{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Supermarket Regression 2 Notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "dataset = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "dataset = pd.DataFrame.from_dict(json.loads(dataset))\n",
    "y_target = dataset['Product_Supermarket_Sales']\n",
    "dataset.drop(['Product_Supermarket_Sales'], axis=1, inplace=True)\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(dataset, y_target, test_size = 0.3)\n",
    "\n",
    "print(\"Training data is\", X_train.shape)\n",
    "print(\"Training target is\", y_train.shape)\n",
    "print(\"test data is\", X_test.shape)\n",
    "print(\"test target is\", y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import RobustScaler, StandardScaler\n",
    "scaler = RobustScaler()\n",
    "\n",
    "scaler.fit(X_train)\n",
    "\n",
    "X_train = scaler.transform(X_train) \n",
    "X_test = scaler.transform(X_test)\n",
    "\n",
    "X_train[:5, :5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_absolute_error\n",
    "from sklearn.model_selection import KFold, cross_val_score\n",
    "\n",
    "\n",
    "def cross_validate(model, nfolds, feats, targets):\n",
    "    score = -1 * (cross_val_score(model, feats, targets, cv=nfolds, scoring='neg_mean_absolute_error'))\n",
    "    return np.mean(score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "n_estimators=150\n",
    "max_depth=3\n",
    "max_features='sqrt'\n",
    "min_samples_split=4\n",
    "random_state=2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "\n",
    "gb_model = GradientBoostingRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features, min_samples_split=min_samples_split, random_state=random_state)\n",
    "\n",
    "mae_score = cross_validate(gb_model, 10, X_train, y_train)\n",
    "print(\"MAE Score: \", mae_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "outputs"
    ]
   },
   "outputs": [],
   "source": [
    "from flytekitplugins.papermill import record_outputs\n",
    "record_outputs(mae_score=float(mae_score))"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
   "display_name": "Python 3.9.9 64-bit ('flytesnacks')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.9"
  },
  "vscode": {
   "interpreter": {
    "hash": "93d1c4f33f306e18e1c08a771c972fe86afbedaedb2338666e30a98a5179caac"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}