{ "cells": [ { "cell_type": "code", "execution_count": 296, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pathlib\n", "import logging\n", "\n", "import time as time\n", "from os import walk\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import copy as copy\n", "from datetime import datetime\n", "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "# Machine Learning Libraries\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.svm import SVC\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "from xgboost import XGBClassifier\n", "\n", "#Validation\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.ensemble import ExtraTreesClassifier\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.utils import resample\n", "import sklearn.utils\n", "\n", "import pickle\n", "import random\n", "\n", "from MLUtils import MLUtils\n", "\n", "from sklearn.model_selection import StratifiedKFold\n", "import lightgbm as lgb\n", "import re\n", "\n", "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", "from tqdm import tnrange, tqdm, tqdm_notebook\n", "from sklearn.preprocessing import PolynomialFeatures\n", "\n", "import copy\n", "from sklearn.preprocessing import FunctionTransformer\n", "\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 297, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 388, "metadata": {}, "outputs": [], "source": [ "#### Global Paths\n", "dir_loc = \".\"\n", "\n", "train_loc = \"./raw/train.csv\"\n", "test_loc = \"./raw/test.csv\"\n", "submission_loc = \"./raw/submission.csv\"\n", "\n", "model_save_loc = \"./models\"\n", "pathlib.Path(model_save_loc).mkdir(parents=True, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": 389, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset Shape: (233154, 41)\n", "Dataset Shape: (112392, 40)\n" ] } ], "source": [ "#### read data\n", "raw_train_df1 = MLUtils.read_dataset(train_loc)\n", "raw_test_df1 = MLUtils.read_dataset(test_loc)" ] }, { "cell_type": "code", "execution_count": 390, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset Shape: (233154, 41)\n", "Dataset Shape: (112392, 40)\n" ] } ], "source": [ "#### read data\n", "raw_train_df = MLUtils.read_dataset(train_loc)\n", "raw_test_df = MLUtils.read_dataset(test_loc)" ] }, { "cell_type": "code", "execution_count": 391, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "date and time: 04202019_15_49_21\n" ] } ], "source": [ "#### log file\n", "MLUtils.createLogFile(dir_loc, logging)" ] }, { "cell_type": "code", "execution_count": 392, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "
---|---|
uniqueid | \n", "420825 | \n", "
disbursed_amount | \n", "50578 | \n", "
asset_cost | \n", "58400 | \n", "
ltv | \n", "89.55 | \n", "
branch_id | \n", "67 | \n", "
supplier_id | \n", "22807 | \n", "
manufacturer_id | \n", "45 | \n", "
current_pincode_id | \n", "1441 | \n", "
date.of.birth | \n", "01-01-84 | \n", "
employment.type | \n", "Salaried | \n", "
disbursaldate | \n", "03-08-18 | \n", "
state_id | \n", "6 | \n", "
employee_code_id | \n", "1998 | \n", "
mobileno_avl_flag | \n", "1 | \n", "
aadhar_flag | \n", "1 | \n", "
pan_flag | \n", "0 | \n", "
voterid_flag | \n", "0 | \n", "
driving_flag | \n", "0 | \n", "
passport_flag | \n", "0 | \n", "
perform_cns.score | \n", "0 | \n", "
perform_cns.score.description | \n", "No Bureau History Available | \n", "
pri.no.of.accts | \n", "0 | \n", "
pri.active.accts | \n", "0 | \n", "
pri.overdue.accts | \n", "0 | \n", "
pri.current.balance | \n", "0 | \n", "
pri.sanctioned.amount | \n", "0 | \n", "
pri.disbursed.amount | \n", "0 | \n", "
sec.no.of.accts | \n", "0 | \n", "
sec.active.accts | \n", "0 | \n", "
sec.overdue.accts | \n", "0 | \n", "
sec.current.balance | \n", "0 | \n", "
sec.sanctioned.amount | \n", "0 | \n", "
sec.disbursed.amount | \n", "0 | \n", "
primary.instal.amt | \n", "0 | \n", "
sec.instal.amt | \n", "0 | \n", "
new.accts.in.last.six.months | \n", "0 | \n", "
delinquent.accts.in.last.six.months | \n", "0 | \n", "
average.acct.age | \n", "0yrs 0mon | \n", "
credit.history.length | \n", "0yrs 0mon | \n", "
no.of_inquiries | \n", "0 | \n", "
loan_default | \n", "0 | \n", "