0% found this document useful (0 votes)
14 views

KnnImputer Ipynb

Uploaded by

UJJAWAL GAMER
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views

KnnImputer Ipynb

Uploaded by

UJJAWAL GAMER
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 6

{

"cells": [
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"from sklearn.impute import KNNImputer,SimpleImputer\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('train.csv')[['Age','Pclass','Fare','Survived']]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>Pclass</th>\n",
" <th>Fare</th>\n",
" <th>Survived</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>22.0</td>\n",
" <td>3</td>\n",
" <td>7.2500</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>71.2833</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>26.0</td>\n",
" <td>3</td>\n",
" <td>7.9250</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>53.1000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>35.0</td>\n",
" <td>3</td>\n",
" <td>8.0500</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age Pclass Fare Survived\n",
"0 22.0 3 7.2500 0\n",
"1 38.0 1 71.2833 1\n",
"2 26.0 3 7.9250 1\n",
"3 35.0 1 53.1000 1\n",
"4 35.0 3 8.0500 0"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Age 19.86532\n",
"Pclass 0.00000\n",
"Fare 0.00000\n",
"Survived 0.00000\n",
"dtype: float64"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isnull().mean() * 100"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"X = df.drop(columns=['Survived'])\n",
"y = df['Survived']"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"X_train,X_test,y_train,y_test =
train_test_split(X,y,test_size=0.2,random_state=2)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>Pclass</th>\n",
" <th>Fare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>27.7208</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>4.0</td>\n",
" <td>3</td>\n",
" <td>16.7000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>873</th>\n",
" <td>47.0</td>\n",
" <td>3</td>\n",
" <td>9.0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>182</th>\n",
" <td>9.0</td>\n",
" <td>3</td>\n",
" <td>31.3875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>876</th>\n",
" <td>20.0</td>\n",
" <td>3</td>\n",
" <td>9.8458</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age Pclass Fare\n",
"30 40.0 1 27.7208\n",
"10 4.0 3 16.7000\n",
"873 47.0 3 9.0000\n",
"182 9.0 3 31.3875\n",
"876 20.0 3 9.8458"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"knn = KNNImputer(n_neighbors=3,weights='distance')\n",
"\n",
"X_train_trf = knn.fit_transform(X_train)\n",
"X_test_trf = knn.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7150837988826816"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr = LogisticRegression()\n",
"\n",
"lr.fit(X_train_trf,y_train)\n",
"\n",
"y_pred = lr.predict(X_test_trf)\n",
"\n",
"accuracy_score(y_test,y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"# Comparision with Simple Imputer --> mean\n",
"\n",
"si = SimpleImputer()\n",
"\n",
"X_train_trf2 = si.fit_transform(X_train)\n",
"X_test_trf2 = si.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.6927374301675978"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr = LogisticRegression()\n",
"\n",
"lr.fit(X_train_trf2,y_train)\n",
"\n",
"y_pred2 = lr.predict(X_test_trf2)\n",
"\n",
"accuracy_score(y_test,y_pred2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

You might also like