In [1]: import pandas as pd
In [2]: dataset = pd.read_csv('Training.csv')
In [3]: dataset
Out[3]: itching skin_rash nodal_skin_eruptions continuous_sneezing shivering chills jo
0 1 1 1 0 0 0
1 0 1 1 0 0 0
2 1 0 1 0 0 0
3 1 1 0 0 0 0
4 1 1 1 0 0 0
... ... ... ... ... ... ...
4915 0 0 0 0 0 0
4916 0 1 0 0 0 0
4917 0 0 0 0 0 0
4918 0 1 0 0 0 0
4919 0 1 0 0 0 0
4920 rows × 133 columns
In [4]: dataset.shape
Out[4]: (4920, 133)
In [5]: #train test split
In [6]: from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
In [7]: X = dataset.drop('prognosis', axis=1)
y = dataset['prognosis']
# ecoding prognonsis
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_
In [8]: #Training top models
In [9]: from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
# Create a dictionary to store models
models = {
'SVC': SVC(kernel='linear'),
'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_stat
'KNeighbors': KNeighborsClassifier(n_neighbors=5),
'MultinomialNB': MultinomialNB()
}
# Loop through the models, train, test, and print results
for model_name, model in models.items():
# Train the model
model.fit(X_train, y_train)
# Test the model
predictions = model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"{model_name} Accuracy: {accuracy}")
# Calculate confusion matrix
cm = confusion_matrix(y_test, predictions)
print(f"{model_name} Confusion Matrix:")
print(np.array2string(cm, separator=', '))
print("\n" + "="*40 + "\n")
SVC Accuracy: 1.0
SVC Confusion Matrix:
[[40, 0, 0, ..., 0, 0, 0],
[ 0, 43, 0, ..., 0, 0, 0],
[ 0, 0, 28, ..., 0, 0, 0],
...,
[ 0, 0, 0, ..., 34, 0, 0],
[ 0, 0, 0, ..., 0, 41, 0],
[ 0, 0, 0, ..., 0, 0, 31]]
========================================
RandomForest Accuracy: 1.0
RandomForest Confusion Matrix:
[[40, 0, 0, ..., 0, 0, 0],
[ 0, 43, 0, ..., 0, 0, 0],
[ 0, 0, 28, ..., 0, 0, 0],
...,
[ 0, 0, 0, ..., 34, 0, 0],
[ 0, 0, 0, ..., 0, 41, 0],
[ 0, 0, 0, ..., 0, 0, 31]]
========================================
GradientBoosting Accuracy: 1.0
GradientBoosting Confusion Matrix:
[[40, 0, 0, ..., 0, 0, 0],
[ 0, 43, 0, ..., 0, 0, 0],
[ 0, 0, 28, ..., 0, 0, 0],
...,
[ 0, 0, 0, ..., 34, 0, 0],
[ 0, 0, 0, ..., 0, 41, 0],
[ 0, 0, 0, ..., 0, 0, 31]]
========================================
KNeighbors Accuracy: 1.0
KNeighbors Confusion Matrix:
[[40, 0, 0, ..., 0, 0, 0],
[ 0, 43, 0, ..., 0, 0, 0],
[ 0, 0, 28, ..., 0, 0, 0],
...,
[ 0, 0, 0, ..., 34, 0, 0],
[ 0, 0, 0, ..., 0, 41, 0],
[ 0, 0, 0, ..., 0, 0, 31]]
========================================
MultinomialNB Accuracy: 1.0
MultinomialNB Confusion Matrix:
[[40, 0, 0, ..., 0, 0, 0],
[ 0, 43, 0, ..., 0, 0, 0],
[ 0, 0, 28, ..., 0, 0, 0],
...,
[ 0, 0, 0, ..., 34, 0, 0],
[ 0, 0, 0, ..., 0, 41, 0],
[ 0, 0, 0, ..., 0, 0, 31]]
========================================
In [10]: #single prediction
In [11]: # selecting svc
svc = SVC(kernel='linear')
svc.fit(X_train,y_train)
ypred = svc.predict(X_test)
accuracy_score(y_test,ypred)
Out[11]: 1.0
In [12]: # save svc
import pickle
pickle.dump(svc,open('svc.pkl','wb'))
In [13]: # load model
svc = pickle.load(open('svc.pkl','rb'))
In [14]: # test 1:
print("predicted disease :",svc.predict(X_test.iloc[0].values.reshape(1,-1)))
print("Actual Disease :", y_test[0])
predicted disease : [40]
Actual Disease : 40
C:\Users\Darshan\python\Lib\site-packages\sklearn\base.py:493: UserWarning: X doe
s not have valid feature names, but SVC was fitted with feature names
warnings.warn(
In [16]: #test 2:
print("predicted disease :",svc.predict(X_test.iloc[100].values.reshape(1,-1)))
print("Actual Disease :", y_test[100])
predicted disease : [39]
Actual Disease : 39
C:\Users\Darshan\python\Lib\site-packages\sklearn\base.py:493: UserWarning: X doe
s not have valid feature names, but SVC was fitted with feature names
warnings.warn(
In [17]: #Recommendation System and Prediction
#load database and use logic for recommendations
In [18]: sym_des = pd.read_csv("symtoms_df.csv")
precautions = pd.read_csv("precautions_df.csv")
workout = pd.read_csv("workout_df.csv")
description = pd.read_csv("description.csv")
medications = pd.read_csv('medications.csv')
diets = pd.read_csv("diets.csv")
In [19]: #============================================================
# custome and helping functions
#==========================helper funtions================
def helper(dis):
desc = description[description['Disease'] == predicted_disease]['Description
desc = " ".join([w for w in desc])
pre = precautions[precautions['Disease'] == dis][['Precaution_1', 'Precautio
pre = [col for col in pre.values]
med = medications[medications['Disease'] == dis]['Medication']
med = [med for med in med.values]
die = diets[diets['Disease'] == dis]['Diet']
die = [die for die in die.values]
wrkout = workout[workout['disease'] == dis] ['workout']
return desc,pre,med,die,wrkout
symptoms_dict = {'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'conti
diseases_list = {15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic c
# Model Prediction function
def get_predicted_value(patient_symptoms):
input_vector = np.zeros(len(symptoms_dict))
for item in patient_symptoms:
input_vector[symptoms_dict[item]] = 1
return diseases_list[svc.predict([input_vector])[0]]
In [20]: # Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separa
symptoms = input("Enter your symptoms.......")
user_symptoms = [s.strip() for s in symptoms.split(',')]
# Remove any extra characters, if any
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
predicted_disease = get_predicted_value(user_symptoms)
desc, pre, med, die, wrkout = helper(predicted_disease)
print("=================predicted disease============")
print(predicted_disease)
print("=================description==================")
print(desc)
print("=================precautions==================")
i = 1
for p_i in pre[0]:
print(i, ": ", p_i)
i += 1
print("=================medications==================")
for m_i in med:
print(i, ": ", m_i)
i += 1
print("=================workout==================")
for w_i in wrkout:
print(i, ": ", w_i)
i += 1
print("=================diets==================")
for d_i in die:
print(i, ": ", d_i)
i += 1
=================predicted disease============
Fungal infection
=================description==================
Fungal infection is a common skin condition caused by fungi.
=================precautions==================
1 : bath twice
2 : use detol or neem in bathing water
3 : keep infected area dry
4 : use clean cloths
=================medications==================
5 : ['Antifungal Cream', 'Fluconazole', 'Terbinafine', 'Clotrimazole', 'Ketocona
zole']
=================workout==================
6 : Avoid sugary foods
7 : Consume probiotics
8 : Increase intake of garlic
9 : Include yogurt in diet
10 : Limit processed foods
11 : Stay hydrated
12 : Consume green tea
13 : Eat foods rich in zinc
14 : Include turmeric in diet
15 : Eat fruits and vegetables
=================diets==================
16 : ['Antifungal Diet', 'Probiotics', 'Garlic', 'Coconut oil', 'Turmeric']
C:\Users\Darshan\python\Lib\site-packages\sklearn\base.py:493: UserWarning: X doe
s not have valid feature names, but SVC was fitted with feature names
warnings.warn(
In [22]: # Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separa
symptoms = input("Enter your symptoms.......")
user_symptoms = [s.strip() for s in symptoms.split(',')]
# Remove any extra characters, if any
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
predicted_disease = get_predicted_value(user_symptoms)
desc, pre, med, die, wrkout = helper(predicted_disease)
print("=================predicted disease============")
print(predicted_disease)
print("=================description==================")
print(desc)
print("=================precautions==================")
i = 1
for p_i in pre[0]:
print(i, ": ", p_i)
i += 1
print("=================medications==================")
for m_i in med:
print(i, ": ", m_i)
i += 1
print("=================workout==================")
for w_i in wrkout:
print(i, ": ", w_i)
i += 1
print("=================diets==================")
for d_i in die:
print(i, ": ", d_i)
i += 1
=================predicted disease============
Impetigo
=================description==================
Impetigo is a highly contagious skin infection causing red sores that can break o
pen.
=================precautions==================
1 : soak affected area in warm water
2 : use antibiotics
3 : remove scabs with wet compressed cloth
4 : consult doctor
=================medications==================
5 : ['Topical antibiotics', 'Oral antibiotics', 'Antiseptics', 'Ointments', 'War
m compresses']
=================workout==================
6 : Maintain good hygiene
7 : Stay hydrated
8 : Consume nutrient-rich foods
9 : Limit sugary foods and beverages
10 : Include foods rich in vitamin C
11 : Consult a healthcare professional
12 : Follow medical recommendations
13 : Avoid scratching
14 : Take prescribed antibiotics
15 : Practice wound care
=================diets==================
16 : ['Impetigo Diet', 'Antibiotic treatment', 'Fruits and vegetables', 'Hydrati
on', 'Protein-rich foods']
C:\Users\Darshan\python\Lib\site-packages\sklearn\base.py:493: UserWarning: X doe
s not have valid feature names, but SVC was fitted with feature names
warnings.warn(
In [23]: # let's use pycharm flask app
# but install this version in pycharm
import sklearn
print(sklearn.__version__)
1.4.1.post1
In [ ]:
In [ ]: