from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import CalibrationDisplay
import matplotlib.pyplot as plt
# Load the Breast Cancer dataset
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Classifiers
svc = SVC()
tree = DecisionTreeClassifier()
log = LogisticRegression(C=0.5)
gnb = GaussianNB()
svc_sigmoid = CalibratedClassifierCV(svc, cv=3, method="sigmoid", ensemble=True)
tree_isotonic = CalibratedClassifierCV(tree, cv=3, method="isotonic", ensemble=True)
gnb_sigmoid = CalibratedClassifierCV(gnb, cv=3, method="sigmoid", ensemble=True)
classifiers = {
"Logistic":log,
"Naive Bayes" : gnb,
"SVM + sigmoid": svc_sigmoid,
"Decision Tree + Isotonic": tree_isotonic,
"Naive Bayes + Sigmoid" : gnb_sigmoid
}
# Plot Probability Calibration curve for each classifier
fig, ax = plt.subplots(figsize=(7, 5), dpi=150)
ax.plot([0, 1], [0, 1], linestyle='--', color='gray')
for name, clf in classifiers.items():
clf.fit(X_train, y_train)
clf_disp = CalibrationDisplay.from_estimator(clf, X_test, y_test, n_bins=10, name=name, ax=ax)
plt.title('Probability Calibration Curve')
plt.legend(loc="best")
plt.show()