机器学习——支持向量机

最新推荐文章于 2022-10-29 09:31:36 发布

原创最新推荐文章于 2022-10-29 09:31:36 发布 · 677 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#机器学习 #支持向量机 #python

机器学习专栏收录该内容

18 篇文章

订阅专栏

本文通过实例讲解了如何使用Python的Scikit-Learn库实现支持向量机，包括线性核和高斯核的应用，展示了如何处理非线性可分数据，并探讨了不同C值对模型性能的影响，最终绘制决策边界。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

机器学习，支持向量机

import numpy as np  #导入数值分析模块
import scipy.io as scio #用来读取matlat数据文件
import matplotlib.pyplot as plt #导入绘图模块
import scipy.optimize #最优化
from sklearn import svm #导入支持向量机
data = scio.loadmat("D:\CourseraML\ex6\data\ex6data1.mat") #读取数据
X, y = data["X"], data["y"]
pos =  np.array([X[i] for i in range(len(X)) if y[i]==0]) #正性样本
neg = np.array([X[i] for i in range(len(X)) if y[i]==1]) #负性样本
print(pos[:,0])
print(y.flatten())

def dataPlot(): #数据可视化
    plt.figure(figsize = (6, 4))#新建画布
    plt.scatter(pos[:,0], pos[:,1], color = "k", marker = "+", label = "Positive sample")
    plt.scatter(neg[:,0], neg[:,1],color = "yellow", marker = "o", label ="Negative sample")
    plt.legend()
    
dataPlot()
print()

在这里插入图片描述

def boundaryPlot(mysvm, xmin, xmax, ymin, ymax): #决策边界
    xvals = np.linspace(xmin, xmax, 100) #
    yvals = np.linspace(ymin, ymax, 100)#
    u, v  = np.meshgrid(xvals , yvals) #网格矩阵
    zvals = mysvm.predict(np.c_[u.ravel(), v.ravel()]).reshape(u.shape) #生成z变量
    dataPlot() #散点
    plt.contour(u, v, zvals)#等值线
    plt.title("Decision boundary") #标题
    
boundaryPlot(linearsvm, np.min(X[:,0]), np.max(X[:,0]), np.min(X[:,1]),  np.max(X[:,1]))

在这里插入图片描述

linearsvm = svm.SVC(C=100, kernel = "linear") #初始化新svm模型
linearsvm.fit(X, y.ravel()) #训练模型
linearsvm.score(X,y)#模型得分
boundaryPlot(linearsvm, np.min(X[:,0]), np.max(X[:,0]), np.min(X[:,1]),  np.max(X[:,1]))

c=100

高斯核

def gaussKernel(x1, x2, sigma): #定义高斯核函数
    return np.exp(-((x1 -x2)**2).sum()/ (2*sigma**2))
gaussKernel(np.array([1, 2, 1]),np.array([0, 4, -1]), 2.)

非线性可分

mat = scio.loadmat("D:\吴恩达机器学习与深度学习\CourseraML\ex6\data\ex6data2.mat") #读取数据
X, y = mat["X"], mat["y"]
pos = np.array([X[i] for i in range(len(X)) if y[i] == 1]) #正性样本
neg  = np.array([X[i] for i in range(len(X)) if y[i] == 0]) #负性样本
dataPlot()

在这里插入图片描述

sigma = 0.1
gamma = np.power( sigma, -2)
gauss_svm = svm.SVC(C = 1, kernel = "rbf", gamma = gamma) #模型初始化
gauss_svm.fit(X, y.flatten()) #训练模型
#dataPlot()
boundaryPlot(gauss_svm, 0, 1, 0.4, 1)

在这里插入图片描述

第三份数据

mat = scio.loadmat("D:\吴恩达机器学习与深度学习\CourseraML\ex6\data\ex6data3.mat") #读取数据
X, y = mat["X"], mat["y"]
Xval, yval = mat["Xval"], mat["yval"]
pos = np.array([X[i] for i in range(len(X)) if y[i] == 1]) #正性样本
neg  = np.array([X[i] for i in range(len(X)) if y[i] == 0]) #负性样本

dataPlot()

在这里插入图片描述

不同C值下的模型

Cvalues = (0.01, 0.03, 0.1, 0.3, 1., 3., 10., 30.) #不同的C值
sigmavalues = Cvalues
best_pair, best_score = (0, 0), 0 

for C in Cvalues:
    for sigma in sigmavalues:
        gamma = np.power(sigma, -2)
        model = svm.SVC(C = C, kernel = "rbf", gamma = gamma) #模型初始化
        model.fit(X, y.flatten()) #模型训练
        this_score  = model.score(Xval, yval) #模型得分
        if this_score > best_score:
            best_score = this_score
            best_pair = (C, sigma)
print("best_pair = {}, best_score = {}".format(best_pair, best_score))

决策边界

gaus_svm = svm.SVC(C = best_pair[0], kernel = "rbf", gamma = np.power(best_pair[1], -2))
gaus_svm.fit(X, y.flatten())
#dataPlot()
boundaryPlot(gaus_svm, -0.5, 0.3, -0.8, 0.6)