广义线性模型
普通最小二乘法
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
reg.coef_
array([ 0.5, 0.5])
岭回归
from sklearn.linear_model import Ridge
reg = Ridge(alpha=.5)
reg.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
normalize=False, random_state=None, solver='auto', tol=0.001)
reg.coef_
array([ 0.34545455, 0.34545455])
reg.intercept_
0.13636363636363638
设置正则化参数:广义交叉验证
from sklearn.linear_model import RidgeCV
reg = RidgeCV(alphas=[0.1, 1.0, 10.0])
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
reg.alpha_
0.10000000000000001
Lasso
from sklearn.linear_model import Lasso
clf = Lasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
normalize=False, positive=False, precompute=False, random_state=None,
selection='cyclic', tol=0.0001, warm_start=False)
clf.coef_
array([ 0.85, 0. ])
clf.intercept_
0.15000000000000002
多任务Lasso
from sklearn import linear_model
clf = linear_model.MultiTaskLasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
print(clf.coef_)
print(clf.intercept_)
[[ 0.89393398 0. ]
[ 0.89393398 0. ]]
[ 0.10606602 0.10606602]
弹性网络
from sklearn.linear_model import ElasticNetCV
from sklearn.datasets import make_regression
X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNetCV(cv=5, random_state=0)
regr.fit(X, y)
print(regr.alpha_)
print(regr.intercept_)
print(regr.predict([[0, 0]]))
0.19947279427
0.398882965428
[ 0.39888297]
多任务弹性网络
from sklearn import linear_model
clf = linear_model.MultiTaskElasticNet(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
print(clf.coef_)
print(clf.intercept_)
最小角回归
LARS Lasso
from sklearn import linear_model
reg = linear_model.LassoLars(alpha=0.01)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])
print(reg.coef_)
[ 0. -0.96325765]
正交匹配追踪法
贝叶斯回归
from sklearn import linear_model
clf = linear_model.BayesianRidge()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
clf.predict([[1, 1]])
array([ 1.])
logistic回归
带L2罚项
带L1正则
在 LogisticRegression 类中实现了这些优化算法: liblinear
, newton-cg
, lbfgs
, sag
和 saga
。
随机梯度下降 ,SGD
import numpy as np
from sklearn import linear_model
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])
clf = linear_model.SGDClassifier()
clf.fit(X, Y)
clf.predict([[-0.8, -1]])
c:\users\caiwei\appdata\local\programs\python\python35\lib\site-packages\sklearn\linear_model\stochastic_gradient.py:128: FutureWarning: max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.
"and default tol will be 1e-3." % type(self), FutureWarning)
array([1])
感知器
被动攻击算法
稳健回归
多项式回归
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
X = np.arange(6).reshape(3, 2)
X
array([[0, 1],
[2, 3],
[4, 5]])
X 的特征已经从 转换到
,
poly = PolynomialFeatures(degree=2)
poly.fit_transform(X)
array([[ 1., 0., 1., 0., 0., 1.],
[ 1., 2., 3., 4., 6., 9.],
[ 1., 4., 5., 16., 20., 25.]])