QiaoRuoZhuo 2018-11-12
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 12 17:07:16 2018
@author: wp:lasso|ridge
"""
#经典鸢尾花数据集
from sklearn.datasets import load_iris
iris = load_iris()
data_x = iris.data
data_y = iris.target
#带入需要的包、库
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import Ridge,RidgeCV
from sklearn.linear_model import Lasso,LassoCV
from sklearn.metrics import mean_squared_error
x_tr,x_te,y_tr,y_te = train_test_split(data_x,data_y,train_size = 0.7,random_state =22)
######################ridge########################################
#通过不同的alpha值 生成不同的ridge模型
alphas = 10**np.linspace(-10,10,100)
ridge_cofficients = []
for alpha in alphas:
ridge = Ridge(alpha = alpha, normalize=True)
ridge.fit(x_tr, y_tr)
ridge_cofficients.append(ridge.coef_)
#画出alpha和回归系数的关系
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
# 设置绘图风格
plt.style.use('ggplot')
plt.plot(alphas, ridge_cofficients)
plt.xscale('log')
plt.axis('tight')
plt.title(r'alpha系数与岭回归系数的关系')
plt.xlabel('Log Alpha')
plt.ylabel('Cofficients')
plt.show()
#ridge交叉验证
ridge_cv = RidgeCV(alphas = alphas, normalize=True, scoring='mean_squared_error', cv = 10)
ridge_cv.fit(x_tr, y_tr)
# 取出最佳的lambda值ridge_best_alpha = ridge_cv.alpha_
ridge_best_alpha = ridge_cv.alpha_ #得到最佳lambda值
#基于最佳lambda值建模
ridge = Ridge(alpha = ridge_best_alpha,normalize = True)
ridge.fit(x_tr,y_tr)
ridge_predict = ridge.predict(x_te)
rmse = np.sqrt(mean_squared_error(y_te,ridge_predict))
######################lasso##################################
# LASSO回归模型的交叉验证
lasso_cv = LassoCV(alphas = alphas, normalize=True, cv = 10, max_iter=10000)
lasso_cv.fit(x_tr,y_tr)
# 取出最佳的lambda值
lasso_best_alpha = lasso_cv.alpha_
lasso_best_alpha
#基于最佳lambda值建模
lasso = Lasso(alpha = lasso_best_alpha, normalize=True, max_iter=10000)
lasso.fit(x_tr, y_tr)
lasso_predict = lasso.predict(x_te) #预测
RMSE = np.sqrt(mean_squared_error(y_te,lasso_predict))