使用L1和L2正则化方法实现波士顿房价预测

import pandas as pd# 数据读取/处理import numpy as np# 数值计算from sklearn.preprocessing import StandardScaler# 数据标准化（正则化必须做！from sklearn.model_selection import train_test_split# 划分训练/测试集。

哦哦331

832人浏览 · 2025-12-05 23:04:41

哦哦331 · 2025-12-05 23:04:41 发布

在这里插入图片描述
import pandas as pd # 数据读取/处理
import numpy as np # 数值计算
from sklearn.preprocessing import StandardScaler # 数据标准化（正则化必须做！）
from sklearn.model_selection import train_test_split # 划分训练/测试集
from sklearn.linear_model import Ridge, Lasso # L2（Ridge）、L1（Lasso）模型
from sklearn.metrics import mean_squared_error, mean_absolute_error # 误差评估

读取数据：假设"波士顿房价xy.csv"是特征+标签的数据集

data = pd.read_csv(“波士顿房价xy.csv”)# 划分训练/测试集：# 错误点！x和y用了同一个数据（data.iloc[:, :-1]是特征，data.iloc[:, -1]是标签）

x_train, x_test, y_train, y_test = train_test_split( data.iloc[:, :-1], # 特征（所有行，除最后一列）
data.iloc[:, -1], # 标签（所有行，最后一列） test_size=0.3, random_state=22 # 测试集占30%，随机种子固定)# 数据标准化：正则化对特征尺度敏感，必须做！
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train) # 训练集：拟合+转换x_test = transfer.transform(x_test) # 测试集：用训练集的参数转换# 构建Lasso模型（alpha是正则化强度，越小正则化越弱）
estimator=Lasso(alpha=0.005)estimator.fit(x_train, y_train) # 训练模型# 预测+评估：均方误差（MSE）
y_predict = estimator.predict(x_test)error_mse =mean_squared_error(y_test,y_predict)print(“L1正则化均方误差：”, error_mse)

data = pd.read_csv(“波士顿房价xy.csv”) # 重复读数据（可以优化为只读一次）# 同样划分训练/测试集（和Lasso部分重复了，可优化）

x_train, x_test, y_train, y_test = train_test_split(…)# 同样标准化（重复了，可优化）
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)# 构建Ridge模型
estimator=Ridge(alpha=0.005)estimator.fit(x_train, y_train)# 打印模型参数（权重coef_、偏置intercept_）
print(“L2正则化权重系数：”, estimator.coef_)
print(“L2正则化偏置：”, estimator.intercept_)# 预测+评估MSEy_predict = estimator.predict(x_test)error_mse = mean_squared_error(y_test, y_predict)print(“L2正则化均方误差：”, error_mse)

四、正则化的作用