孤问尘
孤问尘
Published on 2025-01-13 / 0 Visits
0
0

波士顿房价预测

此次实验使用jupyter notebook

import numpy as np
import matplotlib.pyplot as plt
import random

一、超参数的设定

lr = 0.005
epoch = 12

二、随机参数样本

n = int(input('请输入样本个数:'))
np.random.seed(0)
x = np.arange(0,n,1)
e = np.random.random(n)/10
print(x)
print(e)
y = 2 * x + 1 + e
y

三、循环迭代梯度下降

def train_linear(x, y, lr, epoch):
    '''
    :param x:自变量,ndarray
    :param y:真实值,ndarray
    :param lr:学习率,float
    :param epoch:循环次数,int
    '''
    w = 0.0
    b = 0.0
    w_list = []
    b_list = []
    loss_list = []
    n = len(x)
    for i in range(epoch):
        w_list.append(w)
        b_list.append(b)
        y_predict = w * x + b
        loss = np.sum((y - y_predict)**2)/n
        loss_list.append(loss)
        w -= lr * 2 / n * np.sum((y_predict - y) * x)
        b -= lr * 2 / n * np.sum(y_predict - y) 
    return w, b, w_list, b_list, loss_list
w, b, w_list, b_list, loss_list = train_linear(x, y, lr, epoch)
for i in range(epoch):
    print('epoch:{}, w:{:.3f}, b:{:.3f}, loss:{:.3f}'.
          format(i, w_list[i], b_list[i], loss_list[i]))

四、可视化

def plot(x, y, w_list, b_list, nrows, ncols):
    fig, ax = plt.subplots(nrows = nrows, ncols = ncols, figsize = (10,8))
    for i in range(nrows):
        for j in range(ncols):
            ax[i][j].plot(x,y,'ko')
            ax[i][j].plot(x, w_list[i * nrows + j] * x + b_list[i * nrows + j], 'r-')
            ax[i][j].set_title('round{}'.format(i * ncols + j))
    plt.subplots_adjust(top = 0.9, hspace = 0.5)
    plt.show()
plot(x, y, w_list, b_list, 3, 4)
fig = plt.figure(figsize = (10, 8))
plt.plot(loss_list, 'r-')
plt.title('loss')
plt.xlabel('round')
plt.ylabel('loss')
for i in range(epoch):
    plt.text(i, y=loss_list[i], s = '{:.3f}'.format(loss_list[i]))
plt.show

五、波士顿房价预测

squares = []
prices = []
with open('./prices.txt', 'r', encoding = 'utf-8') as f:
    for line in f.readlines():
        squares.append(int(line.split(',')[0]))
        prices.append(int(line.split(',')[1]))
squares = np.array(squares)
prices = np.array(prices)
print(squares)
print(prices)
w, b, w_list, b_list, loss_list = train_linear(squares, prices, 0.00000001, 100)
for i in range(100):
    print('epoch:{}, w:{:.3f}, b:{:.3f}, loss:{:.3f}'.
          format(i, w_list[i], b_list[i], loss_list[i]))
plot(squares, prices, w_list, b_list, 4, 4)

六、数据标准化

def standed(x):
    # x:list or ndarray
    x = np.array(x)
    mean = np.mean(x)
    std = np.std(x)
    x = (x - mean) / std
    return x
squares = standed(squares)
prices = standed(prices)
print(squares)
print(prices)
w, b, w_list, b_list, loss_list = train_linear(squares, prices, 0.1, 100)
for i in range(100):
    print('epoch:{}, w:{:.3f}, b:{:.3f}, loss:{:.3f}'.
          format(i, w_list[i], b_list[i], loss_list[i]))
plot(squares, prices, w_list, b_list, 4, 4)


Comment