케라스를 이용한 머신러닝-주택가격예측

이번에는 70년대 보스턴의 주택가격을 예측해보겠습니다.

회귀(스칼라)

import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
from keras.datasets import boston_housing

(train,train_targets), (test, test_targets)= boston_housing.load_data()
Using TensorFlow backend.
train.shape, test.shape
((404, 13), (102, 13))
train_targets#주택의 가격(1000달라 단위)
array([15.2, 42.3, 50. , 21.1, 17.7, 18.5, 11.3, 15.6, 15.6, 14.4, 12.1,
       17.9, 23.1, 19.9, 15.7,  8.8, 50. , 22.5, 24.1, 27.5, 10.9, 30.8,
       32.9, 24. , 18.5, 13.3, 22.9, 34.7, 16.6, 17.5, 22.3, 16.1, 14.9,
       23.1, 34.9, 25. , 13.9, 13.1, 20.4, 20. , 15.2, 24.7, 22.2, 16.7,
       12.7, 15.6, 18.4, 21. , 30.1, 15.1, 18.7,  9.6, 31.5, 24.8, 19.1,
       22. , 14.5, 11. , 32. , 29.4, 20.3, 24.4, 14.6, 19.5, 14.1, 14.3,
       15.6, 10.5,  6.3, 19.3, 19.3, 13.4, 36.4, 17.8, 13.5, 16.5,  8.3,
       14.3, 16. , 13.4, 28.6, 43.5, 20.2, 22. , 23. , 20.7, 12.5, 48.5,
       14.6, 13.4, 23.7, 50. , 21.7, 39.8, 38.7, 22.2, 34.9, 22.5, 31.1,
       28.7, 46. , 41.7, 21. , 26.6, 15. , 24.4, 13.3, 21.2, 11.7, 21.7,
       19.4, 50. , 22.8, 19.7, 24.7, 36.2, 14.2, 18.9, 18.3, 20.6, 24.6,
       18.2,  8.7, 44. , 10.4, 13.2, 21.2, 37. , 30.7, 22.9, 20. , 19.3,
       31.7, 32. , 23.1, 18.8, 10.9, 50. , 19.6,  5. , 14.4, 19.8, 13.8,
       19.6, 23.9, 24.5, 25. , 19.9, 17.2, 24.6, 13.5, 26.6, 21.4, 11.9,
       22.6, 19.6,  8.5, 23.7, 23.1, 22.4, 20.5, 23.6, 18.4, 35.2, 23.1,
       27.9, 20.6, 23.7, 28. , 13.6, 27.1, 23.6, 20.6, 18.2, 21.7, 17.1,
        8.4, 25.3, 13.8, 22.2, 18.4, 20.7, 31.6, 30.5, 20.3,  8.8, 19.2,
       19.4, 23.1, 23. , 14.8, 48.8, 22.6, 33.4, 21.1, 13.6, 32.2, 13.1,
       23.4, 18.9, 23.9, 11.8, 23.3, 22.8, 19.6, 16.7, 13.4, 22.2, 20.4,
       21.8, 26.4, 14.9, 24.1, 23.8, 12.3, 29.1, 21. , 19.5, 23.3, 23.8,
       17.8, 11.5, 21.7, 19.9, 25. , 33.4, 28.5, 21.4, 24.3, 27.5, 33.1,
       16.2, 23.3, 48.3, 22.9, 22.8, 13.1, 12.7, 22.6, 15. , 15.3, 10.5,
       24. , 18.5, 21.7, 19.5, 33.2, 23.2,  5. , 19.1, 12.7, 22.3, 10.2,
       13.9, 16.3, 17. , 20.1, 29.9, 17.2, 37.3, 45.4, 17.8, 23.2, 29. ,
       22. , 18. , 17.4, 34.6, 20.1, 25. , 15.6, 24.8, 28.2, 21.2, 21.4,
       23.8, 31. , 26.2, 17.4, 37.9, 17.5, 20. ,  8.3, 23.9,  8.4, 13.8,
        7.2, 11.7, 17.1, 21.6, 50. , 16.1, 20.4, 20.6, 21.4, 20.6, 36.5,
        8.5, 24.8, 10.8, 21.9, 17.3, 18.9, 36.2, 14.9, 18.2, 33.3, 21.8,
       19.7, 31.6, 24.8, 19.4, 22.8,  7.5, 44.8, 16.8, 18.7, 50. , 50. ,
       19.5, 20.1, 50. , 17.2, 20.8, 19.3, 41.3, 20.4, 20.5, 13.8, 16.5,
       23.9, 20.6, 31.5, 23.3, 16.8, 14. , 33.8, 36.1, 12.8, 18.3, 18.7,
       19.1, 29. , 30.1, 50. , 50. , 22. , 11.9, 37.6, 50. , 22.7, 20.8,
       23.5, 27.9, 50. , 19.3, 23.9, 22.6, 15.2, 21.7, 19.2, 43.8, 20.3,
       33.2, 19.9, 22.5, 32.7, 22. , 17.1, 19. , 15. , 16.1, 25.1, 23.7,
       28.7, 37.2, 22.6, 16.4, 25. , 29.8, 22.1, 17.4, 18.1, 30.3, 17.5,
       24.7, 12.6, 26.5, 28.7, 13.3, 10.4, 24.4, 23. , 20. , 17.8,  7. ,
       11.8, 24.4, 13.8, 19.4, 25.2, 19.4, 19.4, 29.1])
train_targets.shape
(404,)

각 특성별로 정규화를 해주어야한다.
왜냐하면 각 특성마다 수치가 상이하기 때문이다.
각 특성에 각 특성의 평균을 빼주고 표준편차로 나눠준다

mean=train.mean(axis=0)#y축의 평균
train-=mean
std=train.std(axis=0)
train/=std

test-=mean
test/=std

mean,std
(array([3.74511057e+00, 1.14801980e+01, 1.11044307e+01, 6.18811881e-02,
        5.57355941e-01, 6.26708168e+00, 6.90106436e+01, 3.74027079e+00,
        9.44059406e+00, 4.05898515e+02, 1.84759901e+01, 3.54783168e+02,
        1.27408168e+01]),
 array([9.22929073e+00, 2.37382770e+01, 6.80287253e+00, 2.40939633e-01,
        1.17147847e-01, 7.08908627e-01, 2.79060634e+01, 2.02770050e+00,
        8.68758849e+00, 1.66168506e+02, 2.19765689e+00, 9.39946015e+01,
        7.24556085e+00]))
from tensorflow.python.keras import models, layers

def build_model():
    model=models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(train.shape[1],)))#train.shpape[1]=404
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1)) #활성화 함수가 출력범위를 제한하지 않고(sigmoid) 어떤값이라도 나올수있게

    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])#mse=평균제곱오차 mae=평균절대오차(예측과 타깃 사이 거리의 절대값)
    return model

k-fold corss-vaildation

import numpy as np
k=4
num_val_samples=len(train)//k #샘플의 크기 (전체/k)
num_epochs=100#model을 fit하는 횟수
all_scores=[]

for i in range(k):
    print('processing fold #',i)
    #검증용
    val_data = train[i* num_val_samples : (i+1)*num_val_samples]
    val_targets = train_targets[i* num_val_samples : (i+1)*num_val_samples]

    #검증을 제외한 데이터
    partial_train = np.concatenate( #두개 합침
        [
         train[: i*num_val_samples],
         train[(i+1)*num_val_samples:]
        ], 
        axis=0
    )
    partial_train_targets = np.concatenate(
        [
         train_targets[:i*num_val_samples],
         train_targets[(i+1)*num_val_samples:]
         ],
        axis=0
    )

    model = build_model()
    model.fit(partial_train, partial_train_targets,
              epochs=num_epochs, batch_size=1, verbose=0)
    
    val_mse, val_mae=model.evaluate(val_data, val_targets, verbose=0)#평균제곱오차, 절대평균오차
    all_scores.append(val_mae)
processing fold # 0
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFBB3AE58> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFBB3AE58> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
processing fold # 1
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFC6FEAF8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFC6FEAF8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
processing fold # 2
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFC7540D8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFC7540D8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
processing fold # 3
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFBEA45E8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFBEA45E8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
all_scores
[2.1257687, 2.9574735, 2.3732476, 2.7640991]
np.mean(all_scores)
2.5551472
#샘
num_epochs=500
all_mae_histories=[]

for i in range(k):
    print('processing fold #',i)
    val_data = train[i* num_val_samples : (i+1)*num_val_samples]
    val_targets = train_targets[i* num_val_samples : (i+1)*num_val_samples]

    partial_train = np.concatenate(
        [
         train[: i*num_val_samples],
         train[(i+1)*num_val_samples:]
        ], 
        axis=0
    )
    partial_train_targets = np.concatenate(
        [
         train_targets[:i*num_val_samples],
         train_targets[(i+1)*num_val_samples:]
         ],
        axis=0
    )

    model = build_model()
    history=model.fit(partial_train, partial_train_targets,
              validation_data=(val_data, val_targets),
              epochs=num_epochs, batch_size=1, verbose=0)
    
    mae_history=history.history['val_mae']
    all_mae_histories.append(mae_history)
processing fold # 0
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFE01CE58> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFE01CE58> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
processing fold # 1
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFDD0BDC8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFDD0BDC8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
processing fold # 2
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFC1C0D38> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFC1C0D38> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
processing fold # 3
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFE639C18> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019CFE639C18> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
average_mae_history=[
                     np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)
]
len(average_mae_history)
500
import matplotlib.pyplot as plt

plt.plot(range(1,len(average_mae_history)+1), average_mae_history)

plt.xlabel('Epochs')
plt.ylabel('Validation Mae')
plt.show()

output_15_0

그래프를 보기쉽게 앞의 스케일이 다른 데이터를 제외하고
지수 이동 평균으로 부드러운 곡선을 얻어보자

#지수이동평균 : 이전의 이동평균에 factor을 곱하고 현재 포인트에 1-factor를 곱한것을 합한것
def smooth(points, factor=0.9):
    smooth_points=[]
    for point in points:
        if smooth_points:
            previous=smooth_points[-1]
            smooth_points.append(previous * factor + point*(1-factor))
        else:
            smooth_points.append(point)
    return smooth_points

smooth_mae_history = smooth(average_mae_history[15:])

plt.plot(range(1, len(smooth_mae_history)+1), smooth_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation Mae')
plt.show()

output_17_0

80정도부터 과대적합된듯하다

model=build_model()
model.fit(train, train_targets, epochs=80, batch_size=16, verbose=0)

test_mse, test_mae=model.evaluate(test, test_targets)
WARNING:tensorflow:Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019C808B8048> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING: Entity <function Function._initialize_uninitialized_variables.<locals>.initialize_variables at 0x0000019C808B8048> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
102/1 [====================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 625us/sample - loss: 68.9733 - mae: 2.7475
test_mae
2.7474988

정리
회귀는 정확도 개념이 없고, 보통 평균절대오차(MAE)로 나타냄
데이터가 적으면 K-fold
데이터가 적다면 은닉층을 줄이고,
레이블 개수가 적다면 은닉유닛을 줄이자

Comments