Tensorflow
[Tensorflow] 사이트 모음
설화님
2024. 2. 15. 12:56
CNN 대박
[LSTM] 주가 예측
LSTM(Long Short-Term Memory)이 시계열 예측(timeseries forecasting)에 특화되어 있다보니 주식 가격을 예측해보는 간단한 LSTM 예제 코드가 Github등에 많이 나와 있다. '주가예측'만큼 학습용 데이터를 손쉽게
pasus.tistory.com
코드
# import libraries
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
# read the csv file
stock_data = pd.read_csv(r"C:\python\workplace\005930.KS1.csv")
stock_data.drop(['Adj Close'], axis=1, inplace=True) # delete adjusted close
# save original 'Open' prices for later
original_open = stock_data['Open'].values
# separate dates for future plotting
dates = pd.to_datetime(stock_data['Date'])
# variables for training
cols = list(stock_data)[1:6]
# new dataframe with only training data - 5 columns
stock_data = stock_data[cols].astype(float)
# normalize the dataset
scaler = StandardScaler()
scaler = scaler.fit(stock_data)
stock_data_scaled = scaler.transform(stock_data)
# split to train data and test data
n_train = int(0.9*stock_data_scaled.shape[0])
train_data_scaled = stock_data_scaled[0: n_train]
train_dates = dates[0: n_train]
test_data_scaled = stock_data_scaled[n_train:]
test_dates = dates[n_train:]
# print(test_dates.head(5))
# data reformatting for LSTM
pred_days = 1 # prediction period
seq_len = 14 # sequence length = past days for future prediction.
input_dim = 5 # input_dimension = ['Open', 'High', 'Low', 'Close', 'Volume']
trainX = []
trainY = []
testX = []
testY = []
for i in range(seq_len, n_train-pred_days +1):
trainX.append(train_data_scaled[i - seq_len:i, 0:train_data_scaled.shape[1]])
trainY.append(train_data_scaled[i + pred_days - 1:i + pred_days, 0])
for i in range(seq_len, len(test_data_scaled)-pred_days +1):
testX.append(test_data_scaled[i - seq_len:i, 0:test_data_scaled.shape[1]])
testY.append(test_data_scaled[i + pred_days - 1:i + pred_days, 0])
trainX, trainY = np.array(trainX), np.array(trainY)
testX, testY = np.array(testX), np.array(testY)
# print(trainX.shape, trainY.shape)
# print(testX.shape, testY.shape)
# LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(trainX.shape[1], trainX.shape[2]), # (seq length, input dimension)
return_sequences=True))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(trainY.shape[1]))
model.summary()
# specify your learning rate
learning_rate = 0.01
# create an Adam optimizer with the specified learning rate
optimizer = Adam(learning_rate=learning_rate)
# compile your model using the custom optimizer
model.compile(optimizer=optimizer, loss='mse')
# Try to load weights
try:
model.load_weights(r"C:\python\workplace\lstm_weights.h5")
print("Loaded model weights from disk")
except:
print("No weights found, training model from scratch")
# Fit the model
history = model.fit(trainX, trainY, epochs=30, batch_size=32,
validation_split=0.1, verbose=1)
# Save model weights after training
model.save_weights(r"C:\python\workplace\lstm_weights.h5")
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.legend()
plt.show()
# prediction
prediction = model.predict(testX)
print(prediction.shape, testY.shape)
# generate array filled with means for prediction
mean_values_pred = np.repeat(scaler.mean_[np.newaxis, :], prediction.shape[0], axis=0)
# substitute predictions into the first column
mean_values_pred[:, 0] = np.squeeze(prediction)
# inverse transform
y_pred = scaler.inverse_transform(mean_values_pred)[:,0]
print(y_pred.shape)
# generate array filled with means for testY
mean_values_testY = np.repeat(scaler.mean_[np.newaxis, :], testY.shape[0], axis=0)
# substitute testY into the first column
mean_values_testY[:, 0] = np.squeeze(testY)
# inverse transform
testY_original = scaler.inverse_transform(mean_values_testY)[:,0]
print(testY_original.shape)
# plotting
plt.figure(figsize=(14, 5))
# plot original 'Open' prices
plt.plot(dates, original_open, color='green', label='Original Open Price')
# plot actual vs predicted
plt.plot(test_dates[seq_len:], testY_original, color='blue', label='Actual Open Price')
plt.plot(test_dates[seq_len:], y_pred, color='red', linestyle='--', label='Predicted Open Price')
plt.xlabel('Date')
plt.ylabel('Open Price')
plt.title('Original, Actual and Predicted Open Price')
plt.legend()
plt.show()
# Calculate the start and end indices for the zoomed plot
zoom_start = len(test_dates) - 50
zoom_end = len(test_dates)
# Create the zoomed plot
plt.figure(figsize=(14, 5))
# Adjust the start index for the testY_original and y_pred arrays
adjusted_start = zoom_start - seq_len
plt.plot(test_dates[zoom_start:zoom_end],
testY_original[adjusted_start:zoom_end - zoom_start + adjusted_start],
color='blue',
label='Actual Open Price')
plt.plot(test_dates[zoom_start:zoom_end],
y_pred[adjusted_start:zoom_end - zoom_start + adjusted_start ],
color='red',
linestyle='--',
label='Predicted Open Price')
plt.xlabel('Date')
plt.ylabel('Open Price')
plt.title('Zoomed In Actual vs Predicted Open Price')
plt.legend()
plt.show()