As my sample code is shown below, I am working on a prediction model using LSTM neural networks using the CSV formatted dataset found in this link.
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import math
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
np.random.seed(7)
# Load data
#df = pd.read_csv('test32_C_data.csv')
df = pd.DataFrame(np.random.randint(0,100, size=(100,3)), columns = ['time', 'X', 'Y'])
n_features = 100
def create_sequences(data, window=15, step=1, prediction_distance=15):
x = []
y = []
for i in range(0, len(data) - window - prediction_distance, step):
x.append(data[i:i + window])
y.append(data[i + window + prediction_distance][1])
x, y = np.asarray(x), np.asarray(y)
return x, y
# Scaling prior to splitting
scaler_x = MinMaxScaler(feature_range=(0.01, 0.99))
scaler_y = MinMaxScaler(feature_range=(0.01, 0.99))
scaled_x = scaler_x.fit_transform(df.loc[:, "X"].reshape([-1,1]))
scaled_y = scaler_y.fit_transform(df.loc[:, "Y"].reshape([-1,1]))
scaled_data = np.column_stack((scaled_x, scaled_y))
# Build sequences
x_sequence, y_sequence = create_sequences(scaled_data)
test_len = int(len(x_sequence) * 0.90)
valid_len = int(len(x_sequence) * 0.90)
train_end = len(x_sequence) - (test_len + valid_len)
x_train, y_train = x_sequence[:train_end], y_sequence[:train_end]
x_valid, y_valid = x_sequence[train_end:train_end + valid_len], y_sequence[train_end:train_end + valid_len]
x_test, y_test = x_sequence[train_end + valid_len:], y_sequence[train_end + valid_len:]
# Initialising the RNN
model = Sequential()
# Adding the input layerand the LSTM layer
model.add(LSTM(15, input_shape=(15, 2)))
# Adding the output layer
model.add(Dense(1))
# Compiling the RNN
model.compile(loss='mse', optimizer='rmsprop')
# Fitting the RNN to the Training set
model.fit(x_train, y_train, epochs=5)
# Getting the predicted values
y_pred = model.predict(x_test)
# invert the predictions
y_pred = scaler_y.inverse_transform(y_pred)
y_test = scaler_y.inverse_transform(y_test)
Finally, I wanted to calculate the Root Mean Square Error (RMSE) from my prediction model as the following
rmse_out = math.sqrt(mean_squared_error(y_test[0], y_pred[:,0]))
However, it is throwing this error: TypeError: Singleton array 225.0 cannot be considered a valid collection. How can we fix this error?