/ EXERCISE

Time Series - Energy Production

Energy Production

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pmdarima import auto_arima
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error, mean_absolute_error

import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('./Data/EnergyProduction.csv', index_col='DATE', parse_dates=True)
df.plot();

png

len(df)
240
n_obs = 24

Exponential Smooting

train_data = df.iloc[:-n_obs]
test_data = df.iloc[-n_obs:]
print(train_data.shape, test_data.shape)
(216, 1) (24, 1)
plt.plot(train_data, label='train')
plt.plot(test_data, label='test')
plt.legend()
plt.show()

png

def exp_smoothing(train_data, trend, seasonal, test_data = test_data):
    fitted_model = ExponentialSmoothing(train_data, trend=trend, seasonal=seasonal,
                                       seasonal_periods=12).fit()
    test_predictions = fitted_model.forecast(24)
    plt.figure(figsize=(12,5))
    plt.plot(test_data, label='test')
    plt.plot(test_predictions, label='prediction')
    
    mae = mean_absolute_error(test_data, test_predictions)
    mse = mean_squared_error(test_data, test_predictions)
    rmse = np.sqrt(mse)
    
    print('mae :', mae)
    print('mse :', mse)
    print('rmse :', rmse)
exp_smoothing(train_data, 'mul', 'mul')
mae : 2.7145989390490537
mse : 11.860913777946623
rmse : 3.443967737646017

png

exp_smoothing(train_data, 'add','add')
mae : 2.768300423888428
mse : 11.775533525016849
rmse : 3.43154972643802

png

final_model = ExponentialSmoothing(df, trend='add',seasonal='add',
                                  seasonal_periods=12).fit()
forecast_prediction = final_model.forecast(36)
plt.figure(figsize=(12,8))
plt.plot(df)
plt.plot(forecast_prediction)
plt.show()

png

ARIMA

from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima

from statsmodels.tsa.arima_model import ARIMA, ARIMAResults
from statsmodels.tsa.statespace.sarimax import SARIMAX, SARIMAXResults
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.tools import diff
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tools.eval_measures import rmse
result = seasonal_decompose(df, model='add')
result.plot();

png

adfuller(df)[1]
0.9830559056338348
df_diff = df.diff().dropna()
adfuller(df_diff)[1]
4.226559012395704e-06
stepwise_fit = auto_arima(df, seasonal=True, trace=True, m=12)
Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,1,1)[12] intercept   : AIC=842.617, Time=1.00 sec
 ARIMA(0,0,0)(0,1,0)[12] intercept   : AIC=1055.546, Time=0.01 sec
 ARIMA(1,0,0)(1,1,0)[12] intercept   : AIC=855.182, Time=0.15 sec
 ARIMA(0,0,1)(0,1,1)[12] intercept   : AIC=909.065, Time=0.11 sec
 ARIMA(0,0,0)(0,1,0)[12]             : AIC=1134.429, Time=0.01 sec
 ARIMA(2,0,2)(0,1,1)[12] intercept   : AIC=840.759, Time=0.70 sec
 ARIMA(2,0,2)(0,1,0)[12] intercept   : AIC=881.557, Time=0.19 sec
 ARIMA(2,0,2)(0,1,2)[12] intercept   : AIC=842.577, Time=1.42 sec
 ARIMA(2,0,2)(1,1,0)[12] intercept   : AIC=855.194, Time=0.43 sec
 ARIMA(2,0,2)(1,1,2)[12] intercept   : AIC=844.166, Time=1.83 sec
 ARIMA(1,0,2)(0,1,1)[12] intercept   : AIC=841.937, Time=0.28 sec
 ARIMA(2,0,1)(0,1,1)[12] intercept   : AIC=843.582, Time=0.38 sec
 ARIMA(3,0,2)(0,1,1)[12] intercept   : AIC=842.514, Time=0.90 sec
 ARIMA(2,0,3)(0,1,1)[12] intercept   : AIC=842.693, Time=0.94 sec
 ARIMA(1,0,1)(0,1,1)[12] intercept   : AIC=843.610, Time=0.18 sec
 ARIMA(1,0,3)(0,1,1)[12] intercept   : AIC=841.805, Time=0.40 sec
 ARIMA(3,0,1)(0,1,1)[12] intercept   : AIC=840.516, Time=0.75 sec
 ARIMA(3,0,1)(0,1,0)[12] intercept   : AIC=inf, Time=0.44 sec
 ARIMA(3,0,1)(1,1,1)[12] intercept   : AIC=842.397, Time=0.96 sec
 ARIMA(3,0,1)(0,1,2)[12] intercept   : AIC=842.368, Time=1.75 sec
 ARIMA(3,0,1)(1,1,0)[12] intercept   : AIC=855.891, Time=0.67 sec
 ARIMA(3,0,1)(1,1,2)[12] intercept   : AIC=inf, Time=1.87 sec
 ARIMA(3,0,0)(0,1,1)[12] intercept   : AIC=842.621, Time=0.20 sec
 ARIMA(4,0,1)(0,1,1)[12] intercept   : AIC=842.514, Time=0.89 sec
 ARIMA(2,0,0)(0,1,1)[12] intercept   : AIC=844.431, Time=0.16 sec
 ARIMA(4,0,0)(0,1,1)[12] intercept   : AIC=843.014, Time=0.24 sec
 ARIMA(4,0,2)(0,1,1)[12] intercept   : AIC=843.906, Time=0.78 sec
 ARIMA(3,0,1)(0,1,1)[12]             : AIC=843.021, Time=0.51 sec

Best model:  ARIMA(3,0,1)(0,1,1)[12] intercept
Total fit time: 18.180 seconds
stepwise_fit.summary()
SARIMAX Results
Dep. Variable: y No. Observations: 240
Model: SARIMAX(3, 0, 1)x(0, 1, 1, 12) Log Likelihood -413.258
Date: Sun, 23 May 2021 AIC 840.516
Time: 23:55:22 BIC 864.522
Sample: 0 HQIC 850.202
- 240
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
intercept 0.0475 0.046 1.044 0.296 -0.042 0.137
ar.L1 1.6309 0.166 9.801 0.000 1.305 1.957
ar.L2 -0.8739 0.155 -5.640 0.000 -1.178 -0.570
ar.L3 0.2156 0.091 2.376 0.017 0.038 0.394
ma.L1 -0.7385 0.174 -4.248 0.000 -1.079 -0.398
ma.S.L12 -0.5458 0.059 -9.265 0.000 -0.661 -0.430
sigma2 2.1498 0.148 14.512 0.000 1.859 2.440
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 64.01
Prob(Q): 0.95 Prob(JB): 0.00
Heteroskedasticity (H): 2.07 Skew: 0.44
Prob(H) (two-sided): 0.00 Kurtosis: 5.44



Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

model = SARIMAX(train_data, order=(3,0,1), seasonal_order=(0,1,1,12))
results = model.fit()
results.summary()
SARIMAX Results
Dep. Variable: EnergyIndex No. Observations: 216
Model: SARIMAX(3, 0, 1)x(0, 1, 1, 12) Log Likelihood -359.115
Date: Sun, 23 May 2021 AIC 730.230
Time: 23:56:54 BIC 750.138
Sample: 01-01-1970 HQIC 738.283
- 12-01-1987
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
ar.L1 1.7745 0.095 18.622 0.000 1.588 1.961
ar.L2 -0.9883 0.113 -8.780 0.000 -1.209 -0.768
ar.L3 0.2111 0.076 2.778 0.005 0.062 0.360
ma.L1 -0.8565 0.080 -10.649 0.000 -1.014 -0.699
ma.S.L12 -0.5650 0.053 -10.651 0.000 -0.669 -0.461
sigma2 1.9217 0.168 11.414 0.000 1.592 2.252
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 9.10
Prob(Q): 0.92 Prob(JB): 0.01
Heteroskedasticity (H): 2.05 Skew: 0.10
Prob(H) (two-sided): 0.00 Kurtosis: 4.01



Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

start = len(train_data)
end = len(train_data) + len(test_data) - 1
pred = results.predict(start, end, typ='level')
plt.plot(pred)
plt.plot(test_data)
plt.show()

png

error = rmse(test_data, pred)
error
array([ 8.31981553,  6.09823958,  6.79366971, 10.53638173, 11.99706146,
        8.69493785,  6.05820751,  5.93929791,  7.96124573, 10.79105154,
        9.2213997 ,  6.03056639,  9.51428606,  6.67546108,  6.18212862,
        9.35946757, 10.76244513,  7.70125307,  5.87907598,  5.9064441 ,
        7.08999389,  9.6500675 ,  8.20025993,  6.47869886])
model = SARIMAX(df, order=(3,0,1), seasonal_order=(0,1,1,12))
results = model.fit()
results.summary()
SARIMAX Results
Dep. Variable: EnergyIndex No. Observations: 240
Model: SARIMAX(3, 0, 1)x(0, 1, 1, 12) Log Likelihood -415.510
Date: Sun, 23 May 2021 AIC 843.021
Time: 23:58:43 BIC 863.597
Sample: 01-01-1970 HQIC 851.322
- 12-01-1989
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
ar.L1 1.7509 0.094 18.681 0.000 1.567 1.935
ar.L2 -0.9646 0.117 -8.235 0.000 -1.194 -0.735
ar.L3 0.2119 0.084 2.523 0.012 0.047 0.377
ma.L1 -0.8439 0.084 -10.022 0.000 -1.009 -0.679
ma.S.L12 -0.5587 0.057 -9.876 0.000 -0.670 -0.448
sigma2 2.1795 0.153 14.279 0.000 1.880 2.479
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 49.80
Prob(Q): 0.93 Prob(JB): 0.00
Heteroskedasticity (H): 2.08 Skew: 0.40
Prob(H) (two-sided): 0.00 Kurtosis: 5.14



Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

forecast = results.predict(len(df)-1, len(df)+11, typ='levels')
plt.plot(df['1983-01-01':])
plt.plot(forecast)
[<matplotlib.lines.Line2D at 0x14301670908>]

png

LSTM

from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Input
nobs = 12
train = df.iloc[:-nobs]
test = df.iloc[-nobs:]
mm_scaler = MinMaxScaler()
st_scaler = StandardScaler()
mm_scaler.fit(train)
st_scaler.fit(train)
StandardScaler()
mm_scaled_train = mm_scaler.transform(train)
mm_scaled_test = mm_scaler.transform(test)
st_scaled_train = st_scaler.transform(train)
st_scaled_test = st_scaler.transform(test)
n_input = 12
n_features = 1
mm_gen = TimeseriesGenerator(mm_scaled_train, mm_scaled_train, length=n_input,
                         batch_size=1)
st_gen = TimeseriesGenerator(st_scaled_train, st_scaled_train, length=n_input,
                         batch_size=1)
inputs = Input(shape=(n_input, n_features))
x = LSTM(100, activation='relu')(inputs)
x = Dense(1)(x)
WARNING:tensorflow:Layer lstm will not use cuDNN kernel since it doesn't meet the cuDNN kernel criteria. It will use generic GPU kernel as fallback when running on GPU
mm_model = Model(inputs, x)
st_model = Model(inputs, x)
mm_model.compile(optimizer='adam', loss='mse')
st_model.compile(optimizer='adam', loss='mse')
mm_hist = mm_model.fit_generator(mm_gen, epochs=50, verbose=0)
st_hist = st_model.fit_generator(st_gen, epochs=50, verbose=0)
WARNING:tensorflow:From <ipython-input-23-6ebd85666877>:1: Model.fit_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.
Instructions for updating:
Please use Model.fit, which supports generators.
plt.plot(mm_hist.history['loss'], label='MinMax')
plt.plot(st_hist.history['loss'], label='Standard')
plt.legend()
plt.show()

png

first_eval_batch = mm_scaled_train[-12:]
first_eval_batch = first_eval_batch.reshape((1, n_input, n_features))
mm_model.predict(first_eval_batch)
array([[1.1262071]], dtype=float32)
test_predictions = []
first_eval_batch = mm_scaled_train[-n_input:]
current_batch = first_eval_batch.reshape((1, n_input, n_features))
for i in range(len(test)):
    current_pred = model.predict(current_batch)[0]
    test_predictions.append(current_pred)
    current_batch = np.append(current_batch[:, 1:, :], [[current_pred]], axis=1)
test_predictions
[array([1.1262071], dtype=float32),
 array([1.117718], dtype=float32),
 array([0.83300537], dtype=float32),
 array([0.5422754], dtype=float32),
 array([0.5601636], dtype=float32),
 array([0.8953703], dtype=float32),
 array([1.2141159], dtype=float32),
 array([1.146272], dtype=float32),
 array([0.80624], dtype=float32),
 array([0.52206135], dtype=float32),
 array([0.63773364], dtype=float32),
 array([1.1513337], dtype=float32)]
true_predictions = mm_scaler.inverse_transform(test_predictions)
test['predictions'] = true_predictions
test.plot(figsize=(12,9))
<AxesSubplot:xlabel='DATE'>

png

print(rmse(test['EnergyIndex'], test['predictions']))
6.790834064681344