/ EXERCISE

Time series - Alcohol Sales

Alcohol Sales

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('./Data/Alcohol_Sales.csv', index_col='DATE', parse_dates=True)
df.index.freq = 'MS'
df.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 325 entries, 1992-01-01 to 2019-01-01
Freq: MS
Data columns (total 1 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   S4248SM144NCEN  325 non-null    int64
dtypes: int64(1)
memory usage: 5.1 KB
df.plot()
plt.show()

png

General Forecasting Model

from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error
train_data = df.iloc[:-12]
test_data = df.iloc[-12:]
print(df.shape)
print(train_data.shape)
print(test_data.shape)
(325, 1)
(313, 1)
(12, 1)
fitted_model_mm = ExponentialSmoothing(train_data, trend='mul', seasonal='mul',seasonal_periods=12).fit()
fitted_model_aa = ExponentialSmoothing(train_data, trend='add', seasonal='add',seasonal_periods=12).fit()
c:\users\ilvna\.conda\envs\tf\lib\site-packages\statsmodels\tsa\holtwinters\model.py:922: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.
  ConvergenceWarning,
test_predictions_mm = fitted_model.forecast(12)
test_predictions_aa = fitted_model.forecast(12)
plt.figure(figsize=(12,5))
plt.plot(train_data, label='Train')
plt.plot(test_data, label='Test')
plt.legend()
plt.show()

png

plt.figure(figsize=(12,5))
plt.plot(test_data, label='Test')
plt.plot(test_predictions_mm, label='Pred_mm')
plt.legend()
plt.show()

png

mae = mean_absolute_error(test_data, test_predictions)
mse = mean_squared_error(test_data ,test_predictions)
rmse = np.sqrt(mse)
print('mae :', mae)
print('mse :', mse)
print('rmse :', rmse)
mae : 411.9015865536535
mse : 229275.45568794024
rmse : 478.82716682320796
final_model = ExponentialSmoothing(df, trend='mul', seasonal='mul', seasonal_periods=12).fit()
c:\users\ilvna\.conda\envs\tf\lib\site-packages\statsmodels\tsa\holtwinters\model.py:922: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.
  ConvergenceWarning,
forecast_prediction = final_model.forecast(36)
plt.figure(figsize=(12,5))
plt.plot(df)
plt.plot(forecast_prediction)
plt.show()

png

ARIMA

from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima

from statsmodels.tsa.arima_model import ARMA, ARIMA, ARMAResults, ARIMAResults
from statsmodels.tsa.statespace.sarimax import SARIMAX, SARIMAXResults
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.tools import diff
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tools.eval_measures import rmse
result = seasonal_decompose(df, model='add')
result.plot()
plt.show()

png

plot_acf(df, lags=40)
plt.show()

png

plot_pacf(df)
plt.show()

png

df_log = np.log(df)
df_log_diff = diff(df_log)
plot_acf(df_log_diff)
plot_pacf(df_log_diff)
plt.show()

png

png

print('p-value is : {}'.format(adfuller(df)[1]))
p-value is : 0.9987196267088919

isn’t stationary

df_diff = diff(df, k_diff=1)
print('p-value is : {}'.format(adfuller(df_diff)[1]))
p-value is : 0.0003408284921168574

is stationary

auto_arima

stepwise_fit = auto_arima(df, seasonal=True, trace=True, m=12)
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,1,1)[12]             : AIC=4514.024, Time=1.35 sec
 ARIMA(0,1,0)(0,1,0)[12]             : AIC=4868.181, Time=0.01 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=4731.674, Time=0.08 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=4629.449, Time=0.66 sec
 ARIMA(2,1,2)(0,1,1)[12]             : AIC=4527.109, Time=0.79 sec
 ARIMA(2,1,2)(1,1,0)[12]             : AIC=4539.016, Time=0.39 sec
 ARIMA(2,1,2)(2,1,1)[12]             : AIC=4499.168, Time=4.80 sec
 ARIMA(2,1,2)(2,1,0)[12]             : AIC=4523.762, Time=1.18 sec
 ARIMA(2,1,2)(2,1,2)[12]             : AIC=inf, Time=7.26 sec
 ARIMA(2,1,2)(1,1,2)[12]             : AIC=4508.118, Time=4.54 sec
 ARIMA(1,1,2)(2,1,1)[12]             : AIC=4526.711, Time=5.72 sec
 ARIMA(2,1,1)(2,1,1)[12]             : AIC=4504.012, Time=3.65 sec
 ARIMA(3,1,2)(2,1,1)[12]             : AIC=4491.408, Time=5.70 sec
 ARIMA(3,1,2)(1,1,1)[12]             : AIC=4506.197, Time=2.39 sec
 ARIMA(3,1,2)(2,1,0)[12]             : AIC=4514.299, Time=4.16 sec
 ARIMA(3,1,2)(2,1,2)[12]             : AIC=4444.011, Time=7.65 sec
 ARIMA(3,1,2)(1,1,2)[12]             : AIC=4499.385, Time=6.75 sec
 ARIMA(3,1,1)(2,1,2)[12]             : AIC=inf, Time=6.98 sec
 ARIMA(4,1,2)(2,1,2)[12]             : AIC=4460.069, Time=9.64 sec
 ARIMA(3,1,3)(2,1,2)[12]             : AIC=4448.232, Time=9.26 sec
 ARIMA(2,1,1)(2,1,2)[12]             : AIC=inf, Time=6.27 sec
 ARIMA(2,1,3)(2,1,2)[12]             : AIC=4449.248, Time=8.97 sec
 ARIMA(4,1,1)(2,1,2)[12]             : AIC=inf, Time=8.59 sec
 ARIMA(4,1,3)(2,1,2)[12]             : AIC=4460.041, Time=10.26 sec
 ARIMA(3,1,2)(2,1,2)[12] intercept   : AIC=4462.884, Time=8.87 sec

Best model:  ARIMA(3,1,2)(2,1,2)[12]          
Total fit time: 125.962 seconds
stepwise_fit.summary()
SARIMAX Results
Dep. Variable: y No. Observations: 325
Model: SARIMAX(3, 1, 2)x(2, 1, 2, 12) Log Likelihood -2212.006
Date: Fri, 09 Apr 2021 AIC 4444.011
Time: 11:46:26 BIC 4481.441
Sample: 0 HQIC 4458.971
- 325
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
ar.L1 -0.0899 0.215 -0.418 0.676 -0.512 0.332
ar.L2 0.0985 0.087 1.128 0.259 -0.073 0.270
ar.L3 0.3208 0.069 4.639 0.000 0.185 0.456
ma.L1 -0.7431 0.223 -3.335 0.001 -1.180 -0.306
ma.L2 -0.1539 0.182 -0.845 0.398 -0.511 0.203
ar.S.L12 0.8702 0.068 12.850 0.000 0.737 1.003
ar.S.L24 -0.8249 0.059 -13.973 0.000 -0.941 -0.709
ma.S.L12 -1.1483 0.100 -11.534 0.000 -1.343 -0.953
ma.S.L24 0.6663 0.096 6.959 0.000 0.479 0.854
sigma2 8.337e+04 6834.224 12.199 0.000 7e+04 9.68e+04
Ljung-Box (L1) (Q): 0.18 Jarque-Bera (JB): 10.24
Prob(Q): 0.67 Prob(JB): 0.01
Heteroskedasticity (H): 4.23 Skew: -0.11
Prob(H) (two-sided): 0.00 Kurtosis: 3.86



Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

model = SARIMAX(train_data, order=(3,1,2), seasonal_order=(2,1,2,12))
results = model.fit()
c:\users\ilvna\.conda\envs\tf\lib\site-packages\statsmodels\base\model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  ConvergenceWarning)
results.summary()
SARIMAX Results
Dep. Variable: S4248SM144NCEN No. Observations: 313
Model: SARIMAX(3, 1, 2)x(2, 1, 2, 12) Log Likelihood -2130.091
Date: Fri, 09 Apr 2021 AIC 4280.183
Time: 11:47:17 BIC 4317.221
Sample: 01-01-1992 HQIC 4295.005
- 01-01-2018
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
ar.L1 -0.2578 0.223 -1.158 0.247 -0.694 0.178
ar.L2 0.0882 0.104 0.845 0.398 -0.116 0.293
ar.L3 0.3605 0.085 4.235 0.000 0.194 0.527
ma.L1 -0.6401 0.225 -2.844 0.004 -1.081 -0.199
ma.L2 -0.2576 0.188 -1.373 0.170 -0.625 0.110
ar.S.L12 0.8397 0.124 6.754 0.000 0.596 1.083
ar.S.L24 -0.7472 0.087 -8.554 0.000 -0.918 -0.576
ma.S.L12 -1.1464 0.158 -7.235 0.000 -1.457 -0.836
ma.S.L24 0.6295 0.149 4.230 0.000 0.338 0.921
sigma2 1.02e+05 1.01e+04 10.119 0.000 8.22e+04 1.22e+05
Ljung-Box (L1) (Q): 0.17 Jarque-Bera (JB): 9.97
Prob(Q): 0.68 Prob(JB): 0.01
Heteroskedasticity (H): 4.70 Skew: -0.12
Prob(H) (two-sided): 0.00 Kurtosis: 3.86



Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

start = len(train_data)
end = len(train_data) + len(test_data) - 1
pred = results.predict(start, end, typ='level')
plt.plot(test_data)
plt.plot(pred)
plt.show()

png

error = rmse(test_data, pred)
error
array([2854.34960749, 1718.78826431, 1867.1512623 , 1812.19483997,
       2065.05758019, 1687.09691322, 1590.03025873, 1711.93822433,
       1559.00525313, 1517.72740288, 2247.46882941, 3260.6541987 ])
model = SARIMAX(df, order=(3,1,2), seasonal_order=(2,1,2,12))
results = model.fit()
c:\users\ilvna\.conda\envs\tf\lib\site-packages\statsmodels\base\model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  ConvergenceWarning)
forecast = results.predict(len(df)-1, len(df)+11, typ='levels')
plt.plot(df['2016-01-01':])
plt.plot(forecast)
plt.show()

png

LSTM

from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Input
nobs = 12
train = df.iloc[:-nobs]
test = df.iloc[-nobs:]
scaler = MinMaxScaler()
scaler.fit(train)
MinMaxScaler()
scaled_train = scaler.transform(train)
scaled_test = scaler.transform(test)
n_input = 12
n_features = 1

gen = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1)
print(len(scaled_train))
print(len(scaled_test))
print(len(gen))
313
12
301
X, y = gen[0]
X, y
(array([[[0.03658432],
         [0.03649885],
         [0.08299855],
         [0.13103684],
         [0.1017181 ],
         [0.12804513],
         [0.12266006],
         [0.09453799],
         [0.09359774],
         [0.10496624],
         [0.10334217],
         [0.16283443]]]),
 array([[0.]]))
inputs = Input(shape=(n_input, n_features))
x = LSTM(150, activation='relu')(inputs)
x = Dense(1)(x)
WARNING:tensorflow:Layer lstm will not use cuDNN kernel since it doesn't meet the cuDNN kernel criteria. It will use generic GPU kernel as fallback when running on GPU
model = Model(inputs, x)
model.summary()
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 12, 1)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 150)               91200     
_________________________________________________________________
dense (Dense)                (None, 1)                 151       
=================================================================
Total params: 91,351
Trainable params: 91,351
Non-trainable params: 0
_________________________________________________________________
model.compile(optimizer='adam', loss='mse')
history = model.fit_generator(gen, epochs = 25)
Epoch 1/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0096
Epoch 2/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0094
Epoch 3/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0087
Epoch 4/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0066
Epoch 5/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0054
Epoch 6/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0039
Epoch 7/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0035
Epoch 8/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0031: 0s
Epoch 9/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0025
Epoch 10/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0021
Epoch 11/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0022
Epoch 12/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0023
Epoch 13/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0024
Epoch 14/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0021
Epoch 15/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0017
Epoch 16/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0016
Epoch 17/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0020
Epoch 18/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0018
Epoch 19/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0017
Epoch 20/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0015
Epoch 21/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0017
Epoch 22/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0018
Epoch 23/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0017
Epoch 24/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0019
Epoch 25/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0016
plt.plot(range(len(history.history['loss'])), model.history.history['loss'])
[<matplotlib.lines.Line2D at 0x24121def0c8>]

png

first_eval_batch = scaled_train[-12:]
first_eval_batch = first_eval_batch.reshape((1, n_input, n_features))
model.predict(first_eval_batch)
array([[0.78138685]], dtype=float32)

Forecast

test_predictions = []
first_eval_batch = scaled_train[-n_input:]
current_batch = first_eval_batch.reshape((1, n_input, n_features))

for i in range(len(test)):
    current_pred = model.predict(current_batch)[0]
    test_predictions.append(current_pred)
    current_batch = np.append(current_batch[:,1:,:], [[current_pred]], axis=1)
test_predictions
[array([0.78138685], dtype=float32),
 array([0.9095711], dtype=float32),
 array([0.85770994], dtype=float32),
 array([1.0188285], dtype=float32),
 array([1.0919129], dtype=float32),
 array([0.84825593], dtype=float32),
 array([1.001781], dtype=float32),
 array([0.87678725], dtype=float32),
 array([0.9534033], dtype=float32),
 array([0.9890802], dtype=float32),
 array([1.060493], dtype=float32),
 array([0.696439], dtype=float32)]
true_predictions = scaler.inverse_transform(test_predictions)
true_predictions
array([[12172.44478464],
       [13672.07242996],
       [13065.34863776],
       [14950.27475297],
       [15805.28861511],
       [12954.74615234],
       [14750.83576441],
       [13288.53398246],
       [14184.86513752],
       [14602.24915051],
       [15437.7075181 ],
       [11178.64018607]])
test['Predictions'] = true_predictions
test
S4248SM144NCEN Predictions
DATE
2018-02-01 10415 12172.444785
2018-03-01 12683 13672.072430
2018-04-01 11919 13065.348638
2018-05-01 14138 14950.274753
2018-06-01 14583 15805.288615
2018-07-01 12640 12954.746152
2018-08-01 14257 14750.835764
2018-09-01 12396 13288.533982
2018-10-01 13914 14184.865138
2018-11-01 14174 14602.249151
2018-12-01 15504 15437.707518
2019-01-01 10718 11178.640186
test.plot(figsize=(12,8))
<AxesSubplot:xlabel='DATE'>

png

test['Pred_ARIMA'] = pred
print(rmse(test['S4248SM144NCEN'], test['Predictions']))
print(rmse(test['S4248SM144NCEN'], test['Pred_ARIMA']))
873.1015843567056
458.91865971479376