11 January 2021 / EXERCISE

Time series - Alcohol Sales

Alcohol Sales

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('./Data/Alcohol_Sales.csv', index_col='DATE', parse_dates=True)

df.index.freq = 'MS'

df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 325 entries, 1992-01-01 to 2019-01-01
Freq: MS
Data columns (total 1 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   S4248SM144NCEN  325 non-null    int64
dtypes: int64(1)
memory usage: 5.1 KB

df.plot()
plt.show()

png

General Forecasting Model

from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error

train_data = df.iloc[:-12]
test_data = df.iloc[-12:]

print(df.shape)
print(train_data.shape)
print(test_data.shape)

(325, 1)
(313, 1)
(12, 1)

fitted_model_mm = ExponentialSmoothing(train_data, trend='mul', seasonal='mul',seasonal_periods=12).fit()
fitted_model_aa = ExponentialSmoothing(train_data, trend='add', seasonal='add',seasonal_periods=12).fit()

c:\users\ilvna\.conda\envs\tf\lib\site-packages\statsmodels\tsa\holtwinters\model.py:922: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.
  ConvergenceWarning,

test_predictions_mm = fitted_model.forecast(12)
test_predictions_aa = fitted_model.forecast(12)

plt.figure(figsize=(12,5))
plt.plot(train_data, label='Train')
plt.plot(test_data, label='Test')
plt.legend()
plt.show()

png

plt.figure(figsize=(12,5))
plt.plot(test_data, label='Test')
plt.plot(test_predictions_mm, label='Pred_mm')
plt.legend()
plt.show()

png

mae = mean_absolute_error(test_data, test_predictions)
mse = mean_squared_error(test_data ,test_predictions)
rmse = np.sqrt(mse)

print('mae :', mae)
print('mse :', mse)
print('rmse :', rmse)

mae : 411.9015865536535
mse : 229275.45568794024
rmse : 478.82716682320796

final_model = ExponentialSmoothing(df, trend='mul', seasonal='mul', seasonal_periods=12).fit()

c:\users\ilvna\.conda\envs\tf\lib\site-packages\statsmodels\tsa\holtwinters\model.py:922: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.
  ConvergenceWarning,

forecast_prediction = final_model.forecast(36)

plt.figure(figsize=(12,5))
plt.plot(df)
plt.plot(forecast_prediction)
plt.show()

png

ARIMA

from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima

from statsmodels.tsa.arima_model import ARMA, ARIMA, ARMAResults, ARIMAResults
from statsmodels.tsa.statespace.sarimax import SARIMAX, SARIMAXResults
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.tools import diff
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tools.eval_measures import rmse

result = seasonal_decompose(df, model='add')

result.plot()
plt.show()

png

plot_acf(df, lags=40)
plt.show()

png

plot_pacf(df)
plt.show()

png

df_log = np.log(df)

df_log_diff = diff(df_log)

plot_acf(df_log_diff)
plot_pacf(df_log_diff)
plt.show()

png

print('p-value is : {}'.format(adfuller(df)[1]))

p-value is : 0.9987196267088919

isn’t stationary

df_diff = diff(df, k_diff=1)

print('p-value is : {}'.format(adfuller(df_diff)[1]))

p-value is : 0.0003408284921168574

is stationary

auto_arima

stepwise_fit = auto_arima(df, seasonal=True, trace=True, m=12)

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,1,1)[12]             : AIC=4514.024, Time=1.35 sec
 ARIMA(0,1,0)(0,1,0)[12]             : AIC=4868.181, Time=0.01 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=4731.674, Time=0.08 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=4629.449, Time=0.66 sec
 ARIMA(2,1,2)(0,1,1)[12]             : AIC=4527.109, Time=0.79 sec
 ARIMA(2,1,2)(1,1,0)[12]             : AIC=4539.016, Time=0.39 sec
 ARIMA(2,1,2)(2,1,1)[12]             : AIC=4499.168, Time=4.80 sec
 ARIMA(2,1,2)(2,1,0)[12]             : AIC=4523.762, Time=1.18 sec
 ARIMA(2,1,2)(2,1,2)[12]             : AIC=inf, Time=7.26 sec
 ARIMA(2,1,2)(1,1,2)[12]             : AIC=4508.118, Time=4.54 sec
 ARIMA(1,1,2)(2,1,1)[12]             : AIC=4526.711, Time=5.72 sec
 ARIMA(2,1,1)(2,1,1)[12]             : AIC=4504.012, Time=3.65 sec
 ARIMA(3,1,2)(2,1,1)[12]             : AIC=4491.408, Time=5.70 sec
 ARIMA(3,1,2)(1,1,1)[12]             : AIC=4506.197, Time=2.39 sec
 ARIMA(3,1,2)(2,1,0)[12]             : AIC=4514.299, Time=4.16 sec
 ARIMA(3,1,2)(2,1,2)[12]             : AIC=4444.011, Time=7.65 sec
 ARIMA(3,1,2)(1,1,2)[12]             : AIC=4499.385, Time=6.75 sec
 ARIMA(3,1,1)(2,1,2)[12]             : AIC=inf, Time=6.98 sec
 ARIMA(4,1,2)(2,1,2)[12]             : AIC=4460.069, Time=9.64 sec
 ARIMA(3,1,3)(2,1,2)[12]             : AIC=4448.232, Time=9.26 sec
 ARIMA(2,1,1)(2,1,2)[12]             : AIC=inf, Time=6.27 sec
 ARIMA(2,1,3)(2,1,2)[12]             : AIC=4449.248, Time=8.97 sec
 ARIMA(4,1,1)(2,1,2)[12]             : AIC=inf, Time=8.59 sec
 ARIMA(4,1,3)(2,1,2)[12]             : AIC=4460.041, Time=10.26 sec
 ARIMA(3,1,2)(2,1,2)[12] intercept   : AIC=4462.884, Time=8.87 sec

Best model:  ARIMA(3,1,2)(2,1,2)[12]          
Total fit time: 125.962 seconds

stepwise_fit.summary()

SARIMAX Results
Dep. Variable:	y	No. Observations:	325
Model:	SARIMAX(3, 1, 2)x(2, 1, 2, 12)	Log Likelihood	-2212.006
Date:	Fri, 09 Apr 2021	AIC	4444.011
Time:	11:46:26	BIC	4481.441
Sample:	0	HQIC	4458.971
	- 325
Covariance Type:	opg

	coef	std err	z	P>\|z\|	[0.025	0.975]
ar.L1	-0.0899	0.215	-0.418	0.676	-0.512	0.332
ar.L2	0.0985	0.087	1.128	0.259	-0.073	0.270
ar.L3	0.3208	0.069	4.639	0.000	0.185	0.456
ma.L1	-0.7431	0.223	-3.335	0.001	-1.180	-0.306
ma.L2	-0.1539	0.182	-0.845	0.398	-0.511	0.203
ar.S.L12	0.8702	0.068	12.850	0.000	0.737	1.003
ar.S.L24	-0.8249	0.059	-13.973	0.000	-0.941	-0.709
ma.S.L12	-1.1483	0.100	-11.534	0.000	-1.343	-0.953
ma.S.L24	0.6663	0.096	6.959	0.000	0.479	0.854
sigma2	8.337e+04	6834.224	12.199	0.000	7e+04	9.68e+04

Ljung-Box (L1) (Q):	0.18	Jarque-Bera (JB):	10.24
Prob(Q):	0.67	Prob(JB):	0.01
Heteroskedasticity (H):	4.23	Skew:	-0.11
Prob(H) (two-sided):	0.00	Kurtosis:	3.86

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

model = SARIMAX(train_data, order=(3,1,2), seasonal_order=(2,1,2,12))

results = model.fit()

c:\users\ilvna\.conda\envs\tf\lib\site-packages\statsmodels\base\model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  ConvergenceWarning)

results.summary()

SARIMAX Results
Dep. Variable:	S4248SM144NCEN	No. Observations:	313
Model:	SARIMAX(3, 1, 2)x(2, 1, 2, 12)	Log Likelihood	-2130.091
Date:	Fri, 09 Apr 2021	AIC	4280.183
Time:	11:47:17	BIC	4317.221
Sample:	01-01-1992	HQIC	4295.005
	- 01-01-2018
Covariance Type:	opg

	coef	std err	z	P>\|z\|	[0.025	0.975]
ar.L1	-0.2578	0.223	-1.158	0.247	-0.694	0.178
ar.L2	0.0882	0.104	0.845	0.398	-0.116	0.293
ar.L3	0.3605	0.085	4.235	0.000	0.194	0.527
ma.L1	-0.6401	0.225	-2.844	0.004	-1.081	-0.199
ma.L2	-0.2576	0.188	-1.373	0.170	-0.625	0.110
ar.S.L12	0.8397	0.124	6.754	0.000	0.596	1.083
ar.S.L24	-0.7472	0.087	-8.554	0.000	-0.918	-0.576
ma.S.L12	-1.1464	0.158	-7.235	0.000	-1.457	-0.836
ma.S.L24	0.6295	0.149	4.230	0.000	0.338	0.921
sigma2	1.02e+05	1.01e+04	10.119	0.000	8.22e+04	1.22e+05

Ljung-Box (L1) (Q):	0.17	Jarque-Bera (JB):	9.97
Prob(Q):	0.68	Prob(JB):	0.01
Heteroskedasticity (H):	4.70	Skew:	-0.12
Prob(H) (two-sided):	0.00	Kurtosis:	3.86

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

start = len(train_data)
end = len(train_data) + len(test_data) - 1

pred = results.predict(start, end, typ='level')

plt.plot(test_data)
plt.plot(pred)
plt.show()

png

error = rmse(test_data, pred)
error

array([2854.34960749, 1718.78826431, 1867.1512623 , 1812.19483997,
       2065.05758019, 1687.09691322, 1590.03025873, 1711.93822433,
       1559.00525313, 1517.72740288, 2247.46882941, 3260.6541987 ])

model = SARIMAX(df, order=(3,1,2), seasonal_order=(2,1,2,12))

results = model.fit()

c:\users\ilvna\.conda\envs\tf\lib\site-packages\statsmodels\base\model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  ConvergenceWarning)

forecast = results.predict(len(df)-1, len(df)+11, typ='levels')

plt.plot(df['2016-01-01':])
plt.plot(forecast)
plt.show()

png

LSTM

from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Input

nobs = 12

train = df.iloc[:-nobs]
test = df.iloc[-nobs:]

scaler = MinMaxScaler()
scaler.fit(train)

MinMaxScaler()

scaled_train = scaler.transform(train)
scaled_test = scaler.transform(test)

n_input = 12
n_features = 1

gen = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1)

print(len(scaled_train))
print(len(scaled_test))
print(len(gen))

313
12
301

X, y = gen[0]

X, y

(array([[[0.03658432],
         [0.03649885],
         [0.08299855],
         [0.13103684],
         [0.1017181 ],
         [0.12804513],
         [0.12266006],
         [0.09453799],
         [0.09359774],
         [0.10496624],
         [0.10334217],
         [0.16283443]]]),
 array([[0.]]))

inputs = Input(shape=(n_input, n_features))
x = LSTM(150, activation='relu')(inputs)
x = Dense(1)(x)

WARNING:tensorflow:Layer lstm will not use cuDNN kernel since it doesn't meet the cuDNN kernel criteria. It will use generic GPU kernel as fallback when running on GPU

model = Model(inputs, x)

model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 12, 1)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 150)               91200     
_________________________________________________________________
dense (Dense)                (None, 1)                 151       
=================================================================
Total params: 91,351
Trainable params: 91,351
Non-trainable params: 0
_________________________________________________________________

model.compile(optimizer='adam', loss='mse')

history = model.fit_generator(gen, epochs = 25)

Epoch 1/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0096
Epoch 2/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0094
Epoch 3/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0087
Epoch 4/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0066
Epoch 5/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0054
Epoch 6/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0039
Epoch 7/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0035
Epoch 8/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0031: 0s
Epoch 9/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0025
Epoch 10/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0021
Epoch 11/25
301/301 [==============================] - 5s 16ms/step - loss: 0.0022
Epoch 12/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0023
Epoch 13/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0024
Epoch 14/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0021
Epoch 15/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0017
Epoch 16/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0016
Epoch 17/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0020
Epoch 18/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0018
Epoch 19/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0017
Epoch 20/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0015
Epoch 21/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0017
Epoch 22/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0018
Epoch 23/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0017
Epoch 24/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0019
Epoch 25/25
301/301 [==============================] - 5s 17ms/step - loss: 0.0016

plt.plot(range(len(history.history['loss'])), model.history.history['loss'])

[<matplotlib.lines.Line2D at 0x24121def0c8>]

png

first_eval_batch = scaled_train[-12:]

first_eval_batch = first_eval_batch.reshape((1, n_input, n_features))

model.predict(first_eval_batch)

array([[0.78138685]], dtype=float32)

Forecast

test_predictions = []
first_eval_batch = scaled_train[-n_input:]
current_batch = first_eval_batch.reshape((1, n_input, n_features))

for i in range(len(test)):
    current_pred = model.predict(current_batch)[0]
    test_predictions.append(current_pred)
    current_batch = np.append(current_batch[:,1:,:], [[current_pred]], axis=1)

test_predictions

[array([0.78138685], dtype=float32),
 array([0.9095711], dtype=float32),
 array([0.85770994], dtype=float32),
 array([1.0188285], dtype=float32),
 array([1.0919129], dtype=float32),
 array([0.84825593], dtype=float32),
 array([1.001781], dtype=float32),
 array([0.87678725], dtype=float32),
 array([0.9534033], dtype=float32),
 array([0.9890802], dtype=float32),
 array([1.060493], dtype=float32),
 array([0.696439], dtype=float32)]

true_predictions = scaler.inverse_transform(test_predictions)

true_predictions

array([[12172.44478464],
       [13672.07242996],
       [13065.34863776],
       [14950.27475297],
       [15805.28861511],
       [12954.74615234],
       [14750.83576441],
       [13288.53398246],
       [14184.86513752],
       [14602.24915051],
       [15437.7075181 ],
       [11178.64018607]])

test['Predictions'] = true_predictions

test

	S4248SM144NCEN	Predictions
DATE
2018-02-01	10415	12172.444785
2018-03-01	12683	13672.072430
2018-04-01	11919	13065.348638
2018-05-01	14138	14950.274753
2018-06-01	14583	15805.288615
2018-07-01	12640	12954.746152
2018-08-01	14257	14750.835764
2018-09-01	12396	13288.533982
2018-10-01	13914	14184.865138
2018-11-01	14174	14602.249151
2018-12-01	15504	15437.707518
2019-01-01	10718	11178.640186

test.plot(figsize=(12,8))

<AxesSubplot:xlabel='DATE'>

png

test['Pred_ARIMA'] = pred

print(rmse(test['S4248SM144NCEN'], test['Predictions']))
print(rmse(test['S4248SM144NCEN'], test['Pred_ARIMA']))

873.1015843567056
458.91865971479376