Wine Price Time Series Analysis¶
In this notebook, I'm going to analyze wine prices using domestic wine production, exports, imports, average wine prices, and population datasets.
Contents¶
- Setup
- Granger Causality Tests
- Stationarity Transformations
- Modeling
- Lag Order Selection
- Fit the Model
- Serial Correlation of Residuals
- Forecasting
- Accuracy
- Conclusion
Setup¶
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.style as style
from matplotlib.ticker import FuncFormatter
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.api import VAR
from statsmodels.stats.stattools import durbin_watson
# Making plots a bit more accessible
style.use('fivethirtyeight')
sns.set_palette('colorblind')
Import Data¶
df = pd.read_excel('../data/master-data.xlsx')
df.head()
Unnamed: 0 | month | population | price | price_adj | di | bulk | bottled | cider | effervescent | ... | landed_duty_paid_value_ukfrspde_imports | landed_duty_paid_value_adj_ukfrspde_imports | customs_value_ukfrspde_imports | customs_value_adj_ukfrspde_imports | quantity_ukfrspde_imports | charges_insurance_freight_ukfrspde_imports | charges_insurance_freight_adj_ukfrspde_imports | calculated_duties_ukfrspde_imports | calculated_duties_adj_ukfrspde_imports | frspger_25 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 2000-01-31 | 281083000 | 5.458 | 5.299029 | 9309.1 | 1.131505e+08 | 1.244070e+08 | NaN | 6.909175e+06 | ... | 59812261 | 5.807016e+07 | 56706027 | 5.505440e+07 | 8768676 | 2406931 | 2.336826e+06 | 699303 | 678934.951456 | 0 |
1 | 1 | 2000-02-29 | 281299000 | 5.256 | 5.198813 | 9345.2 | 7.179357e+07 | 1.375283e+08 | NaN | 4.377026e+06 | ... | 77087577 | 7.624884e+07 | 73873201 | 7.306944e+07 | 8961916 | 2356486 | 2.330847e+06 | 857890 | 848555.885262 | 0 |
2 | 2 | 2000-03-31 | 281531000 | 5.471 | 5.311650 | 9370.3 | 4.635628e+07 | 1.603837e+08 | NaN | 9.321474e+06 | ... | 87219165 | 8.467880e+07 | 83500840 | 8.106878e+07 | 10474993 | 2804317 | 2.722638e+06 | 914008 | 887386.407767 | 0 |
3 | 3 | 2000-04-30 | 281763000 | 5.156 | 5.104950 | 9418.3 | 3.296724e+07 | 1.423004e+08 | 2.045088e+06 | 7.881046e+06 | ... | 87040067 | 8.617828e+07 | 83075769 | 8.225324e+07 | 11128077 | 2989501 | 2.959902e+06 | 974797 | 965145.544554 | 0 |
4 | 4 | 2000-05-31 | 281996000 | 5.530 | 5.426889 | 9457.3 | 3.178035e+07 | 1.612658e+08 | 6.959646e+06 | 6.334834e+06 | ... | 79534639 | 7.805166e+07 | 75599523 | 7.418991e+07 | 10874051 | 3037785 | 2.981143e+06 | 897331 | 880599.607458 | 0 |
5 rows × 83 columns
df.set_index('month', inplace=True)
df.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
df.head()
population | price | price_adj | di | bulk | bottled | cider | effervescent | wine_gross | bulk_adj | ... | landed_duty_paid_value_ukfrspde_imports | landed_duty_paid_value_adj_ukfrspde_imports | customs_value_ukfrspde_imports | customs_value_adj_ukfrspde_imports | quantity_ukfrspde_imports | charges_insurance_freight_ukfrspde_imports | charges_insurance_freight_adj_ukfrspde_imports | calculated_duties_ukfrspde_imports | calculated_duties_adj_ukfrspde_imports | frspger_25 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
month | |||||||||||||||||||||
2000-01-31 | 281083000 | 5.458 | 5.299029 | 9309.1 | 1.131505e+08 | 1.244070e+08 | NaN | 6.909175e+06 | 2.444667e+08 | 0.402552 | ... | 59812261 | 5.807016e+07 | 56706027 | 5.505440e+07 | 8768676 | 2406931 | 2.336826e+06 | 699303 | 678934.951456 | 0 |
2000-02-29 | 281299000 | 5.256 | 5.198813 | 9345.2 | 7.179357e+07 | 1.375283e+08 | NaN | 4.377026e+06 | 2.136989e+08 | 0.255222 | ... | 77087577 | 7.624884e+07 | 73873201 | 7.306944e+07 | 8961916 | 2356486 | 2.330847e+06 | 857890 | 848555.885262 | 0 |
2000-03-31 | 281531000 | 5.471 | 5.311650 | 9370.3 | 4.635628e+07 | 1.603837e+08 | NaN | 9.321474e+06 | 2.160614e+08 | 0.164658 | ... | 87219165 | 8.467880e+07 | 83500840 | 8.106878e+07 | 10474993 | 2804317 | 2.722638e+06 | 914008 | 887386.407767 | 0 |
2000-04-30 | 281763000 | 5.156 | 5.104950 | 9418.3 | 3.296724e+07 | 1.423004e+08 | 2.045088e+06 | 7.881046e+06 | 1.811036e+08 | 0.117003 | ... | 87040067 | 8.617828e+07 | 83075769 | 8.225324e+07 | 11128077 | 2989501 | 2.959902e+06 | 974797 | 965145.544554 | 0 |
2000-05-31 | 281996000 | 5.530 | 5.426889 | 9457.3 | 3.178035e+07 | 1.612658e+08 | 6.959646e+06 | 6.334834e+06 | 1.924214e+08 | 0.112698 | ... | 79534639 | 7.805166e+07 | 75599523 | 7.418991e+07 | 10874051 | 3037785 | 2.981143e+06 | 897331 | 880599.607458 | 0 |
5 rows × 81 columns
Granger Causality Tests¶
maxlag=16
test = 'ssr_chi2test'
def grangers_causation_matrix(data, variables, test=test, verbose=False):
"""Check Granger Causality of all possible combinations of the Time series.
The rows are the response variable, columns are predictors. The values in the table
are the P-Values. P-Values lesser than the significance level (0.05), implies
the Null Hypothesis that the coefficients of the corresponding past values is
zero, that is, the X does not cause Y can be rejected.
data : pandas dataframe containing the time series variables
variables : list containing names of the time series variables.
"""
granger_df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
for c in granger_df.columns:
for r in granger_df.index:
test_result = grangercausalitytests(data[[r, c]], maxlag=16, verbose=False)
p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
min_p_value = np.min(p_values)
granger_df.loc[r, c] = min_p_value
granger_df.columns = [var + '_x' for var in variables]
granger_df.index = [var + '_y' for var in variables]
return granger_df
df.columns
Index(['population', 'price', 'price_adj', 'di', 'bulk', 'bottled', 'cider', 'effervescent', 'wine_gross', 'bulk_adj', 'bottled_adj', 'cider_adj', 'effervescent_adj', 'wine_gross_adj', 'charges_insurance_freight_italy_imports', 'charges_insurance_freight_adj_italy_imports', 'calculated_duties_italy_imports', 'calculated_duties_adj_italy_imports', 'dutiable_value_italy_imports', 'dutiable_value_adj_italy_imports', 'landed_duty_paid_value_italy_imports', 'landed_duty_paid_value_adj_italy_imports', 'quantity_italy_imports', 'quantity_adj_italy_imports', 'customs_value_italy_imports', 'customs_value_adj_italy_imports', 'charges_insurance_freight_australia_imports', 'charges_insurance_freight_adj_australia_imports', 'calculated_duties_australia_imports', 'calculated_duties_adj_australia_imports', 'dutiable_value_australia_imports', 'dutiable_value_adj_australia_imports', 'landed_duty_paid_value_australia_imports', 'landed_duty_paid_value_adj_australia_imports', 'quantity_australia_imports', 'quantity_adj_australia_imports', 'customs_value_australia_imports', 'customs_value_adj_australia_imports', 'charges_insurance_freight_chile_imports', 'charges_insurance_freight_adj_chile_imports', 'calculated_duties_chile_imports', 'calculated_duties_adj_chile_imports', 'dutiable_value_chile_imports', 'dutiable_value_adj_chile_imports', 'landed_duty_paid_value_chile_imports', 'landed_duty_paid_value_adj_chile_imports', 'quantity_chile_imports', 'quantity_adj_chile_imports', 'customs_value_chile_imports', 'customs_value_adj_chile_imports', 'cif_top3_adj', 'calc_duties_top3_adj', 'duty_val_top3_adj', 'duty_paid_val_top3_adj', 'quantity_top3', 'customs_value_top3_adj', 'quantity_exports', 'fas_value_adj_exports', 'dutiable_value_world_imports', 'dutiable_value_adj_world_imports', 'landed_duty_paid_value_world_imports', 'landed_duty_paid_value_adj_world_imports', 'customs_value_world_imports', 'customs_value_adj_world_imports', 'quantity_world_imports', 'charges_insurance_freight_world_imports', 'charges_insurance_freight_adj_world_imports', 'calculated_duties_world_imports', 'calculated_duties_adj_world_imports', 'dutiable_value_ukfrspde_imports', 'dutiable_value_adj_ukfrspde_imports', 'landed_duty_paid_value_ukfrspde_imports', 'landed_duty_paid_value_adj_ukfrspde_imports', 'customs_value_ukfrspde_imports', 'customs_value_adj_ukfrspde_imports', 'quantity_ukfrspde_imports', 'charges_insurance_freight_ukfrspde_imports', 'charges_insurance_freight_adj_ukfrspde_imports', 'calculated_duties_ukfrspde_imports', 'calculated_duties_adj_ukfrspde_imports', 'frspger_25'], dtype='object')
gc_df = df[['population', 'price_adj', 'bulk', 'bottled',
'dutiable_value_adj_world_imports', 'calculated_duties_adj_world_imports',
'dutiable_value_adj_ukfrspde_imports', 'calculated_duties_adj_ukfrspde_imports',
'quantity_world_imports', 'quantity_ukfrspde_imports']].copy()
granger_results = grangers_causation_matrix(data=df.dropna(), variables=df.columns)
granger_results.to_excel('../granger_results_master_df.xlsx')
gc_df.rename(columns={
'population': 'pop',
'price_adj': 'price',
'dutiable_value_adj_world_imports': 'value world imports',
'dutiable_value_adj_ukfrspde_imports': 'value subset imports',
'calculated_duties_adj_world_imports': 'duties charges world',
'calculated_duties_adj_ukfrspde_imports': 'duties charges subset',
'quantity_world_imports': 'total world imports',
'quantity_ukfrspde_imports': 'total subset imports'
}, inplace=True)
gc_test_results = grangers_causation_matrix(data=gc_df.dropna(), variables=gc_df.columns)
print(gc_test_results.to_latex())
\begin{tabular}{lrrrrrrrrrr} \toprule {} & pop\_x & price\_x & bulk\_x & bottled\_x & value world imports\_x & duties charges world\_x & value subset imports\_x & duties charges subset\_x & total world imports\_x & total subset imports\_x \\ \midrule pop\_y & 1.0 & 0.0001 & 0.0000 & 0.0000 & 0.0000 & 0.0000 & 0.0 & 0.0000 & 0.0000 & 0.0000 \\ price\_y & 0.0 & 1.0000 & 0.1349 & 0.0000 & 0.0000 & 0.0016 & 0.0 & 0.0283 & 0.0000 & 0.0000 \\ bulk\_y & 0.0 & 0.0000 & 1.0000 & 0.0000 & 0.0000 & 0.0000 & 0.0 & 0.0025 & 0.0000 & 0.0000 \\ bottled\_y & 0.0 & 0.0000 & 0.0000 & 1.0000 & 0.0000 & 0.0000 & 0.0 & 0.0019 & 0.0000 & 0.0000 \\ value world imports\_y & 0.0 & 0.0000 & 0.0000 & 0.0000 & 1.0000 & 0.0000 & 0.0 & 0.0000 & 0.0001 & 0.0000 \\ duties charges world\_y & 0.0 & 0.1854 & 0.0000 & 0.0000 & 0.0000 & 1.0000 & 0.0 & 0.0000 & 0.0000 & 0.0000 \\ value subset imports\_y & 0.0 & 0.0000 & 0.0000 & 0.0000 & 0.0000 & 0.0000 & 1.0 & 0.0000 & 0.0000 & 0.0000 \\ duties charges subset\_y & 0.0 & 0.4606 & 0.0002 & 0.0239 & 0.0000 & 0.0314 & 0.0 & 1.0000 & 0.0000 & 0.0000 \\ total world imports\_y & 0.0 & 0.0000 & 0.0000 & 0.0000 & 0.0014 & 0.0178 & 0.0 & 0.0316 & 1.0000 & 0.0002 \\ total subset imports\_y & 0.0 & 0.0000 & 0.0000 & 0.0000 & 0.0000 & 0.0000 & 0.0 & 0.0000 & 0.0000 & 1.0000 \\ \bottomrule \end{tabular}
Stationary Transformation¶
Before getting started, let's define the Dickey-Fuller test for checking for stationarity.
The hypothesis for the Augmented Dickey-Fuller test is as follows: $$h_0: \text{The series has a unit root}$$ $$h_1: \text{The series does not have a unit root}$$
def adf(col):
print('Augmented Dickey-Fuller Test:')
unit_root_test = adfuller(col, autolag='AIC')
dfoutput = pd.Series(unit_root_test[0:4], index=['t-stat:','p-value:','lags:','observations:'])
for key, value in unit_root_test[4].items():
dfoutput['critical value (%s):' % key] = value
print (dfoutput)
Let's also create a dataframe that contains all of the inputs to our multivariate timeseries model.
ts_df = df[['frspger_25']].copy()
Price¶
Let's start with the price data.
# Convert the additional 25% tariff indicator to a boolean
df['frspger_25'] = df['frspger_25'].astype('bool')
price_line_plot = sns.lineplot(data=df['price'].rolling(3).mean())
price_line_plot.set(title='Avg Wine Price in U.S. City (3-Month Avg)', ylabel='Average Price', xlabel='Month')
# shade in the timespan of the additional tariff
price_line_plot.axvspan(
xmin=df['frspger_25'].where(df['frspger_25']).first_valid_index(),
xmax=df['frspger_25'].where(df['frspger_25']).last_valid_index(),
color='gray',
alpha=0.2
)
price_line_plot.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '${:.0f}'.format(y)))
plt.gcf().set_size_inches(12, 8)
plt.savefig('../figures/avg-wine-price-in-us-city.png')
plt.show()
price_line_2018_plot = sns.lineplot(data=df.loc['2018-01-01':'2021-12-31']['price_adj'].rolling(3).mean())
price_line_2018_plot.set(title='Avg Wine Price in U.S. City 2018 to Present (3-Month Avg)', ylabel='Average Price', xlabel='Month')
# shade in the timespan of the additional tariff
price_line_2018_plot.axvspan(
xmin=df['frspger_25'].where(df['frspger_25']).first_valid_index(),
xmax=df['frspger_25'].where(df['frspger_25']).last_valid_index(),
color='gray',
alpha=0.2
)
price_line_2018_plot.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '${:.2f}'.format(y)))
plt.gcf().set_size_inches(12, 8)
plt.savefig('../figures/avg-wine-price-in-us-city-2018.png')
plt.show()
Alright, let's run the augmented Dickey-Fuller test.
adf(df['price_adj'])
Augmented Dickey-Fuller Test: t-stat: -1.863547 p-value: 0.349399 lags: 9.000000 observations: 254.000000 critical value (1%): -3.456360 critical value (5%): -2.872987 critical value (10%): -2.572870 dtype: float64
Since the test statistic is greater than the critical value (at the 5% level), we fail to reject the null hypothesis; the series doesn't have a unit root and is therefore non-stationary.
So I do need to make some adjustments to the series to make it stationary.
real_price_diff1 = df['price_adj'] - df['price_adj'].shift(1)
sns.lineplot(data=real_price_diff1)
<AxesSubplot:xlabel='month', ylabel='price_adj'>
adf(real_price_diff1.dropna())
Augmented Dickey-Fuller Test: t-stat: -8.068729e+00 p-value: 1.570433e-12 lags: 8.000000e+00 observations: 2.540000e+02 critical value (1%): -3.456360e+00 critical value (5%): -2.872987e+00 critical value (10%): -2.572870e+00 dtype: float64
It looks like taking the first difference of the data worked to pass the Dickey-Fuller test.
ts_df['price_adj_diff1'] = real_price_diff1
Domestic Wine Production¶
Bottled¶
domestic_production_df = df[['bulk', 'bottled', 'cider', 'effervescent', 'population']].copy()
domestic_production_df.head()
bulk | bottled | cider | effervescent | population | |
---|---|---|---|---|---|
month | |||||
2000-01-31 | 1.131505e+08 | 1.244070e+08 | NaN | 6.909175e+06 | 281083000 |
2000-02-29 | 7.179357e+07 | 1.375283e+08 | NaN | 4.377026e+06 | 281299000 |
2000-03-31 | 4.635628e+07 | 1.603837e+08 | NaN | 9.321474e+06 | 281531000 |
2000-04-30 | 3.296724e+07 | 1.423004e+08 | 2.045088e+06 | 7.881046e+06 | 281763000 |
2000-05-31 | 3.178035e+07 | 1.612658e+08 | 6.959646e+06 | 6.334834e+06 | 281996000 |
I'm not interested in the distinction between carbonated wines and regular wine or cider and wine. The Alcohol and Tobacco Tax and Trade Bureau (TTB) data had Bottled wine production broken into Still Wine, Cider, and Effervescent. I'm going to subtract out the cider and add in the effervescent wine to the bottled category.
domestic_production_df['bottled_total'] = domestic_production_df['bottled'] - domestic_production_df['cider'] + domestic_production_df['effervescent']
bottled_seasonal_decompose = seasonal_decompose(domestic_production_df['bottled_total'].dropna(), model='multiplicative', period=12)
bottled_seas = domestic_production_df['bottled_total'] - bottled_seasonal_decompose.seasonal
bulk_bottled_lineplot = sns.lineplot(data=bottled_seas)
bulk_bottled_lineplot.set(title='Bottled Wine Production in U.S.', xlabel='Month', ylabel='Liters Per Capita of Wine Production')
plt.show()
style.use('default')
bottled_seasonal_decompose.plot()
plt.show()
bulk_bottled_lineplot = sns.lineplot(data=bottled_seasonal_decompose.observed)
bulk_bottled_lineplot.set(title='Bottled Wine Production in U.S.', xlabel='Month', ylabel='Liters of Wine Production')
plt.show()
# test_data_diffed = domestic_production_df['bottled_total'] - domestic_production_df['bottled_total'].shift(1)
bottled_seasonal_decompose = seasonal_decompose(domestic_production_df['bottled_total'].dropna(), model='multiplicative', period=12)
test_data = bottled_seasonal_decompose.seasonal * bottled_seasonal_decompose.trend * bottled_seasonal_decompose.resid * bottled_seasonal_decompose.weights
test_data_seas = bottled_seasonal_decompose.observed / bottled_seasonal_decompose.seasonal / bottled_seasonal_decompose.trend / bottled_seasonal_decompose.weights
bulk_bottled_lineplot = sns.lineplot(data=test_data_seas)
bulk_bottled_lineplot.set(title='Bottled Wine Production in U.S.', xlabel='Month', ylabel='Liters of Wine Production')
plt.show()
test_seas = seasonal_decompose(test_data_seas.dropna())
test_seas.plot()
plt.show()
bulk_bottled_lineplot = sns.lineplot(data=bottled_seasonal_decompose.observed)
bulk_bottled_lineplot.set(title='Bottled Wine Production in U.S.', xlabel='Month', ylabel='Liters of Wine Production')
plt.show()
test_data_seas_rec = test_data_seas * bottled_seasonal_decompose.seasonal * bottled_seasonal_decompose.trend * bottled_seasonal_decompose.weights
bulk_bottled_lineplot = sns.lineplot(data=test_data_seas_rec)
bulk_bottled_lineplot.set(title='Bottled Wine Production in U.S.', xlabel='Month', ylabel='Liters of Wine Production')
plt.show()
from statsmodels.datasets import co2
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
data = co2.load(True).data
data = data.resample('M').mean().ffill()
from statsmodels.tsa.seasonal import STL
res = STL(domestic_production_df['bottled_total'].dropna(), seasonal_deg=1, trend_deg=1, robust=True).fit()
res.plot()
plt.show()
test_seas_adj = res.observed - res.seasonal - res.trend - res.weights
sns.lineplot(data=test_seas_adj)
<AxesSubplot:xlabel='month'>
test_seas_recompose = test_seas_adj + res.seasonal + res.trend + res.weights
sns.lineplot(data=test_seas_recompose)
<AxesSubplot:xlabel='month'>
sns.lineplot(data=res.observed)
<AxesSubplot:xlabel='month', ylabel='bottled_total'>
for c in domestic_production_df.columns:
if c != 'population':
col_name = c + '_per_capita'
domestic_production_df[col_name] = domestic_production_df[c] / domestic_production_df['population']
bulk_bottled_lineplot = sns.lineplot(data=domestic_production_df['bottled_total_per_capita'])
bulk_bottled_lineplot.set(title='Bottled Wine Production in U.S.', xlabel='Month', ylabel='Liters Per Capita of Wine Production')
plt.show()
It looks like there's both an upward trend over time and some seasonality. Let's take a first diff and then run the ADF test. If it doesn't pass, let's look closer at the seasonality and then take another diff accordingly.
domestic_production_df['bottled_total_per_capita_diff1'] = domestic_production_df['bottled_total_per_capita'] - domestic_production_df['bottled_total_per_capita'].shift(1)
wine_production_bottled_diff_plot = sns.lineplot(data=domestic_production_df['bottled_total_per_capita_diff1'])
wine_production_bottled_diff_plot.set(title='First Diff of Per-Capita U.S. Wine Production', xlabel='Month', ylabel='Difference in Liters Per Capita')
adf(domestic_production_df['bottled_total_per_capita_diff1'].dropna())
plt.show()
Augmented Dickey-Fuller Test: t-stat: -4.794225 p-value: 0.000056 lags: 14.000000 observations: 243.000000 critical value (1%): -3.457551 critical value (5%): -2.873509 critical value (10%): -2.573148 dtype: float64
Alright, that failed the ADF test.
decompose_result_mult = seasonal_decompose(domestic_production_df['bottled_total_per_capita'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot();
It definitely looks like there's seasonality in the data. Let's take a closer look at the seasonality to help identify the cycle.
seasonality_bottled_plot = seasonal['2010-01-01':'2013-01-01'].plot()
seasonality_bottled_plot.set(title='Seasonality of Per-Capita Wine Production')
plt.show()
It looks like there's an annual cycle of wine production.
domestic_production_df['bottled_total_per_capita_diff1_diff12'] = domestic_production_df['bottled_total_per_capita_diff1'] - domestic_production_df['bottled_total_per_capita_diff1'].shift(12)
adf(domestic_production_df['bottled_total_per_capita_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.796768e+00 p-value: 2.290410e-09 lags: 1.500000e+01 observations: 2.300000e+02 critical value (1%): -3.459106e+00 critical value (5%): -2.874190e+00 critical value (10%): -2.573512e+00 dtype: float64
It looks like taking the second diff worked and we're now passing the ADF test.
ts_df = ts_df.merge(domestic_production_df['bottled_total_per_capita_diff1_diff12'], left_index=True, right_index=True)
Bulk Wine¶
sns.lineplot(data=domestic_production_df['bulk'])
<AxesSubplot:xlabel='month', ylabel='bulk'>
It doesn't really look like there's an upward trend in bulk wine production. But it does look like there's lots of seasonality.
decompose_result_mult = seasonal_decompose(domestic_production_df['bulk'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
seasonality_bulk_plot = seasonal['2010-01-01':'2013-01-01'].plot()
seasonality_bulk_plot.set(title='Seasonality of Per-Capita Wine Production')
plt.show()
domestic_production_df['bulk_diff12'] = domestic_production_df['bulk'] - domestic_production_df['bulk'].shift(12)
domestic_production_df['bulk_per_capita_diff12'] = domestic_production_df['bulk_per_capita'] - domestic_production_df['bulk_per_capita'].shift(12)
adf(domestic_production_df['bulk_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -5.496346 p-value: 0.000002 lags: 12.000000 observations: 237.000000 critical value (1%): -3.458247 critical value (5%): -2.873814 critical value (10%): -2.573311 dtype: float64
adf(domestic_production_df['bulk_per_capita_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -5.662271e+00 p-value: 9.321169e-07 lags: 1.200000e+01 observations: 2.370000e+02 critical value (1%): -3.458247e+00 critical value (5%): -2.873814e+00 critical value (10%): -2.573311e+00 dtype: float64
Correcting for the seasonality of bulk wine production allowed the data to pass the ADF test.
ts_df = ts_df.merge(domestic_production_df[['bulk_diff12', 'bulk_per_capita_diff12']], left_index=True, right_index=True)
Wine Exports¶
Alright, let's look at the exports data. I think it'll be good to have a variable that's non-basic wine production (wine production that's used for some form of domestic consumption or input). So I'll start by defining a variable for non-basic wine quantity, nonbasic_quantity
.
df['nonbasic_quantity'] = df['bottled'] - df['cider'] - df['quantity_exports']
So we'll transform nonbasic_quantity
, quantity_exports
, and fas_value_adj_exports
variables to be stationary. And we'll also create per-capita variables for those datapoints.
exports_df = df[['nonbasic_quantity', 'quantity_exports', 'fas_value_adj_exports', 'population']].copy()
for c in exports_df.columns:
if c != 'population':
col_name = c + '_per_capita'
exports_df[col_name] = exports_df[c] / exports_df['population']
Non-Basic Wine Production¶
sns.lineplot(data=exports_df['nonbasic_quantity'])
<AxesSubplot:xlabel='month', ylabel='nonbasic_quantity'>
nonbasic_wine_production_plot = sns.lineplot(data=exports_df.loc['2010-01-01':'2021-12-31']['nonbasic_quantity_per_capita'].rolling(3).mean())
nonbasic_wine_production_plot.set(title='Non-Basic Wine Production (3-Month Avg)', ylabel='Non-Basic Quantity of Liters Per Capita', xlabel='Month')
# shade in the timespan of the additional tariff
nonbasic_wine_production_plot.axvspan(
xmin=df['frspger_25'].where(df['frspger_25']).first_valid_index(),
xmax=df['frspger_25'].where(df['frspger_25']).last_valid_index(),
color='gray',
alpha=0.2
)
# nonbasic_wine_production_plot.yaxis.set_major_formatter(FuncFormatter(lambda x, _: '{0:g}'.format(x/1e6)))
plt.gcf().set_size_inches(12, 8)
plt.savefig('../figures/non-basic-quantity-per-capita-2010.png')
plt.show()
For non-basic wine in the U.S., we see both some seasonality and an upward trend. This makes sense since it includes both bottled and bulk wine production.
decompose_result_mult = seasonal_decompose(exports_df['nonbasic_quantity'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
# diff non-basic quantity
exports_df['nonbasic_quantity_diff1'] = exports_df['nonbasic_quantity'] - exports_df['nonbasic_quantity'].shift(1)
exports_df['nonbasic_quantity_diff1_diff12'] = exports_df['nonbasic_quantity_diff1'] - exports_df['nonbasic_quantity_diff1'].shift(12)
# diff non-basic quantity per-capita
exports_df['nonbasic_quantity_per_capita_diff1'] = exports_df['nonbasic_quantity_per_capita'] - exports_df['nonbasic_quantity_per_capita'].shift(1)
exports_df['nonbasic_quantity_per_capita_diff1_diff12'] = exports_df['nonbasic_quantity_per_capita_diff1'] - exports_df['nonbasic_quantity_per_capita_diff1'].shift(12)
adf(exports_df['nonbasic_quantity_diff1_diff12'].dropna())
adf(exports_df['nonbasic_quantity_per_capita_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.669032e+00 p-value: 4.638129e-09 lags: 1.500000e+01 observations: 2.300000e+02 critical value (1%): -3.459106e+00 critical value (5%): -2.874190e+00 critical value (10%): -2.573512e+00 dtype: float64 Augmented Dickey-Fuller Test: t-stat: -6.674146e+00 p-value: 4.509486e-09 lags: 1.500000e+01 observations: 2.300000e+02 critical value (1%): -3.459106e+00 critical value (5%): -2.874190e+00 critical value (10%): -2.573512e+00 dtype: float64
Alright, so the nonbasic_quantity_*
variables are passing.
ts_df = ts_df.merge(exports_df[['nonbasic_quantity_diff1_diff12', 'nonbasic_quantity_per_capita_diff1_diff12']], left_index=True, right_index=True)
Exports Quantity¶
decompose_result_mult = seasonal_decompose(exports_df['quantity_exports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
This looks like there may just be a seasonal trend in it.
seasonality_bulk_plot = seasonal['2010-01-01':'2013-01-01'].plot()
seasonality_bulk_plot.set(title='Seasonality of Quantity of Wine Exports')
plt.show()
Let's do a 12-month diff and then run the ADF test.
exports_df['quantity_exports_diff12'] = exports_df['quantity_exports'] - exports_df['quantity_exports'].shift(12)
adf(exports_df['quantity_exports_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -4.698711 p-value: 0.000085 lags: 12.000000 observations: 239.000000 critical value (1%): -3.458011 critical value (5%): -2.873710 critical value (10%): -2.573256 dtype: float64
Cool, that passes.
ts_df = ts_df.merge(exports_df[['quantity_exports_diff12']], left_index=True, right_index=True)
Real FAS Value of Exports¶
decompose_result_mult = seasonal_decompose(exports_df['fas_value_adj_exports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
It looks like FAS Value has an upward trend and seasonality. From inspection, the seasonality seems to be in a 12-month cycle. So let's just take the diffs and then run the ADF test.
exports_df['fas_value_adj_exports_diff1'] = exports_df['fas_value_adj_exports'] - exports_df['fas_value_adj_exports'].shift(1)
exports_df['fas_value_adj_exports_diff1_diff12'] = exports_df['fas_value_adj_exports_diff1'] - exports_df['fas_value_adj_exports_diff1'].shift(12)
adf(exports_df['fas_value_adj_exports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.410001e+00 p-value: 1.901197e-08 lags: 1.600000e+01 observations: 2.340000e+02 critical value (1%): -3.458608e+00 critical value (5%): -2.873972e+00 critical value (10%): -2.573396e+00 dtype: float64
That worked.
ts_df = ts_df.merge(exports_df['fas_value_adj_exports_diff1_diff12'], left_index=True, right_index=True)
Imports¶
Now we'll adjust imports for stationarity. It may be good to add a variable that represents the percentage of world totals that are affected by the additional tariffs. So I'll do that first and also adjust for per-capita values.
imports_df = df[['dutiable_value_world_imports',
'dutiable_value_adj_world_imports',
'landed_duty_paid_value_world_imports',
'landed_duty_paid_value_adj_world_imports',
'customs_value_world_imports', 'customs_value_adj_world_imports',
'quantity_world_imports', 'charges_insurance_freight_world_imports',
'charges_insurance_freight_adj_world_imports',
'calculated_duties_world_imports',
'calculated_duties_adj_world_imports',
'dutiable_value_ukfrspde_imports',
'dutiable_value_adj_ukfrspde_imports',
'landed_duty_paid_value_ukfrspde_imports',
'landed_duty_paid_value_adj_ukfrspde_imports',
'customs_value_ukfrspde_imports', 'customs_value_adj_ukfrspde_imports',
'quantity_ukfrspde_imports',
'charges_insurance_freight_ukfrspde_imports',
'charges_insurance_freight_adj_ukfrspde_imports',
'calculated_duties_ukfrspde_imports',
'calculated_duties_adj_ukfrspde_imports', 'population']].copy()
# Proportion of imports' values provided by UK, Fr, Sp, and De
imports_df['dutiable_value_ukfrspde_proportion_imports'] = imports_df['dutiable_value_adj_ukfrspde_imports'] / imports_df['dutiable_value_adj_world_imports']
imports_df['landed_duty_paid_ukfrspde_proportion_imports'] = imports_df['landed_duty_paid_value_adj_ukfrspde_imports'] / imports_df['landed_duty_paid_value_adj_world_imports']
imports_df['customs_value_ukfrspde_proportion_imports'] = imports_df['customs_value_adj_ukfrspde_imports'] / imports_df['customs_value_adj_world_imports']
imports_df['quantity_ukfrspde_proportion_imports'] = imports_df['quantity_ukfrspde_imports'] / imports_df['quantity_world_imports']
imports_df['charges_insurance_freight_ukfrspde_proportion_imports'] = imports_df['charges_insurance_freight_adj_ukfrspde_imports'] / imports_df['charges_insurance_freight_adj_world_imports']
imports_df['calculated_duties_ukfrspde_proportion_imports'] = imports_df['calculated_duties_adj_ukfrspde_imports'] / imports_df['calculated_duties_adj_world_imports']
# Per-capita quantities of wine imports
imports_df['quantity_world_per_capita_imports'] = imports_df['quantity_world_imports'] / imports_df['population']
imports_df['quantity_ukfrspde_per_capita_imports'] = imports_df['quantity_ukfrspde_imports'] / imports_df['population']
# Incorporate new fields into original dataframe
df['dutiable_value_ukfrspde_proportion_imports'] = imports_df['dutiable_value_ukfrspde_proportion_imports']
df['landed_duty_paid_ukfrspde_proportion_imports'] = imports_df['landed_duty_paid_ukfrspde_proportion_imports']
df['customs_value_ukfrspde_proportion_imports'] = imports_df['customs_value_ukfrspde_proportion_imports']
df['quantity_ukfrspde_proportion_imports'] = imports_df['quantity_ukfrspde_proportion_imports']
df['charges_insurance_freight_ukfrspde_proportion_imports'] = imports_df['charges_insurance_freight_ukfrspde_proportion_imports']
df['calculated_duties_ukfrspde_proportion_imports'] = imports_df['calculated_duties_ukfrspde_proportion_imports']
df['quantity_world_per_capita_imports'] = imports_df['quantity_world_per_capita_imports']
df['quantity_ukfrspde_per_capita_imports'] = imports_df['quantity_ukfrspde_per_capita_imports']
U.K., France, Spain, Germany¶
cols = []
for c in imports_df.columns:
if 'ukfrspde' in c:
cols.append(c)
imports_subset1_df = imports_df[cols].copy()
imports_subset1_df = imports_subset1_df[['customs_value_adj_ukfrspde_imports', 'calculated_duties_adj_ukfrspde_imports',
'charges_insurance_freight_adj_ukfrspde_imports', 'quantity_ukfrspde_imports',
'quantity_ukfrspde_proportion_imports', 'quantity_ukfrspde_per_capita_imports']]
Customs Value¶
adf(imports_subset1_df['customs_value_adj_ukfrspde_imports'].dropna())
Augmented Dickey-Fuller Test: t-stat: -1.522449 p-value: 0.522378 lags: 13.000000 observations: 250.000000 critical value (1%): -3.456781 critical value (5%): -2.873172 critical value (10%): -2.572969 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset1_df['customs_value_adj_ukfrspde_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset1_df['customs_value_adj_ukfrspde_imports_diff1'] = imports_subset1_df['customs_value_adj_ukfrspde_imports'] - imports_subset1_df['customs_value_adj_ukfrspde_imports'].shift(1)
imports_subset1_df['customs_value_adj_ukfrspde_imports_diff1_diff12'] = imports_subset1_df['customs_value_adj_ukfrspde_imports_diff1'] - imports_subset1_df['customs_value_adj_ukfrspde_imports_diff1'].shift(12)
adf(imports_subset1_df['customs_value_adj_ukfrspde_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -7.935206e+00 p-value: 3.428209e-12 lags: 1.300000e+01 observations: 2.370000e+02 critical value (1%): -3.458247e+00 critical value (5%): -2.873814e+00 critical value (10%): -2.573311e+00 dtype: float64
ts_df = ts_df.merge(imports_subset1_df['customs_value_adj_ukfrspde_imports_diff1_diff12'], left_index=True, right_index=True)
Calculated Duties¶
adf(imports_subset1_df['calculated_duties_adj_ukfrspde_imports'])
Augmented Dickey-Fuller Test: t-stat: -3.654396 p-value: 0.004802 lags: 15.000000 observations: 248.000000 critical value (1%): -3.456996 critical value (5%): -2.873266 critical value (10%): -2.573019 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset1_df['calculated_duties_adj_ukfrspde_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset1_df['calculated_duties_adj_ukfrspde_imports_diff1'] = imports_subset1_df['calculated_duties_adj_ukfrspde_imports'] - imports_subset1_df['calculated_duties_adj_ukfrspde_imports'].shift(1)
imports_subset1_df['calculated_duties_adj_ukfrspde_imports_diff1_diff12'] = imports_subset1_df['calculated_duties_adj_ukfrspde_imports_diff1'] - imports_subset1_df['calculated_duties_adj_ukfrspde_imports_diff1'].shift(12)
adf(imports_subset1_df['calculated_duties_adj_ukfrspde_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -5.431072 p-value: 0.000003 lags: 16.000000 observations: 234.000000 critical value (1%): -3.458608 critical value (5%): -2.873972 critical value (10%): -2.573396 dtype: float64
ts_df = ts_df.merge(imports_subset1_df['calculated_duties_adj_ukfrspde_imports_diff1_diff12'], left_index=True, right_index=True)
Charges, Insurance, and Freight¶
adf(imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports'])
Augmented Dickey-Fuller Test: t-stat: -1.451255 p-value: 0.557473 lags: 13.000000 observations: 250.000000 critical value (1%): -3.456781 critical value (5%): -2.873172 critical value (10%): -2.572969 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports_diff1'] = imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports'] - imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports'].shift(1)
imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12'] = imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports_diff1'] - imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports_diff1'].shift(12)
adf(imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.201207e+00 p-value: 5.801371e-08 lags: 1.600000e+01 observations: 2.340000e+02 critical value (1%): -3.458608e+00 critical value (5%): -2.873972e+00 critical value (10%): -2.573396e+00 dtype: float64
ts_df = ts_df.merge(imports_subset1_df['charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12'], left_index=True, right_index=True)
Quantity¶
adf(imports_subset1_df['quantity_ukfrspde_imports'])
Augmented Dickey-Fuller Test: t-stat: -0.436341 p-value: 0.903852 lags: 13.000000 observations: 250.000000 critical value (1%): -3.456781 critical value (5%): -2.873172 critical value (10%): -2.572969 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset1_df['quantity_ukfrspde_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset1_df['quantity_ukfrspde_imports_diff1'] = imports_subset1_df['quantity_ukfrspde_imports'] - imports_subset1_df['quantity_ukfrspde_imports'].shift(1)
imports_subset1_df['quantity_ukfrspde_imports_diff1_diff12'] = imports_subset1_df['quantity_ukfrspde_imports_diff1'] - imports_subset1_df['quantity_ukfrspde_imports_diff1'].shift(12)
adf(imports_subset1_df['quantity_ukfrspde_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.424159e+00 p-value: 1.761400e-08 lags: 1.300000e+01 observations: 2.370000e+02 critical value (1%): -3.458247e+00 critical value (5%): -2.873814e+00 critical value (10%): -2.573311e+00 dtype: float64
ts_df = ts_df.merge(imports_subset1_df['quantity_ukfrspde_imports_diff1_diff12'], left_index=True, right_index=True)
Quantity as a Proportion of World Imports¶
adf(imports_subset1_df['quantity_ukfrspde_proportion_imports'])
Augmented Dickey-Fuller Test: t-stat: -2.784062 p-value: 0.060587 lags: 13.000000 observations: 250.000000 critical value (1%): -3.456781 critical value (5%): -2.873172 critical value (10%): -2.572969 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset1_df['quantity_ukfrspde_proportion_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset1_df['quantity_ukfrspde_proportion_imports_diff12'] = imports_subset1_df['quantity_ukfrspde_proportion_imports'] - imports_subset1_df['quantity_ukfrspde_proportion_imports'].shift(12)
adf(imports_subset1_df['quantity_ukfrspde_proportion_imports_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -3.105613 p-value: 0.026141 lags: 13.000000 observations: 238.000000 critical value (1%): -3.458128 critical value (5%): -2.873762 critical value (10%): -2.573283 dtype: float64
imports_subset1_df['quantity_ukfrspde_proportion_imports_diff12_diff1'] = imports_subset1_df['quantity_ukfrspde_proportion_imports_diff12'] - imports_subset1_df['quantity_ukfrspde_proportion_imports_diff12'].shift(1)
adf(imports_subset1_df['quantity_ukfrspde_proportion_imports_diff12_diff1'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.683635e+00 p-value: 4.280055e-09 lags: 1.300000e+01 observations: 2.370000e+02 critical value (1%): -3.458247e+00 critical value (5%): -2.873814e+00 critical value (10%): -2.573311e+00 dtype: float64
ts_df = ts_df.merge(imports_subset1_df['quantity_ukfrspde_proportion_imports_diff12_diff1'], left_index=True, right_index=True)
Quantity Per Capita¶
adf(imports_subset1_df['quantity_ukfrspde_per_capita_imports'].dropna())
Augmented Dickey-Fuller Test: t-stat: -0.763268 p-value: 0.829674 lags: 13.000000 observations: 250.000000 critical value (1%): -3.456781 critical value (5%): -2.873172 critical value (10%): -2.572969 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset1_df['quantity_ukfrspde_per_capita_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset1_df['quantity_ukfrspde_per_capita_imports_diff1'] = imports_subset1_df['quantity_ukfrspde_per_capita_imports'] - imports_subset1_df['quantity_ukfrspde_per_capita_imports'].shift(1)
imports_subset1_df['quantity_ukfrspde_per_capita_imports_diff1_diff12'] = imports_subset1_df['quantity_ukfrspde_per_capita_imports_diff1'] - imports_subset1_df['quantity_ukfrspde_per_capita_imports_diff1'].shift(12)
adf(imports_subset1_df['quantity_ukfrspde_per_capita_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.326419e+00 p-value: 2.978744e-08 lags: 1.300000e+01 observations: 2.370000e+02 critical value (1%): -3.458247e+00 critical value (5%): -2.873814e+00 critical value (10%): -2.573311e+00 dtype: float64
ts_df = ts_df.merge(imports_subset1_df['quantity_ukfrspde_per_capita_imports_diff1_diff12'], left_index=True, right_index=True)
World¶
cols = []
for c in imports_df.columns:
if 'world' in c:
cols.append(c)
imports_subset2_df = imports_df[cols].copy()
imports_subset2_df = imports_subset2_df[['customs_value_adj_world_imports', 'calculated_duties_adj_world_imports',
'charges_insurance_freight_adj_world_imports', 'quantity_world_imports', 'quantity_world_per_capita_imports']]
Customs Value¶
adf(imports_subset2_df['customs_value_adj_world_imports'].dropna())
Augmented Dickey-Fuller Test: t-stat: -1.459830 p-value: 0.553281 lags: 13.000000 observations: 250.000000 critical value (1%): -3.456781 critical value (5%): -2.873172 critical value (10%): -2.572969 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset2_df['customs_value_adj_world_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset2_df['customs_value_adj_world_imports_diff1'] = imports_subset2_df['customs_value_adj_world_imports'] - imports_subset2_df['customs_value_adj_world_imports'].shift(1)
imports_subset2_df['customs_value_adj_world_imports_diff1_diff12'] = imports_subset2_df['customs_value_adj_world_imports_diff1'] - imports_subset2_df['customs_value_adj_world_imports_diff1'].shift(12)
adf(imports_subset2_df['customs_value_adj_world_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.834680e+00 p-value: 1.855488e-09 lags: 1.300000e+01 observations: 2.370000e+02 critical value (1%): -3.458247e+00 critical value (5%): -2.873814e+00 critical value (10%): -2.573311e+00 dtype: float64
ts_df = ts_df.merge(imports_subset2_df['customs_value_adj_world_imports_diff1_diff12'], left_index=True, right_index=True)
Calculated Duties¶
adf(imports_subset2_df['calculated_duties_adj_world_imports'])
Augmented Dickey-Fuller Test: t-stat: -3.584811 p-value: 0.006058 lags: 12.000000 observations: 251.000000 critical value (1%): -3.456674 critical value (5%): -2.873125 critical value (10%): -2.572944 dtype: float64
ts_df = ts_df.merge(imports_subset2_df['calculated_duties_adj_world_imports'], left_index=True, right_index=True)
Charges, Insurance, and Freight¶
adf(imports_subset2_df['charges_insurance_freight_adj_world_imports'])
Augmented Dickey-Fuller Test: t-stat: -2.183202 p-value: 0.212398 lags: 16.000000 observations: 247.000000 critical value (1%): -3.457105 critical value (5%): -2.873314 critical value (10%): -2.573044 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset2_df['charges_insurance_freight_adj_world_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset2_df['charges_insurance_freight_adj_world_imports_diff1'] = imports_subset2_df['charges_insurance_freight_adj_world_imports'] - imports_subset2_df['charges_insurance_freight_adj_world_imports'].shift(1)
imports_subset2_df['charges_insurance_freight_adj_world_imports_diff1_diff12'] = imports_subset2_df['charges_insurance_freight_adj_world_imports_diff1'] - imports_subset2_df['charges_insurance_freight_adj_world_imports_diff1'].shift(12)
adf(imports_subset2_df['charges_insurance_freight_adj_world_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.190991e+00 p-value: 6.123570e-08 lags: 1.600000e+01 observations: 2.340000e+02 critical value (1%): -3.458608e+00 critical value (5%): -2.873972e+00 critical value (10%): -2.573396e+00 dtype: float64
ts_df = ts_df.merge(imports_subset2_df['charges_insurance_freight_adj_world_imports_diff1_diff12'], left_index=True, right_index=True)
Quantity¶
adf(imports_subset2_df['quantity_world_imports'].dropna())
Augmented Dickey-Fuller Test: t-stat: -0.665185 p-value: 0.855563 lags: 13.000000 observations: 250.000000 critical value (1%): -3.456781 critical value (5%): -2.873172 critical value (10%): -2.572969 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset2_df['quantity_world_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset2_df['quantity_world_imports_diff1'] = imports_subset2_df['quantity_world_imports'] - imports_subset2_df['quantity_world_imports'].shift(1)
imports_subset2_df['quantity_world_imports_diff1_diff12'] = imports_subset2_df['quantity_world_imports_diff1'] - imports_subset2_df['quantity_world_imports_diff1'].shift(12)
adf(imports_subset2_df['quantity_world_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.029991e+00 p-value: 1.425555e-07 lags: 1.600000e+01 observations: 2.340000e+02 critical value (1%): -3.458608e+00 critical value (5%): -2.873972e+00 critical value (10%): -2.573396e+00 dtype: float64
ts_df = ts_df.merge(imports_subset2_df['quantity_world_imports_diff1_diff12'], left_index=True, right_index=True)
Quantity Per Capita¶
adf(imports_subset2_df['quantity_world_per_capita_imports'].dropna())
Augmented Dickey-Fuller Test: t-stat: -0.972273 p-value: 0.763242 lags: 13.000000 observations: 250.000000 critical value (1%): -3.456781 critical value (5%): -2.873172 critical value (10%): -2.572969 dtype: float64
decompose_result_mult = seasonal_decompose(imports_subset2_df['quantity_world_per_capita_imports'].dropna(), model='additive')
trend = decompose_result_mult.trend
seasonal = decompose_result_mult.seasonal
residual = decompose_result_mult.resid
decompose_result_mult.plot()
imports_subset2_df['quantity_world_per_capita_imports_diff1'] = imports_subset2_df['quantity_world_per_capita_imports'] - imports_subset2_df['quantity_world_per_capita_imports'].shift(1)
imports_subset2_df['quantity_world_per_capita_imports_diff1_diff12'] = imports_subset2_df['quantity_world_per_capita_imports_diff1'] - imports_subset2_df['quantity_world_per_capita_imports_diff1'].shift(12)
adf(imports_subset2_df['quantity_world_per_capita_imports_diff1_diff12'].dropna())
Augmented Dickey-Fuller Test: t-stat: -6.062868e+00 p-value: 1.200914e-07 lags: 1.600000e+01 observations: 2.340000e+02 critical value (1%): -3.458608e+00 critical value (5%): -2.873972e+00 critical value (10%): -2.573396e+00 dtype: float64
ts_df = ts_df.merge(imports_subset2_df['quantity_world_per_capita_imports_diff1_diff12'], left_index=True, right_index=True)
Now we've made everything stationary for the multivariate time series.
display(ts_df.dropna().head())
frspger_25 | price_adj_diff1 | bottled_total_per_capita_diff1_diff12 | bulk_diff12 | bulk_per_capita_diff12 | nonbasic_quantity_diff1_diff12 | nonbasic_quantity_per_capita_diff1_diff12 | quantity_exports_diff12 | fas_value_adj_exports_diff1_diff12 | customs_value_adj_ukfrspde_imports_diff1_diff12 | calculated_duties_adj_ukfrspde_imports_diff1_diff12 | charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 | quantity_ukfrspde_imports_diff1_diff12 | quantity_ukfrspde_proportion_imports_diff12_diff1 | quantity_ukfrspde_per_capita_imports_diff1_diff12 | customs_value_adj_world_imports_diff1_diff12 | calculated_duties_adj_world_imports | charges_insurance_freight_adj_world_imports_diff1_diff12 | quantity_world_imports_diff1_diff12 | quantity_world_per_capita_imports_diff1_diff12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
month | ||||||||||||||||||||
2001-05-31 | 0 | 0.659528 | 0.016915 | 6.595232e+06 | 0.022032 | 2.428998e+06 | 0.008028 | 7204114.0 | 1.040327e+06 | 8.508741e+06 | 176039.000669 | 189967.702126 | 1441077.0 | 0.060835 | 0.005072 | -7.017318e+06 | 2.579096e+06 | -8.889423e+05 | -3193275.0 | -0.011262 |
2001-06-30 | 0 | 0.242013 | -0.050785 | 5.953839e+06 | 0.019816 | -1.053018e+07 | -0.037087 | 998658.0 | -7.739911e+06 | -5.426225e+06 | -75155.816947 | -283322.796341 | -1241802.0 | -0.040281 | -0.004398 | 2.364912e+05 | 2.717220e+06 | 1.200196e+03 | 1031146.0 | 0.003642 |
2001-07-31 | 0 | -0.434246 | 0.082204 | -1.084195e+07 | -0.039632 | 1.902590e+07 | 0.067087 | 7048878.0 | 8.425537e+06 | 7.139594e+06 | 122605.114091 | 565432.829575 | 1692652.0 | -0.018479 | 0.005988 | 2.828532e+07 | 2.990120e+06 | 1.895395e+06 | 7704403.0 | 0.027200 |
2001-08-31 | 0 | 0.380452 | -0.062038 | 7.302612e+07 | 0.250287 | -8.521039e+06 | -0.031190 | 295126.0 | -1.252128e+07 | -1.061431e+07 | -192850.754169 | -609574.553977 | -1839244.0 | 0.011007 | -0.006486 | -3.052870e+07 | 2.938348e+06 | -2.047266e+06 | -7049077.0 | -0.024939 |
2001-09-30 | 0 | -0.242006 | -0.058162 | -3.044768e+07 | -0.128668 | -1.687791e+07 | -0.057719 | -992607.0 | -1.852250e+06 | 1.310284e+07 | 63825.896916 | -297341.446600 | 432505.0 | 0.027586 | 0.001558 | 5.153565e+06 | 2.574896e+06 | -9.266976e+05 | -2195867.0 | -0.007474 |
Johansen Test¶
johan_test_df = ts_df[['price_adj_diff1',
'nonbasic_quantity_diff1_diff12',
'fas_value_adj_exports_diff1_diff12',
'quantity_ukfrspde_proportion_imports_diff12_diff1',
'calculated_duties_adj_world_imports',
'charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12',
'quantity_world_per_capita_imports_diff1_diff12',
'frspger_25'
]]
johan_test_df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 264 entries, 2000-01-31 to 2021-12-31 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 price_adj_diff1 263 non-null float64 1 nonbasic_quantity_diff1_diff12 246 non-null float64 2 fas_value_adj_exports_diff1_diff12 251 non-null float64 3 quantity_ukfrspde_proportion_imports_diff12_diff1 251 non-null float64 4 calculated_duties_adj_world_imports 264 non-null float64 5 charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 251 non-null float64 6 quantity_world_per_capita_imports_diff1_diff12 251 non-null float64 7 frspger_25 264 non-null int64 dtypes: float64(7), int64(1) memory usage: 26.7 KB
fig, axes = plt.subplots(nrows=4, ncols=2, dpi=120, figsize=(11,6))
for i, ax in enumerate(axes.flatten()):
data = johan_test_df[johan_test_df.columns[i]]
ax.plot(data, linewidth=1)
ax.set_title(johan_test_df.columns[i], size=9)
ax.tick_params(labelsize=5)
plt.tight_layout()
plt.show()
Let's define a function that provides a printout of the results of a cointegration johansen test of the variables for our analysis.
def johansen_test(var_df, critical_value=0.05):
output = coint_johansen(var_df, -1, 5)
d = {'0.90': 0, '0.95': 1, '0.99': 2}
trace_stat = output.lr1
critical_values = output.cvt[:, d[str(1-critical_value)]]
print('Cointegration Johansen Test:')
print('{:<62}'.format('Variable') + '{:<30}'.format('T-Stat > Critical Values') + '{:<20}'.format('Significant'))
print('--'*52)
for col, trace, cvt in zip(var_df.columns, trace_stat, critical_values):
print('{:<62}'.format(col) + '{:<30}'.format('{:<7}'.format(format(trace, '.3f')) + ' > ' + '{:<7}'.format(format(cvt, '.3f'))) + '{:<20}'.format(str(trace > cvt)))
# drop boolean datapoint
johan_test_df = johan_test_df.drop(columns=['frspger_25'])
johansen_test(johan_test_df.dropna())
Cointegration Johansen Test: Variable T-Stat > Critical Values Significant -------------------------------------------------------------------------------------------------------- price_adj_diff1 467.573 > 111.780 True nonbasic_quantity_diff1_diff12 352.348 > 83.938 True fas_value_adj_exports_diff1_diff12 256.515 > 60.063 True quantity_ukfrspde_proportion_imports_diff12_diff1 162.657 > 40.175 True calculated_duties_adj_world_imports 104.444 > 24.276 True charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 52.150 > 12.321 True quantity_world_per_capita_imports_diff1_diff12 1.223 > 4.130 False
Modeling¶
Before modeling, we need to do a train-test split to be able to forecast on the data. I should have done this much earlier on and used only the training data for checking for stationarity and granger causality. However, we're here now. New data will be published soon and we'll be able to incorporate that into the a new test set. For now, let's just pull out the most recent few observations for the test set (the most recent 3 months of data).
obs = 3
ts_train, ts_test = johan_test_df.dropna()[0:-obs], johan_test_df.dropna()[-obs:]
ts_train
price_adj_diff1 | nonbasic_quantity_diff1_diff12 | fas_value_adj_exports_diff1_diff12 | quantity_ukfrspde_proportion_imports_diff12_diff1 | calculated_duties_adj_world_imports | charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 | quantity_world_per_capita_imports_diff1_diff12 | |
---|---|---|---|---|---|---|---|
month | |||||||
2001-05-31 | 0.659528 | 2.428998e+06 | 1.040327e+06 | 0.060835 | 2.579096e+06 | 1.899677e+05 | -0.011262 |
2001-06-30 | 0.242013 | -1.053018e+07 | -7.739911e+06 | -0.040281 | 2.717220e+06 | -2.833228e+05 | 0.003642 |
2001-07-31 | -0.434246 | 1.902590e+07 | 8.425537e+06 | -0.018479 | 2.990120e+06 | 5.654328e+05 | 0.027200 |
2001-08-31 | 0.380452 | -8.521039e+06 | -1.252128e+07 | 0.011007 | 2.938348e+06 | -6.095746e+05 | -0.024939 |
2001-09-30 | -0.242006 | -1.687791e+07 | -1.852250e+06 | 0.027586 | 2.574896e+06 | -2.973414e+05 | -0.007474 |
... | ... | ... | ... | ... | ... | ... | ... |
2021-03-31 | 0.115665 | 1.685490e+07 | 3.008267e+07 | 0.042747 | 1.160764e+07 | 1.199310e+06 | -0.004261 |
2021-04-30 | 0.230637 | -1.143297e+07 | -1.641979e+06 | 0.027219 | 6.101831e+06 | 1.861444e+06 | 0.056351 |
2021-05-31 | -0.083969 | -2.850532e+07 | 5.475490e+06 | 0.032440 | 6.080915e+06 | 8.662714e+05 | 0.004597 |
2021-06-30 | -0.200000 | -8.828356e+06 | 4.892883e+06 | -0.034139 | 7.736218e+06 | 1.217117e+06 | 0.091626 |
2021-07-31 | -0.112057 | -1.164970e+07 | -5.591900e+06 | -0.022809 | 7.281041e+06 | -6.812625e+04 | -0.016523 |
243 rows × 7 columns
Lag Order Selection¶
model = VAR(ts_train)
/opt/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/base/tsa_model.py:524: ValueWarning: No frequency information was provided, so inferred frequency M will be used. warnings.warn('No frequency information was'
lag_orders = model.select_order(maxlags=12)
lag_orders.summary()
AIC | BIC | FPE | HQIC | |
---|---|---|---|---|
0 | 110.5 | 110.6 | 9.647e+47 | 110.5 |
1 | 105.0 | 105.8* | 4.046e+45 | 105.4* |
2 | 104.8 | 106.3 | 3.135e+45 | 105.4 |
3 | 104.7 | 107.0 | 3.094e+45 | 105.7 |
4 | 104.8 | 107.8 | 3.297e+45 | 106.0 |
5 | 104.9 | 108.6 | 3.627e+45 | 106.4 |
6 | 104.9 | 109.4 | 3.905e+45 | 106.8 |
7 | 105.0 | 110.2 | 4.155e+45 | 107.1 |
8 | 105.0 | 110.9 | 4.173e+45 | 107.4 |
9 | 105.1 | 111.8 | 5.029e+45 | 107.8 |
10 | 105.1 | 112.5 | 5.089e+45 | 108.1 |
11 | 104.8 | 112.9 | 3.982e+45 | 108.1 |
12 | 104.5* | 113.3 | 3.015e+45* | 108.0 |
It looks like the recommendation is for a lag order of 1. Let's go with that.
Fit the Model¶
tsmf = model.fit(3)
tsmf.summary()
Summary of Regression Results ================================== Model: VAR Method: OLS Date: Tue, 01, Nov, 2022 Time: 20:48:25 -------------------------------------------------------------------- No. of Equations: 7.00000 BIC: 106.850 Nobs: 240.000 HQIC: 105.517 Log likelihood: -14783.8 FPE: 2.72968e+45 AIC: 104.617 Det(Omega_mle): 1.47734e+45 -------------------------------------------------------------------- Results for equation price_adj_diff1 ================================================================================================================================= coefficient std. error t-stat prob --------------------------------------------------------------------------------------------------------------------------------- const 0.015077 0.042793 0.352 0.725 L1.price_adj_diff1 -0.764595 0.065621 -11.652 0.000 L1.nonbasic_quantity_diff1_diff12 0.000000 0.000000 0.491 0.624 L1.fas_value_adj_exports_diff1_diff12 -0.000000 0.000000 -1.695 0.090 L1.quantity_ukfrspde_proportion_imports_diff12_diff1 -0.299643 1.129742 -0.265 0.791 L1.calculated_duties_adj_world_imports -0.000000 0.000000 -0.454 0.650 L1.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.000000 0.000000 -0.057 0.955 L1.quantity_world_per_capita_imports_diff1_diff12 0.580439 1.012552 0.573 0.566 L2.price_adj_diff1 -0.025577 0.084408 -0.303 0.762 L2.nonbasic_quantity_diff1_diff12 -0.000000 0.000000 -0.128 0.898 L2.fas_value_adj_exports_diff1_diff12 -0.000000 0.000000 -1.632 0.103 L2.quantity_ukfrspde_proportion_imports_diff12_diff1 -0.141833 1.235984 -0.115 0.909 L2.calculated_duties_adj_world_imports 0.000000 0.000000 0.421 0.674 L2.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.000000 0.000000 -0.101 0.920 L2.quantity_world_per_capita_imports_diff1_diff12 -0.247800 1.151933 -0.215 0.830 L3.price_adj_diff1 -0.241458 0.065034 -3.713 0.000 L3.nonbasic_quantity_diff1_diff12 -0.000000 0.000000 -1.660 0.097 L3.fas_value_adj_exports_diff1_diff12 -0.000000 0.000000 -1.946 0.052 L3.quantity_ukfrspde_proportion_imports_diff12_diff1 0.108096 1.119225 0.097 0.923 L3.calculated_duties_adj_world_imports 0.000000 0.000000 0.130 0.897 L3.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.000000 0.000000 -0.268 0.789 L3.quantity_world_per_capita_imports_diff1_diff12 0.125428 0.995043 0.126 0.900 ================================================================================================================================= Results for equation nonbasic_quantity_diff1_diff12 ================================================================================================================================== coefficient std. error t-stat prob ---------------------------------------------------------------------------------------------------------------------------------- const -471212.879700 2472784.916441 -0.191 0.849 L1.price_adj_diff1 -1734764.070119 3791903.267623 -0.457 0.647 L1.nonbasic_quantity_diff1_diff12 -0.727078 0.067356 -10.795 0.000 L1.fas_value_adj_exports_diff1_diff12 0.165878 0.082726 2.005 0.045 L1.quantity_ukfrspde_proportion_imports_diff12_diff1 -21156896.256078 65282135.839963 -0.324 0.746 L1.calculated_duties_adj_world_imports -0.629333 0.967496 -0.650 0.515 L1.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -1.405101 2.247186 -0.625 0.532 L1.quantity_world_per_capita_imports_diff1_diff12 -873292.058707 58510310.184761 -0.015 0.988 L2.price_adj_diff1 -3025289.478846 4877501.777911 -0.620 0.535 L2.nonbasic_quantity_diff1_diff12 -0.451445 0.078948 -5.718 0.000 L2.fas_value_adj_exports_diff1_diff12 0.046771 0.095992 0.487 0.626 L2.quantity_ukfrspde_proportion_imports_diff12_diff1 -5344583.761086 71421304.011222 -0.075 0.940 L2.calculated_duties_adj_world_imports 0.979958 1.269687 0.772 0.440 L2.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -4.289626 2.387918 -1.796 0.072 L2.quantity_world_per_capita_imports_diff1_diff12 7228281.145593 66564390.947730 0.109 0.914 L3.price_adj_diff1 -1158151.764997 3757982.910004 -0.308 0.758 L3.nonbasic_quantity_diff1_diff12 -0.121921 0.068760 -1.773 0.076 L3.fas_value_adj_exports_diff1_diff12 0.005104 0.083048 0.061 0.951 L3.quantity_ukfrspde_proportion_imports_diff12_diff1 63993831.669793 64674374.218592 0.989 0.322 L3.calculated_duties_adj_world_imports -0.279341 0.994423 -0.281 0.779 L3.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -4.341083 2.254766 -1.925 0.054 L3.quantity_world_per_capita_imports_diff1_diff12 47990961.941731 57498516.605342 0.835 0.404 ================================================================================================================================== Results for equation fas_value_adj_exports_diff1_diff12 ================================================================================================================================== coefficient std. error t-stat prob ---------------------------------------------------------------------------------------------------------------------------------- const 37334.641674 2120167.287879 0.018 0.986 L1.price_adj_diff1 -3792225.367884 3251180.162644 -1.166 0.243 L1.nonbasic_quantity_diff1_diff12 -0.045636 0.057751 -0.790 0.429 L1.fas_value_adj_exports_diff1_diff12 -0.689502 0.070929 -9.721 0.000 L1.quantity_ukfrspde_proportion_imports_diff12_diff1 -26064560.211913 55972942.883369 -0.466 0.641 L1.calculated_duties_adj_world_imports -0.160578 0.829531 -0.194 0.847 L1.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 1.849618 1.926739 0.960 0.337 L1.quantity_world_per_capita_imports_diff1_diff12 -66593858.795996 50166775.457353 -1.327 0.184 L2.price_adj_diff1 -2115325.619397 4181972.984122 -0.506 0.613 L2.nonbasic_quantity_diff1_diff12 -0.047530 0.067690 -0.702 0.483 L2.fas_value_adj_exports_diff1_diff12 -0.341708 0.082303 -4.152 0.000 L2.quantity_ukfrspde_proportion_imports_diff12_diff1 -36563424.849841 61236669.398746 -0.597 0.550 L2.calculated_duties_adj_world_imports -0.124123 1.088631 -0.114 0.909 L2.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.234173 2.047403 -0.114 0.909 L2.quantity_world_per_capita_imports_diff1_diff12 -45161213.305984 57072349.190860 -0.791 0.429 L3.price_adj_diff1 1500206.420387 3222096.827438 0.466 0.642 L3.nonbasic_quantity_diff1_diff12 0.033324 0.058954 0.565 0.572 L3.fas_value_adj_exports_diff1_diff12 -0.055946 0.071205 -0.786 0.432 L3.quantity_ukfrspde_proportion_imports_diff12_diff1 -25094936.426300 55451847.700407 -0.453 0.651 L3.calculated_duties_adj_world_imports 0.331729 0.852618 0.389 0.697 L3.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 0.506317 1.933238 0.262 0.793 L3.quantity_world_per_capita_imports_diff1_diff12 -79613095.488324 49299263.028388 -1.615 0.106 ================================================================================================================================== Results for equation quantity_ukfrspde_proportion_imports_diff12_diff1 ================================================================================================================================= coefficient std. error t-stat prob --------------------------------------------------------------------------------------------------------------------------------- const -0.000765 0.003340 -0.229 0.819 L1.price_adj_diff1 0.006553 0.005122 1.279 0.201 L1.nonbasic_quantity_diff1_diff12 0.000000 0.000000 2.328 0.020 L1.fas_value_adj_exports_diff1_diff12 -0.000000 0.000000 -0.254 0.800 L1.quantity_ukfrspde_proportion_imports_diff12_diff1 -0.550955 0.088188 -6.248 0.000 L1.calculated_duties_adj_world_imports -0.000000 0.000000 -2.094 0.036 L1.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 0.000000 0.000000 1.389 0.165 L1.quantity_world_per_capita_imports_diff1_diff12 0.016763 0.079040 0.212 0.832 L2.price_adj_diff1 0.002827 0.006589 0.429 0.668 L2.nonbasic_quantity_diff1_diff12 0.000000 0.000000 3.031 0.002 L2.fas_value_adj_exports_diff1_diff12 0.000000 0.000000 0.413 0.680 L2.quantity_ukfrspde_proportion_imports_diff12_diff1 -0.221619 0.096481 -2.297 0.022 L2.calculated_duties_adj_world_imports 0.000000 0.000000 1.088 0.276 L2.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.000000 0.000000 -0.083 0.934 L2.quantity_world_per_capita_imports_diff1_diff12 0.103663 0.089920 1.153 0.249 L3.price_adj_diff1 -0.004199 0.005077 -0.827 0.408 L3.nonbasic_quantity_diff1_diff12 0.000000 0.000000 1.693 0.091 L3.fas_value_adj_exports_diff1_diff12 -0.000000 0.000000 -0.083 0.934 L3.quantity_ukfrspde_proportion_imports_diff12_diff1 -0.168831 0.087367 -1.932 0.053 L3.calculated_duties_adj_world_imports 0.000000 0.000000 0.799 0.424 L3.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 0.000000 0.000000 0.521 0.602 L3.quantity_world_per_capita_imports_diff1_diff12 0.062297 0.077673 0.802 0.423 ================================================================================================================================= Results for equation calculated_duties_adj_world_imports ================================================================================================================================= coefficient std. error t-stat prob --------------------------------------------------------------------------------------------------------------------------------- const 382501.968492 170791.486436 2.240 0.025 L1.price_adj_diff1 -444624.472507 261900.981033 -1.698 0.090 L1.nonbasic_quantity_diff1_diff12 0.002484 0.004652 0.534 0.593 L1.fas_value_adj_exports_diff1_diff12 0.004654 0.005714 0.815 0.415 L1.quantity_ukfrspde_proportion_imports_diff12_diff1 -154248.563028 4508937.653129 -0.034 0.973 L1.calculated_duties_adj_world_imports 0.817246 0.066823 12.230 0.000 L1.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.325554 0.155210 -2.098 0.036 L1.quantity_world_per_capita_imports_diff1_diff12 -2046356.216375 4041217.973245 -0.506 0.613 L2.price_adj_diff1 -164201.738905 336881.616029 -0.487 0.626 L2.nonbasic_quantity_diff1_diff12 0.001408 0.005453 0.258 0.796 L2.fas_value_adj_exports_diff1_diff12 0.005171 0.006630 0.780 0.435 L2.quantity_ukfrspde_proportion_imports_diff12_diff1 3704473.026106 4932960.644566 0.751 0.453 L2.calculated_duties_adj_world_imports -0.049606 0.087695 -0.566 0.572 L2.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.093466 0.164930 -0.567 0.571 L2.quantity_world_per_capita_imports_diff1_diff12 3114647.442846 4597501.059671 0.677 0.498 L3.price_adj_diff1 318344.193528 259558.153616 1.226 0.220 L3.nonbasic_quantity_diff1_diff12 0.002607 0.004749 0.549 0.583 L3.fas_value_adj_exports_diff1_diff12 0.002717 0.005736 0.474 0.636 L3.quantity_ukfrspde_proportion_imports_diff12_diff1 477922.721793 4466960.483977 0.107 0.915 L3.calculated_duties_adj_world_imports 0.169018 0.068683 2.461 0.014 L3.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 0.101660 0.155733 0.653 0.514 L3.quantity_world_per_capita_imports_diff1_diff12 1233050.447545 3971334.932369 0.310 0.756 ================================================================================================================================= Results for equation charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 ================================================================================================================================= coefficient std. error t-stat prob --------------------------------------------------------------------------------------------------------------------------------- const 40505.824584 100751.410496 0.402 0.688 L1.price_adj_diff1 -179786.321741 154497.708287 -1.164 0.245 L1.nonbasic_quantity_diff1_diff12 0.002284 0.002744 0.832 0.405 L1.fas_value_adj_exports_diff1_diff12 0.002982 0.003371 0.885 0.376 L1.quantity_ukfrspde_proportion_imports_diff12_diff1 -4050385.433467 2659862.255844 -1.523 0.128 L1.calculated_duties_adj_world_imports -0.161838 0.039420 -4.106 0.000 L1.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.379397 0.091560 -4.144 0.000 L1.quantity_world_per_capita_imports_diff1_diff12 -930423.392156 2383950.274233 -0.390 0.696 L2.price_adj_diff1 -128607.445680 198729.448952 -0.647 0.518 L2.nonbasic_quantity_diff1_diff12 0.006832 0.003217 2.124 0.034 L2.fas_value_adj_exports_diff1_diff12 0.008452 0.003911 2.161 0.031 L2.quantity_ukfrspde_proportion_imports_diff12_diff1 1657663.337941 2909997.173934 0.570 0.569 L2.calculated_duties_adj_world_imports 0.115377 0.051732 2.230 0.026 L2.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.355994 0.097294 -3.659 0.000 L2.quantity_world_per_capita_imports_diff1_diff12 2048543.495791 2712106.593742 0.755 0.450 L3.price_adj_diff1 -18389.404099 153115.653644 -0.120 0.904 L3.nonbasic_quantity_diff1_diff12 0.008580 0.002802 3.062 0.002 L3.fas_value_adj_exports_diff1_diff12 0.010372 0.003384 3.065 0.002 L3.quantity_ukfrspde_proportion_imports_diff12_diff1 -1952328.156781 2635099.551983 -0.741 0.459 L3.calculated_duties_adj_world_imports 0.045308 0.040517 1.118 0.263 L3.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.073626 0.091868 -0.801 0.423 L3.quantity_world_per_capita_imports_diff1_diff12 -671317.676475 2342725.649487 -0.287 0.774 ================================================================================================================================= Results for equation quantity_world_per_capita_imports_diff1_diff12 ================================================================================================================================= coefficient std. error t-stat prob --------------------------------------------------------------------------------------------------------------------------------- const 0.000080 0.003740 0.021 0.983 L1.price_adj_diff1 -0.014061 0.005735 -2.452 0.014 L1.nonbasic_quantity_diff1_diff12 -0.000000 0.000000 -2.489 0.013 L1.fas_value_adj_exports_diff1_diff12 0.000000 0.000000 1.462 0.144 L1.quantity_ukfrspde_proportion_imports_diff12_diff1 -0.212491 0.098731 -2.152 0.031 L1.calculated_duties_adj_world_imports -0.000000 0.000000 -1.570 0.117 L1.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 0.000000 0.000000 0.280 0.780 L1.quantity_world_per_capita_imports_diff1_diff12 -0.683618 0.088489 -7.725 0.000 L2.price_adj_diff1 -0.004120 0.007377 -0.558 0.577 L2.nonbasic_quantity_diff1_diff12 -0.000000 0.000000 -0.903 0.366 L2.fas_value_adj_exports_diff1_diff12 0.000000 0.000000 1.972 0.049 L2.quantity_ukfrspde_proportion_imports_diff12_diff1 -0.143814 0.108015 -1.331 0.183 L2.calculated_duties_adj_world_imports -0.000000 0.000000 -0.712 0.477 L2.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 0.000000 0.000000 0.086 0.932 L2.quantity_world_per_capita_imports_diff1_diff12 -0.329176 0.100670 -3.270 0.001 L3.price_adj_diff1 0.009602 0.005683 1.689 0.091 L3.nonbasic_quantity_diff1_diff12 0.000000 0.000000 1.309 0.191 L3.fas_value_adj_exports_diff1_diff12 0.000000 0.000000 3.947 0.000 L3.quantity_ukfrspde_proportion_imports_diff12_diff1 -0.060460 0.097811 -0.618 0.536 L3.calculated_duties_adj_world_imports 0.000000 0.000000 2.557 0.011 L3.charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 0.000000 0.000000 0.112 0.911 L3.quantity_world_per_capita_imports_diff1_diff12 -0.182796 0.086959 -2.102 0.036 ================================================================================================================================= Correlation matrix of residuals price_adj_diff1 nonbasic_quantity_diff1_diff12 fas_value_adj_exports_diff1_diff12 quantity_ukfrspde_proportion_imports_diff12_diff1 calculated_duties_adj_world_imports charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 quantity_world_per_capita_imports_diff1_diff12 price_adj_diff1 1.000000 0.004062 -0.035227 -0.040193 -0.051722 -0.036526 -0.058171 nonbasic_quantity_diff1_diff12 0.004062 1.000000 -0.089079 0.030493 -0.053820 0.026258 -0.055443 fas_value_adj_exports_diff1_diff12 -0.035227 -0.089079 1.000000 0.146864 0.039319 0.279711 0.250276 quantity_ukfrspde_proportion_imports_diff12_diff1 -0.040193 0.030493 0.146864 1.000000 -0.035874 0.395026 -0.320231 calculated_duties_adj_world_imports -0.051722 -0.053820 0.039319 -0.035874 1.000000 -0.027366 0.061687 charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 -0.036526 0.026258 0.279711 0.395026 -0.027366 1.000000 0.399330 quantity_world_per_capita_imports_diff1_diff12 -0.058171 -0.055443 0.250276 -0.320231 0.061687 0.399330 1.000000
Test Causality¶
f_test_res = tsmf.test_causality('price_adj_diff1', ['nonbasic_quantity_diff1_diff12', 'fas_value_adj_exports_diff1_diff12', 'quantity_ukfrspde_proportion_imports_diff12_diff1', 'calculated_duties_adj_world_imports', 'charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12', 'quantity_world_per_capita_imports_diff1_diff12'], kind='f')
print(f_test_res)
<statsmodels.tsa.vector_ar.hypothesis_test_results.CausalityTestResults object. H_0: ['nonbasic_quantity_diff1_diff12', 'fas_value_adj_exports_diff1_diff12', 'quantity_ukfrspde_proportion_imports_diff12_diff1', 'calculated_duties_adj_world_imports', 'charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12', 'quantity_world_per_capita_imports_diff1_diff12'] do not Granger-cause price_adj_diff1: fail to reject at 5% significance level. Test statistic: 0.666, critical value: 1.611>, p-value: 0.847>
stable = tsmf.is_stable
print(stable)
<bound method VARProcess.is_stable of <statsmodels.tsa.vector_ar.var_model.VARResults object at 0x7fb68aafc790>>
Serial Correlation of Residuals¶
I'll use the Durbin Watson statistic for serial correlation. The value ranges from 0
to 4
with 0
indicating a positive correlation and 4
indicating a negative correlation. The value 2
is what the test aims for.
dw_output = durbin_watson(tsmf.resid)
for col, val in zip(ts_train.columns, dw_output):
print('{:<62}'.format(str(col), ':'), round(val, 3))
price_adj_diff1 1.95 nonbasic_quantity_diff1_diff12 2.079 fas_value_adj_exports_diff1_diff12 2.04 quantity_ukfrspde_proportion_imports_diff12_diff1 2.083 calculated_duties_adj_world_imports 1.988 charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 2.0 quantity_world_per_capita_imports_diff1_diff12 2.058
This looks like we don't have any serial correlation in the residuals. The only input that looks like it may have some serial correlation is the indicator variable for the additional 25% tariff.
Forecast¶
ts_train.columns
Index(['price_adj_diff1', 'nonbasic_quantity_diff1_diff12', 'fas_value_adj_exports_diff1_diff12', 'quantity_ukfrspde_proportion_imports_diff12_diff1', 'calculated_duties_adj_world_imports', 'charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12', 'quantity_world_per_capita_imports_diff1_diff12'], dtype='object')
lags = tsmf.k_ar
forecast_input = ts_test[['price_adj_diff1', 'nonbasic_quantity_diff1_diff12',
'fas_value_adj_exports_diff1_diff12',
'quantity_ukfrspde_proportion_imports_diff12_diff1',
'calculated_duties_adj_world_imports',
'charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12',
'quantity_world_per_capita_imports_diff1_diff12']].dropna().values[-lags:]
forecast_input
array([[-1.35037399e-01, 1.21611171e+07, -1.21178253e+07, -5.17580615e-03, 7.00927776e+06, -2.15299312e+06, -7.97691163e-02], [-1.46302920e-02, -1.48396828e+07, -8.42806774e+06, 3.32223558e-02, 5.81353470e+06, 1.06825123e+06, 1.76854645e-02], [-2.32410052e-01, 6.79203157e+06, -7.36217880e+06, -8.11764728e-03, 6.28677720e+06, -1.05282745e+06, -4.84722042e-02]])
pred = tsmf.forecast(y=forecast_input, steps=obs)
df_forecast = pd.DataFrame(pred, index=johan_test_df.index[-obs:], columns=ts_train.columns)
df_forecast
price_adj_diff1 | nonbasic_quantity_diff1_diff12 | fas_value_adj_exports_diff1_diff12 | quantity_ukfrspde_proportion_imports_diff12_diff1 | calculated_duties_adj_world_imports | charges_insurance_freight_adj_ukfrspde_imports_diff1_diff12 | quantity_world_per_capita_imports_diff1_diff12 | |
---|---|---|---|---|---|---|---|
month | |||||||
2021-10-31 | 0.279417 | 7.643647e+05 | 1.539493e+07 | -0.016273 | 6.598117e+06 | 267723.190574 | 0.036107 |
2021-11-30 | -0.095997 | 2.985375e+06 | -9.397345e+06 | 0.004606 | 6.243347e+06 | -204145.423263 | -0.019848 |
2021-12-31 | 0.138663 | -3.189388e+06 | 4.665659e+06 | 0.005685 | 6.147849e+06 | 312687.752751 | 0.009867 |
df[['price_adj']].tail(18)
price_adj | |
---|---|
month | |
2020-07-31 | 9.483945 |
2020-08-31 | 9.487003 |
2020-09-30 | 9.479693 |
2020-10-31 | 9.605364 |
2020-11-30 | 9.796169 |
2020-12-31 | 9.855172 |
2021-01-31 | 9.873563 |
2021-02-28 | 10.057515 |
2021-03-31 | 10.173180 |
2021-04-30 | 10.403817 |
2021-05-31 | 10.319847 |
2021-06-30 | 10.119847 |
2021-07-31 | 10.007790 |
2021-08-31 | 9.872753 |
2021-09-30 | 9.858122 |
2021-10-31 | 9.625712 |
2021-11-30 | 9.946201 |
2021-12-31 | 9.834599 |
Let's convert the forecasted differences to the actual amounts.
def convert_forecasts(original_df, forecast_df):
fc = forecast_df.copy()
odf = original_df.merge(fc, how='left', left_index=True, right_index=True)
for col in fc.columns:
col_name_odf = str.split(col, '_diff')[0]
col_name = col_name_odf + '_pred'
if 'diff12' in col:
odf[col_name] = fc[col] + odf[col_name_odf].shift(12)
if 'diff1' in col:
odf[col_name] = odf[col_name] + odf[col_name_odf].shift(1)
elif 'diff1' in col:
odf[col_name] = fc[col] + odf[col_name_odf].shift(1)
ret_cols = []
for col in odf.columns:
if 'pred' in col:
ret_cols.append(col)
return odf[ret_cols][-obs:]
df_results = convert_forecasts(df, df_forecast)
df_results.head()
price_adj_pred | nonbasic_quantity_pred | fas_value_adj_exports_pred | quantity_ukfrspde_proportion_imports_pred | charges_insurance_freight_adj_ukfrspde_imports_pred | quantity_world_per_capita_imports_pred | |
---|---|---|---|---|---|---|
month | ||||||
2021-10-31 | 10.137539 | 3.530810e+08 | 1.989196e+08 | 0.386987 | 1.235805e+07 | 0.720896 |
2021-11-30 | 9.529716 | 3.204310e+08 | 1.831348e+08 | 0.390680 | 1.177816e+07 | 0.679793 |
2021-12-31 | 10.084864 | NaN | 2.050764e+08 | 0.381782 | 1.195654e+07 | 0.675487 |
Accuracy¶
# accuracy metrics
def forecast_accuracy(forecast, actual):
# mean abs percentage error
mape = np.mean(np.abs(forecast - actual)/np.abs(actual))
# root mean squared error
rmse = np.mean((forecast - actual)**2)**.5
# correlation coefficient
corr = np.corrcoef(forecast, actual)[0,1]
# minmax accuracy
mins = np.amin(np.hstack([forecast[:, None], actual[:, None]]), axis=1)
maxs = np.amax(np.hstack([forecast[:, None], actual[:, None]]), axis=1)
minmax = 1 - np.mean(mins/maxs)
return {'mape': mape, 'rmse': rmse, 'corr': corr, 'minmax': minmax}
cols = [str.replace(c, '_pred', '') for c in df_results.columns]
for c in cols:
print('\nPrediction Accuracy: ' + c)
accuracy_prod = forecast_accuracy(df_results[c + '_pred'].values, df[c][-obs:])
for k, v in accuracy_prod.items():
print('{:<10}'.format(k + ': ') + str(round(v,4)))
Prediction Accuracy: price_adj mape: 0.0402 rmse: 0.4075 corr: -0.8146 minmax: 0.0391 Prediction Accuracy: nonbasic_quantity mape: 1.3067 rmse: 200013083.6011 corr: nan minmax: nan Prediction Accuracy: fas_value_adj_exports mape: 1.1937 rmse: 106077441.5318 corr: -0.9012 minmax: 0.5253 Prediction Accuracy: quantity_ukfrspde_proportion_imports mape: 0.9937 rmse: 0.1915 corr: 0.6387 minmax: 0.4937 Prediction Accuracy: charges_insurance_freight_adj_ukfrspde_imports mape: 0.8792 rmse: 5585963.7204 corr: 0.0702 minmax: 0.4593 Prediction Accuracy: quantity_world_per_capita_imports mape: 0.954 rmse: 0.3378 corr: 0.2997 minmax: 0.4868
<ipython-input-135-edd26c8ed0cc>:10: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. mins = np.amin(np.hstack([forecast[:, None], actual[:, None]]), axis=1) <ipython-input-135-edd26c8ed0cc>:11: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. maxs = np.amax(np.hstack([forecast[:, None], actual[:, None]]), axis=1)
Conclusion¶
The prediction accuracy looks pretty good for the Real Price variable, price_adj
, with a RMSE of 0.44 and a minmax getting close to zero.