Creating causal networks from monthly import and export data from WTO.
Creating causal trade networks.
General Process:
By the time trade data is aggregated to a national series, there is a lot of error and bias introduced.
This is out of curiosity if anything plausible will come out the other end.
This can be improved with bilateral, product-level, other economic, and alternative data is to come.
Some additional techniques like feature engineering might also lead to some interesting outcomes.
from utils import *
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
# https://aip.scitation.org/doi/10.1063/1.5025050
# load and transform data
mcl, imports, exports = load_data()
data = wide_data(imports, exports)
print('data loaded')
cointResults = coint(data, 2)
cointPairs = cointResults['series1'] + cointResults['series2']
cointSeries = set(set(cointResults['series1']) & set(cointResults['series2']))
#if we filter out series not coint or granger, filter them here to save time
grangerPData, onlyGrangeCause = granger(inputData=data[cointSeries], maxlags=25, p_thresh=.01)
grangerPairs = ["_".join(col) for col in onlyGrangeCause.columns]
onlyGrangeCause.to_csv('grangerResults.csv')
data loaded
data.head()
Albania-import | Argentina-import | Australia-import | Austria-import | Belarus-import | Belgium-import | Bolivia, Plurinational State of-import | Bosnia and Herzegovina-import | Brazil-import | Bulgaria-import | ... | Sweden-export | Switzerland-export | Thailand-export | Tunisia-export | Turkey-export | Ukraine-export | United Kingdom-export | United States of America-export | Uruguay-export | Viet Nam-export | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Year | |||||||||||||||||||||
2006-02-01 | 0.046154 | 0.000861 | -0.060991 | 0.002850 | 0.108728 | -0.023702 | -0.237500 | 0.330128 | -0.074985 | 0.015748 | ... | -0.027434 | 0.004764 | 0.064788 | 0.113971 | 0.180207 | 0.105263 | 0.117559 | 0.034935 | -0.121864 | -0.211488 |
2006-03-01 | 0.235294 | 0.170249 | 0.110741 | 0.198863 | 0.185457 | 0.195077 | 0.453552 | 0.277108 | 0.288900 | 0.245478 | ... | 0.275189 | 0.140864 | 0.166825 | 0.130913 | 0.223341 | 0.217576 | 0.244058 | 0.180906 | 0.297959 | 0.418460 |
2006-04-01 | -0.103175 | -0.065026 | -0.018433 | -0.125064 | -0.028700 | -0.175460 | -0.142857 | 0.052830 | -0.126158 | -0.047718 | ... | -0.172745 | -0.130984 | -0.170899 | -0.122568 | -0.128863 | -0.087122 | -0.112793 | -0.092512 | -0.047170 | -0.122848 |
2006-05-01 | 0.119469 | 0.110413 | 0.140888 | 0.173619 | 0.137312 | 0.144400 | 0.223684 | 0.236559 | 0.078620 | 0.228758 | ... | 0.159363 | 0.183649 | 0.176804 | 0.039911 | 0.090768 | 0.084639 | 0.139689 | 0.054835 | 0.231023 | 0.133400 |
2006-06-01 | 0.130435 | 0.011677 | -0.052542 | -0.040241 | -0.042282 | 0.017243 | -0.100358 | -0.050725 | 0.011536 | -0.008865 | ... | 0.001466 | -0.003652 | 0.011115 | 0.116205 | 0.109770 | 0.074181 | -0.051073 | 0.031758 | 0.002681 | 0.029938 |
5 rows × 146 columns
onlyGrangeCause.head()
var1 | Argentina-export | Argentina-export.1 | Argentina-export.2 | Argentina-export.3 | Argentina-export.4 | Argentina-export.5 | Argentina-export.6 | Argentina-export.7 | Argentina-export.8 | ... | Uruguay-import.66 | Uruguay-import.67 | Uruguay-import.68 | Uruguay-import.69 | Uruguay-import.70 | Uruguay-import.71 | Uruguay-import.72 | Uruguay-import.73 | Uruguay-import.74 | Uruguay-import.75 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | var2 | Bosnia and Herzegovina-import | Bulgaria-import | Canada-export | Chile-export | Chinese Taipei-export | Croatia-import | Czech Republic-import | Denmark-export | Ecuador-import | ... | Sweden-export | Switzerland-export | Switzerland-import | Thailand-import | Tunisia-export | Tunisia-import | Turkey-export | Turkey-import | United Kingdom-import | United States of America-export |
1 | pValueLag-1 | 0.0042808506364381506 | 0.07208862551391054 | 0.1940035499767957 | 0.054196694270651886 | 0.22931502699808656 | 0.0014824915353768811 | 0.5625975915885326 | 0.3935995905155081 | 0.15709687418719528 | ... | 0.5233306190456136 | 0.7155741085923247 | 0.9465560603468185 | 0.746997160930393 | 0.09903846812732454 | 0.010829285122714388 | 0.15181392678823297 | 0.19824005736941908 | 0.72420556118328 | 0.5150945013823669 |
2 | pValueLag-10 | 0.002398785324031496 | 3.062253019404723e-05 | 0.00019037719567158566 | 0.013678845208345948 | 0.01598875660121711 | 0.0029288101221571065 | 4.352266150739214e-08 | 4.22575627849754e-06 | 0.044043964529867426 | ... | 0.0002273730361093325 | 0.022692810239079843 | 0.08219597221500394 | 0.01911979557249607 | 0.005887029768317211 | 0.015384397864127335 | 0.0028296094556905867 | 0.0027694667090921044 | 1.0367859490055203e-05 | 0.0012037950030202214 |
3 | pValueLag-11 | 0.0022536957104008428 | 8.512330813648873e-05 | 0.00038184584729856585 | 0.025274065001320886 | 0.01643447001083835 | 0.0008137392581741112 | 1.717835935035261e-07 | 9.117462126779992e-06 | 0.0862683129403309 | ... | 0.00023621865071976305 | 0.05083125552104705 | 0.030245619378936696 | 0.024919993215420558 | 0.011228533702231924 | 0.003143374165063898 | 0.0017751096748003764 | 0.006445119080856879 | 6.29496393837463e-06 | 0.0009002882892177738 |
4 | pValueLag-12 | 0.010895339389087155 | 0.00012943226554918222 | 0.017113330310697738 | 0.07453439217958532 | 0.012569005109851058 | 0.02376205287602518 | 7.514566929784178e-05 | 0.0016951034840445001 | 0.36502534041935797 | ... | 0.00012718027179998108 | 0.06431712138610435 | 0.04825923066343767 | 0.0042541528358353235 | 0.00028732915889515215 | 0.00023624001412564778 | 0.002093765947386122 | 0.0003185152952184753 | 1.4230254592036397e-06 | 5.094801632924544e-05 |
5 rows × 6197 columns
def printSeriess(inputData, ncols):
from math import ceil
nrows = ceil(len(inputData.columns) / ncols)
width = ncols * 5
length = nrows * 3
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, dpi=120, figsize=(width,length))
for i, ax in enumerate(axes.flatten()):
if i >= len(inputData.columns):
pass
else:
data = inputData[inputData.columns[i]]
ax.plot(data, color='red', linewidth=1)
# Decorations
ax.set_title(inputData.columns[i])
ax.xaxis.set_ticks_position('none')
ax.get_xaxis().set_visible(False)
# ax.yaxis.set_ticks_position('none')
ax.spines["top"].set_alpha(0)
ax.tick_params(labelsize=6)
plt.tight_layout()
def varResidProbPlot(model, keepSeries, ncols):
resids=model.resid[keepSeries]
from math import ceil
nrows = ceil(len(resids.columns) / ncols)
width = ncols * 5
length = nrows * 4
fig, axes = plt.subplots(nrows, ncols, dpi=120, figsize=(width,nrows*2.5))
for i, ax in enumerate(axes.flatten()):
if i >= len(resids.columns):
pass
else:
stats.probplot(x=resids.iloc[:, i], dist="norm", plot=ax)
ax.set(title=resids.columns[i] + ' residuals')
plt.tight_layout()
#TODO: do proper logging
worksFor = []
# use 12 lags for the VAR model, as this is monthly data
lags = 12
# try to make charts for all series
for y in data.columns:
# if there is data for the series
if y in [col[0] for col in onlyGrangeCause.columns]:
# select series using elastic select
resultCoef, nonZeroCoef = elasticSelect(data, onlyGrangeCause, y)
selectedSeries = [i[0] for i in nonZeroCoef]
# panel cannot be wider than long
if len(selectedSeries) < lags and len(selectedSeries) > 1:
selectedSeries.append(y)
# poor man's logging :(
worksFor.append(y)
print(y)
# this currently doesn't do anything
# would be nice to have it use the knee finding function
# to select lags but wasn't robust enough, so we use 12 lags
modelData, model = varFunc(data, selectedSeries, lags)
# view prob plots of resids for norm dist
varPValues = varP(model, y, .05)
varCoEffValues = varCoef(model, y, varPValues)
# if nothing comes back as significant we can't plot anything
if varCoEffValues.shape[0]>1:
seriesKeep=varCoEffValues['series2'].append(pd.Series(y)).unique()
# view selected series by coint and granger
# TODO: change plotting to have 3 cols so its not so long
printSeriess(data[seriesKeep], ncols=3)
varResidProbPlot(model, seriesKeep, ncols=3)
# create visualization
mnet = MultilayerNetwork(aspects=1, fullyInterconnected = False)
for index, row in varCoEffValues.iterrows():
mnet[row['series1'], row['series2'], 0, row['lag']] = row['coeff']
fig=draw(mnet,
# layout="spring",
layershape="rectangle",
# seems kinda random but this gets to about
# 0.35 which makes it fit on one plot
layergap = (2.5 / varCoEffValues['lag'].nunique()),
autoscale=True,
# defaultLayerLabelStyle='bold',
defaultLayerAlpha=0.4,
#camera_dist=10,
# nodeLabelRule={},
figsize = (10,10),
# alignedNodes=False,
show=True)
plt.tight_layout()
pd.DataFrame(worksFor).to_csv('workingSeries.csv')
Bosnia and Herzegovina-import
Bulgaria-import Colombia-import
<Figure size 432x288 with 0 Axes>
Croatia-import
<Figure size 432x288 with 0 Axes>
Czech Republic-import
<Figure size 432x288 with 0 Axes>
Denmark-import
<Figure size 432x288 with 0 Axes>
El Salvador-import
<Figure size 432x288 with 0 Axes>
Malaysia-import
<Figure size 432x288 with 0 Axes>
Malta-import
<Figure size 432x288 with 0 Axes>
New Zealand-import
<Figure size 432x288 with 0 Axes>
Portugal-import
<Figure size 432x288 with 0 Axes>
Romania-import
<Figure size 432x288 with 0 Axes>
South Africa-import
<Figure size 432x288 with 0 Axes>
Switzerland-import
<Figure size 432x288 with 0 Axes>
Tunisia-import
<Figure size 432x288 with 0 Axes>
Turkey-import
<Figure size 432x288 with 0 Axes>
Chile-export
<Figure size 432x288 with 0 Axes>
Chinese Taipei-export
<Figure size 432x288 with 0 Axes>
Indonesia-export
<Figure size 432x288 with 0 Axes>
North Macedonia-export
<Figure size 432x288 with 0 Axes>
Norway-export
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>