import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')


timeSeries=(pd.read_csv('dotsTimeSeries.csv')
    .pivot_table(index='period', columns=['ReferenceArea', 'CounterpartReferenceArea'], values='value')
)


tsPctChange=np.log(timeSeries).pct_change().iloc[1:].dropna(axis=1)
tsPctChange.columns=['-'.join(col) for col in tsPctChange.columns]
tsPctChange[tsPctChange>1.5]=np.nan
tsPctChange[tsPctChange<-1.5]=np.nan
tsPctChange=tsPctChange.dropna(axis=1)
tsPctChange.index=pd.to_datetime(tsPctChange.index)
tsPctChange=tsPctChange[tsPctChange.index > '1985-01-01']


netStats=pd.read_csv('DOTSnetStats.csv').drop(['Unnamed: 0', 'CONNECTIVITY', 'HAS_BRIDGE', 'TOTAL_NET_VALUE', 'PAGERANK_NUMPY'],axis=1)
netStats.set_index(['index', 'PERIOD'], inplace=True)
# get to period index and econ, stats cols
netStatsWide=(netStats
.reset_index()
.melt(id_vars=['index', 'PERIOD'])
.pivot_table(index='PERIOD', columns=['index', 'variable'], values='value')
)
netStatsWide.index = pd.to_datetime(netStatsWide.index)
netStatsWidePctChange=netStatsWide.pct_change().iloc[1:].dropna(axis=1)
netStatsWidePctChange.index=pd.to_datetime(netStatsWidePctChange.index)
netStatsWidePctChange=netStatsWidePctChange[netStatsWidePctChange.index > '1985-01-01']


netStats.corr()


netStatsWidePctChange.head()


netStatsWidePctChange.corr()


tsPctChange.head()


importers=pd.Series(col.split('-')[0] for col in tsPctChange.columns).unique()
exporters=pd.Series(col.split('-')[1] for col in tsPctChange.columns).unique()
allEcons=sorted(set(list(importers) + list(exporters)))
netStats=pd.Series(col[1] for col in netStatsWidePctChange.columns).nunique()

print('The upper-bound on number of tests:', len(allEcons)*netStats)

The upper-bound on number of tests: 1216


econs=pd.Series(col for col in tsPctChange.columns).unique()
regResults=[]
for tempSeries in econs:

    # get exporter network data
    # if country in net stats equals [0] <- exporter, [1] <- importers
    X_econ=netStatsWidePctChange[[col for col in netStatsWidePctChange.columns if col[0] == tempSeries.split('-')[0]]]
    # network statistics availiable to exporter
    allNs=[col[1] for col in X_econ.columns]
    X_econ.columns=allNs

    # trade import series
    y=tsPctChange[[tempSeries]]
    y.columns = ['_'.join(col) for col in y.columns]

    for tempNs in allNs:

        X = X_econ[tempNs]
        X = sm.add_constant(X, has_constant='add')

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)

        reg = sm.OLS(y_train, X_train).fit()

        y_pred = reg.predict(X_test)

        tempReturn=(pd.DataFrame({
            'ns':reg.params.index[1],
            'coef':reg.params[1],
            'pvalue':reg.pvalues[1],
            'r2':reg.rsquared,
            'aic':reg.aic,
            'mse':mean_squared_error(y_test, y_pred)},index=[tempSeries])
        )

        regResults.append(tempReturn)



regResults=pd.concat(regResults)
regResults.reset_index(inplace=True)


regResults[regResults.index.isin(regResults['pvalue'].nsmallest().index)]


regResults[regResults.index.isin(regResults['r2'].nlargest().index)]


regResults[regResults.index.isin(regResults['aic'].nsmallest().index)]


regResults[regResults.index.isin(regResults['mse'].nsmallest().index)]


filteredRegResults=regResults.query('pvalue<0.05 and r2>.5')
filteredRegResults.reset_index(drop=True, inplace=True)
filteredRegResults


from math import ceil


ncols=4

nrows = ceil(filteredRegResults.shape[0] / ncols)

width = ncols * 5
length = nrows * 3

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, dpi=120, figsize=(width,length))

for i, ax in enumerate(axes.flatten()):
    ax.scatter(
        x=tsPctChange[[filteredRegResults['index'][i]]],
        y=netStatsWidePctChange[[(f"{filteredRegResults['index'][i].split('-')[0]}", f"{filteredRegResults['ns'][i]}")]])

        # ax.suptitle(f"{filteredRegResults['index'][i]} Exports to {filteredRegResults['index'][i][1]} and {filteredRegResults['ns'][i]}")
    ax.set_title(f"pvalue:{np.round(filteredRegResults['pvalue'][i], 4)},  r2:{np.round(filteredRegResults['r2'][i], 2)},  aic:{np.round(filteredRegResults['aic'][i], 2)}")
    ax.set_ylabel(f"{filteredRegResults['ns'][i]} Percent Change")
    ax.set_xlabel(f"{filteredRegResults['index'][i]}  Percent Change")

plt.tight_layout()


from sklearn.preprocessing import StandardScaler
import seaborn as sns
from sklearn.decomposition import PCA
from numpy.linalg import eig


# %%
importers=pd.Series(col.split('-')[0] for col in tsPctChange.columns).unique()
exporters=pd.Series(col.split('-')[1] for col in tsPctChange.columns).unique()
allEcons=sorted(set(list(importers) + list(exporters)))


ncols=5
nrows = ceil(len(allEcons) / ncols)

width = ncols * 5
length = nrows * 3

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, dpi=120, figsize=(width,length))

# for i, ax in enumerate(axes.flatten()):
def myplot(score,coeff, i, ax, tempSeries, labels=None):
    xs = score[:,0]
    ys = score[:,1]
    n = coeff.shape[0]
    scalex = 1.0/(xs.max() - xs.min())
    scaley = 1.0/(ys.max() - ys.min())
    ax.scatter(xs * scalex,ys * scaley)
    for i in range(n):
        ax.arrow(0, 0, coeff[i,0], coeff[i,1],color = 'r',alpha = 0.5)
        if labels is None:
            ax.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, "Var"+str(i+1), color = 'g', ha = 'center', va = 'center')
        else:
            ax.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, labels[i], color = 'g', ha = 'center', va = 'center')
    ax.set_title(tempSeries)


for i, econax in enumerate(zip(allEcons, axes.flatten())):

    tempSeries=econax[0]
    ax=econax[1]

    # tempSeries.split('-')[0] <- exporter, [1] <- importer
    temp=netStatsWidePctChange[[col for col in netStatsWidePctChange.columns if col[0] == tempSeries]]
    # https://stackoverflow.com/questions/50796024/feature-variable-importance-after-a-pca-analysis

    if temp.shape[1] > 0:
        X = temp
        #In general a good idea is to scale the data
        scaler = StandardScaler()
        scaler.fit(X)
        X=scaler.transform(X)

        pca = PCA()
        x_new = pca.fit_transform(X)

        #Call the function. Use only the 2 PCs.
        myplot(x_new[:,0:2],np.transpose(pca.components_[0:2, :]), i, ax, tempSeries, [col[1] for col in temp])

    plt.tight_layout()


econ='Argentina'
temp=netStatsWidePctChange[[col for col in netStatsWidePctChange.columns if col[0] == econ]]
scaler = StandardScaler()
scaledData = pd.DataFrame(scaler.fit_transform(temp))


pcaModel = PCA(n_components=3)


pcaModelFit = pcaModel.fit(scaledData)
principalComponents = pcaModelFit.transform(scaledData)

pcaModelFit.explained_variance_ratio_.sum()

loadings = pcaModelFit.components_.T * np.sqrt(pcaModelFit.explained_variance_)

loading_matrix = pd.DataFrame(loadings, index=temp.columns)
print(pcaModelFit.explained_variance_ratio_.sum())
loading_matrix.sort_values(by=[0], ascending=False)

0.7227717252387083


econs=pd.Series(col for col in tsPctChange.columns).unique()
regResultsPCA=[]
for tempSeries in econs:

    # network statistics for reference econ
    X_econ=netStatsWidePctChange[[col for col in netStatsWidePctChange.columns if col[0] == tempSeries.split('-')[0]]]

    # if there is data
    if X_econ.shape[1] > 0:

        # need to allNs for later
        allNs=[col[1] for col in X_econ.columns]
        X_econ.columns=allNs

        scaler = StandardScaler()
        scaledData = pd.DataFrame(scaler.fit_transform(X_econ))

        #####   PCA
        # create model
        n_components=3
        pcaModel = PCA(n_components=n_components)

        # fit model
        pcaModelFit = pcaModel.fit(scaledData)
        X_econ = pd.DataFrame(pcaModelFit.transform(scaledData), columns=[str(col) for col in range(n_components)])

        # trade time series for reference econ
        y=tsPctChange[[tempSeries]]
        y.columns = ['_'.join(col) for col in y.columns]

        X_econ.index=y.index

        for tempNs in X_econ.columns:
            # if tempNs in X.columns:
            X = X_econ[tempNs]
            X = sm.add_constant(X, has_constant='add')

            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)

            reg = sm.OLS(y_train, X_train).fit()

            y_pred = reg.predict(X_test)


            tempReturn=(pd.DataFrame({
                'ns':reg.params.index[1],
                'coef':reg.params[1],
                'pvalue':reg.pvalues[1],
                'r2':reg.rsquared,
                'aic':reg.aic,
                'mse':mean_squared_error(y_test, y_pred)},index=[tempSeries])
            )

            regResultsPCA.append(tempReturn)

regResultsPCA=pd.concat(regResultsPCA)
regResultsPCA.reset_index(inplace=True)


regResultsPCA[regResultsPCA.index.isin(regResultsPCA['pvalue'].nsmallest().index)]


regResultsPCA[regResultsPCA.index.isin(regResultsPCA['r2'].nlargest().index)]


regResultsPCA[regResultsPCA.index.isin(regResultsPCA['aic'].nsmallest().index)]


regResultsPCA[regResultsPCA.index.isin(regResultsPCA['mse'].nsmallest().index)]


regResultsPCA[regResultsPCA.index.isin(abs(regResultsPCA['coef']).nlargest().index)]


filteredregResultsPCA=regResultsPCA.query('pvalue<0.1 and r2>0.2')
filteredregResultsPCA.reset_index(drop=True, inplace=True)
filteredregResultsPCA


from math import ceil


ncols=4

nrows = ceil(filteredregResultsPCA.shape[0] / ncols)

width = ncols * 5
length = nrows * 3

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, dpi=120, figsize=(width,length))

for i, ax in enumerate(axes.flatten()):
    if i < filteredregResultsPCA.shape[0]:
        econ=filteredregResultsPCA['index'][i].split('-')[0]
        temp=netStatsWidePctChange[[col for col in netStatsWidePctChange.columns if col[0] == econ]]
        if temp.shape[1] > 0:
            scaler = StandardScaler()
            scaledData = pd.DataFrame(scaler.fit_transform(temp))

            #####   PCA
            # create model
            pcaModel = PCA(n_components=3)

            # fit model
            pcaModelFit = pcaModel.fit(scaledData)
            principalComponents = pcaModelFit.transform(scaledData)

            ax.scatter(
                x=tsPctChange[[filteredregResultsPCA['index'][i]]],
                #y=netStatsWidePctChange[[(f"{filteredregResultsPCA['index'][i][0]}", f"{filteredregResultsPCA['ns'][i]}")]])
                y=pd.DataFrame(principalComponents)[int(filteredregResultsPCA['ns'][i])]
                )

            # ax.set_suptitle(f"{filteredregResultsPCA['index'][i][0]} Exports to {filteredregResultsPCA['index'][i][1]} and {filteredregResultsPCA['ns'][i]}")
            ax.set_title(f"pvalue:{np.round(filteredregResultsPCA['pvalue'][i], 5)},  r2:{np.round(filteredregResultsPCA['r2'][i], 2)},  aic:{np.round(filteredregResultsPCA['aic'][i], 2)}")
            ax.set_ylabel(f"{filteredregResultsPCA['ns'][i]} Percent Change")
            ax.set_xlabel(f"{filteredregResultsPCA['index'][i]}")

plt.tight_layout()

	DEGREE	IN_DEGREE	OUT_DEGREE	DEGREE_CENTRALITY	IN_DEGREE_CENTRALITY	OUT_DEGREE_CENTRALITY	AVG_NEIGHBOR_DEGREE	PAGERANK	KATZ	CLOSENESS_CENTRALITY	BETWEENNESS_CENTRALITY	CLUSTCOEF	NUM_NODES	NUM_EDGES	AVERAGECLUSTCOEF	TRIANGLES
DEGREE	1.000000	0.905947	0.962545	0.992468	0.907749	0.943530	0.539215	0.342109	0.030614	0.779620	0.579524	-0.582810	0.560734	0.578295	-0.366186	0.958205
IN_DEGREE	0.905947	1.000000	0.757224	0.879958	0.993021	0.722659	0.424597	0.352711	0.044777	0.888389	0.435932	-0.439187	0.675427	0.696580	-0.441086	0.926553
OUT_DEGREE	0.962545	0.757224	1.000000	0.967567	0.764474	0.992800	0.559940	0.301901	0.018554	0.633813	0.614865	-0.617850	0.432515	0.446061	-0.282453	0.884874
DEGREE_CENTRALITY	0.992468	0.879958	0.967567	1.000000	0.894707	0.962756	0.512342	0.380614	0.044864	0.748006	0.642281	-0.633303	0.491743	0.508283	-0.343765	0.922991
IN_DEGREE_CENTRALITY	0.907749	0.993021	0.764474	0.894707	1.000000	0.740622	0.395934	0.401356	0.059710	0.873463	0.493825	-0.480728	0.611031	0.631584	-0.427157	0.905375
OUT_DEGREE_CENTRALITY	0.943530	0.722659	0.992800	0.962756	0.740622	1.000000	0.531070	0.329623	0.031346	0.596529	0.667287	-0.661709	0.369877	0.382318	-0.258572	0.840450
AVG_NEIGHBOR_DEGREE	0.539215	0.424597	0.559940	0.512342	0.395934	0.531070	1.000000	-0.110202	-0.009636	0.526910	0.076573	-0.222635	0.560895	0.577173	-0.443239	0.484496
PAGERANK	0.342109	0.352711	0.301901	0.380614	0.401356	0.329623	-0.110202	1.000000	0.109268	0.213201	0.603015	-0.398695	-0.077618	-0.073561	0.041703	0.276802
KATZ	0.030614	0.044777	0.018554	0.044864	0.059710	0.031346	-0.009636	0.109268	1.000000	0.024699	0.084676	-0.088603	-0.013311	-0.012970	0.032814	0.003484
CLOSENESS_CENTRALITY	0.779620	0.888389	0.633813	0.748006	0.873463	0.596529	0.526910	0.213201	0.024699	1.000000	0.327792	-0.342888	0.804916	0.828565	-0.733591	0.785403
BETWEENNESS_CENTRALITY	0.579524	0.435932	0.614865	0.642281	0.493825	0.667287	0.076573	0.603015	0.084676	0.327792	1.000000	-0.577527	0.011448	0.014592	-0.088210	0.417665
CLUSTCOEF	-0.582810	-0.439187	-0.617850	-0.633303	-0.480728	-0.661709	-0.222635	-0.398695	-0.088603	-0.342888	-0.577527	1.000000	-0.101121	-0.111978	0.185684	-0.456913
NUM_NODES	0.560734	0.675427	0.432515	0.491743	0.611031	0.369877	0.560895	-0.077618	-0.013311	0.804916	0.011448	-0.101121	1.000000	0.969029	-0.544588	0.620465
NUM_EDGES	0.578295	0.696580	0.446061	0.508283	0.631584	0.382318	0.577173	-0.073561	-0.012970	0.828565	0.014592	-0.111978	0.969029	1.000000	-0.603055	0.652760
AVERAGECLUSTCOEF	-0.366186	-0.441086	-0.282453	-0.343765	-0.427157	-0.258572	-0.443239	0.041703	0.032814	-0.733591	-0.088210	0.185684	-0.544588	-0.603055	1.000000	-0.359057
TRIANGLES	0.958205	0.926553	0.884874	0.922991	0.905375	0.840450	0.484496	0.276802	0.003484	0.785403	0.417665	-0.456913	0.620465	0.652760	-0.359057	1.000000

index	Afghanistan										...	Yemen, P.D. Rep.
variable	AVERAGECLUSTCOEF	CLOSENESS_CENTRALITY	CLUSTCOEF	DEGREE	DEGREE_CENTRALITY	IN_DEGREE	IN_DEGREE_CENTRALITY	KATZ	NUM_EDGES	NUM_NODES	...	CLUSTCOEF	DEGREE	DEGREE_CENTRALITY	IN_DEGREE	IN_DEGREE_CENTRALITY	KATZ	NUM_EDGES	NUM_NODES	PAGERANK	TRIANGLES
PERIOD
1985-02-01	0.006149	0.008230	-0.017379	0.019608	0.019608	0.043478	0.043478	-0.304667	0.002810	0.0	...	-0.062441	-0.018868	-0.018868	-0.028571	-0.028571	0.017828	0.002810	0.0	-0.066568	0.024390
1985-03-01	-0.001313	0.000000	-0.027077	0.057692	0.057692	0.041667	0.041667	-0.090895	0.005324	0.0	...	0.037767	0.115385	0.115385	0.147059	0.147059	-0.079270	0.005324	0.0	0.061019	0.214286
1985-04-01	0.009252	0.012500	0.059892	-0.072727	-0.072727	0.040000	0.040000	1.068448	0.005575	0.0	...	-0.006298	-0.051724	-0.051724	-0.076923	-0.076923	0.120605	0.005575	0.0	-0.253603	-0.133795
1985-05-01	0.002814	0.008403	0.021888	0.058824	0.058824	0.076923	0.076923	0.021696	0.009286	0.0	...	0.017743	0.036364	0.036364	0.055556	0.055556	-0.094392	0.009286	0.0	0.052174	0.167776
1985-06-01	-0.015002	-0.028571	-0.044265	-0.092593	-0.092593	-0.214286	-0.214286	0.053878	-0.008514	0.0	...	-0.000867	-0.052632	-0.052632	-0.078947	-0.078947	-0.101800	-0.008514	0.0	0.216340	-0.168757

	index	Afghanistan										...	Yemen, P.D. Rep.
	variable	AVERAGECLUSTCOEF	CLOSENESS_CENTRALITY	CLUSTCOEF	DEGREE	DEGREE_CENTRALITY	IN_DEGREE	IN_DEGREE_CENTRALITY	KATZ	NUM_EDGES	NUM_NODES	...	CLUSTCOEF	DEGREE	DEGREE_CENTRALITY	IN_DEGREE	IN_DEGREE_CENTRALITY	KATZ	NUM_EDGES	NUM_NODES	PAGERANK	TRIANGLES
index	variable
Afghanistan	AVERAGECLUSTCOEF	1.000000	0.008563	0.268401	0.097012	0.112500	-0.034662	-0.013745	-0.020592	0.258409	-0.188789	...	0.069385	0.034931	0.033566	0.022539	0.021497	0.012239	0.308246	0.063992	-0.113081	0.018248
	CLOSENESS_CENTRALITY	0.008563	1.000000	0.069392	0.485661	0.487173	0.872371	0.877817	-0.015789	0.275666	0.015062	...	-0.010067	0.068281	0.069322	0.035814	0.036912	0.031849	0.105024	-0.061704	-0.053218	0.037660
	CLUSTCOEF	0.268401	0.069392	1.000000	-0.335240	-0.322883	0.030909	0.045627	-0.085917	0.027050	-0.143091	...	0.048122	-0.067967	-0.071540	-0.016755	-0.020086	-0.015965	0.060949	0.178997	-0.161005	-0.040235
	DEGREE	0.097012	0.485661	-0.335240	1.000000	0.997265	0.600504	0.602948	0.040934	0.229190	0.036113	...	0.029834	0.054439	0.056968	0.037052	0.039531	-0.035350	0.195018	-0.133065	0.026756	0.036045
	DEGREE_CENTRALITY	0.112500	0.487173	-0.322883	0.997265	1.000000	0.591579	0.599743	0.040700	0.193831	-0.037386	...	0.028419	0.054921	0.057617	0.037045	0.039675	-0.035741	0.196222	-0.141565	0.026265	0.036405
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
Yemen, P.D. Rep.	KATZ	0.012239	0.031849	-0.015965	-0.035350	-0.035741	0.011687	0.011442	-0.053146	0.018041	0.004437	...	0.175340	0.135799	0.135630	0.075658	0.075625	1.000000	0.026173	0.002194	-0.001177	-0.026618
	NUM_EDGES	0.308246	0.105024	0.060949	0.195018	0.196222	0.142425	0.144518	0.077890	0.336385	-0.007532	...	0.137823	0.268694	0.268028	0.209588	0.209158	0.026173	1.000000	0.024900	-0.057362	0.227043
	NUM_NODES	0.063992	-0.061704	0.178997	-0.133065	-0.141565	0.006879	-0.001520	-0.005665	0.005408	0.112827	...	0.189844	-0.045144	-0.064780	0.017885	0.000177	0.002194	0.024900	1.000000	0.047493	-0.016247
	PAGERANK	-0.113081	-0.053218	-0.161005	0.026756	0.026265	-0.011613	-0.012307	0.031506	-0.014342	0.008425	...	0.062099	0.223940	0.222865	0.309366	0.308527	-0.001177	-0.057362	0.047493	1.000000	0.320644
	TRIANGLES	0.018248	0.037660	-0.040235	0.036045	0.036405	0.125626	0.127260	0.189249	0.077119	-0.003858	...	0.300027	0.861270	0.860715	0.906685	0.906874	-0.026618	0.227043	-0.016247	0.320644	1.000000

	Argentina-Brazil	Argentina-Chile	Argentina-Japan	Australia-Canada	Australia-China	Australia-France	Australia-Germany	Australia-India	Australia-Italy	Australia-Japan	...	United Kingdom-Singapore	United Kingdom-Spain	United Kingdom-Sweden	United Kingdom-Switzerland	United Kingdom-Taiwan, Province of China	United Kingdom-Tanzania, United Republic of	United Kingdom-Thailand	United Kingdom-Turkey	United Kingdom-United Arab Emirates	United Kingdom-United States
period
1985-02-01	0.062522	-0.154539	-0.293125	0.142470	0.062065	-0.097768	0.065731	-0.131695	0.000170	-0.004596	...	-0.036131	-0.029949	0.006133	0.004249	-0.030834	-0.285420	-0.033476	-0.129838	-0.018257	-0.006231
1985-03-01	-0.157772	-0.028198	0.374053	0.018934	-0.088413	-0.003325	-0.037304	0.080223	-0.137057	-0.023654	...	0.025743	0.069935	0.006004	0.013794	0.069704	0.261847	0.056897	0.113487	0.076519	0.018622
1985-04-01	0.076867	0.130921	0.230518	-0.087285	0.164218	0.056872	-0.091518	0.128781	0.182579	0.051857	...	0.035510	-0.039669	0.050269	0.063971	0.030340	0.175368	0.097109	0.097419	0.006345	0.040990
1985-05-01	-0.079725	-0.085492	-0.072008	0.074769	0.035272	-0.029843	0.165752	0.010152	0.004218	-0.000145	...	0.013411	-0.008849	-0.044910	-0.018795	0.022171	0.196199	-0.045813	-0.046656	-0.076937	0.022249
1985-06-01	0.037981	0.148035	0.147914	0.027766	0.093546	0.115701	-0.122772	0.011665	-0.078161	0.012099	...	-0.056779	0.013070	0.005069	0.012590	-0.035781	-0.138247	-0.038296	0.012301	0.063594	-0.020316

	index	ns	coef	pvalue	r2	aic	mse
1858	France-Italy	PAGERANK	0.366934	3.058292e-72	0.560019	-1923.270376	0.000415
4186	Italy-Austria	PAGERANK	0.294486	1.078482e-65	0.525103	-1723.383263	0.000481
4330	Italy-France	PAGERANK	0.281747	1.283553e-79	0.596303	-1873.084271	0.000485
4346	Italy-Germany	PAGERANK	0.219648	5.352173e-74	0.568945	-2025.713641	0.000250
4762	Italy-Spain	PAGERANK	0.332262	3.005676e-61	0.499840	-1587.743934	0.000609

1. Load and clean data¶

2. Loop and Linear Regress¶

3. filter univariate regression results¶

4. Visual Check¶

5. PCA on Network Statistics to Reduce Dimensionality¶

	index	ns	coef	pvalue	r2	aic	mse
885	Canada-United States	DEGREE	0.098988	1.032284e-08	0.079926	-2663.455321	0.000103
886	Canada-United States	DEGREE_CENTRALITY	0.091503	6.636601e-08	0.071443	-2659.820961	0.000104
890	Canada-United States	NUM_EDGES	0.254925	9.073834e-10	0.090931	-2668.220270	0.000076
892	Canada-United States	OUT_DEGREE	0.067155	7.640546e-07	0.060235	-2655.069445	0.000106
895	Canada-United States	TRIANGLES	0.091143	5.187018e-09	0.083051	-2664.802277	0.000095

	index	ns	coef	pvalue	r2	aic	mse
6088	Netherlands-Germany	AVERAGECLUSTCOEF	0.328054	6.201868e-05	0.039943	-2443.170822	0.000059
6089	Netherlands-Germany	AVG_NEIGHBOR_DEGREE	0.163929	7.750331e-05	0.038913	-2442.746399	0.000059
6093	Netherlands-Germany	DEGREE	0.096945	9.414008e-03	0.016989	-2433.814378	0.000067
6098	Netherlands-Germany	NUM_EDGES	0.365567	8.341191e-12	0.111869	-2474.008866	0.000045
6103	Netherlands-Germany	TRIANGLES	0.253912	5.776218e-09	0.082563	-2461.152722	0.000053

		0	1	2
index	variable
Argentina	DEGREE_CENTRALITY	0.954604	0.128942	0.040180
	DEGREE	0.948894	0.176514	0.059231
	CLOSENESS_CENTRALITY	0.904727	-0.136198	-0.089371
	IN_DEGREE_CENTRALITY	0.903900	-0.108930	-0.080641
	IN_DEGREE	0.899232	-0.047406	-0.056052
	TRIANGLES	0.889398	0.252981	0.033791
	PAGERANK	0.059270	-0.464178	-0.388364
	AVERAGECLUSTCOEF	0.047154	0.108996	-0.881155
	KATZ	0.012647	-0.053412	-0.066499
	NUM_EDGES	-0.006441	0.832203	-0.374385
	NUM_NODES	-0.224252	0.776487	0.244186
	CLUSTCOEF	-0.840177	0.058003	-0.234825

	index	coef	pvalue	r2	aic	mse
867	Italy-Austria	0.006742	1.853681e-23	0.223638	-1528.740775	0.000760
894	Italy-France	0.005892	3.648805e-22	0.211911	-1608.181766	0.000866
897	Italy-Germany	0.004781	5.983764e-23	0.219047	-1790.378996	0.000484
948	Italy-Netherlands	0.006450	4.852923e-22	0.210780	-1533.830995	0.000644
1002	Italy-United Kingdom	0.005826	3.209635e-23	0.221491	-1639.549949	0.000803

	index	ns	coef	pvalue	r2	aic	mse
177	Canada-United States	0	0.000743	5.443885e-07	0.061794	-2655.726957	0.000107
178	Canada-United States	1	0.000766	6.290972e-03	0.018792	-2637.980310	0.000104
179	Canada-United States	2	0.001416	1.241709e-05	0.047375	-2649.687178	0.000095
564	Germany-Netherlands	0	0.000564	6.635361e-03	0.018553	-2453.067065	0.000099
565	Germany-Netherlands	1	0.001542	4.810872e-06	0.051754	-2466.695017	0.000077

	index	ns	coef	pvalue	r2	aic	mse
179	Canada-United States	2	0.001416	0.000012	0.047375	-2649.687178	0.000095
565	Germany-Netherlands	1	0.001542	0.000005	0.051754	-2466.695017	0.000077
1242	Netherlands-Germany	0	0.000361	0.104435	0.006678	-2429.682232	0.000069
1243	Netherlands-Germany	1	0.001509	0.000002	0.055816	-2449.773089	0.000059
1244	Netherlands-Germany	2	0.001605	0.000004	0.052761	-2448.493640	0.000055

	index	ns	coef	pvalue	r2	aic	mse
119	Canada-Denmark	2	0.026723	3.769401e-03	0.021100	7.875695	0.037339
640	Greece-Egypt	1	-0.021450	4.405205e-02	0.010251	110.083040	0.015408
1433	Portugal-Japan	2	-0.021662	1.224228e-02	0.015821	-183.288771	0.016500
1566	Sri Lanka-Japan	0	0.024138	3.983916e-16	0.154936	-634.561938	0.012071
1688	Switzerland-New Zealand	2	0.022450	7.622431e-07	0.060246	-515.664428	0.056722