from base64 import encode
import pandas as pd
import os
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import MeanSquaredError

from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE

from sklearn.decomposition import PCA

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

import plotly.express as px

import seaborn as sns

from math import ceil

seed = 11
rand_state = 11

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


########## load data

dir=r'C:\Users\yosty\Desktop\Desktop_Folder\03 - Data\wto\newest'

data = pd.read_csv(os.path.join(dir,'HS_M_0010.csv'))

mcl = pd.read_csv('countryList.csv',
                 dtype={'Name':'str',
                        'ISO':'int',
                        'IC':'str',
                        'Region':'str'})


encodeData = pd.pivot_table(data[data['Year'].isin(range(2000,2021))],
                          index= ['ReportingEconomy', 'PartnerEconomy', 'Year'],
                          columns='ProductOrSector',
                          values='Value',
                          aggfunc= np.sum).fillna(0)

encodeData.head()


econDataPercentTotal = encodeData.melt(ignore_index=False).reset_index()

econDataPercentTotal['total'] = econDataPercentTotal.groupby(['ReportingEconomy', 'PartnerEconomy', 'Year']).transform('sum')['value']

econDataPercentTotal['percent'] = econDataPercentTotal['value'] / econDataPercentTotal['total']

econDataPercentTotal.drop(['value', 'total'] ,axis=1)

econDataPercentTotal = pd.pivot_table(econDataPercentTotal,
                          columns= 'ProductOrSector',
                          index=['ReportingEconomy', 'PartnerEconomy', 'Year'],
                          values='percent',
                          aggfunc= np.sum)

econDataPercentTotal.head()


econDataPercentTotal.mean().sort_values().nlargest()

ProductOrSector
Nuclear reactors, boilers, machinery and mechanical appliances; parts thereof                                                                                                              0.093345
Electrical machinery and equipment and parts thereof; sound recorders and reproducers, television image and sound recorders and reproducers, and parts and accessories of such articles    0.085582
Mineral fuels, mineral oils and products of their distillation; bituminous substances; mineral waxes                                                                                       0.058911
Vehicles other than railway or tramway rollingstock, and parts and accessories thereof                                                                                                     0.042113
Pharmaceutical products                                                                                                                                                                    0.031543
dtype: float64


class shallow(Model):
  def __init__(self, latent_dim=10):
    super(shallow, self).__init__()

    self.latent_dim = latent_dim

    self.encoder = tf.keras.Sequential([

      Dense(8, activation="relu")])

    self.decoder = tf.keras.Sequential([

      Dense(96, activation="sigmoid")])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

class deep(Model):
  def __init__(self, latent_dim=10):
    super(deep, self).__init__()

    self.latent_dim = latent_dim

    self.encoder = tf.keras.Sequential([
      Dense(48, activation="relu"),
      Dense(24, activation="relu"),
      Dense(8, activation="relu")])

    self.decoder = tf.keras.Sequential([
      Dense(24, activation="relu"),
      Dense(48, activation="relu"),
      Dense(96, activation="sigmoid")])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded


class deeep(Model):
  def __init__(self, latent_dim=10):
    super(deeep, self).__init__()

    self.latent_dim = latent_dim

    self.encoder = tf.keras.Sequential([
      Dense(88, activation="relu"),
      Dense(80, activation="relu"),
      Dense(72, activation="relu"),
      Dense(64, activation="relu"),
      Dense(56, activation="relu"),
      Dense(48, activation="relu"),
      Dense(40, activation="relu"),
      Dense(32, activation="relu"),
      Dense(24, activation="relu"),
      Dense(16, activation="relu"),
      Dense(8, activation="relu")])

    self.decoder = tf.keras.Sequential([
      Dense(16, activation="relu"),
      Dense(24, activation="relu"),
      Dense(32, activation="relu"),
      Dense(40, activation="relu"),
      Dense(48, activation="relu"),
      Dense(56, activation="relu"),
      Dense(64, activation="relu"),
      Dense(72, activation="relu"),
      Dense(80, activation="relu"),
      Dense(88, activation="relu"),
      Dense(96, activation="sigmoid")])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded



def pca_plot(score, coeff, pcaModelFit, labels=None):
    xs = score[:,0]
    ys = score[:,1]
    n = coeff.shape[0]
    scalex = 1.0/(xs.max() - xs.min())
    scaley = 1.0/(ys.max() - ys.min())

    ax = sns.scatterplot(
        x=xs * scalex,
        y=ys * scaley)

    ax.set(xlabel = f'One: {np.round(pcaModelFit.explained_variance_[0],2)}',
            ylabel = f'Two: {np.round(pcaModelFit.explained_variance_[1],2)}')

    ax.set_title(f'PCA Total Variance Explained: \
    {np.round(pcaModelFit.explained_variance_[0] + pcaModelFit.explained_variance_[1],2)}%')

    ax.legend(fontsize = 15,
                    bbox_to_anchor= (1.03, 1),
                    title_fontsize = 18,
                    shadow = True,
                    facecolor = 'white')

    for i in range(n):
        plt.arrow(0, 0, coeff[i,0], coeff[i,1],color = 'r',alpha = 0.5)
        if labels is None:
            plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, "Var"+str(i+1), color = 'g', ha = 'center', va = 'center')
        else:
            plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, labels[i], color = 'g', ha = 'center', va = 'center')

    return ax



def distributions_plots(X, title=None):

    ncols=3
    nrows = ceil(len(X.columns) / ncols)
    width = ncols * 5
    length = nrows * 3

    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, dpi=120, figsize=(width,length))

    for i in range(len(X.columns)):
        axes.flatten()[i].hist(X.iloc[:, i], bins=100)

    fig.suptitle(f"Encoding Distributions: {title}")


scaler= MinMaxScaler()

X= scaler.fit_transform(econDataPercentTotal)


X_train, X_test = train_test_split(X, test_size=.1, random_state=42, shuffle=True)

models= [shallow, deep, deeep]

for i, model in enumerate(models):

    print(model)

    autoencoder = model()
    autoencoder.compile(optimizer='adam', loss='mae')
    autoencoder.fit(X_train, X_train,
                    epochs=10,
                    shuffle=True)

    encoded = pd.DataFrame(autoencoder.encoder(X_test).numpy())

    plt.figure()
    distributions_plots(encoded, title=model)

    pcaModel = PCA()
    pcaModelFit = pcaModel.fit(encoded)
    principalComponents = pcaModelFit.transform(encoded)

    plt.figure()
    pca_plot(
        score= principalComponents[:,0:2],
        coeff= np.transpose(pcaModel.components_[0:2, :]),
        pcaModelFit= pcaModelFit)

    tsne_results = TSNE(
        n_components=2,
        perplexity=np.round(encoded.shape[0]**0.5, 0),
        learning_rate='auto',
        n_iter=1000,
        init='pca',
        n_jobs=16).fit_transform(encoded)

    plt.figure()
    sns.scatterplot(
        x=tsne_results[:,0],
        y=tsne_results[:,1]).set_title(f"tsne of embeddings {model}")

<class '__main__.shallow'>
Epoch 1/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0249
Epoch 2/10
7030/7030 [==============================] - 11s 2ms/step - loss: 0.0095
Epoch 3/10
7030/7030 [==============================] - 11s 2ms/step - loss: 0.0090
Epoch 4/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0087
Epoch 5/10
7030/7030 [==============================] - 9s 1ms/step - loss: 0.0084
Epoch 6/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0084
Epoch 7/10
7030/7030 [==============================] - 11s 2ms/step - loss: 0.0082
Epoch 8/10
7030/7030 [==============================] - 9s 1ms/step - loss: 0.0082
Epoch 9/10
7030/7030 [==============================] - 9s 1ms/step - loss: 0.0082
Epoch 10/10
7030/7030 [==============================] - 9s 1ms/step - loss: 0.0082

No handles with labels found to put in legend.
c:\Users\yosty\Envs\python\lib\site-packages\sklearn\manifold\_t_sne.py:982: FutureWarning: The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.
  warnings.warn(

<class '__main__.deep'>
Epoch 1/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0129
Epoch 2/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0085
Epoch 3/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0082
Epoch 4/10
7030/7030 [==============================] - 11s 2ms/step - loss: 0.0081
Epoch 5/10
7030/7030 [==============================] - 11s 2ms/step - loss: 0.0079
Epoch 6/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0078
Epoch 7/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0078
Epoch 8/10
7030/7030 [==============================] - 11s 1ms/step - loss: 0.0074
Epoch 9/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0071
Epoch 10/10
7030/7030 [==============================] - 10s 1ms/step - loss: 0.0069

No handles with labels found to put in legend.
c:\Users\yosty\Envs\python\lib\site-packages\sklearn\manifold\_t_sne.py:982: FutureWarning: The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.
  warnings.warn(

<class '__main__.deeep'>
Epoch 1/10
7030/7030 [==============================] - 21s 3ms/step - loss: 0.0121
Epoch 2/10
7030/7030 [==============================] - 20s 3ms/step - loss: 0.0104
Epoch 3/10
7030/7030 [==============================] - 22s 3ms/step - loss: 0.0104
Epoch 4/10
7030/7030 [==============================] - 24s 3ms/step - loss: 0.0104
Epoch 5/10
7030/7030 [==============================] - 22s 3ms/step - loss: 0.0104
Epoch 6/10
7030/7030 [==============================] - 22s 3ms/step - loss: 0.0104
Epoch 7/10
7030/7030 [==============================] - 23s 3ms/step - loss: 0.0104
Epoch 8/10
7030/7030 [==============================] - 22s 3ms/step - loss: 0.0104
Epoch 9/10
7030/7030 [==============================] - 19s 3ms/step - loss: 0.0104
Epoch 10/10
7030/7030 [==============================] - 21s 3ms/step - loss: 0.0104

No handles with labels found to put in legend.
c:\Users\yosty\Envs\python\lib\site-packages\sklearn\manifold\_t_sne.py:982: FutureWarning: The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.
  warnings.warn(

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

		ProductOrSector	Aircraft, spacecraft, and parts thereof	Albuminoidal substances; modified starches; glues; enzymes	Aluminium and articles thereof	Animal or vegetable fats and oils and their cleavage products; prepared edible fats; animal or vegetable waxes	Arms and ammunition; parts and accessories thereof	Articles of apparel and clothing accessories, knitted or crocheted	Articles of apparel and clothing accessories, not knitted or crocheted	Articles of iron or steel	Articles of leather; saddlery and harness; travel goods, handbags and similar containers; articles of animal gut (other than silkworm gut)	Articles of stone, plaster, cement, asbestos, mica or similar materials	...	Tools, implements, cutlery, spoons and forks, of base metal; parts thereof of base metal	Toys, games and sports requisites; parts and accessories thereof	Umbrellas, sun umbrellas, walkingsticks, seatsticks, whips, ridingcrops and parts thereof	Vegetable plaiting materials; vegetable products not elsewhere specified or included	Vehicles other than railway or tramway rollingstock, and parts and accessories thereof	Wadding, felt and nonwovens; special yarns; twine, cordage, ropes and cables and articles thereof	Wood and articles of wood; wood charcoal	Wool, fine or coarse animal hair; horsehair yarn and woven fabric	Works of art, collectors' pieces and antiques	Zinc and articles thereof
ReportingEconomy	PartnerEconomy	Year
Albania	Afghanistan	2005	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
		2008	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	19.0	0.0
		2010	0.0	0.0	0.0	0.0	0.0	193.0	847.0	0.0	0.0	0.0	...	0.0	150.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
		2012	0.0	0.0	0.0	0.0	0.0	2735.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
		2013	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0