
import datetime

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import numba

def tSNE(X, ndims=2, perplexity=30, seed=0, max_iter=500, stop_lying_iter=100, mom_switch_iter=400):

    """The t-SNE algorithm


X: the high-dimensional coordinates

ndims: number of dimensions in output domain


        Points of X in low dimension


    momentum = 0.5

    final_momentum = 0.8

    eta = 200.0

    N, _D = X.shape


    # normalize input

    X -= X.mean(axis=0) # zero mean

    X /= np.abs(X).max() # min-max scaled

    # compute input similarity for exact t-SNE

    P = computeGaussianPerplexity(X, perplexity)

    # symmetrize and normalize input similarities

    P = P + P.T

    P /= P.sum()

    # lie about the P-values

    P *= 12.0

    # initialize solution

    Y = np.random.randn(N, ndims) * 0.0001

    # perform main training loop

    gains = np.ones_like(Y)

    uY = np.zeros_like(Y)

    for i in range(max_iter):

        # compute gradient, update gains

        dY = computeExactGradient(P, Y)

        gains = np.where(np.sign(dY) != np.sign(uY), gains+0.2, gains*0.8).clip(0.1)

        # gradient update with momentum and gains

        uY = momentum * uY - eta * gains * dY

        Y = Y + uY

        # make the solution zero-mean

        Y -= Y.mean(axis=0)

        # Stop lying about the P-values after a while, and switch momentum

        if i == stop_lying_iter:

            P /= 12.0

        if i == mom_switch_iter:

            momentum = final_momentum

        # print progress

        if (i % 50) == 0:

            C = evaluateError(P, Y)

            now =

            print(f"{now} - Iteration {i}: Error = {C}")

    return Y


def computeExactGradient(P, Y):

    """Gradient of t-SNE cost function


        P: similarity matrix

        Y: low-dimensional coordinates


        dY, a numpy array of shape (N,D)


    N, _D = Y.shape

    # compute squared Euclidean distance matrix of Y, the Q matrix, and the normalization sum

    DD = computeSquaredEuclideanDistance(Y)

    Q = 1/(1+DD)

    sum_Q = Q.sum()

    # compute gradient

    mult = (P - (Q/sum_Q)) * Q

    dY = np.zeros_like(Y)

    for n in range(N):

        for m in range(N):

            if n==m: continue

            dY[n] += (Y[n] - Y[m]) * mult[n,m]

    return dY


def evaluateError(P, Y):

    """Evaluate t-SNE cost function


        P: similarity matrix

        Y: low-dimensional coordinates


        Total t-SNE error C


    DD = computeSquaredEuclideanDistance(Y)

    # Compute Q-matrix and normalization sum

    Q = 1/(1+DD)

    np.fill_diagonal(Q, np.finfo(np.float32).eps)

    Q /= Q.sum()

    # Sum t-SNE error: sum P log(P/Q)

    error = P * np.log( (P + np.finfo(np.float32).eps) / (Q + np.finfo(np.float32).eps) )

    return error.sum()


def computeGaussianPerplexity(X, perplexity):

    """Compute Gaussian Perplexity


        X: numpy array of shape (N,D)

        perplexity: double


        Similarity matrix P


    # Compute the squared Euclidean distance matrix

    N, _D = X.shape

    DD = computeSquaredEuclideanDistance(X)

    # Compute the Gaussian kernel row by row

    P = np.zeros_like(DD)

    for n in range(N):

        found = False

        beta = 1.0

        min_beta = -np.inf

        max_beta = np.inf

        tol = 1e-5

        # iterate until we get a good perplexity

        n_iter = 0

        while not found and n_iter < 200:

            # compute Gaussian kernel row

            P[n] = np.exp(-beta * DD[n])

            P[n,n] = np.finfo(np.float32).eps

            # compute entropy of current row

            # Gaussians to be row-normalized to make it a probability

            # then H = sum_i -P[i] log(P[i])

            #        = sum_i -P[i] (-beta * DD[n] - log(sum_P))

            #        = sum_i P[i] * beta * DD[n] + log(sum_P)

            sum_P = P[n].sum()

            H = beta * (DD[n] @ P[n]) / sum_P + np.log(sum_P)

            # Evaluate if entropy within tolerance level

            Hdiff = H - np.log2(perplexity)

            if -tol < Hdiff < tol:

                found = True


            if Hdiff > 0:

                min_beta = beta

                if max_beta in (np.inf, -np.inf):

                    beta *= 2


                    beta = (beta + max_beta) / 2


                max_beta = beta

                if min_beta in (np.inf, -np.inf):

                    beta /= 2


                    beta = (beta + min_beta) / 2

            n_iter += 1

        # normalize this row

        P[n] /= P[n].sum()

    assert not np.isnan(P).any()

    return P


def computeSquaredEuclideanDistance(X):

    """Compute squared distance


        X: numpy array of shape (N,D)


        numpy array of shape (N,N) of squared distances


    N, _D = X.shape

    DD = np.zeros((N,N))

    for i in range(N-1):

        for j in range(i+1, N):

            diff = X[i] - X[j]

            DD[j][i] = DD[i][j] = diff @ diff

    return DD

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# pick 1000 samples from the dataset

rows = np.random.choice(X_test.shape[0], 1000, replace=False)

X_data = X_train[rows].reshape(1000, -1).astype("float")

X_label = y_train[rows]

# run t-SNE to transform into 2D and visualize in scatter plot

Y = tSNE(X_data, 2, 30, 0, 500, 100, 400)


plt.scatter(Y[:,0], Y[:,1], c=X_label)