Scientific Functions in NumPy and SciPy || Blockchain & Web development

import datetime

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import numba

def tSNE(X, ndims=2, perplexity=30, seed=0, max_iter=500, stop_lying_iter=100, mom_switch_iter=400):

"""The t-SNE algorithm

Args:

X: the high-dimensional coordinates

ndims: number of dimensions in output domain

Returns:

Points of X in low dimension

"""

momentum = 0.5

final_momentum = 0.8

eta = 200.0

N, _D = X.shape

np.random.seed(seed)

# normalize input

X -= X.mean(axis=0) # zero mean

X /= np.abs(X).max() # min-max scaled

# compute input similarity for exact t-SNE

P = computeGaussianPerplexity(X, perplexity)

# symmetrize and normalize input similarities

P = P + P.T

P /= P.sum()

# lie about the P-values

P *= 12.0

# initialize solution

Y = np.random.randn(N, ndims) * 0.0001

# perform main training loop

gains = np.ones_like(Y)

uY = np.zeros_like(Y)

for i in range(max_iter):

# compute gradient, update gains

dY = computeExactGradient(P, Y)

gains = np.where(np.sign(dY) != np.sign(uY), gains+0.2, gains*0.8).clip(0.1)

# gradient update with momentum and gains

uY = momentum * uY - eta * gains * dY

Y = Y + uY

# make the solution zero-mean

Y -= Y.mean(axis=0)

# Stop lying about the P-values after a while, and switch momentum

if i == stop_lying_iter:

P /= 12.0

if i == mom_switch_iter:

momentum = final_momentum

# print progress

if (i % 50) == 0:

C = evaluateError(P, Y)

now = datetime.datetime.now()

print(f"{now} - Iteration {i}: Error = {C}")

return Y

@numba.jit(nopython=True)

def computeExactGradient(P, Y):

"""Gradient of t-SNE cost function

Args:

P: similarity matrix

Y: low-dimensional coordinates

Returns:

dY, a numpy array of shape (N,D)

"""

N, _D = Y.shape

# compute squared Euclidean distance matrix of Y, the Q matrix, and the normalization sum

DD = computeSquaredEuclideanDistance(Y)

Q = 1/(1+DD)

sum_Q = Q.sum()

# compute gradient

mult = (P - (Q/sum_Q)) * Q

dY = np.zeros_like(Y)

for n in range(N):

for m in range(N):

if n==m: continue

dY[n] += (Y[n] - Y[m]) * mult[n,m]

return dY

@numba.jit(nopython=True)

def evaluateError(P, Y):

"""Evaluate t-SNE cost function

Args:

P: similarity matrix

Y: low-dimensional coordinates

Returns:

Total t-SNE error C

"""

DD = computeSquaredEuclideanDistance(Y)

# Compute Q-matrix and normalization sum

Q = 1/(1+DD)

np.fill_diagonal(Q, np.finfo(np.float32).eps)

Q /= Q.sum()

# Sum t-SNE error: sum P log(P/Q)

error = P * np.log( (P + np.finfo(np.float32).eps) / (Q + np.finfo(np.float32).eps) )

return error.sum()

@numba.jit(nopython=True)

def computeGaussianPerplexity(X, perplexity):

"""Compute Gaussian Perplexity

Args:

X: numpy array of shape (N,D)

perplexity: double

Returns:

Similarity matrix P

"""

# Compute the squared Euclidean distance matrix

N, _D = X.shape

DD = computeSquaredEuclideanDistance(X)

# Compute the Gaussian kernel row by row

P = np.zeros_like(DD)

for n in range(N):

found = False

beta = 1.0

min_beta = -np.inf

max_beta = np.inf

tol = 1e-5

# iterate until we get a good perplexity

n_iter = 0

while not found and n_iter < 200:

# compute Gaussian kernel row

P[n] = np.exp(-beta * DD[n])

P[n,n] = np.finfo(np.float32).eps

# compute entropy of current row

# Gaussians to be row-normalized to make it a probability

# then H = sum_i -P[i] log(P[i])

# = sum_i -P[i] (-beta * DD[n] - log(sum_P))

# = sum_i P[i] * beta * DD[n] + log(sum_P)

sum_P = P[n].sum()

H = beta * (DD[n] @ P[n]) / sum_P + np.log(sum_P)

# Evaluate if entropy within tolerance level

Hdiff = H - np.log2(perplexity)

if -tol < Hdiff < tol:

found = True

break

if Hdiff > 0:

min_beta = beta

if max_beta in (np.inf, -np.inf):

beta *= 2

else:

beta = (beta + max_beta) / 2

else:

max_beta = beta

if min_beta in (np.inf, -np.inf):

beta /= 2

else:

beta = (beta + min_beta) / 2

n_iter += 1

# normalize this row

P[n] /= P[n].sum()

assert not np.isnan(P).any()

return P

@numba.jit(nopython=True)

def computeSquaredEuclideanDistance(X):

"""Compute squared distance

Args:

X: numpy array of shape (N,D)

Returns:

numpy array of shape (N,N) of squared distances

"""

N, _D = X.shape

DD = np.zeros((N,N))

for i in range(N-1):

for j in range(i+1, N):

diff = X[i] - X[j]

DD[j][i] = DD[i][j] = diff @ diff

return DD

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# pick 1000 samples from the dataset

rows = np.random.choice(X_test.shape[0], 1000, replace=False)

X_data = X_train[rows].reshape(1000, -1).astype("float")

X_label = y_train[rows]

# run t-SNE to transform into 2D and visualize in scatter plot

Y = tSNE(X_data, 2, 30, 0, 500, 100, 400)

plt.figure(figsize=(8,8))

plt.scatter(Y[:,0], Y[:,1], c=X_label)

plt.show()

Scientific Functions in NumPy and SciPy

Problems are not stop signs, they are guidelines

Problems are not stop signs, they are guidelines

Problems are not stop signs, they are guidelines

Problems are not stop signs, they are guidelines

Problems are not stop signs, they are guidelines

Problems are not stop signs, they are guidelines