Search

# Scientific Functions in NumPy and SciPy import datetime

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import numba

def tSNE(X, ndims=2, perplexity=30, seed=0, max_iter=500, stop_lying_iter=100, mom_switch_iter=400):

“”“The t-SNE algorithm

Args:

X: the high-dimensional coordinates

ndims: number of dimensions in output domain

Returns:

Points of X in low dimension

““”

momentum = 0.5

final_momentum = 0.8

eta = 200.0

N, _D = X.shape

np.random.seed(seed)

# normalize input

X -= X.mean(axis=0) # zero mean

X /= np.abs(X).max() # min-max scaled

# compute input similarity for exact t-SNE

P = computeGaussianPerplexity(X, perplexity)

# symmetrize and normalize input similarities

P = P + P.T

P /= P.sum()

P *= 12.0

# initialize solution

Y = np.random.randn(N, ndims) * 0.0001

# perform main training loop

gains = np.ones_like(Y)

uY = np.zeros_like(Y)

for i in range(max_iter):

gains = np.where(np.sign(dY) != np.sign(uY), gains+0.2, gains*0.8).clip(0.1)

# gradient update with momentum and gains

uY = momentum * uY eta * gains * dY

Y = Y + uY

# make the solution zero-mean

Y -= Y.mean(axis=0)

# Stop lying about the P-values after a while, and switch momentum

if i == stop_lying_iter:

P /= 12.0

if i == mom_switch_iter:

momentum = final_momentum

# print progress

if (i % 50) == 0:

C = evaluateError(P, Y)

now = datetime.datetime.now()

print(f“{now} – Iteration {i}: Error = {C}”)

return Y

@numba.jit(nopython=True)

Args:

P: similarity matrix

Y: low-dimensional coordinates

Returns:

dY, a numpy array of shape (N,D)

“”

N, _D = Y.shape

# compute squared Euclidean distance matrix of Y, the Q matrix, and the normalization sum

DD = computeSquaredEuclideanDistance(Y)

Q = 1/(1+DD)

sum_Q = Q.sum()

mult = (P (Q/sum_Q)) * Q

dY = np.zeros_like(Y)

for n in range(N):

for m in range(N):

if n==m: continue

dY[n] += (Y[n] Y[m]) * mult[n,m]

return dY

@numba.jit(nopython=True)

def evaluateError(P, Y):

“”“Evaluate t-SNE cost function

Args:

P: similarity matrix

Y: low-dimensional coordinates

Returns:

Total t-SNE error C

““”

DD = computeSquaredEuclideanDistance(Y)

# Compute Q-matrix and normalization sum

Q = 1/(1+DD)

np.fill_diagonal(Q, np.finfo(np.float32).eps)

Q /= Q.sum()

# Sum t-SNE error: sum P log(P/Q)

error = P * np.log( (P + np.finfo(np.float32).eps) / (Q + np.finfo(np.float32).eps) )

return error.sum()

@numba.jit(nopython=True)

def computeGaussianPerplexity(X, perplexity):

“”“Compute Gaussian Perplexity

Args:

X: numpy array of shape (N,D)

perplexity: double

Returns:

Similarity matrix P

““”

# Compute the squared Euclidean distance matrix

N, _D = X.shape

DD = computeSquaredEuclideanDistance(X)

# Compute the Gaussian kernel row by row

P = np.zeros_like(DD)

for n in range(N):

found = False

beta = 1.0

min_beta = np.inf

max_beta = np.inf

tol = 1e5

# iterate until we get a good perplexity

n_iter = 0

# compute Gaussian kernel row

P[n] = np.exp(beta * DD[n])

P[n,n] = np.finfo(np.float32).eps

# compute entropy of current row

# Gaussians to be row-normalized to make it a probability

# then H = sum_i -P[i] log(P[i])

#        = sum_i -P[i] (-beta * DD[n] – log(sum_P))

#        = sum_i P[i] * beta * DD[n] + log(sum_P)

sum_P = P[n].sum()

H = beta * (DD[n] @ P[n]) / sum_P + np.log(sum_P)

# Evaluate if entropy within tolerance level

Hdiff = H np.log2(perplexity)

if tol < Hdiff < tol:

found = True

break

if Hdiff > 0:

min_beta = beta

if max_beta in (np.inf, np.inf):

beta *= 2

else:

beta = (beta + max_beta) / 2

else:

max_beta = beta

if min_beta in (np.inf, np.inf):

beta /= 2

else:

beta = (beta + min_beta) / 2

n_iter += 1

# normalize this row

P[n] /= P[n].sum()

assert not np.isnan(P).any()

return P

@numba.jit(nopython=True)

def computeSquaredEuclideanDistance(X):

“”“Compute squared distance

Args:

X: numpy array of shape (N,D)

Returns:

numpy array of shape (N,N) of squared distances

““”

N, _D = X.shape

DD = np.zeros((N,N))

for i in range(N1):

for j in range(i+1, N):

diff = X[i] X[j]

DD[j][i] = DD[i][j] = diff @ diff

return DD

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# pick 1000 samples from the dataset

rows = np.random.choice(X_test.shape, 1000, replace=False)

X_data = X_train[rows].reshape(1000, 1).astype(“float”)

X_label = y_train[rows]

# run t-SNE to transform into 2D and visualize in scatter plot

Y = tSNE(X_data, 2, 30, 0, 500, 100, 400)

plt.figure(figsize=(8,8))

plt.scatter(Y[:,0], Y[:,1], c=X_label)

plt.show()

#### Barbara Cartland

You should write because you love the shape of Flow and sentences and the creation of different words on a page.

#### Subscribe 