In [28]:
# code from last time

import numpy as np

def softmax(S):
#print( S.max(), S.min())
expS = np.exp(S)
return expS/expS.sum(axis=0)

def loss(WT, X, y):

n = X.shape[1]

# forward pass for the loss itself
S = WT @ X
P = softmax(S)
Py = P[ y , np.arange(n) ] # need the yith row of each column i of P
l = -np.log(Py)
L = l.sum()

dLdl = np.ones_like( l )

dLdP = np.zeros_like( P )
dLdP[ y , np.arange(n) ] = -1/Py

dLdS = np.zeros_like( S )
for k in range( c ):
dLdS += dLdP[k]*(-P[k]*P)
dLdS += dLdP*P

dLdWT = dLdS @ X.T

return L, dLdWT  # loss itself and its gradient

from PIL import Image
import glob

def charclass(png):     # extract character class name from file name
return png.split('__')[1][:-4]

pngs = sorted( glob.glob('handwriting_f19/pngs/*.png') )
h,w = np.array(Image.open(pngs[0]))[:,:,0].shape #125,100
selection = sorted( {charclass(png) for png in pngs} )

# Load flattened images as columns of big array X
X = np.empty((h*w,len(pngs)))
for i,png in enumerate(pngs):
X[:,i] = 255 - np.array(Image.open(png))[:,:,0].reshape(h*w)
X /= X.sum()  # to avoid having huge numbers in our network

# Get the true class numbers of all the images
y = [selection.index(charclass(png)) for png in pngs]  # true classes of images

n = len(pngs)
c = len(selection)
hw = h*w

# Suppose we have no idea what the weights should be, and start at all zeros:
WT = np.zeros( (c, hw) )

# get loss and its gradient
L,gradL = loss( WT, X, y )

# change the weights by a multiple "stepsize" of the negative gradient
stepsize = 1000000 # 100   # 1
L,gradL = loss( WT, X, y )
L

import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(10,1))
for j in range(c):
plt.subplot(1,c,j+1)
plt.imshow( WT[j].reshape(h,w), cmap='seismic' )
plt.axis('off')

In [29]:
# change the weights by a multiple "stepsize" of the negative gradient
stepsize = 10000000 # 100   # 1
WT = np.zeros( (c, hw) )
for k in range(1000):
L,gradL = loss( WT, X, y )
if k==0: print(L,'initially')
print(L,'\r',end='')
L,gradL = loss( WT, X, y )
print(L)
plt.figure(figsize=(10,1))
for j in range(c):
plt.subplot(1,c,j+1)
plt.imshow( WT[j].reshape(h,w), cmap='seismic' )
plt.axis('off')

3486.1138307929855 initially
211.6370183393639

In [30]:
from os.path import join

pngs = sorted( glob.glob( join(folder,'*.png') ) )
h,w = np.array(Image.open(pngs[0]))[:,:,0].shape #125,100
selection = sorted( {charclass(png) for png in pngs} )

# Load flattened images as columns of big array X
X = np.empty((h*w,len(pngs)))
for i,png in enumerate(pngs):
X[:,i] = 255 - np.array(Image.open(png))[:,:,0].reshape(h*w)
#X /= X.sum()  # this should be done outside

# Get the true class numbers of all the images
y = [selection.index(charclass(png)) for png in pngs]  # true classes of images
return X,y,(h,w)

In [31]:
Xtrain,ytrain,(_,_) = loadimages('handwriting_f19/pngs')
Xnorm = Xtrain.sum()
Xtrain = Xtrain/Xnorm
Xtest = Xtest/Xnorm  # should normalize Xtest with the same factor as for Xtrain !!!!

stepsize = 1e7 # 100   # 1
WT = np.zeros( (c, hw) )
Ltrainlist, Ltestlist = [],[]
nsteps = 50
for k in range(nsteps):
L,gradL = loss( WT, Xtrain, ytrain )
if k==0: print(L,'initially')
print(k,'of',nsteps,L,' '*40,'\r',end='')
Ltest,_ = loss( WT, Xtest, ytest )
Ltrainlist.append(L)
Ltestlist.append(Ltest)

L,gradL = loss( WT, X, y )
print(L)

plt.figure()
npts = len(Ltrainlist)
plt.semilogy( range(npts), Ltrainlist, label='train' )
plt.semilogy( range(npts), Ltestlist , label='test' )

plt.figure(figsize=(10,1))
for j in range(c):
plt.subplot(1,c,j+1)
plt.imshow( WT[j].reshape(h,w), cmap='seismic' )
plt.axis('off')

3486.1138307929855 initially
905.7839843944075973833626


### Add classification to the loss function¶

In [32]:
def loss(WT, X, y):

n = X.shape[1]

# forward pass for the loss itself
S = WT @ X
P = softmax(S)
ypredicted = np.argmax(P,axis=0)

Py = P[ y , np.arange(n) ] # need the yith row of each column i of P
l = -np.log(Py)
L = l.sum()

dLdl = np.ones_like( l )

dLdP = np.zeros_like( P )
dLdP[ y , np.arange(n) ] = -1/Py

dLdS = np.zeros_like( S )
for k in range( c ):
dLdS += dLdP[k]*(-P[k]*P)
dLdS += dLdP*P

dLdWT = dLdS @ X.T

return L, dLdWT, ypredicted  # loss itself and its gradient

In [35]:
Xtrain,ytrain,(_,_) = loadimages('handwriting_f19/pngs')
Xnorm = Xtrain.sum()
Xtrain = Xtrain/Xnorm
Xtest = Xtest/Xnorm  # should normalize Xtest with the same factor as for Xtrain !!!!

WT = np.zeros( (c, hw) )

Ltrainlist, Ltestlist,trainaccuracylist,testaccuracylist = [],[],[],[]

schedule = [(100,1e7),(100,1e8),(100,2e8),(100,4e8),(400,1e9),(100,2e9)]  # a way of setting a schedule for learning rate

for nsteps, stepsize in schedule:
print()
for k in range(nsteps):
L,gradL,ypredicted = loss( WT, Xtrain, ytrain )
accuracy = 100*(ytrain==ypredicted).sum()/len(ytrain)
trainaccuracylist.append(accuracy)
#if k==0: print(L,'initially')
Ltest,_,ypredicted = loss( WT, Xtest, ytest )
accuracy = 100*(ytest==ypredicted).sum()/len(ytest)
testaccuracylist.append(accuracy)
Ltrainlist.append(L)
Ltestlist.append(Ltest)
print('{:>5} of {:>5} steps: loss = {:>10.3f}, accuracy = {:>10.3}%\r'.format(k,nsteps,L,accuracy),end='')
L,gradL,_ = loss( WT, X, y )
print()
print(L)

plt.figure()
npts = len(Ltrainlist)
plt.semilogy( range(npts), Ltrainlist, label='train' )
plt.semilogy( range(npts), Ltestlist , label='test' )
plt.legend()
plt.title('loss')

plt.figure()
plt.semilogy( range(npts), 100-np.array(trainaccuracylist), label='train')
plt.semilogy( range(npts), 100-np.array(testaccuracylist) , label='test' )
plt.title('percent inaccuracy')
plt.legend()

plt.figure(figsize=(15,8))
for j in range(c):
plt.subplot(2,c/2,j+1)
plt.imshow( WT[j].reshape(h,w), cmap='seismic' )
plt.axis('off')
plt.title(selection[j])

   99 of   100 steps: loss =    631.072, accuracy =       91.9%
99 of   100 steps: loss =    202.084, accuracy =       94.6%
99 of   100 steps: loss =    111.954, accuracy =       94.6%
99 of   100 steps: loss =     67.119, accuracy =       94.5%
399 of   400 steps: loss =     24.398, accuracy =       94.1%
99 of   100 steps: loss =     21.349, accuracy =       93.7%
21.32676530784488


### Compare with the class averages of the training set¶

In [34]:
plt.figure(figsize=(15,8))
ytrain = np.array(ytrain)
for j in range(c):
plt.subplot(2,c/2,j+1)
plt.imshow( Xtrain[:,ytrain==j].mean(axis=1).reshape(h,w), cmap='seismic' )
plt.axis('off')


# Let's replicate the above with Tensorflow¶

In [32]:
import tensorflow
tensorflow.__version__

Out[32]:
'2.0.0'
In [ ]:
def preparedata( folder ): ############## needs some mods

pngs = sorted( glob.glob( join(folder,'*.png') ) )
h,w = np.array(Image.open(pngs[0]))[:,:,0].shape #125,100
selection = sorted( {charclass(png) for png in pngs} )

# Load flattened images as columns of big array X
X = np.empty((h*w,len(pngs)))
for i,png in enumerate(pngs):
X[:,i] = 255 - np.array(Image.open(png))[:,:,0].reshape(h*w)
X /= X.sum()  # to avoid having huge numbers in our network

# Get the true class numbers of all the images
y = [selection.index(charclass(png)) for png in pngs]  # true classes of images
return X,y,(h,w)

In [ ]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense   # fully connected layer

model = Sequential()
model.add( Dense( 10, input_dim=125*100, activation='softmax' ) )    # 10 is the number of nodes i.e. the # outputs

from tensorflow.keras.optimizers import SGD
sgd = SGD( learning_rate = 1  )
model.compile( optimizer=sgd, loss='categorical_crossentropy' )

# generate the 1-hot encoding of the label data
X, Y = preparedata( folder )

model.fit( X?, Y, verbose=2   )