Extracting features from the images

In [5]:
%pylab inline
from PIL import Image
import glob
import numpy as np
Populating the interactive namespace from numpy and matplotlib
In [18]:
pngs = sorted(glob.glob('handwriting/pngs/*.png'))
features = ['ink','aspect','topheaviness','rheaviness']
n = len(pngs)
d = len(features)
F = zeros((n,d))
for k,png in enumerate(pngs):
    img = Image.open(png)
    a = array(img)
    a = a[:,:,0]
    a = 255 - a  # invert color to make ink be high values    
    a = array(a,dtype=float)
    h,w = a.shape
    
    ink = a.sum()
    if ink==0:
        continue
    F[k,0]  =  ink/(h*w*255)   # total ink (feature 0)  rescaled by max possible value
    
    # make coordinate arrays
    x = np.linspace(0,w,w,endpoint=False)
    y = np.linspace(0,h,h,endpoint=False)
    X,Y = np.meshgrid(x,y)
    
    xc = (a*X).sum()/ink  # center of mass
    yc = (a*Y).sum()/ink
    #print(xc,yc)
    
    #a>0
    xmin = X[ a>0 ].min()
    xmax = X[ a>0 ].max()
    ymin = Y[ a>0 ].min()
    ymax = Y[ a>0 ].max()
    
    aspect = (ymax-ymin)/(xmax-xmin)
    F[k,1] = log10(aspect)
    
    xc = (xmax+xmin)/2
    yc = (ymax+ymin)/2
    hi_ink = a[ Y>yc ].sum()  # ink above center
    topheaviness = hi_ink/ink
    r_ink = a[ X>xc ].sum()   # ink right of center
    rheaviness = r_ink/ink
    
    F[k,2] = topheaviness
    F[k,3] = rheaviness

    #break
F[:10,:]  # first 10 rows of feature array
Out[18]:
array([[ 0.10606965,  0.09526807,  0.49100406,  0.51943969],
       [ 0.11931608, -0.08783036,  0.48692943,  0.47377997],
       [ 0.1286629 , -0.00653087,  0.47156272,  0.48056999],
       [ 0.11962729,  0.09548376,  0.52347684,  0.52120049],
       [ 0.14772392,  0.12493874,  0.3830123 ,  0.56255654],
       [ 0.17657286, -0.01393578,  0.51544527,  0.525889  ],
       [ 0.12226855, -0.00673338,  0.51194285,  0.53776066],
       [ 0.11811922, -0.1309291 ,  0.49975698,  0.53475518],
       [ 0.13104753,  0.01848341,  0.48597366,  0.5212825 ],
       [ 0.0768571 ,  0.48063783,  0.98093329,  0.48157824]])
In [7]:
X,Y = np.meshgrid([7,8],[1,2,3])
X
Out[7]:
array([[7, 8],
       [7, 8],
       [7, 8]])
In [8]:
Y
Out[8]:
array([[1, 1],
       [2, 2],
       [3, 3]])

Can we plot these points in 4D feature space by plotting all 2D projections?

In [25]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize=(8,8))
for i in range(d):
    for j in range(d):
        plt.subplot(d,d,i*d+j+1)
        if i==j: 
            plt.text(.5,.5,features[i],ha='center')
        else:
            # plot F[:,i] vs F[:,j]
            plt.plot(F[:,j], F[:,i], 'b.', alpha=0.25)
        plt.xticks([])
        plt.yticks([])