In [2]:
from PIL import Image
In [20]:
import glob
%pylab inline
from numpy import *
set_printoptions(linewidth=200)
Populating the interactive namespace from numpy and matplotlib
In [36]:
pngs = []
pngs += glob.glob('pngs/*_09.png')
pngs += glob.glob('pngs/*__8.png')
pngs += glob.glob('pngs/*__1.png')
for png in pngs:
    print(png)
    img = Image.open(png)
    #imshow(img)
    a = array(img)
    print( a.shape )
    a = a[:,:,0]   #  select the red layer (because red, green, blue all the same)
    print( a.shape,a.dtype,a.max() )
    a = array(255-a,dtype=float)
    h,w = a.shape
    #for i in range(h):
    #    for j in range(w):
    #        print( str( int( a[i,j]> 0) ) , end='' )
    #    print()
    break
pngs/039_20170329_aishanib_09.png
(125, 100, 4)
(125, 100) uint8 255

Extract features

In [37]:
n = len(pngs)
features = ['ink','width','height','topheaviness','rightheaviness','log aspect']
d = len(features)
F = empty((n,d))  # array of feature vectors


for i,png in enumerate(pngs):
    img = Image.open(png)
    a = array(img)
    #print( a.shape )
    a = a[:,:,0]   #  select the red layer (because red, green, blue all the same)
    h,w = a.shape

    x = linspace(0,w,w,endpoint=False)
    y = linspace(0,h,h,endpoint=False)

    X,Y = meshgrid(x,y)
    
    #print( a.shape,a.dtype,a.max() )
    a = array(255-a,dtype=float)
    
    ink = a.sum()
    F[i,0] = ink/(255*w*h/5)   # can we normalize this sensibly?
    
    xmin = X[ a>0 ].min()  # the minimum x value where a>0
    xmax = X[ a>0 ].max()
    ymin = Y[ a>0 ].min()  # the minimum y value where a>0
    ymax = Y[ a>0 ].max()
    width  = xmax - xmin
    height = ymax - ymin
    F[i,1] = width/w   # can we normalize this sensibly?
    F[i,2] = height/h   # can we normalize this sensibly?
    
    xc = (xmin+xmax)/2   # center of character
    yc = (ymin+ymax)/2
    
    # could alteranatively use center of mass
    # xc = (a*X).sum()/ink
    # yc = (a*Y).sum()/ink
    
    # total ink above center
    F[i,3] = a[ Y>yc ].sum()/ink

    # total ink right of center
    F[i,4] = a[ X>xc ].sum()/ink

    # aspect
    F[i,5] = log10(height/width)
    
    
print(F)
[[ 0.08680314  0.19        0.128       0.51762835  0.55660769 -0.07463362]
 [ 0.02950275  0.09        0.08        0.43720757  0.48149724  0.04575749]
 [ 0.05732078  0.17        0.096       0.40884462  0.48656341 -0.15126768]
 ..., 
 [ 0.59177412  0.61        0.736       0.55075599  0.20197161  0.17845799]
 [ 0.31515294  0.39        0.72        0.4654074   0.30769001  0.3631779 ]
 [ 0.33817255  0.13        0.832       0.49046084  0.36325811  0.90308999]]
In [ ]:
We cannot make a picture of dots in 6D space.
We could make an array of all coordinate plane projections.
In [39]:
figure(figsize=(12,12))
for i in range(d):
    for j in range(d):
        # plot the i,j coordinate plane projections
        subplot(d,d,i*d+j+1)
        if i==j: 
            text(.5,.5,features[i],ha='center')
        else:
            plot(F[:,j],F[:,i],'bo',alpha=0.5)
        xticks([])
        yticks([])
            
In [42]:
c = list(set([ png[-6:-4] for png in pngs]))
c
Out[42]:
['09', '_1', '_8']
In [43]:
colors = 'rgb'
colordict = {k:colors[i] for i,k in enumerate(c)}
colordict
Out[43]:
{'09': 'r', '_1': 'g', '_8': 'b'}
In [45]:
figure(figsize=(12,12))
for i in range(d):
    for j in range(d):
        # plot the i,j coordinate plane projections
        subplot(d,d,i*d+j+1)
        if i==j: 
            text(.5,.5,features[i],ha='center')
        else:
            for k,png in enumerate(pngs):
                plot(F[k,j],F[k,i],'o',alpha=0.1,color=colordict[png[-6:-4]])
        xticks([])
        yticks([])
In [ ]: