In [ ]:
import matplotlib.pyplot as plt
In [ ]:
fig = plt.figure(figsize = (15,15)) # This determines the area we will be graphing in
axes = [ fig.add_subplot(i) for i in range(321,327)]
years = range(firstyear, firstyear+nyears)
In [ ]:
for ax,letter in zip(axes,'eiouay'):
    ax.plot(  years ,d[letter]['M'])
    ax.plot(years,d[letter]['F'], color = 'r')
    ax.set_title(letter.upper()+' Male vs Female')
    ax.set_xlabel('Year')
    ax.set_ylabel('Letter Count')
    ax.legend(('Male','Female'))

regular expressions, cont'd

Match a phone number

In [2]:
s = 'I think the Math Dept phone number is 716-645-6284. The following is not a phone number abc-def-ghij.'
pattern = '[0-9]{3}-[0-9]{3}-[0-9]{4}'
import re
re.findall(pattern, s )
Out[2]:
['716-645-6284']
In [4]:
pattern = '\d{3}-\d{3}-\d{4}'
re.findall(pattern, s )
Out[4]:
['716-645-6284']
In [5]:
s = 'I think the Math Dept phone number is 5716-645-62848. The following is not a phone number abc-def-ghij.'
re.findall(pattern, s )
Out[5]:
['716-645-6284']
In [ ]:
\b for word boundary
In [10]:
s = 'I think the Math Dept phone number is 5716-645-62848, I mean 716-645-6284. The following is not a phone number abc-def-ghij.'
pattern = '\\b\d{3}-\d{3}-\d{4}\\b'
#print(pattern)
re.findall(pattern, s )
Out[10]:
['716-645-6284']

UB class schedule

In [11]:
with open('Undergraduate_UB_Academic_Schedule.html') as f:
    s = f.read()
In [ ]:
pat = ''
In [14]:
import csv
csvfile = '2189_all_mth_courses.csv'   # to get this file append the name to the course website URL
with open(csvfile) as f:
        r = csv.DictReader(f,delimiter='\t')
        for row in r:
            print(row['ROOM'])
            break
        for item in r.fieldnames:
            print(item)
Capen 262
TERMSOURCEKEY
TERM
CATALOGNUMBERSOURCEKEY
CLASSNUMBERSECTION
CLASSSECTION
CLASSSECTIONSOURCEKEY
COURSESOURCEKEY
COURSETYPESOURCEKEY
COURSEDESCRIPTION
ENROLLTOTAL
SECTIONCOUNT
SECTIONCAPACITY
ROOMCAPACITY
ROOM
FACULTY
FACULTYSOURCEKEY
CLASSMEETINGPATTERN
CLASSMEETINGPATTERNSOURCEKEY
CLASSSTARTTIME
CLASSENDTIME
CAMPUS
ACADEMICORG
In [15]:
import pandas
In [16]:
df = pandas.read_csv('2189_all_mth_courses.csv',sep='\t')
In [17]:
df.head()
Out[17]:
TERMSOURCEKEY TERM CATALOGNUMBERSOURCEKEY CLASSNUMBERSECTION CLASSSECTION CLASSSECTIONSOURCEKEY COURSESOURCEKEY COURSETYPESOURCEKEY COURSEDESCRIPTION ENROLLTOTAL ... ROOMCAPACITY ROOM FACULTY FACULTYSOURCEKEY CLASSMEETINGPATTERN CLASSMEETINGPATTERNSOURCEKEY CLASSSTARTTIME CLASSENDTIME CAMPUS ACADEMICORG
0 2189 Fall 2018 101LR MTH 101LR C C C 11269 LEC Intro to Contemporary Math 54 ... 58 Capen 262 Mangahas Kutluhan,Johanna 37919103 Monday-Wednesday-Friday MWF 10:00AM 10:50AM North Campus Mathematics
1 2189 Fall 2018 115LR MTH 115LR SP1 SP1 SP1 11277 LEC Survey of Algebra & Trig 70 ... 0 SIM HQ Arr Cho,Wee Weng 38012960 Unspecified UNSP Unknown Unknown Overseas Campus Mathematics
2 2189 Fall 2018 115LR MTH 115LR SP2 SP2 SP2 11277 LEC Survey of Algebra & Trig 55 ... 0 SIM HQ Arr Cho,Wee Weng 38012960 Unspecified UNSP Unknown Unknown Overseas Campus Mathematics
3 2189 Fall 2018 121LR MTH 121LR H H H 11288 LEC Surv Calculus & Appl 1 60 ... 60 Norton 218 Gu,Jiaxi 50025739 Monday-Wednesday-Friday MWF 3:00PM 3:50PM North Campus Mathematics
4 2189 Fall 2018 121LR MTH 121LR VT VT VT 11288 LEC Surv Calculus & Appl 1 163 ... 0 Online Casper,Michael Justin 29804368 Arranged ARR 12:00AM 12:00AM Virtual Mathematics

5 rows × 22 columns

In [18]:
df['ROOM']
Out[18]:
0       Capen 262
1      SIM HQ Arr
2      SIM HQ Arr
3      Norton 218
4          Online
5         Nsc 228
6         Nsc 220
7         Knox 04
8         Nsc 222
9      Norton 218
10        Nsc 216
11     Obrian 112
12        Nsc 222
13     Frnczk 422
14     Clemen 322
15     Norton 218
16     Norton 218
17      Clemen 06
18       Math 250
19      Clemen 17
20     SIM HQ Arr
21     Clemen 322
22      Clemen 04
23        Knox 04
24      Clemen 04
25      Clemen 06
26     Frnczk 422
27     Obrian 214
28        Nsc 220
29     Clemen 322
          ...    
387       Arr Arr
388       Arr Arr
389       Arr Arr
390       Arr Arr
391       Arr Arr
392       Arr Arr
393       Arr Arr
394       Arr Arr
395       Arr Arr
396       Arr Arr
397       Arr Arr
398       Arr Arr
399       Arr Arr
400       Arr Arr
401       Arr Arr
402       Arr Arr
403       Arr Arr
404       Arr Arr
405      Math Arr
406       Arr Arr
407       Arr Arr
408       Arr Arr
409       Arr Arr
410       Arr Arr
411       Arr Arr
412       Arr Arr
413       Arr Arr
414       Arr Arr
415       Arr Arr
416       Arr Arr
Name: ROOM, Length: 417, dtype: object
In [20]:
list(df['ROOM'])
Out[20]:
['Capen 262',
 'SIM HQ Arr',
 'SIM HQ Arr',
 'Norton 218',
 'Online',
 'Nsc 228',
 'Nsc 220',
 'Knox 04',
 'Nsc 222',
 'Norton 218',
 'Nsc 216',
 'Obrian 112',
 'Nsc 222',
 'Frnczk 422',
 'Clemen 322',
 'Norton 218',
 'Norton 218',
 'Clemen 06',
 'Math 250',
 'Clemen 17',
 'SIM HQ Arr',
 'Clemen 322',
 'Clemen 04',
 'Knox 04',
 'Clemen 04',
 'Clemen 06',
 'Frnczk 422',
 'Obrian 214',
 'Nsc 220',
 'Clemen 322',
 'Clemen 322',
 'Nsc 228',
 'Online',
 'Clemen 322',
 'Nsc 222',
 'Clemen 322',
 'Norton 218',
 'Nsc 222',
 'Arr Arr',
 'Clemen 04',
 'Nsc 228',
 'Nsc 218',
 'Nsc 205',
 'Nsc 220',
 'Knox 14',
 'Nsc 220',
 'Clemen 322',
 'Alumni 97',
 'Nsc 205',
 'Clemen 322',
 'Clemen 322',
 'Norton 218',
 'Nsc 220',
 'Baldy 101',
 'Math 150',
 'Math 150',
 'Arr Arr',
 'Nsc 222',
 'Nsc 220',
 'Nsc 205',
 'Nsc 205',
 'Nsc 216',
 'Nsc 228',
 'Nsc 210',
 'Nsc 216',
 'Nsc 218',
 'Nsc 218',
 'Nsc 216',
 'Clemen 322',
 'Norton 218',
 'Nsc 218',
 'Nsc 218',
 'Baldy 101',
 'Nsc 218',
 'Nsc 216',
 'Nsc 205',
 'Math 250',
 'Math 250',
 'Math 122',
 'Nsc 205',
 'Math 250',
 'Math 250',
 'Math 250',
 'Math 150',
 'Math 250',
 'Math 150',
 'Math 150',
 'Math 250',
 'Obrian 112',
 'Math 150',
 'Math 150',
 'Nsc 205',
 'Unknown',
 'Math 250',
 'Math 250',
 'Math 250',
 'Math 150',
 'Math 250',
 'Math 122',
 'Unknown',
 'Math 250',
 'Math 150',
 'Obrian 112',
 'Math 150',
 'Math 250',
 'Math 150',
 'Math 122',
 'Math 122',
 'Math 122',
 'Park 145',
 'Math 235',
 'Math 122',
 'Math 235',
 'Math 122',
 'Math 235',
 'Math 122',
 'Norton 216',
 'Talbrt 106',
 'SIM HQ Arr',
 'SIM HQ Arr',
 'Talbrt 103',
 'Norton 210',
 'Talbrt 106',
 'Bell 337',
 'Park 145',
 'Park 146',
 'Baldy 108',
 'Bell 337',
 'Clemen 102',
 'Talbrt 106',
 'Talbrt 106',
 'Park 440',
 'Talbrt 103',
 'Bell 138',
 'Online',
 'Norton 210',
 'Talbrt 103',
 'Talbrt 111',
 'Clemen 04',
 'Talbrt 106',
 'Capen 108',
 'Norton 216',
 'Park 146',
 'Capen 109',
 'Clemen 103',
 'Capen 108',
 'Talbrt 106',
 'Obrian 214',
 'Park 440',
 'Talbrt 103',
 'Frnczk 408',
 'Norton 216',
 'Capen 110',
 'Park 146',
 'Norton 216',
 'Park 440',
 'SIM HQ Arr',
 'Norton 213',
 'Park 146',
 'Online',
 'Norton 214',
 'Arr Arr',
 'Talbrt 106',
 'Talbrt 111',
 'Cooke 127B',
 'Capen 110',
 'Clemen 119',
 'Cooke 127A',
 'Capen 260',
 'Park 145',
 'Baldy 110',
 'Cooke 248',
 'Park 145',
 'Norton 210',
 'Clemen 102',
 'Talbrt 111',
 'Park 146',
 'Cooke 127B',
 'Arr Arr',
 'Capen 109',
 'Obrian 209',
 'Cooke 127B',
 'Bell 337',
 'Clemen 106',
 'Obrian 214',
 'Norton 214',
 'Park 145',
 'Talbrt 111',
 'Park 440',
 'Bell 138',
 'Cooke 248',
 'Park 250',
 'Cooke 127A',
 'Cooke 114',
 'Clemen 04',
 'Obrian 214',
 'Obrian 214',
 'Norton 216',
 'Talbrt 103',
 'Norton 210',
 'Talbrt 111',
 'Bell 138',
 'Park 145',
 'Park 146',
 'Bell 337',
 'Bell 337',
 'Talbrt 106',
 'Park 250',
 'Norton 210',
 'Capen 110',
 'Bell 138',
 'Capen 108',
 'Talbrt 111',
 'Talbrt 106',
 'Norton 216',
 'Math 250',
 'Math 150',
 'Park 146',
 'Park 440',
 'Talbrt 103',
 'Norton 214',
 'Bell 138',
 'Frnczk 408',
 'Bell 138',
 'Arr Arr',
 'Baldy 108',
 'Park 145',
 'Clemen 04',
 'Talbrt 103',
 'Park 440',
 'Norton 216',
 'Capen 110',
 'Talbrt 106',
 'Obrian 214',
 'Cooke 248',
 'Bell 138',
 'Bell 138',
 'Talbrt 103',
 'Clemen 06',
 'Park 145',
 'Talbrt 113',
 'Cooke 114',
 'Capen 260',
 'Cooke 127B',
 'Capen 260',
 'Capen 260',
 'Capen 260',
 'Clemen 04',
 'Cooke 127A',
 'Capen 260',
 'Math 150',
 'Talbrt 103',
 'Unknown',
 'Capen 260',
 'Capen 260',
 'Clemen 04',
 'Clemen 06',
 'Capen 260',
 'Park 146',
 'Talbrt 106',
 'Clemen 103',
 'Capen 258',
 'Park 146',
 'Clemen 19',
 'Talbrt 113',
 'Cooke 248',
 'Obrian 209',
 'Norton 216',
 'Baldy 108',
 'Math 250',
 'Math 250',
 'Math 122',
 'Obrian 214',
 'Bell 138',
 'Math 250',
 'Math 150',
 'Math 250',
 'Math 122',
 'Math 250',
 'Math 150',
 'Capen 110',
 'Math 250',
 'Math Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Math Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Math Arr',
 'Math Arr',
 'Math Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Math Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Math Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Math Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr',
 'Arr Arr']