In [1]:
import requests
In [2]:
r = requests.get('http://eff.org')
In [3]:
type(r)
Out[3]:
requests.models.Response
In [4]:
s = r.text
In [5]:
type(s)
Out[5]:
str
In [6]:
s[:500]
Out[6]:
'<!DOCTYPE html>\n  <!--[if IEMobile 7]><html class="no-js ie iem7" lang="en" dir="ltr"><![endif]-->\n  <!--[if lte IE 6]><html class="no-js ie lt-ie9 lt-ie8 lt-ie7" lang="en" dir="ltr"><![endif]-->\n  <!--[if (IE 7)&(!IEMobile)]><html class="no-js ie lt-ie9 lt-ie8" lang="en" dir="ltr"><![endif]-->\n  <!--[if IE 8]><html class="no-js ie lt-ie9" lang="en" dir="ltr"><![endif]-->\n  <!--[if (gte IE 9)|(gt IEMobile 7)]><html class="no-js ie" lang="en" dir="ltr" prefix="fb: http://ogp.me/ns/fb# og: http://'
In [7]:
r = requests.get('http://blue.math.buffalo.edu')

Spoofing your user agent string

In [8]:
fakeua = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0"
In [9]:
myheaders = {'User-Agent':fakeua}
In [10]:
r = requests.get('http://blue.math.buffalo.edu',headers=myheaders)

Let's get a large text

In [11]:
f = open('hamlet.txt')
s = f.read()  # entire file into a single string
f.close()
In [12]:
# or to get the file closed automatically:
with open('hamlet.txt') as f:
    s = f.read()
In [13]:
len(s)
Out[13]:
197305
In [14]:
'To be, or not to be' in s
Out[14]:
True
In [15]:
nwords = len(s.split())  # by default splits on any whitespace 
nwords
Out[15]:
35064

"Word-count": for each distinct word in text, how many times does it appear?

Let's clean a little first

In [16]:
s = s.lower()  # convert any uppercase to lower case
In [17]:
'To be, or not to be' in s
Out[17]:
False
In [18]:
'to be, or not to be' in s
Out[18]:
True

Get rid of any punctuation

In [19]:
punc = ',.:;?!()[]"\'@#_'
punc
Out[19]:
',.:;?!()[]"\'@#_'
In [20]:
for c in punc:
    print(c)
,
.
:
;
?
!
(
)
[
]
"
'
@
#
_
In [21]:
for c in punc:
    s = s.replace(c,'')
In [22]:
words = s.split()
In [23]:
len(words)
Out[23]:
35064

Word count

Make a dictionary with words as keys and counts as values

In [24]:
wc = {}  
for word in words:
    if word not in wc: wc[word] = 0
    wc[word] += 1        
In [25]:
'The lazy    dog ...'.split()
Out[25]:
['The', 'lazy', 'dog', '...']
In [26]:
d = {'a':4,'b':77}
In [27]:
d['blah'] = 999
d
Out[27]:
{'a': 4, 'b': 77, 'blah': 999}
In [28]:
wc
Out[28]:
{'videlicet': 1,
 'teeth': 2,
 'sadness': 1,
 'checks': 1,
 'scape': 2,
 'th’assay': 1,
 'kills': 2,
 'outlive': 1,
 'sin’s': 1,
 'serious': 1,
 'amen': 1,
 'fatted': 1,
 'compounded': 1,
 'reading': 3,
 'day': 21,
 'wont': 3,
 '90': 2,
 'kites': 1,
 'libertine': 1,
 'priam’s': 1,
 'chariest': 1,
 'knave': 5,
 'bore': 4,
 'wealth': 1,
 'spirits': 5,
 'amaze': 1,
 '‘down': 1,
 'utter’d': 1,
 'mouse': 2,
 'hallow’d': 1,
 'screen’d': 1,
 'stay': 11,
 'eleven': 1,
 'splenative': 1,
 'week': 1,
 'aptly': 1,
 'height': 1,
 'fellow’s': 1,
 'behav’d': 1,
 'exclusion': 1,
 'mean': 10,
 'saying': 1,
 'wwwgutenbergorg/donate': 2,
 'seen': 22,
 'various': 1,
 'beer-barrel': 1,
 'target': 1,
 'let’s': 10,
 'we’ll': 13,
 'unshaped': 1,
 'sprung': 1,
 'open': 2,
 'fiction': 1,
 'casual': 1,
 'cannon': 5,
 'while': 14,
 'howe’er': 1,
 'sensible': 1,
 'unto': 8,
 'lightness': 1,
 'beauty': 6,
 'array': 1,
 'thoughts': 17,
 'mood': 1,
 'palmy': 1,
 'lost': 9,
 'thou—': 1,
 'story': 3,
 'baseness': 1,
 'undiscover’d': 1,
 'wore': 1,
 'spoke': 2,
 'dirty': 1,
 'put': 24,
 'ungor’d': 1,
 'flourish': 5,
 'link’d': 1,
 'joint-labourer': 1,
 'burst': 3,
 'nomination': 1,
 'dane': 6,
 'stockings': 1,
 'twentieth': 1,
 'gutenberg': 31,
 'proclaim': 2,
 'donate': 3,
 'sinners': 1,
 'rouse': 3,
 'permitted': 2,
 'kill’d': 6,
 'meantime': 2,
 'eternal': 2,
 'gait': 2,
 'silver’d': 1,
 'user': 3,
 'th’impression': 1,
 'aught—': 1,
 'presentment': 1,
 'richer': 2,
 'events': 1,
 'rummage': 1,
 'nature’s': 2,
 'so’': 4,
 'bells': 1,
 'shipwrights': 1,
 'unmatch’d': 1,
 'outface': 1,
 'riband': 1,
 'slow': 3,
 'make—': 1,
 'suffered': 1,
 'ten': 4,
 'robe': 1,
 'reason': 14,
 'seeming': 3,
 'hecuba': 4,
 'shoon': 1,
 'employ': 1,
 'progress': 1,
 'exact': 1,
 'aid': 1,
 'guards': 1,
 'glow-worm': 1,
 'conception': 1,
 'shipwright': 2,
 'scopes': 1,
 'wwwgutenbergorg/license': 1,
 'stalks': 1,
 'be': 239,
 'touch’d': 1,
 'lords': 4,
 'under': 11,
 'fly': 5,
 'shrewdly': 1,
 'whisper': 2,
 '-': 3,
 'husbands—begin': 1,
 'potency': 1,
 'fitness': 2,
 'constantly': 1,
 'unprotected': 1,
 'future': 3,
 'quick': 8,
 'finger’d': 1,
 'shouldst': 2,
 'attendants': 8,
 'whereat': 1,
 'delays': 1,
 'daughter—have': 1,
 'mallicho': 1,
 'grown': 4,
 'shortens': 1,
 'throat': 4,
 'dry': 3,
 'pah': 1,
 'appear’d': 3,
 'within': 31,
 'answerest': 1,
 'tellus’': 1,
 'welcome—his': 1,
 'worm’s': 1,
 'united': 15,
 'lasting': 2,
 'scarcely': 2,
 'prisoner': 2,
 'tenantless': 1,
 'hideous': 2,
 'guiltless': 1,
 'gonzago’s': 1,
 'aim': 1,
 'show': 23,
 'repulsed—a': 1,
 'insolence': 1,
 't’expel': 1,
 'sickly': 2,
 'finger': 2,
 'exclaim': 1,
 'sanctity': 1,
 'cart': 1,
 '99775': 1,
 'shroud': 1,
 'ache': 1,
 'fruitful': 1,
 'think—or': 1,
 'unlink': 1,
 '1524': 1,
 'suffers': 3,
 'th’university': 1,
 'whipped': 1,
 'left': 3,
 'quest': 1,
 'wing': 1,
 'preaching': 1,
 'scourge': 2,
 'meet': 10,
 'advances': 1,
 'compliance': 5,
 't’express': 1,
 'noblest': 1,
 'by': 144,
 'received': 6,
 'procession': 1,
 'dowry': 1,
 'govern': 2,
 'loam': 2,
 'drive': 2,
 'pull’d': 1,
 'contact': 4,
 'chief': 4,
 'data': 1,
 'desires': 4,
 'undergo': 1,
 'in’t': 10,
 'heart': 29,
 'also': 3,
 'chamberlain': 1,
 'large': 1,
 'problem': 1,
 'determination': 1,
 'lawful': 1,
 'ass—': 1,
 'neutral': 1,
 'them—that': 1,
 'breathing': 2,
 'warranty': 2,
 'souls': 5,
 'seeks': 2,
 'warrant': 5,
 'steel': 3,
 'lapwing': 1,
 'us—thou': 1,
 'clemency': 1,
 'before': 26,
 'places': 1,
 'uphoarded': 1,
 'cut': 2,
 'granted': 1,
 'larger': 1,
 'saviour’s': 1,
 'exception': 1,
 'thought-sick': 1,
 'stealers': 1,
 'murderer': 3,
 'eruption': 1,
 'good-night': 1,
 'wand’ring': 1,
 'scenes': 1,
 'accounted': 1,
 'royally': 1,
 'paid': 7,
 'old': 22,
 'tragedians': 1,
 'far': 11,
 'return’d': 4,
 'educational': 1,
 'purging': 2,
 'ignorant': 2,
 'behold': 1,
 'yon': 1,
 'metal': 1,
 'holy': 2,
 'why': 60,
 'grinning': 1,
 'dish': 1,
 'thieves': 1,
 'shapes': 2,
 'thirty': 3,
 'proceeded': 1,
 'dearth': 1,
 'foresaid': 1,
 'jade': 1,
 'sirs': 2,
 'yawn': 1,
 'o’erweigh': 1,
 'deprive': 1,
 'sovereign': 3,
 'wipe': 2,
 'ein': 1,
 'knowing': 1,
 'tardy': 2,
 'months': 4,
 'firm': 1,
 'fat': 6,
 'wormwood': 2,
 'wrist': 1,
 'feast': 3,
 'heat': 4,
 'unmaster’d': 1,
 'whensoever': 1,
 'thin': 1,
 'obscure': 1,
 'importunate': 1,
 'remiss': 1,
 'demonstrated': 1,
 'join': 2,
 'adulterate': 1,
 'offence': 7,
 'subscribe': 1,
 'ago': 1,
 'inheritor': 1,
 'obey': 7,
 'canon': 1,
 'o’ercame': 1,
 'lady’s': 1,
 'wicked': 6,
 'perdition': 1,
 'westward': 1,
 'strumpet': 2,
 'picture': 2,
 'sees': 1,
 'lids': 1,
 'impress': 1,
 'jump': 2,
 'i’m': 1,
 'intents': 1,
 'school': 1,
 'siz’d': 1,
 'garrison’d': 1,
 'awe': 2,
 'lamond': 1,
 'seed': 1,
 'magic': 1,
 'sister': 6,
 'ay': 37,
 'wits': 4,
 'clouds': 4,
 'enlarg’d': 1,
 'four': 5,
 'conceited': 1,
 'defect': 5,
 'blastments': 1,
 'unweeded': 1,
 'look': 37,
 'grand': 1,
 'express’d': 1,
 'pit': 3,
 'augury': 1,
 'paint': 1,
 'airy': 1,
 'where': 60,
 'low': 1,
 'bride-bed': 1,
 'current': 1,
 'thee': 58,
 'necessity': 1,
 'honours': 1,
 'graces': 2,
 'th’air': 1,
 'exempt': 2,
 'different': 2,
 'besmirch': 1,
 'distributing': 7,
 'monster': 1,
 'hell': 10,
 'compact': 1,
 'unfellowed': 1,
 'captain': 10,
 'craft': 2,
 'thrown': 2,
 'denied': 1,
 'restrain’d': 1,
 'francisco': 11,
 'allowance': 2,
 'loud': 3,
 'sweep': 2,
 '‘laertes': 1,
 'by’r': 2,
 'unsure': 1,
 'greeks': 1,
 'waits': 1,
 'potent': 2,
 '‘well': 1,
 'manners—that': 1,
 'spots': 1,
 'hop’d': 1,
 'assume': 3,
 'wrap': 1,
 'o’ertook': 1,
 'away—go': 1,
 'ambitious': 2,
 'painting': 1,
 'if': 135,
 '‘closes': 2,
 'depart': 1,
 'beckons': 3,
 'among': 1,
 'a-down': 1,
 'bastard': 1,
 'opinion': 1,
 'waist': 1,
 'controversy': 1,
 'gave’t': 1,
 'cool': 1,
 'loosed': 1,
 'unshaken': 1,
 'place': 10,
 'rul’d': 3,
 'believe': 19,
 'regulating': 1,
 'villanous': 1,
 'firmament': 1,
 'fang’d—': 1,
 'bugs': 1,
 'fanned': 1,
 'impart': 4,
 'muddy': 1,
 'flagon': 1,
 'attend': 1,
 'done—must': 1,
 'opportunities': 1,
 'likelihood': 1,
 'courtiers': 1,
 'contents': 2,
 'altitude': 1,
 'indirections': 1,
 'handsaw': 1,
 'orchard': 2,
 'do’t': 9,
 'spread': 3,
 'conclusions': 1,
 'confederate': 1,
 'legal': 2,
 'loneliness—we': 1,
 'aright': 1,
 'strengthen': 1,
 'livery': 3,
 'points': 2,
 'hawk': 1,
 'desire': 7,
 'element': 1,
 'gentry': 2,
 'pernicious': 1,
 'frost': 1,
 'grinding': 1,
 'repast': 1,
 'taints': 1,
 'contagion': 2,
 'folly': 1,
 'acres': 1,
 'unnatural': 4,
 'crash': 1,
 'individual': 4,
 'thing—': 1,
 '’t': 2,
 'wars': 2,
 'perfume': 2,
 'marriages': 1,
 'desirous': 1,
 'avoid': 3,
 'distract': 1,
 'wanton': 3,
 'rub': 2,
 'horses': 2,
 'escoted': 1,
 'exceed': 1,
 'liquor': 2,
 'rot': 1,
 'ones': 2,
 'meditation': 1,
 'meed': 1,
 'hard': 5,
 'brook': 2,
 'infusion': 1,
 'maximum': 1,
 'deserve': 1,
 'after': 14,
 'dalliance': 1,
 'liability': 3,
 'hits': 3,
 'kiss’d': 1,
 'was': 86,
 '’twere': 11,
 'priest': 4,
 'prayer': 2,
 'quit': 2,
 'spacious': 1,
 'windy': 1,
 'dispriz’d': 1,
 'sent': 14,
 'wot': 1,
 'calculate': 1,
 'ambiguous': 1,
 'and': 1051,
 'words': 20,
 'cannot': 33,
 'imports': 4,
 'shame': 8,
 'epitaph': 2,
 'lock’d': 2,
 'cheerfully': 2,
 'including': 8,
 'capitol': 1,
 'dallying': 1,
 'mourn’d': 1,
 'often': 4,
 'arm’d': 2,
 'stuck': 1,
 'applicable': 3,
 'fellows': 3,
 'vow': 2,
 'shut': 1,
 'heaven': 43,
 'hic': 1,
 'proceed': 2,
 'dreams': 3,
 'advantage': 1,
 'bespeak': 1,
 'lands': 4,
 'clepe': 1,
 'perfections': 1,
 'law’s': 1,
 'reasons': 2,
 'hey': 2,
 'embrace': 2,
 'nemean': 1,
 'lose': 7,
 'israel': 1,
 'view': 2,
 'tenants': 1,
 'sister’s': 1,
 'would’': 1,
 'england': 20,
 'sage': 1,
 'feed': 6,
 'whose': 31,
 'priests': 1,
 'dust': 7,
 'almost': 12,
 'wretch': 3,
 'harping': 1,
 'ape': 2,
 'columbines': 1,
 'tenures': 1,
 'tears': 7,
 'fares': 2,
 'familiar': 1,
 'tithe': 1,
 'privy': 1,
 'fight': 3,
 'rather': 5,
 'compilation': 1,
 'defy': 1,
 'ourselves—to': 1,
 'springes': 1,
 '‘high': 1,
 'alaska': 1,
 'son’s': 1,
 'demands': 2,
 'aslant': 1,
 'dejected': 1,
 'knock': 1,
 'crowing': 1,
 'audit': 1,
 'gather': 3,
 'enactures': 1,
 'responsive': 1,
 'marked': 2,
 'blasted': 2,
 'virtues': 2,
 'carried': 1,
 'fear': 20,
 'malefactions': 1,
 'ancient': 1,
 'barr’d': 1,
 'grandsire': 1,
 'aroused': 1,
 'pocky': 1,
 'continual': 1,
 'arms': 9,
 'but’': 1,
 'arrest—o': 1,
 'commutual': 1,
 'strangely': 2,
 'practically': 1,
 'farm': 2,
 'prais’d': 1,
 'orb': 1,
 'c': 1,
 'wittenberg': 4,
 '‘one’': 1,
 'personal': 1,
 'ut': 1,
 'enemy': 3,
 'post-haste': 1,
 'incestuous': 3,
 'she': 46,
 'smiles': 1,
 'praying': 1,
 'shakespeare': 4,
 'draw': 9,
 'sum': 2,
 'paintings': 1,
 'wait': 2,
 'about': 26,
 'flaxen': 1,
 'comrade': 1,
 'yield': 2,
 'philosophy': 2,
 'confess': 6,
 'o’erwhelm': 1,
 'tend': 4,
 'question': 16,
 'ha’': 3,
 'forestall': 1,
 'really': 1,
 'process': 3,
 'prompted': 1,
 'medium': 5,
 'between': 15,
 'bout': 1,
 'churches': 1,
 'ladies': 4,
 'circumstances': 1,
 'sitting': 1,
 'glares': 1,
 'resolutes': 1,
 'sere': 1,
 'remember': 11,
 'princes': 1,
 'compound': 1,
 'lo': 3,
 'churchyard': 2,
 'think': 46,
 'calumny': 1,
 'matter—': 1,
 'promotion': 1,
 'brings': 1,
 'song': 1,
 'dark': 1,
 'axe': 2,
 '1e6': 1,
 'neglected': 2,
 'swamp': 1,
 'worst': 1,
 'fall': 9,
 'language': 1,
 'espials': 1,
 'pity': 4,
 'presence': 3,
 'lesser': 1,
 'shadows': 1,
 'italian': 1,
 'compare': 1,
 'defects': 1,
 'centre': 1,
 'unfold': 3,
 'outdated': 1,
 'obeys': 1,
 'requite': 2,
 'imposed': 1,
 'select': 1,
 'accessible': 1,
 'paragraph': 11,
 'appointment': 1,
 'recks': 1,
 'odd': 2,
 'beguile': 2,
 'i’ll': 57,
 'deaths': 1,
 'looked': 1,
 'periodic': 1,
 'stabs': 1,
 'room': 28,
 'peculiar': 1,
 'prick’d': 1,
 'sandal': 1,
 'longed': 1,
 'shell': 1,
 'undertakings': 1,
 'ardour': 1,
 'provisions': 1,
 'witness': 2,
 'robustious': 1,
 'follow’d': 1,
 'foundations': 3,
 'recorders': 3,
 'hammers': 1,
 'grows—the': 1,
 'opposition': 2,
 'truant': 2,
 'overlooked': 1,
 'shoulder': 3,
 'lodg’d': 1,
 'exits': 1,
 'method': 3,
 'find': 17,
 'steward': 1,
 'wisest': 3,
 'sets': 2,
 'court': 6,
 'sharp': 1,
 'amaz’d': 1,
 'web': 5,
 'serpent': 2,
 'facility': 1,
 'hazard': 1,
 'flash': 1,
 'advice': 3,
 'available': 2,
 'lock': 2,
 'unwilling': 1,
 'replace': 1,
 'confines': 1,
 'made—': 1,
 'rises': 3,
 'blossoms': 1,
 'bare': 1,
 'caught': 2,
 'trademark/copyright': 1,
 'corrupt': 1,
 'bawdy': 1,
 'mortal': 5,
 'praised': 1,
 'volley': 1,
 'abstracts': 1,
 'unhand': 1,
 '‘lord': 1,
 'yourself': 14,
 'lungs': 2,
 'more': 99,
 'assay': 2,
 'mind’s': 2,
 '1d': 1,
 'trumpet': 5,
 'be’': 1,
 'destroy': 4,
 'unsatisfied': 1,
 'codes': 1,
 'plac’d': 2,
 'ecstacy': 1,
 'normandy—': 1,
 'table-book': 1,
 'ills': 1,
 'imagine—': 1,
 'taken': 2,
 'accuse': 1,
 'turns': 1,
 'pinch': 1,
 'stops': 2,
 'tells': 1,
 'admiration': 3,
 'anchor’s': 1,
 'yonder': 2,
 'bad': 6,
 '1f6': 1,
 'ruin': 1,
 'flame': 2,
 'solidity': 1,
 'palm': 2,
 'soul’s': 1,
 'feels': 1,
 'danes': 4,
 'calendar': 1,
 'let': 94,
 'nights': 3,
 'fadoms': 1,
 'heel': 2,
 'over': 4,
 'too': 51,
 'all—': 1,
 'proclaims': 2,
 'omen': 1,
 'rugged': 2,
 'marvellous': 2,
 'gore': 1,
 'e’en': 12,
 'reconcilement': 1,
 '1f4': 1,
 'rightly': 1,
 'lucianus': 3,
 'aloof': 2,
 'antic': 1,
 'mine—an': 1,
 'scope': 3,
 'air': 12,
 'offences': 1,
 'shovel': 1,
 'middle-aged': 1,
 'blood': 19,
 'bethought': 1,
 'greeting': 1,
 'they': 105,
 'caviare': 1,
 'retiring': 1,
 'for’s': 2,
 'weapons': 1,
 'rebels': 1,
 'sale’—': 1,
 'theme': 3,
 'periwig-pated': 1,
 'yet': 42,
 'whom': 7,
 'bestow': 4,
 'afeard': 1,
 'desert': 2,
 'mind': 12,
 'likely': 2,
 'pair': 2,
 'adam': 1,
 'ungart’red': 1,
 'swords': 1,
 'immediate': 4,
 'mars': 1,
 'seem': 10,
 'delights': 3,
 'uncle': 10,
 '&c': 6,
 'bell': 2,
 'methods': 1,
 'spills': 1,
 'wish': 4,
 'one': 45,
 'blister': 1,
 'shameful': 1,
 'curb': 2,
 'single': 2,
 'once': 19,
 'unknowing': 1,
 'uses': 3,
 '‘there': 1,
 'murder—': 1,
 'precepts': 2,
 'sponge—what': 1,
 'pluck': 3,
 'rotten': 2,
 'keeps': 5,
 'inward': 3,
 'permanent': 2,
 'death': 37,
 'digs': 1,
 'able': 1,
 'wide': 5,
 'forgeries': 1,
 'bearing': 1,
 'traitorous': 1,
 'motive': 5,
 'gibes': 1,
 'shuffling': 2,
 'drabbing': 1,
 'english': 3,
 'oppress’d': 1,
 'chiefly': 1,
 'people': 4,
 'thicker': 1,
 'cunnings—': 1,
 'heraldry': 2,
 'smelt': 1,
 'convey': 1,
 'forest': 1,
 'lordship': 8,
 'hands': 12,
 'tokens': 1,
 'th’opposed': 1,
 'anon': 6,
 'vanish’d': 1,
 'brands': 1,
 'leads': 1,
 'function': 1,
 'wine': 1,
 'knows': 3,
 'peal': 1,
 'suck’d': 2,
 'tongue': 15,
 'her': 91,
 'delve': 1,
 'barefoot': 1,
 'peevish': 1,
 'lofty': 1,
 'jove': 2,
 'protected': 5,
 'such-like': 1,
 'saw’t': 1,
 'candied': 1,
 'charity': 1,
 'better’d': 1,
 'venom': 2,
 'temper’d': 1,
 'contriving': 1,
 'revel': 1,
 'fish': 2,
 'entreaty': 2,
 'unpregnant': 1,
 'is’t': 17,
 'quarrel': 2,
 'took': 4,
 '‘would’': 1,
 'natures': 1,
 'birth': 4,
 'illusion': 1,
 'bed-rid': 1,
 'desperate': 6,
 'jaws': 1,
 'fix’d': 3,
 'remove': 3,
 'calumnious': 1,
 'grounds': 1,
 'poor': 20,
 'moderate': 1,
 'discomfort': 1,
 'matron’s': 1,
 'wouldst': 6,
 'agreeing': 1,
 'as': 230,
 'polonius’': 1,
 'king’': 2,
 'hundred': 2,
 'leaping': 1,
 'carp': 1,
 'bears': 2,
 'deal': 1,
 'palpable': 1,
 'bird': 2,
 'importun’d': 1,
 'stone': 2,
 'rich': 3,
 'prison-house': 1,
 'applaud': 1,
 'force': 3,
 'favours': 2,
 'benefit': 1,
 'truncheon’s': 1,
 'b’': 4,
 'money': 5,
 'machine': 2,
 'breath': 9,
 'popp’d': 1,
 'dream': 5,
 'image': 4,
 'hang': 4,
 'nonce': 1,
 'repair': 2,
 'awhile': 10,
 'ill': 6,
 'information': 9,
 'owner': 6,
 'dungeons': 1,
 'consent': 4,
 'hugger-mugger': 1,
 'dumb': 6,
 'honourable': 3,
 'one—to': 1,
 'device': 1,
 'howsoever': 1,
 'capital': 2,
 'parley': 1,
 'praises': 1,
 'herald': 1,
 'having': 2,
 'entreated': 1,
 'unkind': 1,
 'th’other': 1,
 'recoveries': 2,
 'mirth': 2,
 'extolment': 1,
 'norway': 13,
 'trust': 2,
 'persevere': 1,
 'given': 11,
 'affairs': 1,
 'sundays': 1,
 'official': 3,
 'unite': 1,
 'fire': 12,
 'givers': 1,
 'speed': 3,
 'voltemand': 9,
 'play’d': 2,
 'adventurous': 1,
 'bodes': 1,
 'sheeted': 1,
 'murder': 12,
 'battlements': 1,
 'mess': 1,
 'unknown': 2,
 'sleeper’s': 1,
 'lief': 1,
 'operant': 1,
 'looking': 1,
 'pound': 1,
 'house': 7,
 'vigour': 1,
 'on’t': 8,
 'lobby': 2,
 'that’s': 16,
 'reverted': 1,
 'pall': 1,
 'vienna': 1,
 'beard': 5,
 'nymph': 1,
 'fits': 2,
 'blackest': 1,
 'flies': 1,
 'immediately': 1,
 'numerous': 1,
 'backward': 1,
 'beating': 3,
 'gregory': 1,
 'artless': 1,
 'right': 15,
 'receiv’d': 2,
 ...}
In [29]:
d
Out[29]:
{'a': 4, 'b': 77, 'blah': 999}
In [30]:
d.keys()
Out[30]:
dict_keys(['b', 'blah', 'a'])
In [31]:
d.values()
Out[31]:
dict_values([77, 999, 4])
In [32]:
d.items()
Out[32]:
dict_items([('b', 77), ('blah', 999), ('a', 4)])
In [33]:
for item in d.items():
    print(item)
('b', 77)
('blah', 999)
('a', 4)
In [34]:
wcl = list(wc.items())
In [35]:
wcl
Out[35]:
[('videlicet', 1),
 ('teeth', 2),
 ('sadness', 1),
 ('checks', 1),
 ('scape', 2),
 ('th’assay', 1),
 ('kills', 2),
 ('outlive', 1),
 ('sin’s', 1),
 ('serious', 1),
 ('amen', 1),
 ('fatted', 1),
 ('compounded', 1),
 ('reading', 3),
 ('day', 21),
 ('wont', 3),
 ('90', 2),
 ('kites', 1),
 ('libertine', 1),
 ('priam’s', 1),
 ('chariest', 1),
 ('knave', 5),
 ('bore', 4),
 ('wealth', 1),
 ('spirits', 5),
 ('amaze', 1),
 ('‘down', 1),
 ('utter’d', 1),
 ('mouse', 2),
 ('hallow’d', 1),
 ('screen’d', 1),
 ('stay', 11),
 ('eleven', 1),
 ('splenative', 1),
 ('week', 1),
 ('aptly', 1),
 ('height', 1),
 ('fellow’s', 1),
 ('behav’d', 1),
 ('exclusion', 1),
 ('mean', 10),
 ('saying', 1),
 ('wwwgutenbergorg/donate', 2),
 ('seen', 22),
 ('various', 1),
 ('beer-barrel', 1),
 ('target', 1),
 ('let’s', 10),
 ('we’ll', 13),
 ('unshaped', 1),
 ('sprung', 1),
 ('open', 2),
 ('fiction', 1),
 ('casual', 1),
 ('cannon', 5),
 ('while', 14),
 ('howe’er', 1),
 ('sensible', 1),
 ('unto', 8),
 ('lightness', 1),
 ('beauty', 6),
 ('array', 1),
 ('thoughts', 17),
 ('mood', 1),
 ('palmy', 1),
 ('lost', 9),
 ('thou—', 1),
 ('story', 3),
 ('baseness', 1),
 ('undiscover’d', 1),
 ('wore', 1),
 ('spoke', 2),
 ('dirty', 1),
 ('put', 24),
 ('ungor’d', 1),
 ('flourish', 5),
 ('link’d', 1),
 ('joint-labourer', 1),
 ('burst', 3),
 ('nomination', 1),
 ('dane', 6),
 ('stockings', 1),
 ('twentieth', 1),
 ('gutenberg', 31),
 ('proclaim', 2),
 ('donate', 3),
 ('sinners', 1),
 ('rouse', 3),
 ('permitted', 2),
 ('kill’d', 6),
 ('meantime', 2),
 ('eternal', 2),
 ('gait', 2),
 ('silver’d', 1),
 ('user', 3),
 ('th’impression', 1),
 ('aught—', 1),
 ('presentment', 1),
 ('richer', 2),
 ('events', 1),
 ('rummage', 1),
 ('nature’s', 2),
 ('so’', 4),
 ('bells', 1),
 ('shipwrights', 1),
 ('unmatch’d', 1),
 ('outface', 1),
 ('riband', 1),
 ('slow', 3),
 ('make—', 1),
 ('suffered', 1),
 ('ten', 4),
 ('robe', 1),
 ('reason', 14),
 ('seeming', 3),
 ('hecuba', 4),
 ('shoon', 1),
 ('employ', 1),
 ('progress', 1),
 ('exact', 1),
 ('aid', 1),
 ('guards', 1),
 ('glow-worm', 1),
 ('conception', 1),
 ('shipwright', 2),
 ('scopes', 1),
 ('wwwgutenbergorg/license', 1),
 ('stalks', 1),
 ('be', 239),
 ('touch’d', 1),
 ('lords', 4),
 ('under', 11),
 ('fly', 5),
 ('shrewdly', 1),
 ('whisper', 2),
 ('-', 3),
 ('husbands—begin', 1),
 ('potency', 1),
 ('fitness', 2),
 ('constantly', 1),
 ('unprotected', 1),
 ('future', 3),
 ('quick', 8),
 ('finger’d', 1),
 ('shouldst', 2),
 ('attendants', 8),
 ('whereat', 1),
 ('delays', 1),
 ('daughter—have', 1),
 ('mallicho', 1),
 ('grown', 4),
 ('shortens', 1),
 ('throat', 4),
 ('dry', 3),
 ('pah', 1),
 ('appear’d', 3),
 ('within', 31),
 ('answerest', 1),
 ('tellus’', 1),
 ('welcome—his', 1),
 ('worm’s', 1),
 ('united', 15),
 ('lasting', 2),
 ('scarcely', 2),
 ('prisoner', 2),
 ('tenantless', 1),
 ('hideous', 2),
 ('guiltless', 1),
 ('gonzago’s', 1),
 ('aim', 1),
 ('show', 23),
 ('repulsed—a', 1),
 ('insolence', 1),
 ('t’expel', 1),
 ('sickly', 2),
 ('finger', 2),
 ('exclaim', 1),
 ('sanctity', 1),
 ('cart', 1),
 ('99775', 1),
 ('shroud', 1),
 ('ache', 1),
 ('fruitful', 1),
 ('think—or', 1),
 ('unlink', 1),
 ('1524', 1),
 ('suffers', 3),
 ('th’university', 1),
 ('whipped', 1),
 ('left', 3),
 ('quest', 1),
 ('wing', 1),
 ('preaching', 1),
 ('scourge', 2),
 ('meet', 10),
 ('advances', 1),
 ('compliance', 5),
 ('t’express', 1),
 ('noblest', 1),
 ('by', 144),
 ('received', 6),
 ('procession', 1),
 ('dowry', 1),
 ('govern', 2),
 ('loam', 2),
 ('drive', 2),
 ('pull’d', 1),
 ('contact', 4),
 ('chief', 4),
 ('data', 1),
 ('desires', 4),
 ('undergo', 1),
 ('in’t', 10),
 ('heart', 29),
 ('also', 3),
 ('chamberlain', 1),
 ('large', 1),
 ('problem', 1),
 ('determination', 1),
 ('lawful', 1),
 ('ass—', 1),
 ('neutral', 1),
 ('them—that', 1),
 ('breathing', 2),
 ('warranty', 2),
 ('souls', 5),
 ('seeks', 2),
 ('warrant', 5),
 ('steel', 3),
 ('lapwing', 1),
 ('us—thou', 1),
 ('clemency', 1),
 ('before', 26),
 ('places', 1),
 ('uphoarded', 1),
 ('cut', 2),
 ('granted', 1),
 ('larger', 1),
 ('saviour’s', 1),
 ('exception', 1),
 ('thought-sick', 1),
 ('stealers', 1),
 ('murderer', 3),
 ('eruption', 1),
 ('good-night', 1),
 ('wand’ring', 1),
 ('scenes', 1),
 ('accounted', 1),
 ('royally', 1),
 ('paid', 7),
 ('old', 22),
 ('tragedians', 1),
 ('far', 11),
 ('return’d', 4),
 ('educational', 1),
 ('purging', 2),
 ('ignorant', 2),
 ('behold', 1),
 ('yon', 1),
 ('metal', 1),
 ('holy', 2),
 ('why', 60),
 ('grinning', 1),
 ('dish', 1),
 ('thieves', 1),
 ('shapes', 2),
 ('thirty', 3),
 ('proceeded', 1),
 ('dearth', 1),
 ('foresaid', 1),
 ('jade', 1),
 ('sirs', 2),
 ('yawn', 1),
 ('o’erweigh', 1),
 ('deprive', 1),
 ('sovereign', 3),
 ('wipe', 2),
 ('ein', 1),
 ('knowing', 1),
 ('tardy', 2),
 ('months', 4),
 ('firm', 1),
 ('fat', 6),
 ('wormwood', 2),
 ('wrist', 1),
 ('feast', 3),
 ('heat', 4),
 ('unmaster’d', 1),
 ('whensoever', 1),
 ('thin', 1),
 ('obscure', 1),
 ('importunate', 1),
 ('remiss', 1),
 ('demonstrated', 1),
 ('join', 2),
 ('adulterate', 1),
 ('offence', 7),
 ('subscribe', 1),
 ('ago', 1),
 ('inheritor', 1),
 ('obey', 7),
 ('canon', 1),
 ('o’ercame', 1),
 ('lady’s', 1),
 ('wicked', 6),
 ('perdition', 1),
 ('westward', 1),
 ('strumpet', 2),
 ('picture', 2),
 ('sees', 1),
 ('lids', 1),
 ('impress', 1),
 ('jump', 2),
 ('i’m', 1),
 ('intents', 1),
 ('school', 1),
 ('siz’d', 1),
 ('garrison’d', 1),
 ('awe', 2),
 ('lamond', 1),
 ('seed', 1),
 ('magic', 1),
 ('sister', 6),
 ('ay', 37),
 ('wits', 4),
 ('clouds', 4),
 ('enlarg’d', 1),
 ('four', 5),
 ('conceited', 1),
 ('defect', 5),
 ('blastments', 1),
 ('unweeded', 1),
 ('look', 37),
 ('grand', 1),
 ('express’d', 1),
 ('pit', 3),
 ('augury', 1),
 ('paint', 1),
 ('airy', 1),
 ('where', 60),
 ('low', 1),
 ('bride-bed', 1),
 ('current', 1),
 ('thee', 58),
 ('necessity', 1),
 ('honours', 1),
 ('graces', 2),
 ('th’air', 1),
 ('exempt', 2),
 ('different', 2),
 ('besmirch', 1),
 ('distributing', 7),
 ('monster', 1),
 ('hell', 10),
 ('compact', 1),
 ('unfellowed', 1),
 ('captain', 10),
 ('craft', 2),
 ('thrown', 2),
 ('denied', 1),
 ('restrain’d', 1),
 ('francisco', 11),
 ('allowance', 2),
 ('loud', 3),
 ('sweep', 2),
 ('‘laertes', 1),
 ('by’r', 2),
 ('unsure', 1),
 ('greeks', 1),
 ('waits', 1),
 ('potent', 2),
 ('‘well', 1),
 ('manners—that', 1),
 ('spots', 1),
 ('hop’d', 1),
 ('assume', 3),
 ('wrap', 1),
 ('o’ertook', 1),
 ('away—go', 1),
 ('ambitious', 2),
 ('painting', 1),
 ('if', 135),
 ('‘closes', 2),
 ('depart', 1),
 ('beckons', 3),
 ('among', 1),
 ('a-down', 1),
 ('bastard', 1),
 ('opinion', 1),
 ('waist', 1),
 ('controversy', 1),
 ('gave’t', 1),
 ('cool', 1),
 ('loosed', 1),
 ('unshaken', 1),
 ('place', 10),
 ('rul’d', 3),
 ('believe', 19),
 ('regulating', 1),
 ('villanous', 1),
 ('firmament', 1),
 ('fang’d—', 1),
 ('bugs', 1),
 ('fanned', 1),
 ('impart', 4),
 ('muddy', 1),
 ('flagon', 1),
 ('attend', 1),
 ('done—must', 1),
 ('opportunities', 1),
 ('likelihood', 1),
 ('courtiers', 1),
 ('contents', 2),
 ('altitude', 1),
 ('indirections', 1),
 ('handsaw', 1),
 ('orchard', 2),
 ('do’t', 9),
 ('spread', 3),
 ('conclusions', 1),
 ('confederate', 1),
 ('legal', 2),
 ('loneliness—we', 1),
 ('aright', 1),
 ('strengthen', 1),
 ('livery', 3),
 ('points', 2),
 ('hawk', 1),
 ('desire', 7),
 ('element', 1),
 ('gentry', 2),
 ('pernicious', 1),
 ('frost', 1),
 ('grinding', 1),
 ('repast', 1),
 ('taints', 1),
 ('contagion', 2),
 ('folly', 1),
 ('acres', 1),
 ('unnatural', 4),
 ('crash', 1),
 ('individual', 4),
 ('thing—', 1),
 ('’t', 2),
 ('wars', 2),
 ('perfume', 2),
 ('marriages', 1),
 ('desirous', 1),
 ('avoid', 3),
 ('distract', 1),
 ('wanton', 3),
 ('rub', 2),
 ('horses', 2),
 ('escoted', 1),
 ('exceed', 1),
 ('liquor', 2),
 ('rot', 1),
 ('ones', 2),
 ('meditation', 1),
 ('meed', 1),
 ('hard', 5),
 ('brook', 2),
 ('infusion', 1),
 ('maximum', 1),
 ('deserve', 1),
 ('after', 14),
 ('dalliance', 1),
 ('liability', 3),
 ('hits', 3),
 ('kiss’d', 1),
 ('was', 86),
 ('’twere', 11),
 ('priest', 4),
 ('prayer', 2),
 ('quit', 2),
 ('spacious', 1),
 ('windy', 1),
 ('dispriz’d', 1),
 ('sent', 14),
 ('wot', 1),
 ('calculate', 1),
 ('ambiguous', 1),
 ('and', 1051),
 ('words', 20),
 ('cannot', 33),
 ('imports', 4),
 ('shame', 8),
 ('epitaph', 2),
 ('lock’d', 2),
 ('cheerfully', 2),
 ('including', 8),
 ('capitol', 1),
 ('dallying', 1),
 ('mourn’d', 1),
 ('often', 4),
 ('arm’d', 2),
 ('stuck', 1),
 ('applicable', 3),
 ('fellows', 3),
 ('vow', 2),
 ('shut', 1),
 ('heaven', 43),
 ('hic', 1),
 ('proceed', 2),
 ('dreams', 3),
 ('advantage', 1),
 ('bespeak', 1),
 ('lands', 4),
 ('clepe', 1),
 ('perfections', 1),
 ('law’s', 1),
 ('reasons', 2),
 ('hey', 2),
 ('embrace', 2),
 ('nemean', 1),
 ('lose', 7),
 ('israel', 1),
 ('view', 2),
 ('tenants', 1),
 ('sister’s', 1),
 ('would’', 1),
 ('england', 20),
 ('sage', 1),
 ('feed', 6),
 ('whose', 31),
 ('priests', 1),
 ('dust', 7),
 ('almost', 12),
 ('wretch', 3),
 ('harping', 1),
 ('ape', 2),
 ('columbines', 1),
 ('tenures', 1),
 ('tears', 7),
 ('fares', 2),
 ('familiar', 1),
 ('tithe', 1),
 ('privy', 1),
 ('fight', 3),
 ('rather', 5),
 ('compilation', 1),
 ('defy', 1),
 ('ourselves—to', 1),
 ('springes', 1),
 ('‘high', 1),
 ('alaska', 1),
 ('son’s', 1),
 ('demands', 2),
 ('aslant', 1),
 ('dejected', 1),
 ('knock', 1),
 ('crowing', 1),
 ('audit', 1),
 ('gather', 3),
 ('enactures', 1),
 ('responsive', 1),
 ('marked', 2),
 ('blasted', 2),
 ('virtues', 2),
 ('carried', 1),
 ('fear', 20),
 ('malefactions', 1),
 ('ancient', 1),
 ('barr’d', 1),
 ('grandsire', 1),
 ('aroused', 1),
 ('pocky', 1),
 ('continual', 1),
 ('arms', 9),
 ('but’', 1),
 ('arrest—o', 1),
 ('commutual', 1),
 ('strangely', 2),
 ('practically', 1),
 ('farm', 2),
 ('prais’d', 1),
 ('orb', 1),
 ('c', 1),
 ('wittenberg', 4),
 ('‘one’', 1),
 ('personal', 1),
 ('ut', 1),
 ('enemy', 3),
 ('post-haste', 1),
 ('incestuous', 3),
 ('she', 46),
 ('smiles', 1),
 ('praying', 1),
 ('shakespeare', 4),
 ('draw', 9),
 ('sum', 2),
 ('paintings', 1),
 ('wait', 2),
 ('about', 26),
 ('flaxen', 1),
 ('comrade', 1),
 ('yield', 2),
 ('philosophy', 2),
 ('confess', 6),
 ('o’erwhelm', 1),
 ('tend', 4),
 ('question', 16),
 ('ha’', 3),
 ('forestall', 1),
 ('really', 1),
 ('process', 3),
 ('prompted', 1),
 ('medium', 5),
 ('between', 15),
 ('bout', 1),
 ('churches', 1),
 ('ladies', 4),
 ('circumstances', 1),
 ('sitting', 1),
 ('glares', 1),
 ('resolutes', 1),
 ('sere', 1),
 ('remember', 11),
 ('princes', 1),
 ('compound', 1),
 ('lo', 3),
 ('churchyard', 2),
 ('think', 46),
 ('calumny', 1),
 ('matter—', 1),
 ('promotion', 1),
 ('brings', 1),
 ('song', 1),
 ('dark', 1),
 ('axe', 2),
 ('1e6', 1),
 ('neglected', 2),
 ('swamp', 1),
 ('worst', 1),
 ('fall', 9),
 ('language', 1),
 ('espials', 1),
 ('pity', 4),
 ('presence', 3),
 ('lesser', 1),
 ('shadows', 1),
 ('italian', 1),
 ('compare', 1),
 ('defects', 1),
 ('centre', 1),
 ('unfold', 3),
 ('outdated', 1),
 ('obeys', 1),
 ('requite', 2),
 ('imposed', 1),
 ('select', 1),
 ('accessible', 1),
 ('paragraph', 11),
 ('appointment', 1),
 ('recks', 1),
 ('odd', 2),
 ('beguile', 2),
 ('i’ll', 57),
 ('deaths', 1),
 ('looked', 1),
 ('periodic', 1),
 ('stabs', 1),
 ('room', 28),
 ('peculiar', 1),
 ('prick’d', 1),
 ('sandal', 1),
 ('longed', 1),
 ('shell', 1),
 ('undertakings', 1),
 ('ardour', 1),
 ('provisions', 1),
 ('witness', 2),
 ('robustious', 1),
 ('follow’d', 1),
 ('foundations', 3),
 ('recorders', 3),
 ('hammers', 1),
 ('grows—the', 1),
 ('opposition', 2),
 ('truant', 2),
 ('overlooked', 1),
 ('shoulder', 3),
 ('lodg’d', 1),
 ('exits', 1),
 ('method', 3),
 ('find', 17),
 ('steward', 1),
 ('wisest', 3),
 ('sets', 2),
 ('court', 6),
 ('sharp', 1),
 ('amaz’d', 1),
 ('web', 5),
 ('serpent', 2),
 ('facility', 1),
 ('hazard', 1),
 ('flash', 1),
 ('advice', 3),
 ('available', 2),
 ('lock', 2),
 ('unwilling', 1),
 ('replace', 1),
 ('confines', 1),
 ('made—', 1),
 ('rises', 3),
 ('blossoms', 1),
 ('bare', 1),
 ('caught', 2),
 ('trademark/copyright', 1),
 ('corrupt', 1),
 ('bawdy', 1),
 ('mortal', 5),
 ('praised', 1),
 ('volley', 1),
 ('abstracts', 1),
 ('unhand', 1),
 ('‘lord', 1),
 ('yourself', 14),
 ('lungs', 2),
 ('more', 99),
 ('assay', 2),
 ('mind’s', 2),
 ('1d', 1),
 ('trumpet', 5),
 ('be’', 1),
 ('destroy', 4),
 ('unsatisfied', 1),
 ('codes', 1),
 ('plac’d', 2),
 ('ecstacy', 1),
 ('normandy—', 1),
 ('table-book', 1),
 ('ills', 1),
 ('imagine—', 1),
 ('taken', 2),
 ('accuse', 1),
 ('turns', 1),
 ('pinch', 1),
 ('stops', 2),
 ('tells', 1),
 ('admiration', 3),
 ('anchor’s', 1),
 ('yonder', 2),
 ('bad', 6),
 ('1f6', 1),
 ('ruin', 1),
 ('flame', 2),
 ('solidity', 1),
 ('palm', 2),
 ('soul’s', 1),
 ('feels', 1),
 ('danes', 4),
 ('calendar', 1),
 ('let', 94),
 ('nights', 3),
 ('fadoms', 1),
 ('heel', 2),
 ('over', 4),
 ('too', 51),
 ('all—', 1),
 ('proclaims', 2),
 ('omen', 1),
 ('rugged', 2),
 ('marvellous', 2),
 ('gore', 1),
 ('e’en', 12),
 ('reconcilement', 1),
 ('1f4', 1),
 ('rightly', 1),
 ('lucianus', 3),
 ('aloof', 2),
 ('antic', 1),
 ('mine—an', 1),
 ('scope', 3),
 ('air', 12),
 ('offences', 1),
 ('shovel', 1),
 ('middle-aged', 1),
 ('blood', 19),
 ('bethought', 1),
 ('greeting', 1),
 ('they', 105),
 ('caviare', 1),
 ('retiring', 1),
 ('for’s', 2),
 ('weapons', 1),
 ('rebels', 1),
 ('sale’—', 1),
 ('theme', 3),
 ('periwig-pated', 1),
 ('yet', 42),
 ('whom', 7),
 ('bestow', 4),
 ('afeard', 1),
 ('desert', 2),
 ('mind', 12),
 ('likely', 2),
 ('pair', 2),
 ('adam', 1),
 ('ungart’red', 1),
 ('swords', 1),
 ('immediate', 4),
 ('mars', 1),
 ('seem', 10),
 ('delights', 3),
 ('uncle', 10),
 ('&c', 6),
 ('bell', 2),
 ('methods', 1),
 ('spills', 1),
 ('wish', 4),
 ('one', 45),
 ('blister', 1),
 ('shameful', 1),
 ('curb', 2),
 ('single', 2),
 ('once', 19),
 ('unknowing', 1),
 ('uses', 3),
 ('‘there', 1),
 ('murder—', 1),
 ('precepts', 2),
 ('sponge—what', 1),
 ('pluck', 3),
 ('rotten', 2),
 ('keeps', 5),
 ('inward', 3),
 ('permanent', 2),
 ('death', 37),
 ('digs', 1),
 ('able', 1),
 ('wide', 5),
 ('forgeries', 1),
 ('bearing', 1),
 ('traitorous', 1),
 ('motive', 5),
 ('gibes', 1),
 ('shuffling', 2),
 ('drabbing', 1),
 ('english', 3),
 ('oppress’d', 1),
 ('chiefly', 1),
 ('people', 4),
 ('thicker', 1),
 ('cunnings—', 1),
 ('heraldry', 2),
 ('smelt', 1),
 ('convey', 1),
 ('forest', 1),
 ('lordship', 8),
 ('hands', 12),
 ('tokens', 1),
 ('th’opposed', 1),
 ('anon', 6),
 ('vanish’d', 1),
 ('brands', 1),
 ('leads', 1),
 ('function', 1),
 ('wine', 1),
 ('knows', 3),
 ('peal', 1),
 ('suck’d', 2),
 ('tongue', 15),
 ('her', 91),
 ('delve', 1),
 ('barefoot', 1),
 ('peevish', 1),
 ('lofty', 1),
 ('jove', 2),
 ('protected', 5),
 ('such-like', 1),
 ('saw’t', 1),
 ('candied', 1),
 ('charity', 1),
 ('better’d', 1),
 ('venom', 2),
 ('temper’d', 1),
 ('contriving', 1),
 ('revel', 1),
 ('fish', 2),
 ('entreaty', 2),
 ('unpregnant', 1),
 ('is’t', 17),
 ('quarrel', 2),
 ('took', 4),
 ('‘would’', 1),
 ('natures', 1),
 ('birth', 4),
 ('illusion', 1),
 ('bed-rid', 1),
 ('desperate', 6),
 ('jaws', 1),
 ('fix’d', 3),
 ('remove', 3),
 ('calumnious', 1),
 ('grounds', 1),
 ('poor', 20),
 ('moderate', 1),
 ('discomfort', 1),
 ('matron’s', 1),
 ('wouldst', 6),
 ('agreeing', 1),
 ('as', 230),
 ('polonius’', 1),
 ('king’', 2),
 ('hundred', 2),
 ('leaping', 1),
 ('carp', 1),
 ('bears', 2),
 ('deal', 1),
 ('palpable', 1),
 ('bird', 2),
 ('importun’d', 1),
 ('stone', 2),
 ('rich', 3),
 ('prison-house', 1),
 ('applaud', 1),
 ('force', 3),
 ('favours', 2),
 ('benefit', 1),
 ('truncheon’s', 1),
 ('b’', 4),
 ('money', 5),
 ('machine', 2),
 ('breath', 9),
 ('popp’d', 1),
 ('dream', 5),
 ('image', 4),
 ('hang', 4),
 ('nonce', 1),
 ('repair', 2),
 ('awhile', 10),
 ('ill', 6),
 ('information', 9),
 ('owner', 6),
 ('dungeons', 1),
 ('consent', 4),
 ('hugger-mugger', 1),
 ('dumb', 6),
 ('honourable', 3),
 ('one—to', 1),
 ('device', 1),
 ('howsoever', 1),
 ('capital', 2),
 ('parley', 1),
 ('praises', 1),
 ('herald', 1),
 ('having', 2),
 ('entreated', 1),
 ('unkind', 1),
 ('th’other', 1),
 ('recoveries', 2),
 ('mirth', 2),
 ('extolment', 1),
 ('norway', 13),
 ('trust', 2),
 ('persevere', 1),
 ('given', 11),
 ('affairs', 1),
 ('sundays', 1),
 ('official', 3),
 ('unite', 1),
 ('fire', 12),
 ('givers', 1),
 ('speed', 3),
 ('voltemand', 9),
 ('play’d', 2),
 ('adventurous', 1),
 ('bodes', 1),
 ('sheeted', 1),
 ('murder', 12),
 ('battlements', 1),
 ('mess', 1),
 ('unknown', 2),
 ('sleeper’s', 1),
 ('lief', 1),
 ('operant', 1),
 ('looking', 1),
 ('pound', 1),
 ('house', 7),
 ('vigour', 1),
 ('on’t', 8),
 ('lobby', 2),
 ('that’s', 16),
 ('reverted', 1),
 ('pall', 1),
 ('vienna', 1),
 ('beard', 5),
 ('nymph', 1),
 ('fits', 2),
 ('blackest', 1),
 ('flies', 1),
 ('immediately', 1),
 ('numerous', 1),
 ('backward', 1),
 ('beating', 3),
 ('gregory', 1),
 ('artless', 1),
 ('right', 15),
 ('receiv’d', 2),
 ...]
In [36]:
mylist = [3,4,4,2,1]
sorted(mylist)
Out[36]:
[1, 2, 3, 4, 4]
In [37]:
mylist = ['banana','apple','orange']
sorted(mylist)
Out[37]:
['apple', 'banana', 'orange']
In [38]:
mylist = [(5,3),(5,4),(2,99)]
sorted(mylist)
Out[38]:
[(2, 99), (5, 3), (5, 4)]
In [39]:
sorted(wcl)
Out[39]:
[('$1', 1),
 ('$5000', 1),
 ('&c', 6),
 ('*', 4),
 ('***', 3),
 ('*****', 2),
 ('-', 3),
 ('1', 1),
 ('1500', 1),
 ('1524', 1),
 ('1524-0txt', 1),
 ('1524-0zip', 1),
 ('1998', 1),
 ('1a', 1),
 ('1b', 1),
 ('1c', 2),
 ('1d', 1),
 ('1e', 2),
 ('1e1', 5),
 ('1e2', 1),
 ('1e3', 1),
 ('1e4', 1),
 ('1e5', 1),
 ('1e6', 1),
 ('1e7', 3),
 ('1e8', 4),
 ('1e9', 3),
 ('1f', 1),
 ('1f1', 1),
 ('1f2', 1),
 ('1f3', 5),
 ('1f4', 1),
 ('1f5', 1),
 ('1f6', 1),
 ('2', 1),
 ('20%', 1),
 ('2001', 1),
 ('2017', 1),
 ('3', 3),
 ('30', 2),
 ('4', 3),
 ('5', 1),
 ('50', 1),
 ('501c3', 1),
 ('596-1887', 1),
 ('60', 1),
 ('64-6221541', 1),
 ('750175', 1),
 ('801', 1),
 ('809', 1),
 ('84116', 1),
 ('90', 2),
 ('99775', 1),
 ('a', 607),
 ('a-cursing', 1),
 ('a-down', 1),
 ('a-down-a’', 1),
 ('a-foot', 1),
 ('a-making', 1),
 ('a-work', 1),
 ('abate', 1),
 ('abatements', 1),
 ('abhorred', 1),
 ('abide', 1),
 ('able', 1),
 ('aboard', 3),
 ('abominably', 1),
 ('about', 26),
 ('above', 4),
 ('abridgement', 1),
 ('abroad', 1),
 ('absent', 1),
 ('absolute', 2),
 ('abstinence', 1),
 ('abstracts', 1),
 ('absurd', 2),
 ('abuse', 1),
 ('abuses', 1),
 ('abus’d', 1),
 ('accent', 2),
 ('accept', 1),
 ('accepted', 2),
 ('accepting', 1),
 ('accepts', 1),
 ('access', 11),
 ('accessed', 1),
 ('accessible', 1),
 ('accident', 3),
 ('accidental', 1),
 ('accidents', 1),
 ('accord', 1),
 ('accordance', 2),
 ('according', 2),
 ('account', 1),
 ('accounted', 1),
 ('accurst', 1),
 ('accuse', 1),
 ('ache', 1),
 ('achievements', 1),
 ('acquaint', 1),
 ('acquire', 1),
 ('acquittance', 1),
 ('acres', 1),
 ('across', 1),
 ('act', 24),
 ('acted', 1),
 ('acting', 1),
 ('action', 10),
 ('actions', 2),
 ('active', 2),
 ('actively', 1),
 ('actor', 3),
 ('actors', 2),
 ('acts', 2),
 ('actual', 1),
 ('adam', 1),
 ('adam’s', 1),
 ('adders', 1),
 ('addicted', 1),
 ('addition', 4),
 ('additional', 4),
 ('additions', 1),
 ('address', 3),
 ('addresses', 1),
 ('adheres', 1),
 ('adieu', 7),
 ('adjoin’d', 1),
 ('admirable', 1),
 ('admiration', 3),
 ('admit', 2),
 ('admittance', 1),
 ('adoption', 1),
 ('adulterate', 1),
 ('advancement', 2),
 ('advances', 1),
 ('advancing', 1),
 ('advanc’d', 1),
 ('advantage', 1),
 ('adventurous', 1),
 ('advice', 3),
 ('advise', 1),
 ('aeneas’', 1),
 ('afar', 1),
 ('afeard', 1),
 ('affair', 3),
 ('affairs', 1),
 ('affectation', 1),
 ('affection', 4),
 ('affections', 1),
 ('afflict', 1),
 ('affliction', 2),
 ('afflicts', 1),
 ('affrighted', 1),
 ('affront', 1),
 ('afraid', 1),
 ('after', 14),
 ('afternoon', 1),
 ('afterwards', 1),
 ('again', 32),
 ('against', 24),
 ('again—what', 1),
 ('age', 9),
 ('agent', 1),
 ('ago', 1),
 ('agree', 9),
 ('agreed', 1),
 ('agreeing', 1),
 ('agreement', 18),
 ('ah', 3),
 ('aid', 1),
 ('aim', 1),
 ('aim’d', 1),
 ('air', 12),
 ('airs', 1),
 ('airy', 1),
 ('ak', 1),
 ('alack', 3),
 ('alarm', 1),
 ('alas', 11),
 ('alaska', 1),
 ('alexander', 5),
 ('all', 137),
 ('allegiance', 1),
 ('alleys', 1),
 ('allow', 2),
 ('allowance', 2),
 ('allowed', 1),
 ('allow’d', 1),
 ('all—', 1),
 ('all’s', 2),
 ('almost', 12),
 ('alone', 10),
 ('along', 3),
 ('aloof', 2),
 ('already', 7),
 ('also', 3),
 ('alteration', 1),
 ('alternate', 1),
 ('altitude', 1),
 ('altogether', 1),
 ('always', 1),
 ('am', 55),
 ('amaze', 1),
 ('amazement', 2),
 ('amaz’d', 1),
 ('ambassador', 1),
 ('ambassadors', 3),
 ('amber', 1),
 ('ambiguous', 1),
 ('ambition', 6),
 ('ambitious', 2),
 ('amble', 1),
 ('amen', 1),
 ('amiss', 2),
 ('amities', 1),
 ('among', 1),
 ('an', 55),
 ('anchor’s', 1),
 ('ancient', 1),
 ('and', 1051),
 ('and’t', 1),
 ('angel', 4),
 ('angels', 3),
 ('anger', 1),
 ('angle', 1),
 ('angry', 1),
 ('animals', 1),
 ('ankle', 1),
 ('annexment', 1),
 ('annual', 1),
 ('anoint', 1),
 ('anon', 6),
 ('another', 20),
 ('another’s', 1),
 ('answer', 14),
 ('answerest', 1),
 ('answer’d', 1),
 ('antic', 1),
 ('anticipation', 1),
 ('antique', 2),
 ('antiquity', 1),
 ('any', 49),
 ('anyone', 5),
 ('anything', 7),
 ('anywhere', 2),
 ('apart', 2),
 ('ape', 2),
 ('apiece', 1),
 ('apoplex’d', 1),
 ('appal', 1),
 ('apparel', 1),
 ('apparition', 2),
 ('appear', 4),
 ('appearing', 1),
 ('appears', 4),
 ('appear’d', 3),
 ('appetite', 1),
 ('applaud', 1),
 ('appliance', 1),
 ('applicable', 3),
 ('apply', 1),
 ('appointment', 1),
 ('apprehension', 2),
 ('approach', 1),
 ('approve', 2),
 ('appurtenance', 1),
 ('apt', 2),
 ('aptly', 1),
 ('archive', 13),
 ('ardour', 1),
 ('are', 154),
 ('argal', 3),
 ('argues', 1),
 ('argument', 4),
 ('aright', 1),
 ('arise', 1),
 ('arm', 5),
 ('armed', 3),
 ('armour', 3),
 ('arms', 9),
 ('army', 1),
 ('arm’d', 2),
 ('aroused', 1),
 ('arraign', 1),
 ('arrant', 2),
 ('arras', 5),
 ('array', 1),
 ('arrests', 1),
 ('arrest—o', 1),
 ('arriv’d', 1),
 ('arrow', 1),
 ('arrows', 2),
 ('art', 17),
 ('artery', 1),
 ('article', 2),
 ('articles', 1),
 ('artless', 1),
 ('as', 230),
 ('as-is', 1),
 ('ascii', 2),
 ('ashamed', 1),
 ('aside', 11),
 ('ask', 2),
 ('asked', 1),
 ('asking', 2),
 ('aslant', 1),
 ('asleep', 2),
 ('aspect', 1),
 ('ass', 3),
 ('assail', 1),
 ('assault', 1),
 ('assay', 2),
 ('assays', 1),
 ('assay’d', 1),
 ('assigns', 2),
 ('assistance', 1),
 ('assistant', 2),
 ('associated', 8),
 ('assume', 3),
 ('assurance', 2),
 ('assure', 1),
 ('assur’d', 2),
 ('ass—', 1),
 ('asunder', 1),
 ('at', 100),
 ('attached', 1),
 ('attend', 1),
 ('attendant', 1),
 ('attendants', 8),
 ('attended', 2),
 ('attends', 1),
 ('attent', 1),
 ('attractive', 1),
 ('attribute', 1),
 ('audience', 5),
 ('audit', 1),
 ('aught', 9),
 ('aught—', 1),
 ('augury', 1),
 ('aunt-mother', 1),
 ('auspicious', 1),
 ('author', 3),
 ('authorities', 1),
 ('available', 2),
 ('avoid', 3),
 ('avouch', 1),
 ('awake', 2),
 ('away', 27),
 ('away--you', 1),
 ('away—go', 1),
 ('awe', 2),
 ('awhile', 10),
 ('awry', 1),
 ('axe', 2),
 ('ay', 37),
 ('aye', 1),
 ('ayry', 1),
 ('a’', 1),
 ('b', 2),
 ('babe', 1),
 ('baby', 2),
 ('back', 8),
 ('backed', 1),
 ('backward', 1),
 ('bad', 6),
 ('bade', 1),
 ('bait', 1),
 ('baker’s', 1),
 ('bak’d', 2),
 ('ban', 1),
 ('bands', 1),
 ('bank', 1),
 ('baptista', 1),
 ('bar', 1),
 ('barbary', 2),
 ('barber’s', 1),
 ('bare', 1),
 ('barefac’d', 1),
 ('barefoot', 1),
 ('bark', 1),
 ('bark’d', 1),
 ('barnardo', 30),
 ('barren', 1),
 ('barr’d', 1),
 ('base', 5),
 ('based', 2),
 ('baseness', 1),
 ('baser', 2),
 ('basket', 2),
 ('bastard', 1),
 ('bat', 2),
 ('bated', 1),
 ('battalions', 1),
 ('batten', 1),
 ('battery', 1),
 ('battlements', 1),
 ('bawd', 1),
 ('bawdry', 1),
 ('bawds', 1),
 ('bawdy', 1),
 ('be', 239),
 ('beam', 1),
 ('bean', 2),
 ('bear', 14),
 ('beard', 5),
 ('beards', 1),
 ('beard—prythee', 1),
 ('bearers', 2),
 ('bearing', 1),
 ('bears', 2),
 ('bear’t', 1),
 ('beast', 5),
 ('beasts', 2),
 ('beast—', 1),
 ('beaten', 1),
 ('beating', 3),
 ('beats', 1),
 ('beauteous', 1),
 ('beautied', 1),
 ('beauties', 1),
 ('beautified', 1),
 ('beauty', 6),
 ('beaver', 1),
 ('became', 1),
 ('because', 2),
 ('beck', 1),
 ('beckons', 3),
 ('becomes', 3),
 ('bed', 11),
 ('bed-rid', 1),
 ('bedded', 1),
 ('beds', 1),
 ('been', 28),
 ('beer-barrel', 1),
 ('beetles', 1),
 ('befall’n', 1),
 ('befitted', 1),
 ('before', 26),
 ('beg', 7),
 ('beget', 1),
 ('beggar', 3),
 ('beggars', 1),
 ('beggars’', 1),
 ('beggar’d', 1),
 ('begin', 5),
 ('beginning', 1),
 ('begins', 2),
 ('beguile', 2),
 ('begun', 4),
 ('behaviour', 2),
 ('behav’d', 1),
 ('behind', 9),
 ('behold', 1),
 ('behove', 1),
 ('behoves', 1),
 ('being', 12),
 ('belief', 1),
 ('believe', 19),
 ('believed', 1),
 ('belike', 2),
 ('bell', 2),
 ('bellow', 1),
 ('bellowed', 1),
 ('bells', 1),
 ('belov’d', 1),
 ('below', 6),
 ('bend', 3),
 ('bended', 1),
 ('beneath', 4),
 ('benefit', 1),
 ('benetted', 1),
 ('bent', 2),
 ('bent—i', 1),
 ('berattle', 1),
 ('beseech', 7),
 ('beseech’d', 1),
 ('beshrew', 1),
 ('besides', 1),
 ('besmirch', 1),
 ('bespeak', 1),
 ('best', 11),
 ('bestial', 1),
 ('bestow', 4),
 ('bestowed', 1),
 ('bestow’d', 1),
 ('bet', 1),
 ('beteem', 1),
 ('bethought', 1),
 ('betime', 1),
 ('betimes', 1),
 ('betoken', 1),
 ('better', 14),
 ('better—their', 1),
 ('better’d', 1),
 ('between', 15),
 ('bevy', 1),
 ('beware', 2),
 ('bewept', 1),
 ('beyond', 2),
 ('be’', 1),
 ('bias', 1),
 ('bid', 6),
 ('bier', 1),
 ('bilboes', 1),
 ('binary', 1),
 ('bird', 2),
 ('birds', 1),
 ('birth', 4),
 ('bisson', 1),
 ('bites', 1),
 ('bitter', 3),
 ('black', 8),
 ('blackest', 1),
 ('blame', 3),
 ('blank', 2),
 ('blanket', 1),
 ('blanks', 1),
 ('blast', 2),
 ('blasted', 2),
 ('blasting', 1),
 ('blastments', 1),
 ('blasts', 1),
 ('blaze', 1),
 ('blazes', 1),
 ('blazon', 1),
 ('bleed', 2),
 ('bleeding', 1),
 ('blench', 1),
 ('bless', 3),
 ('blessing', 5),
 ('bles’d', 2),
 ('blister', 1),
 ('bloat', 1),
 ('blood', 19),
 ('bloodily', 1),
 ('bloody', 7),
 ('blossoms', 1),
 ('blow', 3),
 ('blown', 2),
 ('blows', 2),
 ('blue', 1),
 ('blunted', 1),
 ('blurs', 1),
 ('blush', 2),
 ('board', 1),
 ('boarded', 1),
 ('bodes', 1),
 ('bodies', 6),
 ('bodikin', 1),
 ('bodiless', 1),
 ('bodkin', 1),
 ('body', 17),
 ('boist’rous', 1),
 ('bold', 3),
 ('bonds', 1),
 ('bones', 4),
 ('bonnet', 1),
 ('bonny', 1),
 ('book', 2),
 ('books', 1),
 ('bore', 4),
 ('born', 2),
 ('borne', 3),
 ('born—he', 1),
 ('borrower', 1),
 ('borrowing', 1),
 ('borrow’d', 1),
 ('bosom', 4),
 ('botch', 1),
 ('both', 30),
 ('boughs', 1),
 ('bought', 1),
 ('bound', 7),
 ('bounded', 1),
 ('bounds', 1),
 ('bounteous', 1),
 ('bounty', 1),
 ('bourn', 1),
 ('bout', 1),
 ('bouts', 1),
 ('bow', 3),
 ('bowl', 1),
 ('box', 2),
 ('boy', 3),
 ('boys', 1),
 ('brain', 6),
 ('brainish', 1),
 ('brains', 5),
 ('branches', 1),
 ('brands', 1),
 ('brave', 3),
 ('bravery', 1),
 ('bray', 1),
 ('brazen', 1),
 ('braz’d', 1),
 ('breach', 3),
 ('bread', 1),
 ('breadth', 1),
 ('break', 8),
 ('breaking', 2),
 ('breaks', 2),
 ('breast', 1),
 ('breath', 9),
 ('breathe', 4),
 ('breathes', 1),
 ('breathing', 2),
 ('breed', 2),
 ('breeder', 1),
 ('breeding', 1),
 ('brevity', 1),
 ('bride-bed', 1),
 ('brief', 5),
 ('bring', 12),
 ('bringing', 1),
 ('brings', 1),
 ('broad', 2),
 ('broke', 2),
 ('broken', 1),
 ('brokers', 1),
 ('brooch', 1),
 ('brood', 1),
 ('brook', 2),
 ('brothel', 1),
 ('brother', 9),
 ('brothers', 2),
 ('brother’s', 7),
 ('brought', 6),
 ('brow', 4),
 ('brows', 1),
 ('bruit', 1),
 ('brute', 1),
 ('brutus', 1),
 ('bubbles', 1),
 ('budge', 1),
 ('buffets', 1),
 ('bugs', 1),
 ('build', 1),
 ('builds', 2),
 ('built', 1),
 ('bulk', 2),
 ('bulwark', 1),
 ('bung-hole', 1),
 ('burden', 1),
 ('burial', 4),
 ('burial—', 1),
 ('buried', 6),
 ('burn', 2),
 ('burning', 2),
 ('burns', 2),
 ('burnt', 1),
 ('burst', 3),
 ('business', 12),
 ('busy', 1),
 ('but', 273),
 ('button', 1),
 ('buttons', 1),
 ('but’', 1),
 ('buy', 1),
 ('buyer', 1),
 ('buys', 1),
 ('buzz', 2),
 ('buzzers', 1),
 ('by', 144),
 ('by—they', 1),
 ('by’r', 2),
 ('b’', 4),
 ('c', 1),
 ('cabin', 1),
 ('caesar', 2),
 ('cain’s', 1),
 ('calamity', 1),
 ('calculate', 1),
 ('calculated', 1),
 ('calendar', 1),
 ('calf', 1),
 ('calf-skins', 1),
 ('call', 22),
 ('called', 1),
 ('calls', 4),
 ('call’d', 1),
 ('call’t', 1),
 ('calm', 2),
 ('calmly', 1),
 ('calumnious', 1),
 ('calumny', 1),
 ('calves', 1),
 ('came', 9),
 ('camel', 2),
 ('can', 44),
 ('candied', 1),
 ('canker', 2),
 ('cannon', 5),
 ('cannoneer', 1),
 ('cannons', 1),
 ('cannot', 33),
 ('canon', 1),
 ('canoniz’d', 1),
 ('canopy', 1),
 ('canst', 3),
 ('cap', 2),
 ('cap-à-pie', 1),
 ('capability', 1),
 ('capable', 1),
 ('capable—do', 1),
 ('capital', 2),
 ('capitol', 1),
 ('capons', 1),
 ('caps', 1),
 ('captain', 10),
 ('captains', 1),
 ('carbuncles', 1),
 ('card', 3),
 ('carefully', 1),
 ('careless', 1),
 ('carnal', 1),
 ('carouses', 1),
 ('carp', 1),
 ('carpenter', 2),
 ('carriage', 1),
 ('carriages', 5),
 ('carried', 1),
 ('carries', 1),
 ('carrion—', 1),
 ('carry', 4),
 ('carrying', 1),
 ('cart', 1),
 ('carters', 1),
 ('carve', 1),
 ('case', 1),
 ('cases', 1),
 ('cast', 6),
 ('castle', 30),
 ('casual', 1),
 ('cat', 1),
 ('cataplasm', 1),
 ('catch', 2),
 ('caught', 2),
 ('cause', 19),
 ('cautel', 1),
 ('caution—i', 1),
 ('caviare', 1),
 ('cease', 3),
 ('celebrated', 1),
 ('celestial', 2),
 ('cell', 1),
 ('cellarage', 1),
 ('censure', 4),
 ('centre', 1),
 ('cerements', 1),
 ('ceremony', 3),
 ('certain', 7),
 ('certainty', 1),
 ('chalice', 1),
 ('challenger', 1),
 ('chamber', 4),
 ('chamberlain', 1),
 ('chameleon’s', 1),
 ('chance', 2),
 ('chances', 2),
 ('change', 5),
 ('changed', 1),
 ('changeling', 1),
 ('changes', 1),
 ('chanson', 1),
 ('chapel', 2),
 ('chapless', 1),
 ('character', 3),
 ('charge', 14),
 ('charges', 1),
 ('chariest', 1),
 ('charitable', 3),
 ('charities', 1),
 ('charity', 1),
 ('charm', 1),
 ('chase', 1),
 ('chaste', 3),
 ('chaunted', 1),
 ('check', 4),
 ('checking', 1),
 ('checks', 1),
 ('cheek', 2),
 ('cheer', 3),
 ('cheerfully', 2),
 ('cherub', 1),
 ('chide', 1),
 ('chief', 4),
 ('chiefest', 1),
 ('chiefly', 1),
 ('child', 2),
 ('children', 3),
 ('choice', 6),
 ('choler', 2),
 ('choose', 5),
 ('chop-fallen', 1),
 ('chopine', 1),
 ('chorus', 1),
 ('chough', 1),
 ('christian', 6),
 ('christians', 1),
 ('chronicles', 1),
 ('church', 2),
 ('churches', 1),
 ('churchyard', 2),
 ('churchyards', 1),
 ('churlish', 1),
 ('cicatrice', 1),
 ('circumscrib’d', 1),
 ('circumstance', 6),
 ('circumstances', 1),
 ('circumvent', 1),
 ('city', 3),
 ('clad', 1),
 ('claim', 2),
 ('clamb’ring', 1),
 ('clamour', 1),
 ('clapped', 1),
 ('claudio', 1),
 ('claudius', 3),
 ('claw’d', 1),
 ('clay', 3),
 ('clear', 1),
 ('clearly', 2),
 ('cleave', 1),
 ('cleft', 1),
 ('clemency', 1),
 ('clepe', 1),
 ('cliff', 1),
 ('climatures', 1),
 ('cloak', 1),
 ('close', 3),
 ('closely', 1),
 ('closes', 2),
 ('closet', 3),
 ('clothe', 1),
 ('clothes', 2),
 ('cloud', 1),
 ('clouds', 4),
 ('clout', 1),
 ('clouts', 1),
 ('clown', 47),
 ('clowns', 3),
 ('clutch', 1),
 ('co-mingled', 1),
 ('coach', 1),
 ('coagulate', 1),
 ('cock', 6),
 ('cockle', 1),
 ('codes', 1),
 ('coil', 1),
 ('coinage', 1),
 ('cold', 6),
 ('coldly', 2),
 ('collateral', 1),
 ('colleagued', 1),
 ('collected', 2),
 ('collection', 6),
 ('colour', 5),
 ('columbines', 1),
 ('combat', 1),
 ('combated', 1),
 ('combination', 1),
 ('combined', 1),
 ('come', 106),
 ('comedy', 2),
 ('comes', 24),
 ('comfort', 1),
 ('coming', 7),
 ('comma', 1),
 ('command', 10),
 ('commanded', 1),
 ('commandment', 4),
 ('commands', 1),
 ('commencement', 1),
 ('commend', 4),
 ('commendable', 1),
 ('commended', 1),
 ('comment', 1),
 ('commerce', 1),
 ('commercial', 1),
 ('commission', 5),
 ('committed', 1),
 ('common', 7),
 ('commune', 1),
 ('commutual', 1),
 ('compact', 1),
 ('companies', 1),
 ('companions', 1),
 ('company', 1),
 ('compare', 1),
 ('compass', 1),
 ('compelled', 1),
 ('compell’d', 1),
 ('competent', 1),
 ('compilation', 1),
 ('complete', 1),
 ('complexion', 3),
 ('compliance', 5),
 ('comply', 8),
 ('complying', 3),
 ('compost', 1),
 ('compos’d', 1),
 ('compound', 1),
 ('compounded', 1),
 ('compressed', 1),
 ('compulsatory', 1),
 ('compulsive', 1),
 ('computer', 2),
 ('computers', 2),
 ('comrade', 1),
 ('com’st', 2),
 ('conceal’d', 1),
 ('conceit', 5),
 ('conceited', 1),
 ('conceive', 1),
 ('concept', 2),
 ('conception', 1),
 ('concernancy', 1),
 ('concerning', 2),
 ('concernings', 1),
 ('concluded', 1),
 ('conclusions', 1),
 ('condolement', 1),
 ('confederate', 1),
 ('conference', 1),
 ('confess', 6),
 ('confession', 3),
 ('confine', 2),
 ('confines', 1),
 ('confin’d', 1),
 ('confirmation', 1),
 ('confirmed', 1),
 ('confound', 2),
 ('confront', 1),
 ('confusion', 1),
 ('congregation', 1),
 ('conjectures', 1),
 ('conjoin’d', 1),
 ('conjunctive', 1),
 ('conjuration', 1),
 ('conjure', 1),
 ('conjures', 1),
 ('conjuring', 1),
 ('conqueror', 1),
 ('conquest', 1),
 ('conscience', 8),
 ('consent', 4),
 ('consequence', 2),
 ('consequence’', 2),
 ('consequential', 1),
 ('consider', 2),
 ('considerable', 2),
 ('considered', 1),
 ('consider’d', 1),
 ('consonancy', 1),
 ('constant', 2),
 ('constantly', 1),
 ('consummation', 1),
 ('contact', 4),
 ('contagion', 2),
 ('contagious', 1),
 ('contain', 2),
 ('containing', 2),
 ('contend', 1),
 ('content', 3),
 ('contents', 2),
 ('continent', 2),
 ('continual', 1),
 ('contract', 2),
 ('contracted', 1),
 ('contraction', 1),
 ('contrary', 1),
 ('contributions', 2),
 ('contrive', 2),
 ('contriving', 1),
 ('controversy', 1),
 ('contumely', 1),
 ('convenience', 1),
 ('conveniently', 1),
 ('conversation', 1),
 ('converse', 1),
 ('convert', 3),
 ('converted', 1),
 ('convey', 1),
 ('conveyance', 1),
 ('conveyances', 1),
 ('convocation', 1),
 ('convoy', 1),
 ('cool', 1),
 ('copied', 3),
 ('copies', 7),
 ('copy', 12),
 ('copying', 4),
 ('copyright', 19),
 ('cop’d', 1),
 ('core', 1),
 ('cornelius', 7),
 ('corner', 1),
 ('coronation', 1),
 ('coronet', 1),
 ('corporation', 1),
 ...]
In [40]:
# sort by arbitrary function of elements
def second(x): return x[1]
sorted(wcl,key=second,reverse=True)
Out[40]:
[('the', 1300),
 ('and', 1051),
 ('to', 818),
 ('of', 800),
 ('you', 623),
 ('a', 607),
 ('i', 545),
 ('my', 516),
 ('in', 510),
 ('hamlet', 463),
 ('it', 430),
 ('that', 402),
 ('is', 362),
 ('this', 347),
 ('not', 339),
 ('with', 317),
 ('his', 297),
 ('but', 273),
 ('for', 273),
 ('your', 251),
 ('be', 239),
 ('me', 233),
 ('as', 230),
 ('lord', 220),
 ('he', 219),
 ('what', 204),
 ('him', 195),
 ('king', 194),
 ('so', 193),
 ('or', 190),
 ('have', 187),
 ('will', 176),
 ('do', 166),
 ('horatio', 154),
 ('are', 154),
 ('no', 153),
 ('we', 147),
 ('by', 144),
 ('on', 138),
 ('all', 137),
 ('if', 135),
 ('our', 123),
 ('polonius', 120),
 ('queen', 117),
 ('shall', 116),
 ('from', 111),
 ('o', 110),
 ('come', 106),
 ('they', 105),
 ('good', 105),
 ('laertes', 104),
 ('thou', 102),
 ('at', 100),
 ('more', 99),
 ('now', 98),
 ('let', 94),
 ('how', 93),
 ('her', 91),
 ('project', 89),
 ('most', 87),
 ('was', 86),
 ('thy', 86),
 ('ophelia', 86),
 ('may', 86),
 ('like', 80),
 ('would', 79),
 ('there', 78),
 ('rosencrantz', 78),
 ('us', 77),
 ('them', 75),
 ('know', 75),
 ('well', 74),
 ('’tis', 73),
 ('sir', 72),
 ('go', 70),
 ('which', 70),
 ('must', 69),
 ('enter', 68),
 ('guildenstern', 66),
 ('very', 66),
 ('did', 66),
 ('love', 66),
 ('hath', 64),
 ('then', 63),
 ('give', 63),
 ('first', 63),
 ('speak', 62),
 ('such', 61),
 ('why', 60),
 ('where', 60),
 ('thee', 58),
 ('make', 58),
 ('i’ll', 57),
 ('out', 57),
 ('should', 57),
 ('their', 57),
 ('gutenberg-tm', 56),
 ('upon', 56),
 ('when', 56),
 ('an', 55),
 ('some', 55),
 ('am', 55),
 ('work', 55),
 ('say', 53),
 ('here', 53),
 ('than', 53),
 ('these', 52),
 ('too', 51),
 ('father', 51),
 ('man', 51),
 ('much', 50),
 ('any', 49),
 ('see', 48),
 ('clown', 47),
 ('marcellus', 47),
 ('she', 46),
 ('think', 46),
 ('who', 46),
 ('one', 45),
 ('had', 44),
 ('can', 44),
 ('scene', 44),
 ('heaven', 43),
 ('time', 43),
 ('yet', 42),
 ('tell', 42),
 ('thus', 40),
 ('up', 39),
 ('mother', 39),
 ('own', 38),
 ('exit', 38),
 ('ay', 37),
 ('look', 37),
 ('death', 37),
 ('nor', 37),
 ('exeunt', 35),
 ('night', 35),
 ('play', 35),
 ('take', 35),
 ('hear', 34),
 ('soul', 34),
 ('cannot', 33),
 ('life', 33),
 ('could', 33),
 ('works', 33),
 ('osric', 33),
 ('again', 32),
 ('into', 32),
 ('ghost', 32),
 ('gutenberg', 31),
 ('within', 31),
 ('whose', 31),
 ('dead', 31),
 ('might', 31),
 ('indeed', 31),
 ('god', 31),
 ('set', 31),
 ('mine', 31),
 ('barnardo', 30),
 ('nothing', 30),
 ('castle', 30),
 ('part', 30),
 ('both', 30),
 ('heart', 29),
 ('other', 29),
 ('made', 29),
 ('pray', 29),
 ('room', 28),
 ('down', 28),
 ('dear', 28),
 ('hold', 28),
 ('leave', 28),
 ('without', 28),
 ('been', 28),
 ('away', 27),
 ('use', 27),
 ('were', 27),
 ('doth', 27),
 ('electronic', 27),
 ('nature', 27),
 ('does', 27),
 ('before', 26),
 ('about', 26),
 ('never', 26),
 ('head', 26),
 ('sweet', 26),
 ('world', 26),
 ('great', 26),
 ('terms', 26),
 ('matter', 26),
 ('second', 25),
 ('nay', 25),
 ('even', 25),
 ('denmark', 25),
 ('put', 24),
 ('act', 24),
 ('though', 24),
 ('against', 24),
 ('itself', 24),
 ('full', 24),
 ('comes', 24),
 ('show', 23),
 ('true', 23),
 ('means', 23),
 ('eyes', 23),
 ('seen', 22),
 ('old', 22),
 ('i’', 22),
 ('foundation', 22),
 ('two', 22),
 ('those', 22),
 ('follow', 22),
 ('madness', 22),
 ('call', 22),
 ('day', 21),
 ('off', 21),
 ('himself', 21),
 ('state', 21),
 ('fair', 21),
 ('mad', 21),
 ('fortinbras', 21),
 ('done', 21),
 ('words', 20),
 ('england', 20),
 ('fear', 20),
 ('poor', 20),
 ('players', 20),
 ('states', 20),
 ('son', 20),
 ('still', 20),
 ('till', 20),
 ('player', 20),
 ('hand', 20),
 ('many', 20),
 ('friends', 20),
 ('earth', 20),
 ('another', 20),
 ('keep', 20),
 ('believe', 19),
 ('blood', 19),
 ('once', 19),
 ('copyright', 19),
 ('makes', 19),
 ('reynaldo', 19),
 ('cause', 19),
 ('end', 19),
 ('since', 18),
 ('agreement', 18),
 ('father’s', 18),
 ('there’s', 18),
 ('long', 18),
 ('little', 18),
 ('gentleman', 18),
 ('thoughts', 17),
 ('find', 17),
 ('is’t', 17),
 ('thing', 17),
 ('noble', 17),
 ('th’', 17),
 ('body', 17),
 ('form', 17),
 ('else', 17),
 ('farewell', 17),
 ('young', 17),
 ('ear', 17),
 ('art', 17),
 ('faith', 17),
 ('question', 16),
 ('that’s', 16),
 ('has', 16),
 ('stand', 16),
 ('watch', 16),
 ('license', 16),
 ('hast', 16),
 ('majesty', 16),
 ('speech', 16),
 ('free', 16),
 ('youth', 16),
 ('welcome', 16),
 ('sword', 16),
 ('marry', 16),
 ('united', 15),
 ('between', 15),
 ('tongue', 15),
 ('right', 15),
 ('daughter', 15),
 ('eye', 15),
 ('live', 15),
 ('forth', 15),
 ('drink', 15),
 ('therefore', 15),
 ('said', 15),
 ('help', 15),
 ('please', 15),
 ('something', 15),
 ('last', 15),
 ('rest', 15),
 ('donations', 15),
 ('while', 14),
 ('reason', 14),
 ('after', 14),
 ('sent', 14),
 ('yourself', 14),
 ('gone', 14),
 ('thine', 14),
 ('heard', 14),
 ('law', 14),
 ('mark', 14),
 ('answer', 14),
 ('charge', 14),
 ('grace', 14),
 ('lady', 14),
 ('better', 14),
 ('dost', 14),
 ('bear', 14),
 ('sings', 14),
 ('spirit', 14),
 ('grief', 14),
 ('purpose', 14),
 ('phrase', 14),
 ('o’er', 14),
 ('we’ll', 13),
 ('norway', 13),
 ('further', 13),
 ('duty', 13),
 ('same', 13),
 ('gertrude', 13),
 ('kind', 13),
 ('sense', 13),
 ('haste', 13),
 ('archive', 13),
 ('way', 13),
 ('ebook', 13),
 ('things', 13),
 ('virtue', 13),
 ('none', 13),
 ('ever', 13),
 ('ho', 13),
 ('literary', 13),
 ('goes', 13),
 ('almost', 12),
 ('e’en', 12),
 ('air', 12),
 ('mind', 12),
 ('hands', 12),
 ('fire', 12),
 ('murder', 12),
 ('friend', 12),
 ('revenge', 12),
 ('hamlet’s', 12),
 ('saw', 12),
 ('together', 12),
 ('ii', 12),
 ('ere', 12),
 ('what’s', 12),
 ('gentlemen', 12),
 ('business', 12),
 ('voice', 12),
 ('fit', 12),
 ('madam', 12),
 ('copy', 12),
 ('grave', 12),
 ('oft', 12),
 ('ourselves', 12),
 ('name', 12),
 ('bring', 12),
 ('being', 12),
 ('each', 12),
 ('sleep', 12),
 ('word', 12),
 ('stay', 11),
 ('under', 11),
 ('far', 11),
 ('francisco', 11),
 ('’twere', 11),
 ('remember', 11),
 ('paragraph', 11),
 ('given', 11),
 ('late', 11),
 ('fee', 11),
 ('tonight', 11),
 ('foul', 11),
 ('thousand', 11),
 ('elsinore', 11),
 ('honest', 11),
 ('face', 11),
 ('doubt', 11),
 ('wind', 11),
 ('aside', 11),
 ('takes', 11),
 ('passion', 11),
 ('men', 11),
 ('thought', 11),
 ('through', 11),
 ('trademark', 11),
 ('fortune', 11),
 ('deed', 11),
 ('best', 11),
 ('villain', 11),
 ('access', 11),
 ('bed', 11),
 ('alas', 11),
 ('mean', 10),
 ('let’s', 10),
 ('meet', 10),
 ('in’t', 10),
 ('hell', 10),
 ('captain', 10),
 ('place', 10),
 ('seem', 10),
 ('uncle', 10),
 ('awhile', 10),
 ('peace', 10),
 ('memory', 10),
 ('excellent', 10),
 ('ears', 10),
 ('fine', 10),
 ('command', 10),
 ('myself', 10),
 ('action', 10),
 ('general', 10),
 ('skull', 10),
 ('alone', 10),
 ('receive', 10),
 ('he’s', 10),
 ('days', 10),
 ('thanks', 10),
 ('news', 10),
 ('ground', 10),
 ('refund', 10),
 ('neither', 10),
 ('honour', 10),
 ('laws', 10),
 ('near', 10),
 ('particular', 10),
 ('lie', 10),
 ('lost', 9),
 ('do’t', 9),
 ('arms', 9),
 ('draw', 9),
 ('fall', 9),
 ('breath', 9),
 ('information', 9),
 ('voltemand', 9),
 ('sight', 9),
 ('sure', 9),
 ('hope', 9),
 ('custom', 9),
 ('agree', 9),
 ('brother', 9),
 ('seek', 9),
 ('thank', 9),
 ('wife', 9),
 ('walk', 9),
 ('shows', 9),
 ('fell', 9),
 ('judgment', 9),
 ('get', 9),
 ('strange', 9),
 ('came', 9),
 ('courtier', 9),
 ('behind', 9),
 ('swear', 9),
 ('soft', 9),
 ('falls', 9),
 ('aught', 9),
 ('light', 9),
 ('age', 9),
 ('lay', 9),
 ('o’', 9),
 ('unto', 8),
 ('quick', 8),
 ('attendants', 8),
 ('shame', 8),
 ('including', 8),
 ('lordship', 8),
 ('on’t', 8),
 ('only', 8),
 ('understand', 8),
 ('gives', 8),
 ('return', 8),
 ('three', 8),
 ('devil', 8),
 ('provide', 8),
 ('dies', 8),
 ('sit', 8),
 ('iv', 8),
 ('person', 8),
 ('fellow', 8),
 ('service', 8),
 ('comply', 8),
 ('hither', 8),
 ('associated', 8),
 ('sun', 8),
 ('shot', 8),
 ('pale', 8),
 ('times', 8),
 ('iii', 8),
 ('to’t', 8),
 ('read', 8),
 ('conscience', 8),
 ('gave', 8),
 ('power', 8),
 ('truly', 8),
 ('ha', 8),
 ('music', 8),
 ('hour', 8),
 ('grow', 8),
 ('wherein', 8),
 ('fashion', 8),
 ('king’s', 8),
 ('here’s', 8),
 ('wilt', 8),
 ('husband', 8),
 ('joy', 8),
 ('heavens', 8),
 ('known', 8),
 ('break', 8),
 ('black', 8),
 ('back', 8),
 ('says', 8),
 ('freely', 8),
 ('less', 8),
 ('noise', 8),
 ('letters', 8),
 ('woe', 8),
 ('found', 8),
 ('looks', 8),
 ('paid', 7),
 ('offence', 7),
 ('obey', 7),
 ('distributing', 7),
 ('desire', 7),
 ('lose', 7),
 ('dust', 7),
 ('tears', 7),
 ('whom', 7),
 ('house', 7),
 ('section', 7),
 ('either', 7),
 ('tax', 7),
 ('brother’s', 7),
 ('adieu', 7),
 ('needs', 7),
 ('whole', 7),
 ('coming', 7),
 ('march', 7),
 ('others', 7),
 ('ebooks', 7),
 ('common', 7),
 ('turn', 7),
 ('visit', 7),
 ('top', 7),
 ('copies', 7),
 ('already', 7),
 ('country', 7),
 ('every', 7),
 ('enough', 7),
 ('rank', 7),
 ('beg', 7),
 ('wholesome', 7),
 ('bound', 7),
 ('certain', 7),
 ('list', 7),
 ('cornelius', 7),
 ('fool', 7),
 ('beseech', 7),
 ('woman', 7),
 ('bloody', 7),
 ('anything', 7),
 ('sound', 7),
 ('unless', 7),
 ('located', 7),
 ('permission', 7),
 ('start', 7),
 ('loves', 7),
 ('seems', 7),
 ('beauty', 6),
 ('dane', 6),
 ('kill’d', 6),
 ('received', 6),
 ('fat', 6),
 ('wicked', 6),
 ('sister', 6),
 ('feed', 6),
 ('confess', 6),
 ('court', 6),
 ('bad', 6),
 ('&c', 6),
 ('anon', 6),
 ('desperate', 6),
 ('wouldst', 6),
 ('ill', 6),
 ('owner', 6),
 ('dumb', 6),
 ('mother’s', 6),
 ('whether', 6),
 ('wrong', 6),
 ('course', 6),
 ('piece', 6),
 ('using', 6),
 ('crown', 6),
 ('point', 6),
 ('volunteers', 6),
 ('marriage', 6),
 ('lies', 6),
 ('hall', 6),
 ('health', 6),
 ('new', 6),
 ('door', 6),
 ('poison', 6),
 ('effect', 6),
 ('toward', 6),
 ('wisdom', 6),
 ('seal’d', 6),
 ('v', 6),
 ('struck', 6),
 ('christian', 6),
 ('damned', 6),
 ('vile', 6),
 ('discourse', 6),
 ('fie', 6),
 ('brain', 6),
 ('try', 6),
 ('slain', 6),
 ('round', 6),
 ('heavy', 6),
 ('herself', 6),
 ('water', 6),
 ('home', 6),
 ('mouth', 6),
 ('choice', 6),
 ('pardon', 6),
 ('cock', 6),
 ('bid', 6),
 ('guilty', 6),
 ('dull', 6),
 ('pyrrhus', 6),
 ('sorrow', 6),
 ('shape', 6),
 ('except', 6),
 ('withal', 6),
 ('lack', 6),
 ('brought', 6),
 ('dare', 6),
 ('cold', 6),
 ('touch', 6),
 ('season', 6),
 ('platform', 6),
 ('below', 6),
 ('property', 6),
 ('laid', 6),
 ('maid', 6),
 ('ambition', 6),
 ('circumstance', 6),
 ('sea', 6),
 ('move', 6),
 ('bodies', 6),
 ('fare', 6),
 ('foils', 6),
 ('distribute', 6),
 ('pass', 6),
 ('double', 6),
 ('note', 6),
 ('laugh', 6),
 ('vows', 6),
 ('yours', 6),
 ('false', 6),
 ('whilst', 6),
 ('stood', 6),
 ('proof', 6),
 ('wager', 6),
 ('speaks', 6),
 ('hot', 6),
 ('prologue', 6),
 ('buried', 6),
 ('danish', 6),
 ('prince', 6),
 ('hit', 6),
 ('truth', 6),
 ('collection', 6),
 ('distribution', 6),
 ('cast', 6),
 ('knave', 5),
 ('spirits', 5),
 ('cannon', 5),
 ('flourish', 5),
 ('fly', 5),
 ('compliance', 5),
 ('souls', 5),
 ('warrant', 5),
 ('four', 5),
 ('defect', 5),
 ('hard', 5),
 ('rather', 5),
 ('medium', 5),
 ('web', 5),
 ('mortal', 5),
 ('trumpet', 5),
 ('keeps', 5),
 ('wide', 5),
 ('motive', 5),
 ('protected', 5),
 ('money', 5),
 ('dream', 5),
 ('beard', 5),
 ('conceit', 5),
 ('patience', 5),
 ('cries', 5),
 ('quiet', 5),
 ('mercy', 5),
 ('following', 5),
 ('woul’t', 5),
 ('treason', 5),
 ('table', 5),
 ('anyone', 5),
 ('1f3', 5),
 ('in’s', 5),
 ('teach', 5),
 ('france', 5),
 ('arm', 5),
 ('save', 5),
 ('shalt', 5),
 ('soldiers', 5),
 ('need', 5),
 ('flesh', 5),
 ('knew', 5),
 ('next', 5),
 ('prepare', 5),
 ('parts', 5),
 ('honesty', 5),
 ('wwwgutenbergorg', 5),
 ('themselves', 5),
 ('whereon', 5),
 ('perhaps', 5),
 ('sing', 5),
 ('damn’d', 5),
 ('slave', 5),
 ('perchance', 5),
 ('choose', 5),
 ('natural', 5),
 ('presently', 5),
 ('brains', 5),
 ('dangerous', 5),
 ('direct', 5),
 ('half', 5),
 ('pay', 5),
 ('1e1', 5),
 ('writ', 5),
 ('told', 5),
 ('holds', 5),
 ('prison', 5),
 ('arras', 5),
 ('colour', 5),
 ('reads', 5),
 ('send', 5),
 ('visage', 5),
 ('morning', 5),
 ('provided', 5),
 ('throw', 5),
 ('mass', 5),
 ('cost', 5),
 ('warlike', 5),
 ('messenger', 5),
 ('countenance', 5),
 ('disposition', 5),
 ('begin', 5),
 ('fault', 5),
 ('longer', 5),
 ('drown’d', 5),
 ('moon', 5),
 ('fortune’s', 5),
 ('short', 5),
 ('stir', 5),
 ('star', 5),
 ('land', 5),
 ('cry', 5),
 ('’gainst', 5),
 ('wit', 5),
 ('man’s', 5),
 ('commission', 5),
 ('change', 5),
 ('servant', 5),
 ('base', 5),
 ('liberty', 5),
 ('alexander', 5),
 ('’twas', 5),
 ('figure', 5),
 ('audience', 5),
 ('went', 5),
 ('plain', 5),
 ('report', 5),
 ('replacement', 5),
 ('humbly', 5),
 ('hide', 5),
 ('beast', 5),
 ('carriages', 5),
 ('gentle', 5),
 ('die', 5),
 ('silence', 5),
 ('sudden', 5),
 ('twelve', 5),
 ('fast', 5),
 ('gracious', 5),
 ('limited', 5),
 ('yes', 5),
 ('blessing', 5),
 ('hearing', 5),
 ('french', 5),
 ('drown', 5),
 ('its', 5),
 ('brief', 5),
 ('year', 5),
 ('main', 5),
 ('eat', 5),
 ('nunnery', 5),
 ('dread', 5),
 ('kingdom', 5),
 ('bore', 4),
 ('so’', 4),
 ('ten', 4),
 ('hecuba', 4),
 ('lords', 4),
 ('grown', 4),
 ('throat', 4),
 ('contact', 4),
 ('chief', 4),
 ('desires', 4),
 ('return’d', 4),
 ('months', 4),
 ('heat', 4),
 ('wits', 4),
 ('clouds', 4),
 ('impart', 4),
 ('unnatural', 4),
 ('individual', 4),
 ('priest', 4),
 ('imports', 4),
 ('often', 4),
 ('lands', 4),
 ('wittenberg', 4),
 ('shakespeare', 4),
 ('tend', 4),
 ('ladies', 4),
 ('pity', 4),
 ('destroy', 4),
 ('danes', 4),
 ('over', 4),
 ('bestow', 4),
 ('immediate', 4),
 ('wish', 4),
 ('people', 4),
 ('took', 4),
 ('birth', 4),
 ('b’', 4),
 ('image', 4),
 ('hang', 4),
 ('consent', 4),
 ('flowers', 4),
 ('favour', 4),
 ('writing', 4),
 ('stars', 4),
 ('treasure', 4),
 ('requirements', 4),
 ('sleeping', 4),
 ('profit', 4),
 ('secret', 4),
 ('turn’d', 4),
 ('hence', 4),
 ('length', 4),
 ('ecstasy', 4),
 ('commandment', 4),
 ('drift', 4),
 ('defective', 4),
 ('glad', 4),
 ('snow', 4),
 ('imagination', 4),
 ('creating', 4),
 ('stage', 4),
 ('thrice', 4),
 ('ease', 4),
 ('meant', 4),
 ('sits', 4),
 ('format', 4),
 ('you’ll', 4),
 ('white', 4),
 ('died', 4),
 ('polonius’s', 4),
 ('discretion', 4),
 ('poison’d', 4),
 ('used', 4),
 ('william', 4),
 ('sleeps', 4),
 ('begun', 4),
 ('*', 4),
 ('gifts', 4),
 ('lov’d', 4),
 ('lives', 4),
 ('public', 4),
 ('fingers', 4),
 ('possible', 4),
 ('habit', 4),
 ('ready', 4),
 ('native', 4),
 ('weep', 4),
 ('wretched', 4),
 ('seeing', 4),
 ('pate', 4),
 ('fain', 4),
 ('didst', 4),
 ('lead', 4),
 ('thence', 4),
 ('copying', 4),
 ('tender', 4),
 ('brow', 4),
 ('th’earth', 4),
 ('providing', 4),
 ('online', 4),
 ('yea', 4),
 ('modesty', 4),
 ('lights', 4),
 ('lest', 4),
 ('re-enter', 4),
 ('motion', 4),
 ('straight', 4),
 ('methought', 4),
 ('ways', 4),
 ('censure', 4),
 ('entertainment', 4),
 ('few', 4),
 ('salt', 4),
 ('mock', 4),
 ('stop', 4),
 ('instant', 4),
 ('draws', 4),
 ('weeds', 4),
 ('support', 4),
 ('six', 4),
 ('gross', 4),
 ('sigh', 4),
 ('editions', 4),
 ('status', 4),
 ('damages', 4),
 ('ye', 4),
 ('above', 4),
 ('trumpets', 4),
 ('cousin', 4),
 ('cursed', 4),
 ('appear', 4),
 ('special', 4),
 ('herein', 4),
 ('holder', 4),
 ('beneath', 4),
 ('bosom', 4),
 ('thrift', 4),
 ('addition', 4),
 ('thyself', 4),
 ('evil', 4),
 ('honour’d', 4),
 ('grows', 4),
 ('methinks', 4),
 ('violence', 4),
 ('1e8', 4),
 ('rose', 4),
 ('mission', 4),
 ('distributed', 4),
 ('thinking', 4),
 ('practice', 4),
 ('tomorrow', 4),
 ('calls', 4),
 ('puts', 4),
 ('forget', 4),
 ('lend', 4),
 ('angel', 4),
 ('follows', 4),
 ('spade', 4),
 ('years', 4),
 ('pause', 4),
 ('check', 4),
 ('leisure', 4),
 ('tale', 4),
 ('deep', 4),
 ('cup', 4),
 ('quite', 4),
 ('kill', 4),
 ('subject', 4),
 ('quality', 4),
 ('written', 4),
 ('appears', 4),
 ('sailors', 4),
 ('whereto', 4),
 ('affection', 4),
 ('forgot', 4),
 ('horrible', 4),
 ('strong', 4),
 ('pleasure', 4),
 ('stronger', 4),
 ('bones', 4),
 ('wi’', 4),
 ('twice', 4),
 ...]
In [41]:
wcl = sorted(wcl,key=lambda x:x[1],reverse=True)  # lambda construct for defining a function inline

Plot of frequency vs rank

In [42]:
import matplotlib.pyplot as plt
%matplotlib inline
from numpy import *
In [43]:
rank = arange(len(wcl)) + 1
freqs = [item[1] for item in wcl]
plt.plot(rank,freqs);
In [44]:
plt.semilogy(rank,freqs);
In [45]:
plt.loglog(rank,freqs);

Zipf's Law

In [ ]:
Sort by word length
In [48]:
wcl = sorted(wcl,key=lambda x:len(x[0]),reverse=False) 
In [49]:
wcl
Out[49]:
[('a', 607),
 ('i', 545),
 ('o', 110),
 ('v', 6),
 ('*', 4),
 ('-', 3),
 ('3', 3),
 ('4', 3),
 ('b', 2),
 ('c', 1),
 ('s', 1),
 ('5', 1),
 ('2', 1),
 ('1', 1),
 ('to', 818),
 ('of', 800),
 ('my', 516),
 ('in', 510),
 ('it', 430),
 ('is', 362),
 ('be', 239),
 ('me', 233),
 ('as', 230),
 ('he', 219),
 ('so', 193),
 ('or', 190),
 ('do', 166),
 ('no', 153),
 ('we', 147),
 ('by', 144),
 ('on', 138),
 ('if', 135),
 ('at', 100),
 ('us', 77),
 ('go', 70),
 ('an', 55),
 ('am', 55),
 ('up', 39),
 ('ay', 37),
 ('i’', 22),
 ('ho', 13),
 ('ii', 12),
 ('o’', 9),
 ('iv', 8),
 ('ha', 8),
 ('&c', 6),
 ('b’', 4),
 ('ye', 4),
 ('lo', 3),
 ('ah', 3),
 ('oh', 3),
 ('‘i', 3),
 ('90', 2),
 ('’t', 2),
 ('1e', 2),
 ('vi', 2),
 ('‘a', 2),
 ('30', 2),
 ('1c', 2),
 ('ut', 1),
 ('1d', 1),
 ('se', 1),
 ('la', 1),
 ('1a', 1),
 ('1b', 1),
 ('$1', 1),
 ('ak', 1),
 ('et', 1),
 ('dr', 1),
 ('a’', 1),
 ('po', 1),
 ('pg', 1),
 ('60', 1),
 ('50', 1),
 ('1f', 1),
 ('the', 1300),
 ('and', 1051),
 ('you', 623),
 ('not', 339),
 ('his', 297),
 ('but', 273),
 ('for', 273),
 ('him', 195),
 ('are', 154),
 ('all', 137),
 ('our', 123),
 ('now', 98),
 ('let', 94),
 ('how', 93),
 ('her', 91),
 ('was', 86),
 ('thy', 86),
 ('may', 86),
 ('sir', 72),
 ('did', 66),
 ('why', 60),
 ('out', 57),
 ('say', 53),
 ('too', 51),
 ('man', 51),
 ('any', 49),
 ('see', 48),
 ('she', 46),
 ('who', 46),
 ('one', 45),
 ('had', 44),
 ('can', 44),
 ('yet', 42),
 ('own', 38),
 ('nor', 37),
 ('god', 31),
 ('set', 31),
 ('use', 27),
 ('nay', 25),
 ('put', 24),
 ('act', 24),
 ('old', 22),
 ('two', 22),
 ('day', 21),
 ('off', 21),
 ('mad', 21),
 ('son', 20),
 ('end', 19),
 ('th’', 17),
 ('ear', 17),
 ('art', 17),
 ('has', 16),
 ('eye', 15),
 ('law', 14),
 ('way', 13),
 ('air', 12),
 ('saw', 12),
 ('ere', 12),
 ('fit', 12),
 ('oft', 12),
 ('far', 11),
 ('fee', 11),
 ('men', 11),
 ('bed', 11),
 ('lie', 10),
 ('get', 9),
 ('age', 9),
 ('lay', 9),
 ('sit', 8),
 ('sun', 8),
 ('iii', 8),
 ('joy', 8),
 ('woe', 8),
 ('tax', 7),
 ('top', 7),
 ('beg', 7),
 ('fat', 6),
 ('bad', 6),
 ('ill', 6),
 ('new', 6),
 ('fie', 6),
 ('try', 6),
 ('bid', 6),
 ('sea', 6),
 ('hot', 6),
 ('hit', 6),
 ('fly', 5),
 ('web', 5),
 ('1f3', 5),
 ('arm', 5),
 ('pay', 5),
 ('1e1', 5),
 ('cry', 5),
 ('wit', 5),
 ('die', 5),
 ('yes', 5),
 ('its', 5),
 ('eat', 5),
 ('so’', 4),
 ('ten', 4),
 ('yea', 4),
 ('few', 4),
 ('six', 4),
 ('1e8', 4),
 ('cup', 4),
 ('wi’', 4),
 ('dry', 3),
 ('pit', 3),
 ('ha’', 3),
 ('win', 3),
 ('1e7', 3),
 ('dew', 3),
 ('wed', 3),
 ('boy', 3),
 ('ass', 3),
 ('1e9', 3),
 ('bow', 3),
 ('***', 3),
 ('cut', 2),
 ('awe', 2),
 ('rub', 2),
 ('vow', 2),
 ('hey', 2),
 ('ape', 2),
 ('sum', 2),
 ('axe', 2),
 ('odd', 2),
 ('lot', 2),
 ('toy', 2),
 ('met', 2),
 ('ice', 2),
 ('dog', 2),
 ('toe', 2),
 ('box', 2),
 ('me’', 2),
 ('wag', 2),
 ('hid', 2),
 ('jig', 2),
 ('hat', 2),
 ('dig', 2),
 ('run', 2),
 ('kin', 2),
 ('got', 2),
 ('war', 2),
 ('sat', 2),
 ('rat', 2),
 ('jot', 2),
 ('fed', 2),
 ('wax', 2),
 ('on—', 2),
 ('red', 2),
 ('bat', 2),
 ('apt', 2),
 ('lap', 2),
 ('rue', 2),
 ('’em', 2),
 ('sin', 2),
 ('vii', 2),
 ('ask', 2),
 ('cap', 2),
 ('foe', 2),
 ('aid', 1),
 ('pah', 1),
 ('aim', 1),
 ('yon', 1),
 ('ein', 1),
 ('ago', 1),
 ('i’m', 1),
 ('low', 1),
 ('rot', 1),
 ('wot', 1),
 ('hic', 1),
 ('orb', 1),
 ('1e6', 1),
 ('be’', 1),
 ('1f6', 1),
 ('1f4', 1),
 ('dug', 1),
 ('1f5', 1),
 ('buy', 1),
 ('led', 1),
 ('gem', 1),
 ('it—', 1),
 ('sty', 1),
 ('‘we', 1),
 ('ply', 1),
 ('row', 1),
 ('aye', 1),
 ('1f2', 1),
 ('cue', 1),
 ('ore', 1),
 ('gib', 1),
 ('hap', 1),
 ('1e3', 1),
 ('is—', 1),
 ('fix', 1),
 ('foh', 1),
 ('hue', 1),
 ('lug', 1),
 ('i’d', 1),
 ('fox', 1),
 ('1e2', 1),
 ('fay', 1),
 ('woo', 1),
 ('dip', 1),
 ('non', 1),
 ('due', 1),
 ('mew', 1),
 ('gis', 1),
 ('cat', 1),
 ('ban', 1),
 ('irs', 1),
 ('up—', 1),
 ('bet', 1),
 ('owl', 1),
 ('key', 1),
 ('raw', 1),
 ('pox', 1),
 ('bar', 1),
 ('mar', 1),
 ('ope', 1),
 ('err', 1),
 ('1e5', 1),
 ('hum', 1),
 ('1e4', 1),
 ('20%', 1),
 ('dye', 1),
 ('809', 1),
 ('gum', 1),
 ('jaw', 1),
 ('1f1', 1),
 ('yaw', 1),
 ('pat', 1),
 ('‘if', 1),
 ('801', 1),
 ('llc', 1),
 ('that', 402),
 ('this', 347),
 ('with', 317),
 ('your', 251),
 ('lord', 220),
 ('what', 204),
 ('king', 194),
 ('have', 187),
 ('will', 176),
 ('from', 111),
 ('come', 106),
 ('they', 105),
 ('good', 105),
 ('thou', 102),
 ('more', 99),
 ('most', 87),
 ('like', 80),
 ('them', 75),
 ('know', 75),
 ('well', 74),
 ('’tis', 73),
 ('must', 69),
 ('very', 66),
 ('love', 66),
 ('hath', 64),
 ('then', 63),
 ('give', 63),
 ('such', 61),
 ('thee', 58),
 ('make', 58),
 ('i’ll', 57),
 ('upon', 56),
 ('when', 56),
 ('some', 55),
 ('work', 55),
 ('here', 53),
 ('than', 53),
 ('much', 50),
 ('time', 43),
 ('tell', 42),
 ('thus', 40),
 ('exit', 38),
 ('look', 37),
 ('play', 35),
 ('take', 35),
 ('hear', 34),
 ('soul', 34),
 ('life', 33),
 ('into', 32),
 ('dead', 31),
 ('mine', 31),
 ('part', 30),
 ('both', 30),
 ('made', 29),
 ('pray', 29),
 ('room', 28),
 ('down', 28),
 ('dear', 28),
 ('hold', 28),
 ('been', 28),
 ('away', 27),
 ('were', 27),
 ('doth', 27),
 ('does', 27),
 ('head', 26),
 ('even', 25),
 ('full', 24),
 ('show', 23),
 ('true', 23),
 ('eyes', 23),
 ('seen', 22),
 ('call', 22),
 ('fair', 21),
 ('done', 21),
 ('fear', 20),
 ('poor', 20),
 ('till', 20),
 ('hand', 20),
 ('many', 20),
 ('keep', 20),
 ('once', 19),
 ('long', 18),
 ('find', 17),
 ('is’t', 17),
 ('body', 17),
 ('form', 17),
 ('else', 17),
 ('hast', 16),
 ('free', 16),
 ('live', 15),
 ('said', 15),
 ('help', 15),
 ('last', 15),
 ('rest', 15),
 ('sent', 14),
 ('gone', 14),
 ('mark', 14),
 ('lady', 14),
 ('dost', 14),
 ('bear', 14),
 ('o’er', 14),
 ('duty', 13),
 ('same', 13),
 ('kind', 13),
 ('none', 13),
 ('ever', 13),
 ('goes', 13),
 ('e’en', 12),
 ('mind', 12),
 ('fire', 12),
 ('copy', 12),
 ('name', 12),
 ('each', 12),
 ('word', 12),
 ('stay', 11),
 ('late', 11),
 ('foul', 11),
 ('face', 11),
 ('wind', 11),
 ('deed', 11),
 ('best', 11),
 ('alas', 11),
 ('mean', 10),
 ('meet', 10),
 ('in’t', 10),
 ('hell', 10),
 ('seem', 10),
 ('ears', 10),
 ('fine', 10),
 ('he’s', 10),
 ('days', 10),
 ('news', 10),
 ('laws', 10),
 ('near', 10),
 ('lost', 9),
 ('do’t', 9),
 ('arms', 9),
 ('draw', 9),
 ('fall', 9),
 ('sure', 9),
 ('hope', 9),
 ('seek', 9),
 ('wife', 9),
 ('walk', 9),
 ('fell', 9),
 ('came', 9),
 ('soft', 9),
 ('unto', 8),
 ('on’t', 8),
 ('only', 8),
 ('dies', 8),
 ('shot', 8),
 ('pale', 8),
 ('to’t', 8),
 ('read', 8),
 ('gave', 8),
 ('hour', 8),
 ('grow', 8),
 ('wilt', 8),
 ('back', 8),
 ('says', 8),
 ('less', 8),
 ('paid', 7),
 ('obey', 7),
 ('lose', 7),
 ('dust', 7),
 ('whom', 7),
 ('turn', 7),
 ('rank', 7),
 ('list', 7),
 ('fool', 7),
 ('dane', 6),
 ('feed', 6),
 ('anon', 6),
 ('dumb', 6),
 ('lies', 6),
 ('hall', 6),
 ('door', 6),
 ('vile', 6),
 ('home', 6),
 ('cock', 6),
 ('dull', 6),
 ('lack', 6),
 ('dare', 6),
 ('cold', 6),
 ('laid', 6),
 ('maid', 6),
 ('move', 6),
 ('fare', 6),
 ('pass', 6),
 ('note', 6),
 ('vows', 6),
 ('cast', 6),
 ('four', 5),
 ('hard', 5),
 ('wide', 5),
 ('in’s', 5),
 ('save', 5),
 ('need', 5),
 ('knew', 5),
 ('next', 5),
 ('sing', 5),
 ('half', 5),
 ('writ', 5),
 ('told', 5),
 ('send', 5),
 ('mass', 5),
 ('cost', 5),
 ('moon', 5),
 ('stir', 5),
 ('star', 5),
 ('land', 5),
 ('base', 5),
 ('went', 5),
 ('hide', 5),
 ('fast', 5),
 ('year', 5),
 ('main', 5),
 ('bore', 4),
 ('heat', 4),
 ('wits', 4),
 ('tend', 4),
 ('pity', 4),
 ('over', 4),
 ('wish', 4),
 ('took', 4),
 ('hang', 4),
 ('glad', 4),
 ('snow', 4),
 ('ease', 4),
 ('sits', 4),
 ('died', 4),
 ('used', 4),
 ('weep', 4),
 ('pate', 4),
 ('fain', 4),
 ('lead', 4),
 ('brow', 4),
 ('lest', 4),
 ('ways', 4),
 ('salt', 4),
 ('mock', 4),
 ('stop', 4),
 ('sigh', 4),
 ('evil', 4),
 ('rose', 4),
 ('puts', 4),
 ('lend', 4),
 ('tale', 4),
 ('deep', 4),
 ('kill', 4),
 ('wear', 4),
 ('kept', 4),
 ('sick', 4),
 ('site', 4),
 ('wild', 4),
 ('fees', 4),
 ('wont', 3),
 ('user', 3),
 ('slow', 3),
 ('left', 3),
 ('also', 3),
 ('loud', 3),
 ('hits', 3),
 ('uses', 3),
 ('rich', 3),
 ('high', 3),
 ('card', 3),
 ('morn', 3),
 ('idle', 3),
 ('pure', 3),
 ('odds', 3),
 ('sort', 3),
 ('clay', 3),
 ('tame', 3),
 ('jest', 3),
 ('lets', 3),
 ('neck', 3),
 ('pipe', 3),
 ('vice', 3),
 ('bend', 3),
 ('worm', 3),
 ('sail', 3),
 ('mere', 3),
 ('suit', 3),
 ('flat', 3),
 ('easy', 3),
 ('pith', 3),
 ('rage', 3),
 ('nine', 3),
 ('bold', 3),
 ('rash', 3),
 ('five', 3),
 ('seal', 3),
 ('past', 3),
 ('if’t', 3),
 ('hill', 3),
 ('foot', 3),
 ('sore', 3),
 ('city', 3),
 ('blow', 3),
 ('edge', 3),
 ('gods', 3),
 ('date', 3),
 ('open', 2),
 ('gait', 2),
 ('loam', 2),
 ('holy', 2),
 ('sirs', 2),
 ('wipe', 2),
 ('join', 2),
 ('jump', 2),
 ('by’r', 2),
 ('wars', 2),
 ('ones', 2),
 ('quit', 2),
 ('view', 2),
 ('farm', 2),
 ('wait', 2),
 ('sets', 2),
 ('lock', 2),
 ('palm', 2),
 ('heel', 2),
 ('pair', 2),
 ('bell', 2),
 ('curb', 2),
 ('jove', 2),
 ('fish', 2),
 ('bird', 2),
 ('fits', 2),
 ('born', 2),
 ('keen', 2),
 ('lust', 2),
 ('hart', 2),
 ('eats', 2),
 ('term', 2),
 ('want', 2),
 ('soil', 2),
 ('lips', 2),
 ('just', 2),
 ('food', 2),
 ('line', 2),
 ('runs', 2),
 ('foil', 2),
 ('held', 2),
 ('nose', 2),
 ('rise', 2),
 ('ha’t', 2),
 ('sir—', 2),
 ('lieu', 2),
 ('oath', 2),
 ('step', 2),
 ('wash', 2),
 ('wise', 2),
 ('tush', 2),
 ('heed', 2),
 ('file', 2),
 ('dove', 2),
 ('ring', 2),
 ('tune', 2),
 ('ours', 2),
 ('acts', 2),
 ('melt', 2),
 ('page', 2),
 ('dirt', 2),
 ('west', 2),
 ('peep', 2),
 ('gain', 2),
 ('envy', 2),
 ('rate', 2),
 ('joys', 2),
 ('ends', 2),
 ('wary', 2),
 ('buzz', 2),
 ('feet', 2),
 ('fame', 2),
 ('pace', 2),
 ('talk', 2),
 ('ship', 2),
 ('calm', 2),
 ('rude', 2),
 ('guts', 2),
 ('roar', 2),
 ('book', 2),
 ('weak', 2),
 ('sith', 2),
 ('bent', 2),
 ('it’s', 2),
 ('tear', 2),
 ('bulk', 2),
 ('safe', 2),
 ('gall', 2),
 ('side', 2),
 ('hurt', 2),
 ('mole', 2),
 ('fate', 2),
 ('diet', 2),
 ('owns', 2),
 ('crew', 2),
 ('pole', 2),
 ('deny', 2),
 ('burn', 2),
 ('baby', 2),
 ('seat', 2),
 ('bean', 2),
 ('you—', 2),
 ('amen', 1),
 ('week', 1),
 ('mood', 1),
 ('wore', 1),
 ('robe', 1),
 ('cart', 1),
 ('ache', 1),
 ('1524', 1),
 ('wing', 1),
 ('data', 1),
 ('ass—', 1),
 ('dish', 1),
 ('jade', 1),
 ('yawn', 1),
 ('firm', 1),
 ('thin', 1),
 ('sees', 1),
 ('lids', 1),
 ('seed', 1),
 ('airy', 1),
 ('wrap', 1),
 ('cool', 1),
 ('bugs', 1),
 ('hawk', 1),
 ('meed', 1),
 ('shut', 1),
 ('sage', 1),
 ('defy', 1),
 ('but’', 1),
 ('bout', 1),
 ('sere', 1),
 ('song', 1),
 ('dark', 1),
 ('bare', 1),
 ('ills', 1),
 ('ruin', 1),
 ('all—', 1),
 ('omen', 1),
 ('gore', 1),
 ('adam', 1),
 ('mars', 1),
 ('digs', 1),
 ('able', 1),
 ('wine', 1),
 ('peal', 1),
 ('jaws', 1),
 ('carp', 1),
 ('deal', 1),
 ('mess', 1),
 ('lief', 1),
 ('pall', 1),
 ('calf', 1),
 ('curd', 1),
 ('case', 1),
 ('lain', 1),
 ('halt', 1),
 ('why—', 1),
 ('yond', 1),
 ('op’d', 1),
 ('prey', 1),
 ('drum', 1),
 ('undo', 1),
 ('hate', 1),
 ('spur', 1),
 ('loan', 1),
 ('loss', 1),
 ('rack', 1),
 ('tent', 1),
 ('grew', 1),
 ('fill', 1),
 ('nill', 1),
 ('vial', 1),
 ('turf', 1),
 ('grey', 1),
 ('heir', 1),
 ('lank', 1),
 ('nero', 1),
 ('buys', 1),
 ('pile', 1),
 ('1500', 1),
 ('poem', 1),
 ('beck', 1),
 ('roof', 1),
 ('deer', 1),
 ('bait', 1),
 ('‘the', 1),
 ('worn', 1),
 ('zone', 1),
 ('cure', 1),
 ('sans', 1),
 ('bawd', 1),
 ('pays', 1),
 ('coil', 1),
 ('rede', 1),
 ('gulf', 1),
 ('dole', 1),
 ('army', 1),
 ('hark', 1),
 ('ayry', 1),
 ('wall', 1),
 ('fail', 1),
 ('sir’', 1),
 ('huge', 1),
 ('wart', 1),
 ('rule', 1),
 ('debt', 1),
 ('sold', 1),
 ('clad', 1),
 ('bank', 1),
 ('disk', 1),
 ('sins', 1),
 ('seas', 1),
 ('‘man', 1),
 ('wood', 1),
 ('haps', 1),
 ('womb', 1),
 ('feel', 1),
 ('knee', 1),
 ('hush', 1),
 ('dews', 1),
 ('1998', 1),
 ('fond', 1),
 ('afar', 1),
 ('bark', 1),
 ('drop', 1),
 ('lash', 1),
 ('‘and', 1),
 ('nave', 1),
 ('rite', 1),
 ('damn', 1),
 ('desk', 1),
 ('hies', 1),
 ('illo', 1),
 ('dram', 1),
 ('hams', 1),
 ('herb', 1),
 ('nods', 1),
 ('i’ve', 1),
 ('2017', 1),
 ('mope', 1),
 ('sons', 1),
 ('whet', 1),
 ('loth', 1),
 ('blue', 1),
 ('idol', 1),
 ('2001', 1),
 ('peak', 1),
 ('pour', 1),
 ('boys', 1),
 ('rear', 1),
 ('inky', 1),
 ('hung', 1),
 ('mute', 1),
 ('owed', 1),
 ('’one', 1),
 ('ugly', 1),
 ('gold', 1),
 ('load', 1),
 ('s/he', 1),
 ('crib', 1),
 ('vain', 1),
 ('flaw', 1),
 ('bier', 1),
 ('lads', 1),
 ('say—', 1),
 ('moor', 1),
 ('she—', 1),
 ('fust', 1),
 ('ripe', 1),
 ('girl', 1),
 ('hole', 1),
 ('plot', 1),
 ('‘for', 1),
 ('sole', 1),
 ('inch', 1),
 ('dire', 1),
 ('cell', 1),
 ('self', 1),
 ('woos', 1),
 ('us’d', 1),
 ('task', 1),
 ('rain', 1),
 ('cups', 1),
 ('dild', 1),
 ('test', 1),
 ('lays', 1),
 ('beam', 1),
 ('rock', 1),
 ('skin', 1),
 ('toys', 1),
 ('one—', 1),
 ('ossa', 1),
 ('e’er', 1),
 ('miss', 1),
 ('kick', 1),
 ('bias', 1),
 ('rots', 1),
 ('poet', 1),
 ('pain', 1),
 ('saws', 1),
 ('whit', 1),
 ('hent', 1),
 ('folk', 1),
 ('mend', 1),
 ('trip', 1),
 ('rant', 1),
 ('liar', 1),
 ('beds', 1),
 ('turk', 1),
 ('void', 1),
 ('weal', 1),
 ('wick', 1),
 ('rend', 1),
 ('wake', 1),
 ('lean', 1),
 ('tree', 1),
 ('pooh', 1),
 ('lick', 1),
 ('thaw', 1),
 ('shoe', 1),
 ('sift', 1),
 ('wail', 1),
 ('path', 1),
 ('mote', 1),
 ('‘now', 1),
 ('babe', 1),
 ('bade', 1),
 ('host', 1),
 ('rome', 1),
 ('hire', 1),
 ('gets', 1),
 ('legs', 1),
 ('doom', 1),
 ('lisp', 1),
 ('weed', 1),
 ('dogs', 1),
 ('dido', 1),
 ('toil', 1),
 ('hems', 1),
 ('rare', 1),
 ('garb', 1),
 ('mart', 1),
 ('poll', 1),
 ('son—', 1),
 ('busy', 1),
 ('east', 1),
 ('pomp', 1),
 ('glow', 1),
 ('posy', 1),
 ('rags', 1),
 ('tomb', 1),
 ('caps', 1),
 ('‘not', 1),
 ('awry', 1),
 ('gape', 1),
 ('hail', 1),
 ('sate', 1),
 ('post', 1),
 ('film', 1),
 ('bevy', 1),
 ('milk', 1),
 ('bowl', 1),
 ('hair', 1),
 ('core', 1),
 ('rat’', 1),
 ('crab', 1),
 ('slay', 1),
 ('rood', 1),
 ('airs', 1),
 ('yard', 1),
 ('fret', 1),
 ('moan', 1),
 ('drab', 1),
 ('bray', 1),
 ('lake', 1),
 ('queen', 117),
 ('shall', 116),
 ('would', 79),
 ('there', 78),
 ('which', 70),
 ('enter', 68),
 ('first', 63),
 ('speak', 62),
 ...]

How to picture a possible relationship between word length and word frequency?

In [ ]:
words = [word for word in words if 'http' not in word]
In [50]:
fl = [(len(w[0]),w[1]) for w in wcl]
fl[:5]
Out[50]:
[(1, 607), (1, 545), (1, 110), (1, 6), (1, 4)]
In [53]:
l = [len(w[0]) for w in wcl]
f = [w[1] for w in wcl]
plt.scatter(l,log10(f),alpha=0.1)
plt.xlabel('word length')
plt.ylabel('log word frequency')
plt.savefig('foo.png')
In [ ]:
l = [len(w[0]) for w in wcl]

Random text

In [54]:
from numpy import *
In [71]:
alphabet = [c for c in 'aaaaeeeeioutzrfk     ']
text = ''.join(random.choice(alphabet,5000000))
In [72]:
text[:500]
Out[72]:
' eaae  eaeaaaeziaez eieu oe eri eakfeko e e ua a oeae ee aea   aeeaikfoef  f arttteoikkuaee a tzo i  e k  uo u ttft  euuaa zaoefeza  aitaz t ea  a f  eaeffaeueoe eferraeea azir zie eaekeoafe aezaaef k foe  farfazez  i   taeeeeazeiefauo aaa  izffe eeata aa aa atearakuzeor  f aot zaee fefuae rz ak o ae e  t a uu reirrez fze kkeoa  ef a ktet aai eo ei eikkura zaaa aata eeifezaeeeeeee u eie  a azuer ka ue ke eaaoe eo za ef etaa etetuieeff e  a tizza  e t e aeza a reekue f f  reekik  k riaaea tzi i e'
In [73]:
# replace multiple spaces by single space
text = ' '.join(text.split())
text[:500]
Out[73]:
'eaae eaeaaaeziaez eieu oe eri eakfeko e e ua a oeae ee aea aeeaikfoef f arttteoikkuaee a tzo i e k uo u ttft euuaa zaoefeza aitaz t ea a f eaeffaeueoe eferraeea azir zie eaekeoafe aezaaef k foe farfazez i taeeeeazeiefauo aaa izffe eeata aa aa atearakuzeor f aot zaee fefuae rz ak o ae e t a uu reirrez fze kkeoa ef a ktet aai eo ei eikkura zaaa aata eeifezaeeeeeee u eie a azuer ka ue ke eaaoe eo za ef etaa etetuieeff e a tizza e t e aeza a reekue f f reekik k riaaea tzi i e ee r ia rer ritoazaetot'
In [74]:
d = {}
for w in text.split():
    if w not in d: d[w]=0
    d[w] += 1
wc = d.items()
wc = sorted(wc,key=lambda x:x[1],reverse=True)
freqs = [item[1] for item in wc]
ranks = arange(1,len(freqs)+1)
plt.loglog(ranks,freqs);
In [75]:
' '.join(['a','happy','day'])
Out[75]:
'a happy day'
In [63]:
'!!!'.join(['a','happy','day'])
Out[63]:
'a!!!happy!!!day'