In [1]:
import pandas as pd
In [2]:
import numpy as np
In [4]:
fipslist = np.random.randint(0,1000000,5)
unemp = np.random.rand(5)*100
df = pd.DataFrame( zip(fipslist,unemp), columns = ['FIPS','unemployment rate'])
df
Out[4]:
FIPS unemployment rate
0 707376 48.184493
1 997153 25.298947
2 9673 99.912666
3 312952 38.540949
4 170831 97.723062

option 1 - get out of pandas

Make a dict

In [5]:
ulookup = { k:v for k,v in df[ ['FIPS','unemployment rate'] ].values }
In [6]:
ulookup[170831]
Out[6]:
97.72306236691202
In [8]:
for item in np.random.rand(5,2):
    print(item)
    break
[0.26716316 0.53187329]
In [11]:
for item in zip('abc',[4,5,6,7]) :
    print(item)
('a', 4)
('b', 5)
('c', 6)

Option 2: use pandas directly by re-indexing the df

In [12]:
df
Out[12]:
FIPS unemployment rate
0 707376 48.184493
1 997153 25.298947
2 9673 99.912666
3 312952 38.540949
4 170831 97.723062
In [15]:
df.set_index('FIPS',inplace=True)
In [16]:
df
Out[16]:
unemployment rate
FIPS
707376 48.184493
997153 25.298947
9673 99.912666
312952 38.540949
170831 97.723062
In [17]:
df.loc[170831]['unemployment rate']
Out[17]:
97.72306236691202
In [18]:
df.loc[170831,'unemployment rate']
Out[18]:
97.72306236691202
In [19]:
df.loc[170831]
Out[19]:
unemployment rate    97.723062
Name: 170831, dtype: float64
In [20]:
df.iloc[4]
Out[20]:
unemployment rate    97.723062
Name: 170831, dtype: float64

Membership in a set can be tested MUCH faster than m'ship in a list.

In [21]:
from time import time
In [29]:
mylist = list(np.random.randint(0,1000000000000,1000000,dtype='uint64'))
print(type(mylist[0]))
myset  = set(mylist)
len(myset)-len(mylist)
<class 'numpy.uint64'>
Out[29]:
0
In [33]:
tic = time()
print(0 in mylist)
toc = time()
tlist = toc-tic

tic = time()
print(0 in myset)
toc = time()
tset = toc-tic
print('set lookup is',tlist/tset,'times faster')
False
False
set lookup is 3424.831359545239 times faster

Back to NHTSA complaint database

In [34]:
import requests
import json
url0 = 'http://www.nhtsa.gov/webapi/api/Complaints/vehicle/modelyear/{}/make/{}/model/{}?format=json'

make,model = 'Chevrolet','Cobalt'
d = {'number of complaints':[],'model year':[]}
for year in range(2000,2019):
        url = url0.format(year,make,model)
        print(str(year)+'\r',end='')
        s = requests.get(url).text  # a JSON string
        complaints = json.loads(s)
        d['number of complaints'].append(complaints['Count'])
        d['model year'].append(year)

import altair as alt
alt.renderers.enable('notebook')
import pandas as pd
df = pd.DataFrame.from_dict(d)
alt.Chart(df,title=make+' '+model).mark_bar().encode( x='model year:O', y='number of complaints')
2018
/usr/local/lib/python3.6/dist-packages/altair/utils/core.py:294: FutureWarning: A future version of pandas will default to `skipna=True`. To silence this warning, pass `skipna=True|False` explicitly.
  attrs['type'] = infer_vegalite_type(data[attrs['field']])
Out[34]:

Iterating over a dictionary

Iterating over a dictionary, you get the keys (only):

In [35]:
d = {55:[1,2,3],777:'hello'}
for item in d:
    print(item)
55
777

If you need the values to, you can do this:

In [36]:
for item in d.items():
    print(item)
(55, [1, 2, 3])
(777, 'hello')
In [ ]: