In [1]:
import requests
import json
import pandas as pd

url0 = 'http://www.nhtsa.gov/webapi/api/Complaints/vehicle/modelyear/{}/make/{}/model/{}?format=json'

def get(make,model,searchstring):
        d = {'other':[],searchstring:[],'model year':[]}
        for year in range(2004,2011):

                print(str(year)+'\r',end='')
                # download the complaints for this make,model,year
                url = url0.format(year,make,model)
                s = requests.get(url).text
                complaints = json.loads(s)

                # count complaints whose Summary contains search string
                nc = 0
                for result in complaints['Results']:
                    if searchstring in result['Summary'].lower(): nc += 1

                d['model year'].append(year)
                d[searchstring].append(nc)
                d['other'].append(complaints['Count']-nc)

        df = pd.DataFrame.from_dict(d)
        #display(df)
        df = df[['model year',searchstring,'other']]   # reorder columns
        return df

df = get('Chevrolet','Cobalt','steer')
display(df)
2010
model year steer other
0 2004 0 0
1 2005 1184 423
2 2006 1314 922
3 2007 694 765
4 2008 291 431
5 2009 192 377
6 2010 226 214
In [2]:
df.melt( id_vars=['model year'], value_vars=['other','steer'])
Out[2]:
model year variable value
0 2004 other 0
1 2005 other 423
2 2006 other 922
3 2007 other 765
4 2008 other 431
5 2009 other 377
6 2010 other 214
7 2004 steer 0
8 2005 steer 1184
9 2006 steer 1314
10 2007 steer 694
11 2008 steer 291
12 2009 steer 192
13 2010 steer 226
In [6]:
# perhaps nice to rename those columns
ldf = df.melt( id_vars=['model year'], value_vars=['other','steer'], var_name='kind of complaint', value_name='count')
ldf
Out[6]:
model year kind of complaint count
0 2004 other 0
1 2005 other 423
2 2006 other 922
3 2007 other 765
4 2008 other 431
5 2009 other 377
6 2010 other 214
7 2004 steer 0
8 2005 steer 1184
9 2006 steer 1314
10 2007 steer 694
11 2008 steer 291
12 2009 steer 192
13 2010 steer 226
In [11]:
import altair as alt
alt.renderers.enable('notebook')
alt.Chart(ldf).mark_bar().encode(
    x='model year:O',
    y=alt.Y('count'),
    color='kind of complaint'
)
Out[11]:

In [13]:
from vega_datasets import data
data.barley()
Out[13]:
yield variety year site
0 27.00000 Manchuria 1931 University Farm
1 48.86667 Manchuria 1931 Waseca
2 27.43334 Manchuria 1931 Morris
3 39.93333 Manchuria 1931 Crookston
4 32.96667 Manchuria 1931 Grand Rapids
... ... ... ... ...
115 58.16667 Wisconsin No. 38 1932 Waseca
116 47.16667 Wisconsin No. 38 1932 Morris
117 35.90000 Wisconsin No. 38 1932 Crookston
118 20.66667 Wisconsin No. 38 1932 Grand Rapids
119 29.33333 Wisconsin No. 38 1932 Duluth

120 rows × 4 columns

Datetimes

In [14]:
from datetime import datetime
datetime.fromtimestamp( 1527825600 )
Out[14]:
datetime.datetime(2018, 6, 1, 0, 0)
In [16]:
birthday = datetime(1999,5,7,14,25)
birthday
Out[16]:
datetime.datetime(1999, 5, 7, 14, 25)
In [24]:
now = datetime.now() #(2019,10,2,14,25)
now
Out[24]:
datetime.datetime(2019, 10, 2, 14, 27, 11, 302454)
In [25]:
age = now-birthday
age
Out[25]:
datetime.timedelta(7453, 131, 302454)
In [23]:
age.days
Out[23]:
7453

List of datetimes of all the complaint filings for one make,model,year

In [32]:
from datetime import datetime
def get(make,model,year):

        dates = []
        url = url0.format(year,make,model)
        s = requests.get(url).text
        complaints = json.loads(s)

        for result in complaints['Results']:
            date = datetime.fromtimestamp( int( result['DateComplaintFiled'][6:16] ) )
            dates.append(date)
            #break
        return dates

dates = pd.DataFrame( get('Chevrolet','Cobalt',2005), columns=['date filed'] )
display(dates)
date filed
0 2010-02-19 00:00:00
1 2009-07-15 00:00:00
2 2011-03-21 00:00:00
3 2010-02-26 00:00:00
4 2010-09-30 00:00:00
... ...
1602 2016-10-24 00:00:00
1603 2016-10-24 00:00:00
1604 2016-11-08 08:42:48
1605 2016-11-08 09:04:03
1606 2016-11-08 00:00:00

1607 rows × 1 columns

In [36]:
alt.Chart(dates).mark_point().encode(x=alt.X('date filed',bin=True))
Out[36]:

In [41]:
alt.Chart(dates).mark_point().encode(x=alt.X('date filed',bin=alt.Bin(maxbins=5)))
Out[41]:

In [42]:
alt.Chart(dates).mark_point().encode(x=alt.X('date filed',bin=alt.Bin(maxbins=5)),y='count()')
Out[42]:

In [65]:
alt.Chart(dates).mark_bar().encode(x=alt.X('date filed',bin=alt.Bin(maxbins=50)),y='count()')
Out[65]:

In [44]:
alt.Chart(dates).mark_bar().encode(x=alt.X('date filed',bin=alt.Bin(maxbins=50)),y='count()')
Out[44]:

In [64]:
# This doesn't work
#alt.Chart(dates).mark_bar().encode(x=alt.X('date filed',timeUnit='yearmonthdate',bin=alt.Bin(maxbins=50)),y='count()') # ?????

Airports

In [46]:
ap = pd.read_csv('airports.csv')
ap.head()
Out[46]:
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords
0 6523 00A heliport Total Rf Heliport 40.070801 -74.933601 11.0 NaN US US-PA Bensalem no 00A NaN 00A NaN NaN NaN
1 323361 00AA small_airport Aero B Ranch Airport 38.704022 -101.473911 3435.0 NaN US US-KS Leoti no 00AA NaN 00AA NaN NaN NaN
2 6524 00AK small_airport Lowell Field 59.949200 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN
3 6525 00AL small_airport Epps Airpark 34.864799 -86.770302 820.0 NaN US US-AL Harvest no 00AL NaN 00AL NaN NaN NaN
4 6526 00AR closed Newport Hospital & Clinic Heliport 35.608700 -91.254898 237.0 NaN US US-AR Newport no NaN NaN NaN NaN NaN 00AR
In [48]:
ap.tail()
Out[48]:
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords
55453 317861 ZYYK medium_airport Yingkou Lanqi Airport 40.542524 122.358600 0.0 AS CN CN-21 Yingkou yes ZYYK YKH NaN NaN https://en.wikipedia.org/wiki/Yingkou_Lanqi_Ai... NaN
55454 32753 ZYYY medium_airport Shenyang Dongta Airport 41.784401 123.496002 NaN AS CN CN-21 Shenyang no ZYYY NaN NaN NaN NaN NaN
55455 46378 ZZ-0001 heliport Sealand Helipad 51.894444 1.482500 40.0 EU GB GB-ENG Sealand no NaN NaN NaN http://www.sealandgov.org/ http://en.wikipedia.org/wiki/Principality_of_S... Roughs Tower Helipad
55456 307326 ZZ-0002 small_airport Glorioso Islands Airstrip -11.584278 47.296389 11.0 AF TF TF-U-A Grande Glorieuse no NaN NaN NaN NaN NaN NaN
55457 313629 ZZZZ small_airport Satsuma Iōjima Airport 30.784722 130.270556 338.0 AS JP JP-46 Mishima-Mura no RJX7 NaN NaN NaN http://wikimapia.org/6705190/Satsuma-Iwo-jima-... SATSUMA,IWOJIMA,RJX7
In [47]:
len(ap)
Out[47]:
55458
In [50]:
ap = pd.read_csv('airports.csv',converters={ 'continent':lambda x:x  })
ap.head()
Out[50]:
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords
0 6523 00A heliport Total Rf Heliport 40.070801 -74.933601 11.0 NA US US-PA Bensalem no 00A NaN 00A NaN NaN NaN
1 323361 00AA small_airport Aero B Ranch Airport 38.704022 -101.473911 3435.0 NA US US-KS Leoti no 00AA NaN 00AA NaN NaN NaN
2 6524 00AK small_airport Lowell Field 59.949200 -151.695999 450.0 NA US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN
3 6525 00AL small_airport Epps Airpark 34.864799 -86.770302 820.0 NA US US-AL Harvest no 00AL NaN 00AL NaN NaN NaN
4 6526 00AR closed Newport Hospital & Clinic Heliport 35.608700 -91.254898 237.0 NA US US-AR Newport no NaN NaN NaN NaN NaN 00AR
In [53]:
ilo = ap['elevation_ft'].idxmin()
ilo
Out[53]:
32094
In [54]:
ap.loc[ilo]
Out[54]:
id                                                               4421
ident                                                            LLMZ
type                                                   medium_airport
name                                              Bar Yehuda Airfield
latitude_deg                                                  31.3282
longitude_deg                                                 35.3886
elevation_ft                                                    -1266
continent                                                          AS
iso_country                                                        IL
iso_region                                                       IL-D
municipality                                                   Masada
scheduled_service                                                  no
gps_code                                                         LLMZ
iata_code                                                         MTZ
local_code                                                        NaN
home_link                                                         NaN
wikipedia_link       http://en.wikipedia.org/wiki/Bar_Yehuda_Airfield
keywords                                              Masada Airfield
Name: 32094, dtype: object
In [55]:
ap['elevation_ft'].min()
Out[55]:
-1266.0
In [56]:
# most northerly
ap.loc[ ap['latitude_deg'].idxmax() ]
Out[56]:
id                                           320326
ident                                       CA-0605
type                                         closed
name                 Disraeli Inlet Water Aerodrome
latitude_deg                                  82.75
longitude_deg                                   -73
elevation_ft                                    NaN
continent                                        NA
iso_country                                      CA
iso_region                                    CA-NU
municipality                         Disraeli Inlet
scheduled_service                                no
gps_code                                        NaN
iata_code                                       NaN
local_code                                      SW6
home_link                                       NaN
wikipedia_link                                  NaN
keywords                                        NaN
Name: 13639, dtype: object
In [58]:
ap.sort_values(by='elevation_ft',ascending=False).head()
Out[58]:
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords
24218 35129 IN-0001 heliport Siachen Glacier AFS Airport 35.500000 77.000000 22000.0 AS IN IN-JK NaN no NaN NaN NaN NaN NaN NaN
24220 42716 IN-0003 small_airport Daulat Beg Oldi Advanced Landing Ground 35.396467 77.928965 16200.0 AS IN IN-JK NaN no NaN NaN NaN NaN http://en.wikipedia.org/wiki/Daulat_Beg_Oldi_A... NaN
46016 39635 SPNH small_airport Laguna Choclococha Airport -13.165700 -75.071999 14965.0 SA PE PE-HUV Choclococha no SPNH NaN NaN NaN NaN NaN
45967 39624 SPFA small_airport Fausa Airport -14.709400 -71.731102 14809.0 SA PE PE-CUS Fausa no SPFA NaN NaN NaN NaN NaN
55337 327348 ZUDC medium_airport Daocheng Yading Airport 29.323056 100.053333 14472.0 AS CN CN-51 Daocheng County yes ZUDC DCY NaN NaN https://en.wikipedia.org/wiki/Daocheng_Yading_... NaN
In [59]:
ap['elevation_ft'].median()
Out[59]:
720.0

preview of grouping and aggregation

In [60]:
ap['continent'].value_counts()
Out[60]:
NA    27877
EU     7980
SA     7838
AS     5337
AF     3351
OC     3047
AN       28
Name: continent, dtype: int64
In [ ]: