In [1]:
import pandas
In [3]:
df = pandas.read_csv('NYSDOH_CancerMapping_Data_2005_2009.csv')
df.head()
Out[3]:
geoid10 observed_Bladder observed_Bone observed_Brain observed_Breast observed_Colorectal observed_Esophagus observed_Kidney observed_Larynx observed_Leukemia ... highlight_Nasal highlight_Oral highlight_Ovary highlight_Pancreas highlight_Prostate highlight_Soft_Tissue highlight_Stomach highlight_Testis highlight_Thyroid highlight_Uterus
0 360010001001 4 0 0 0 1 0 1 0 1 ... 0 0 0 0 0 0 0 0 0 0
1 360010001002 1 0 0 2 2 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 360010002001 1 0 1 9 2 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
3 360010002002 0 0 0 3 2 0 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 360010003001 2 0 0 3 3 0 2 1 2 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 74 columns

In [4]:
for c in df.columns:
    print(c)
geoid10
observed_Bladder
observed_Bone
observed_Brain
observed_Breast
observed_Colorectal
observed_Esophagus
observed_Kidney
observed_Larynx
observed_Leukemia
observed_Liver
observed_Lung
observed_Mesothelioma
observed_NHL
observed_Nasal
observed_Oral
observed_Other
observed_Ovary
observed_Pancreas
observed_Prostate
observed_Soft_Tissue
observed_Stomach
observed_Testis
observed_Thyroid
observed_Uterus
observed_Total
expected_Bladder
expected_Bone
expected_Brain
expected_Breast
expected_Colorectal
expected_Esophagus
expected_Kidney
expected_Larynx
expected_Leukemia
expected_Liver
expected_Lung
expected_Mesothelioma
expected_NHL
expected_Nasal
expected_Oral
expected_Other
expected_Ovary
expected_Pancreas
expected_Prostate
expected_Soft_Tissue
expected_Stomach
expected_Testis
expected_Thyroid
expected_Uterus
expected_Total
highlight_Bladder
highlight_Bone
highlight_Brain
highlight_Breast
highlight_Colorectal
highlight_Esophagus
highlight_Kidney
highlight_Larynx
highlight_Leukemia
highlight_Liver
highlight_Lung
highlight_Mesothelioma
highlight_NHL
highlight_Nasal
highlight_Oral
highlight_Ovary
highlight_Pancreas
highlight_Prostate
highlight_Soft_Tissue
highlight_Stomach
highlight_Testis
highlight_Thyroid
highlight_Uterus
In [8]:
ndf = df[['geoid10']]
ndf['lungrate'] = df['observed_Lung']/df['expected_Lung']
ndf.head()
/home/ringland/.local/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
Out[8]:
geoid10 lungrate
0 360010001001 2.231812
1 360010001002 2.007116
2 360010002001 1.962593
3 360010002002 1.129135
4 360010003001 1.001943
In [9]:
ndf['lungrate'].median()
Out[9]:
0.9463346745180691
In [11]:
ndf.to_csv('lungrate.csv',index=False)
In [ ]: