In [1]:
import pandas
In [2]:
mycons = {'Total Charges':lambda x:float(x.replace('$','')),
          'Length of Stay':lambda x:int(x.replace('+',''))}
s = pandas.read_csv('sparcs2014.csv',converters=mycons)
s.head(2)
Out[2]:
Health Service Area Hospital County Operating Certificate Number Facility Id Facility Name Age Group Zip Code - 3 digits Gender Race Ethnicity ... Payment Typology 2 Payment Typology 3 Attending Provider License Number Operating Provider License Number Other Provider License Number Birth Weight Abortion Edit Indicator Emergency Department Indicator Total Charges Total Costs
0 Western NY Allegany 226700 37 Cuba Memorial Hospital Inc 30 to 49 147 F White Not Span/Hispanic ... NaN NaN 90335341 NaN NaN 0 N Y 9546.85 $12303.20
1 Western NY Allegany 226700 37 Cuba Memorial Hospital Inc 50 to 69 147 F White Not Span/Hispanic ... NaN NaN 90335341 NaN NaN 0 N Y 11462.75 $10298.32

2 rows × 39 columns

In [3]:
for col in s.columns:
    print(col)
Health Service Area
Hospital County
Operating Certificate Number
Facility Id
Facility Name
Age Group
Zip Code - 3 digits
Gender
Race
Ethnicity
Length of Stay
Admit Day of Week
Type of Admission
Patient Disposition
Discharge Year
Discharge Day of Week
CCS Diagnosis Code
CCS Diagnosis Description
CCS Procedure Code
CCS Procedure Description
APR DRG Code
APR DRG Description
APR MDC Code
APR MDC Description
APR Severity of Illness Code
APR Severity of Illness Description
APR Risk of Mortality
APR Medical Surgical Description
Payment Typology 1
Payment Typology 2
Payment Typology 3
Attending Provider License Number
Operating Provider License Number
Other Provider License Number
Birth Weight
Abortion Edit Indicator
Emergency Department Indicator
Total Charges
Total Costs
In [4]:
mmc = s[ s['Facility Name']=='Maimonides Medical Center' ]
len(mmc)
Out[4]:
43112
In [5]:
mmcbabies = mmc[ mmc['CCS Diagnosis Description'] == 'LIVEBORN' ]
len(mmcbabies)
Out[5]:
8507
In [6]:
from numpy import *
vstack([ones(5),zeros(5)])
Out[6]:
array([[ 1.,  1.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.]])
In [7]:
babylogcharges = log10(mmcbabies['Total Charges'])
babymax = babylogcharges.max()
babymin = babylogcharges.min()
In [8]:
from histogram1d import histogram1d
In [ ]:
 
In [9]:
import matplotlib.pyplot as pl
In [10]:
lefts, counts = histogram1d(babylogcharges, babymin,babymax, 10000, rawcounts=True)
w = lefts[1]-lefts[0]
values = lefts + w/2
x = vstack([values,values])
y = vstack([zeros_like(counts), counts])
pl.figure(figsize=(12,6))
pl.plot(x,y,'b')
pl.show()
In [11]:
s['Patient Disposition'].value_counts()
Out[11]:
Home or Self Care                        1581401
Home w/ Home Health Services              317027
Skilled Nursing Home                      223621
Expired                                    50913
Left Against Medical Advice                47012
Inpatient Rehabilitation Facility          41524
Short-term Hospital                        40817
Psychiatric Hospital or Unit of Hosp       12565
Hospice - Medical Facility                 12513
Hospice - Home                             10540
Another Type Not Listed                     8441
Facility w/ Custodial/Supportive Care       6268
Medicare Cert Long Term Care Hospital       3789
Court/Law Enforcement                       3731
Cancer Center or Children's Hospital        2504
Hosp Basd Medicare Approved Swing Bed       1544
Federal Health Care Facility                 725
Medicaid Cert Nursing Facility               152
Critical Access Hospital                     121
Name: Patient Disposition, dtype: int64
In [12]:
s['CCS Diagnosis Description'].value_counts()
Out[12]:
LIVEBORN                   231293
SEPTICEMIA                  96517
OSTEOARTHRITIS              59366
MOOD DISORDERS              58624
CHF                         56848
PNEUMONIA                   46113
SCHIZ/OTH PSYCH DISORDR     43908
DYSRHYTHMIA                 43628
ALCOHOL-RELATD DISORDER     41824
OTH COMP BIRTH/PUERPRM      39211
DEVICE/IMPLNT/GRFT COMP     38947
SKIN/SUBCUT TISS INFECT     38645
SUBSTANCE-RLTD DISORDER     36649
DIABETES W/COMPL            35084
ACUTE CVD                   34874
ASTHMA                      34609
BACK PROBLEM                34541
ACUTE MYOCARDL INFARCT      33357
OTHR PREGNANCY COMPL        32975
COPD                        32330
PROLONGED PREGNANCY         32292
CORONARY ATHEROSCLER        31813
CHEST PAIN                  31438
REHAB/PROSTH FIT/ADJUST     30756
SURGCL/MED CARE COMPL       30731
URINARY TRACT INFECTION     30244
PREVIOUS C-SECTION          28651
OB PERINEAL/VULV TRAUMA     26319
FLUID/ELECTRLYT DISORDR     26041
EPILEPSY/CONVULSIONS        25750
                            ...  
MENOPAUSAL DISORDER           350
SEXUAL INFECTIONS             348
SHORT GEST/LOW BRTHWT         347
RETINAL PROBLEMS              339
ACQUIRD FOOT DEFORMITY        331
OTHER URINARY CANCER          292
OTHR HEMATOLOGIC COND         286
MEDICAL EXAM/EVALUATN         268
SKIN MELANOMAS                235
POSTABORTION COMPL            233
RESP DISTRSS SYNDROME         231
VARICOSE VEIN                 225
OTHR RESPIRATRY CANCER        192
DEVELOPMENTAL DISORDERS       167
FORCEPS DELIVERY              156
ADMIN/SOCIAL ADMISSION        149
IMMUNITY DISORDER             140
TESTICULAR CANCER             130
IMMUNIZATION/SCREENING         84
SHOCK                          84
BIRTH ASPHYXIA                 79
SUICIDE/SELF-INFLCT INJ        77
GLAUCOMA                       69
MALE GENITAL CANCER            66
HYPERLIPIDEMIA                 59
BIRTH TRAUMA                   33
CATARACT                       26
CONTRCPTV/PROCRTV MGT          26
OSTEOPOROSIS                   21
FEMALE INFERTILITY             10
Name: CCS Diagnosis Description, dtype: int64
In [13]:
sc = s[ s['CCS Diagnosis Description'].str.contains('CANCER') ]
sc['CCS Diagnosis Description'].value_counts()
Out[13]:
BRONCHIAL/LUNG CANCER      9447
COLON CANCER               5902
BREAST CANCER              5736
PROSTATE CANCER            4334
KIDNEY/RENAL CANCER        3322
PANCREAS CANCER            2859
BRAIN/NERV SYST CANCER     2703
RECTAL/ANAL CANCER         2700
UTERINE CANCER             2426
BLADDER CANCER             2384
HEAD/NECK CANCER           2368
LIVER/BILE DUCT CANCER     2167
STOMACH CANCER             2019
THYROID CANCER             1717
OTHR GI/PERITONL CANCER    1629
OVARIAN CANCER             1618
BONE/CONN TISSU CANCER     1226
ESOPHAGEAL CANCER           879
CERVICAL CANCER             838
OTHER PRIMARY CANCER        607
NON-EPITHELIAL CANCER       474
FEMALE GENITAL CANCER       418
OTHER URINARY CANCER        292
OTHR RESPIRATRY CANCER      192
TESTICULAR CANCER           130
MALE GENITAL CANCER          66
Name: CCS Diagnosis Description, dtype: int64
In [14]:
g = s.groupby('Hospital County')
for name,group in g:
    print(name,len(group))
Albany 63461
Allegany 2052
Bronx 183847
Broome 28213
Cattaraugus 5262
Cayuga 4984
Chautauqua 11132
Chemung 15803
Chenango 1761
Clinton 9868
Columbia 5528
Cortland 3745
Delaware 762
Dutchess 30932
Erie 121301
Essex 410
Franklin 4710
Fulton 3076
Genesee 4412
Herkimer 748
Jefferson 11092
Kings 258712
Lewis 1641
Livingston 2023
Madison 4729
Manhattan 412188
Monroe 104802
Montgomery 6500
Nassau 180741
Niagara 17400
Oneida 29735
Onondaga 76495
Ontario 11487
Orange 39572
Orleans 1372
Oswego 5092
Otsego 12449
Putnam 6650
Queens 199374
Rensselaer 12476
Richmond 58876
Rockland 32211
Saratoga 9167
Schenectady 21963
Schoharie 562
Schuyler 651
St Lawrence 10705
Steuben 6438
Suffolk 159765
Sullivan 4442
Tompkins 7226
Ulster 11537
Warren 13939
Wayne 5315
Westchester 119384
Wyoming 2256
Yates 759
In [15]:
import numpy as np
g.agg({'Total Charges':['median',np.mean]})
Out[15]:
Total Charges
median mean
Hospital County
Albany 24941.200 42824.678678
Allegany 6864.775 9447.230804
Bronx 20387.020 40888.858943
Broome 14152.400 23337.190761
Cattaraugus 8897.075 12605.036923
Cayuga 12146.000 15864.603355
Chautauqua 7425.375 10905.151219
Chemung 12829.090 19567.670297
Chenango 14201.500 19882.085014
Clinton 19832.975 29965.544606
Columbia 12700.515 18099.838461
Cortland 7211.710 9157.265872
Delaware 8687.275 10802.212703
Dutchess 24442.405 37470.757036
Erie 15248.480 28356.993957
Essex 7799.880 8424.805878
Franklin 11561.195 15563.635943
Fulton 16380.865 20709.369467
Genesee 10601.320 14939.391514
Herkimer 8466.495 9332.398008
Jefferson 11143.970 15556.550593
Kings 20466.985 35827.915035
Lewis 7115.270 9301.480792
Livingston 7820.470 11787.964726
Madison 13774.910 17657.331191
Manhattan 31191.865 57623.191722
Monroe 13732.395 24275.150814
Montgomery 12375.345 15840.527629
Nassau 33457.220 56269.047370
Niagara 9362.760 13473.650586
Oneida 14747.810 21626.534473
Onondaga 18779.020 32415.260638
Ontario 10369.530 14767.910544
Orange 27844.385 38516.229516
Orleans 8696.010 11967.996764
Oswego 10178.085 14588.237247
Otsego 12328.140 20015.032696
Putnam 25818.580 33308.905686
Queens 20376.820 36483.546173
Rensselaer 13882.760 20164.968626
Richmond 21551.195 36616.813506
Rockland 26242.630 39691.695802
Saratoga 21055.910 28686.658518
Schenectady 14969.120 25815.983324
Schoharie 9193.440 10310.857011
Schuyler 5909.140 6972.520369
St Lawrence 9022.900 11782.562507
Steuben 9896.750 13170.891367
Suffolk 31143.000 48742.260524
Sullivan 18411.120 26597.025905
Tompkins 8004.660 11897.976759
Ulster 18330.850 26800.900193
Warren 15494.660 23637.492441
Wayne 10740.780 15697.965737
Westchester 22476.255 47804.177764
Wyoming 6592.915 8619.636201
Yates 8105.930 9788.401818
In [16]:
g.agg({'Total Charges':'median'}).sort_values('Total Charges',ascending=False)
Out[16]:
Total Charges
Hospital County
Nassau 33457.220
Manhattan 31191.865
Suffolk 31143.000
Orange 27844.385
Rockland 26242.630
Putnam 25818.580
Albany 24941.200
Dutchess 24442.405
Westchester 22476.255
Richmond 21551.195
Saratoga 21055.910
Kings 20466.985
Bronx 20387.020
Queens 20376.820
Clinton 19832.975
Onondaga 18779.020
Sullivan 18411.120
Ulster 18330.850
Fulton 16380.865
Warren 15494.660
Erie 15248.480
Schenectady 14969.120
Oneida 14747.810
Chenango 14201.500
Broome 14152.400
Rensselaer 13882.760
Madison 13774.910
Monroe 13732.395
Chemung 12829.090
Columbia 12700.515
Montgomery 12375.345
Otsego 12328.140
Cayuga 12146.000
Franklin 11561.195
Jefferson 11143.970
Wayne 10740.780
Genesee 10601.320
Ontario 10369.530
Oswego 10178.085
Steuben 9896.750
Niagara 9362.760
Schoharie 9193.440
St Lawrence 9022.900
Cattaraugus 8897.075
Orleans 8696.010
Delaware 8687.275
Herkimer 8466.495
Yates 8105.930
Tompkins 8004.660
Livingston 7820.470
Essex 7799.880
Chautauqua 7425.375
Cortland 7211.710
Lewis 7115.270
Allegany 6864.775
Wyoming 6592.915
Schuyler 5909.140
In [ ]: