import pandas
mycons = {'Total Charges':lambda x:float(x.replace('$','')),
'Length of Stay':lambda x:int(x.replace('+',''))}
s = pandas.read_csv('sparcs2014.csv',converters=mycons)
s.head(2)
Health Service Area | Hospital County | Operating Certificate Number | Facility Id | Facility Name | Age Group | Zip Code - 3 digits | Gender | Race | Ethnicity | ... | Payment Typology 2 | Payment Typology 3 | Attending Provider License Number | Operating Provider License Number | Other Provider License Number | Birth Weight | Abortion Edit Indicator | Emergency Department Indicator | Total Charges | Total Costs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Western NY | Allegany | 226700 | 37 | Cuba Memorial Hospital Inc | 30 to 49 | 147 | F | White | Not Span/Hispanic | ... | NaN | NaN | 90335341 | NaN | NaN | 0 | N | Y | 9546.85 | $12303.20 |
1 | Western NY | Allegany | 226700 | 37 | Cuba Memorial Hospital Inc | 50 to 69 | 147 | F | White | Not Span/Hispanic | ... | NaN | NaN | 90335341 | NaN | NaN | 0 | N | Y | 11462.75 | $10298.32 |
2 rows × 39 columns
for col in s.columns:
print(col)
Health Service Area Hospital County Operating Certificate Number Facility Id Facility Name Age Group Zip Code - 3 digits Gender Race Ethnicity Length of Stay Admit Day of Week Type of Admission Patient Disposition Discharge Year Discharge Day of Week CCS Diagnosis Code CCS Diagnosis Description CCS Procedure Code CCS Procedure Description APR DRG Code APR DRG Description APR MDC Code APR MDC Description APR Severity of Illness Code APR Severity of Illness Description APR Risk of Mortality APR Medical Surgical Description Payment Typology 1 Payment Typology 2 Payment Typology 3 Attending Provider License Number Operating Provider License Number Other Provider License Number Birth Weight Abortion Edit Indicator Emergency Department Indicator Total Charges Total Costs
mmc = s[ s['Facility Name']=='Maimonides Medical Center' ]
len(mmc)
43112
mmcbabies = mmc[ mmc['CCS Diagnosis Description'] == 'LIVEBORN' ]
len(mmcbabies)
8507
from numpy import *
vstack([ones(5),zeros(5)])
array([[ 1., 1., 1., 1., 1.], [ 0., 0., 0., 0., 0.]])
babylogcharges = log10(mmcbabies['Total Charges'])
babymax = babylogcharges.max()
babymin = babylogcharges.min()
from histogram1d import histogram1d
import matplotlib.pyplot as pl
lefts, counts = histogram1d(babylogcharges, babymin,babymax, 10000, rawcounts=True)
w = lefts[1]-lefts[0]
values = lefts + w/2
x = vstack([values,values])
y = vstack([zeros_like(counts), counts])
pl.figure(figsize=(12,6))
pl.plot(x,y,'b')
pl.show()
s['Patient Disposition'].value_counts()
Home or Self Care 1581401 Home w/ Home Health Services 317027 Skilled Nursing Home 223621 Expired 50913 Left Against Medical Advice 47012 Inpatient Rehabilitation Facility 41524 Short-term Hospital 40817 Psychiatric Hospital or Unit of Hosp 12565 Hospice - Medical Facility 12513 Hospice - Home 10540 Another Type Not Listed 8441 Facility w/ Custodial/Supportive Care 6268 Medicare Cert Long Term Care Hospital 3789 Court/Law Enforcement 3731 Cancer Center or Children's Hospital 2504 Hosp Basd Medicare Approved Swing Bed 1544 Federal Health Care Facility 725 Medicaid Cert Nursing Facility 152 Critical Access Hospital 121 Name: Patient Disposition, dtype: int64
s['CCS Diagnosis Description'].value_counts()
LIVEBORN 231293 SEPTICEMIA 96517 OSTEOARTHRITIS 59366 MOOD DISORDERS 58624 CHF 56848 PNEUMONIA 46113 SCHIZ/OTH PSYCH DISORDR 43908 DYSRHYTHMIA 43628 ALCOHOL-RELATD DISORDER 41824 OTH COMP BIRTH/PUERPRM 39211 DEVICE/IMPLNT/GRFT COMP 38947 SKIN/SUBCUT TISS INFECT 38645 SUBSTANCE-RLTD DISORDER 36649 DIABETES W/COMPL 35084 ACUTE CVD 34874 ASTHMA 34609 BACK PROBLEM 34541 ACUTE MYOCARDL INFARCT 33357 OTHR PREGNANCY COMPL 32975 COPD 32330 PROLONGED PREGNANCY 32292 CORONARY ATHEROSCLER 31813 CHEST PAIN 31438 REHAB/PROSTH FIT/ADJUST 30756 SURGCL/MED CARE COMPL 30731 URINARY TRACT INFECTION 30244 PREVIOUS C-SECTION 28651 OB PERINEAL/VULV TRAUMA 26319 FLUID/ELECTRLYT DISORDR 26041 EPILEPSY/CONVULSIONS 25750 ... MENOPAUSAL DISORDER 350 SEXUAL INFECTIONS 348 SHORT GEST/LOW BRTHWT 347 RETINAL PROBLEMS 339 ACQUIRD FOOT DEFORMITY 331 OTHER URINARY CANCER 292 OTHR HEMATOLOGIC COND 286 MEDICAL EXAM/EVALUATN 268 SKIN MELANOMAS 235 POSTABORTION COMPL 233 RESP DISTRSS SYNDROME 231 VARICOSE VEIN 225 OTHR RESPIRATRY CANCER 192 DEVELOPMENTAL DISORDERS 167 FORCEPS DELIVERY 156 ADMIN/SOCIAL ADMISSION 149 IMMUNITY DISORDER 140 TESTICULAR CANCER 130 IMMUNIZATION/SCREENING 84 SHOCK 84 BIRTH ASPHYXIA 79 SUICIDE/SELF-INFLCT INJ 77 GLAUCOMA 69 MALE GENITAL CANCER 66 HYPERLIPIDEMIA 59 BIRTH TRAUMA 33 CATARACT 26 CONTRCPTV/PROCRTV MGT 26 OSTEOPOROSIS 21 FEMALE INFERTILITY 10 Name: CCS Diagnosis Description, dtype: int64
sc = s[ s['CCS Diagnosis Description'].str.contains('CANCER') ]
sc['CCS Diagnosis Description'].value_counts()
BRONCHIAL/LUNG CANCER 9447 COLON CANCER 5902 BREAST CANCER 5736 PROSTATE CANCER 4334 KIDNEY/RENAL CANCER 3322 PANCREAS CANCER 2859 BRAIN/NERV SYST CANCER 2703 RECTAL/ANAL CANCER 2700 UTERINE CANCER 2426 BLADDER CANCER 2384 HEAD/NECK CANCER 2368 LIVER/BILE DUCT CANCER 2167 STOMACH CANCER 2019 THYROID CANCER 1717 OTHR GI/PERITONL CANCER 1629 OVARIAN CANCER 1618 BONE/CONN TISSU CANCER 1226 ESOPHAGEAL CANCER 879 CERVICAL CANCER 838 OTHER PRIMARY CANCER 607 NON-EPITHELIAL CANCER 474 FEMALE GENITAL CANCER 418 OTHER URINARY CANCER 292 OTHR RESPIRATRY CANCER 192 TESTICULAR CANCER 130 MALE GENITAL CANCER 66 Name: CCS Diagnosis Description, dtype: int64
g = s.groupby('Hospital County')
for name,group in g:
print(name,len(group))
Albany 63461 Allegany 2052 Bronx 183847 Broome 28213 Cattaraugus 5262 Cayuga 4984 Chautauqua 11132 Chemung 15803 Chenango 1761 Clinton 9868 Columbia 5528 Cortland 3745 Delaware 762 Dutchess 30932 Erie 121301 Essex 410 Franklin 4710 Fulton 3076 Genesee 4412 Herkimer 748 Jefferson 11092 Kings 258712 Lewis 1641 Livingston 2023 Madison 4729 Manhattan 412188 Monroe 104802 Montgomery 6500 Nassau 180741 Niagara 17400 Oneida 29735 Onondaga 76495 Ontario 11487 Orange 39572 Orleans 1372 Oswego 5092 Otsego 12449 Putnam 6650 Queens 199374 Rensselaer 12476 Richmond 58876 Rockland 32211 Saratoga 9167 Schenectady 21963 Schoharie 562 Schuyler 651 St Lawrence 10705 Steuben 6438 Suffolk 159765 Sullivan 4442 Tompkins 7226 Ulster 11537 Warren 13939 Wayne 5315 Westchester 119384 Wyoming 2256 Yates 759
import numpy as np
g.agg({'Total Charges':['median',np.mean]})
Total Charges | ||
---|---|---|
median | mean | |
Hospital County | ||
Albany | 24941.200 | 42824.678678 |
Allegany | 6864.775 | 9447.230804 |
Bronx | 20387.020 | 40888.858943 |
Broome | 14152.400 | 23337.190761 |
Cattaraugus | 8897.075 | 12605.036923 |
Cayuga | 12146.000 | 15864.603355 |
Chautauqua | 7425.375 | 10905.151219 |
Chemung | 12829.090 | 19567.670297 |
Chenango | 14201.500 | 19882.085014 |
Clinton | 19832.975 | 29965.544606 |
Columbia | 12700.515 | 18099.838461 |
Cortland | 7211.710 | 9157.265872 |
Delaware | 8687.275 | 10802.212703 |
Dutchess | 24442.405 | 37470.757036 |
Erie | 15248.480 | 28356.993957 |
Essex | 7799.880 | 8424.805878 |
Franklin | 11561.195 | 15563.635943 |
Fulton | 16380.865 | 20709.369467 |
Genesee | 10601.320 | 14939.391514 |
Herkimer | 8466.495 | 9332.398008 |
Jefferson | 11143.970 | 15556.550593 |
Kings | 20466.985 | 35827.915035 |
Lewis | 7115.270 | 9301.480792 |
Livingston | 7820.470 | 11787.964726 |
Madison | 13774.910 | 17657.331191 |
Manhattan | 31191.865 | 57623.191722 |
Monroe | 13732.395 | 24275.150814 |
Montgomery | 12375.345 | 15840.527629 |
Nassau | 33457.220 | 56269.047370 |
Niagara | 9362.760 | 13473.650586 |
Oneida | 14747.810 | 21626.534473 |
Onondaga | 18779.020 | 32415.260638 |
Ontario | 10369.530 | 14767.910544 |
Orange | 27844.385 | 38516.229516 |
Orleans | 8696.010 | 11967.996764 |
Oswego | 10178.085 | 14588.237247 |
Otsego | 12328.140 | 20015.032696 |
Putnam | 25818.580 | 33308.905686 |
Queens | 20376.820 | 36483.546173 |
Rensselaer | 13882.760 | 20164.968626 |
Richmond | 21551.195 | 36616.813506 |
Rockland | 26242.630 | 39691.695802 |
Saratoga | 21055.910 | 28686.658518 |
Schenectady | 14969.120 | 25815.983324 |
Schoharie | 9193.440 | 10310.857011 |
Schuyler | 5909.140 | 6972.520369 |
St Lawrence | 9022.900 | 11782.562507 |
Steuben | 9896.750 | 13170.891367 |
Suffolk | 31143.000 | 48742.260524 |
Sullivan | 18411.120 | 26597.025905 |
Tompkins | 8004.660 | 11897.976759 |
Ulster | 18330.850 | 26800.900193 |
Warren | 15494.660 | 23637.492441 |
Wayne | 10740.780 | 15697.965737 |
Westchester | 22476.255 | 47804.177764 |
Wyoming | 6592.915 | 8619.636201 |
Yates | 8105.930 | 9788.401818 |
g.agg({'Total Charges':'median'}).sort_values('Total Charges',ascending=False)
Total Charges | |
---|---|
Hospital County | |
Nassau | 33457.220 |
Manhattan | 31191.865 |
Suffolk | 31143.000 |
Orange | 27844.385 |
Rockland | 26242.630 |
Putnam | 25818.580 |
Albany | 24941.200 |
Dutchess | 24442.405 |
Westchester | 22476.255 |
Richmond | 21551.195 |
Saratoga | 21055.910 |
Kings | 20466.985 |
Bronx | 20387.020 |
Queens | 20376.820 |
Clinton | 19832.975 |
Onondaga | 18779.020 |
Sullivan | 18411.120 |
Ulster | 18330.850 |
Fulton | 16380.865 |
Warren | 15494.660 |
Erie | 15248.480 |
Schenectady | 14969.120 |
Oneida | 14747.810 |
Chenango | 14201.500 |
Broome | 14152.400 |
Rensselaer | 13882.760 |
Madison | 13774.910 |
Monroe | 13732.395 |
Chemung | 12829.090 |
Columbia | 12700.515 |
Montgomery | 12375.345 |
Otsego | 12328.140 |
Cayuga | 12146.000 |
Franklin | 11561.195 |
Jefferson | 11143.970 |
Wayne | 10740.780 |
Genesee | 10601.320 |
Ontario | 10369.530 |
Oswego | 10178.085 |
Steuben | 9896.750 |
Niagara | 9362.760 |
Schoharie | 9193.440 |
St Lawrence | 9022.900 |
Cattaraugus | 8897.075 |
Orleans | 8696.010 |
Delaware | 8687.275 |
Herkimer | 8466.495 |
Yates | 8105.930 |
Tompkins | 8004.660 |
Livingston | 7820.470 |
Essex | 7799.880 |
Chautauqua | 7425.375 |
Cortland | 7211.710 |
Lewis | 7115.270 |
Allegany | 6864.775 |
Wyoming | 6592.915 |
Schuyler | 5909.140 |