In [1]:
import pandas
In [4]:
mycons = {'Total Charges':lambda x:float(x.replace('$','')),
          'Length of Stay':lambda x:int(x.replace('+',''))}
s = pandas.read_csv('sparcs2014.csv',converters=mycons)
s.head(2)
Out[4]:
Health Service Area Hospital County Operating Certificate Number Facility Id Facility Name Age Group Zip Code - 3 digits Gender Race Ethnicity ... Payment Typology 2 Payment Typology 3 Attending Provider License Number Operating Provider License Number Other Provider License Number Birth Weight Abortion Edit Indicator Emergency Department Indicator Total Charges Total Costs
0 Western NY Allegany 226700.0 37.0 Cuba Memorial Hospital Inc 30 to 49 147 F White Not Span/Hispanic ... NaN NaN 90335341.0 NaN NaN 0 N Y 9546.85 $12303.20
1 Western NY Allegany 226700.0 37.0 Cuba Memorial Hospital Inc 50 to 69 147 F White Not Span/Hispanic ... NaN NaN 90335341.0 NaN NaN 0 N Y 11462.75 $10298.32

2 rows × 39 columns

In [9]:
pop2014 = 19.75e6
int(s['Total Charges'].sum()/pop2014)
Out[9]:
4812
In [10]:
s['Total Charges'].max()
Out[10]:
8593455.8800000008
In [11]:
itcmax = s['Total Charges'].argmax()
s.iloc[itcmax]
Out[11]:
Health Service Area                                                        New York City
Hospital County                                                                    Bronx
Operating Certificate Number                                                 7.00001e+06
Facility Id                                                                         1169
Facility Name                          Montefiore Medical Center - Henry & Lucy Moses...
Age Group                                                                        0 to 17
Zip Code - 3 digits                                                                  104
Gender                                                                                 M
Race                                                              Black/African American
Ethnicity                                                              Not Span/Hispanic
Length of Stay                                                                       120
Admit Day of Week                                                                    FRI
Type of Admission                                                              Emergency
Patient Disposition                                         Home w/ Home Health Services
Discharge Year                                                                      2014
Discharge Day of Week                                                                TUE
CCS Diagnosis Code                                                                    63
CCS Diagnosis Description                                        WHITE BLOODCELL DISEASE
CCS Procedure Code                                                                    64
CCS Procedure Description                                         BONE MARROW TRANSPLANT
APR DRG Code                                                                           3
APR DRG Description                                               BONE MARROW TRANSPLANT
APR MDC Code                                                                          16
APR MDC Description                    Diseases and Disorders of Blood, Blood Forming...
APR Severity of Illness Code                                                           4
APR Severity of Illness Description                                              Extreme
APR Risk of Mortality                                                            Extreme
APR Medical Surgical Description                                                Surgical
Payment Typology 1                                              Private Health Insurance
Payment Typology 2                                                              Self-Pay
Payment Typology 3                                                                   NaN
Attending Provider License Number                                                 198304
Operating Provider License Number                                                 229870
Other Provider License Number                                                        NaN
Birth Weight                                                                           0
Abortion Edit Indicator                                                                N
Emergency Department Indicator                                                         N
Total Charges                                                                8.59346e+06
Total Costs                                                                  $2961423.61
Name: 965564, dtype: object
In [12]:
itcmin = s['Total Charges'].argmin()
s.iloc[itcmin]
Out[12]:
Health Service Area                                                       New York City
Hospital County                                                               Manhattan
Operating Certificate Number                                                  7.002e+06
Facility Id                                                                        1439
Facility Name                                                   Mount Sinai Beth Israel
Age Group                                                                   70 or Older
Zip Code - 3 digits                                                                 100
Gender                                                                                M
Race                                                                              White
Ethnicity                                                             Not Span/Hispanic
Length of Stay                                                                        1
Admit Day of Week                                                                   WED
Type of Admission                                                                Urgent
Patient Disposition                                                             Expired
Discharge Year                                                                     2014
Discharge Day of Week                                                               THU
CCS Diagnosis Code                                                                  108
CCS Diagnosis Description                                                           CHF
CCS Procedure Code                                                                    0
CCS Procedure Description                                                       NO PROC
APR DRG Code                                                                        194
APR DRG Description                                                       HEART FAILURE
APR MDC Code                                                                          5
APR MDC Description                    Diseases and Disorders of the Circulatory System
APR Severity of Illness Code                                                          1
APR Severity of Illness Description                                               Minor
APR Risk of Mortality                                                          Moderate
APR Medical Surgical Description                                                Medical
Payment Typology 1                                             Private Health Insurance
Payment Typology 2                                                             Self-Pay
Payment Typology 3                                                                  NaN
Attending Provider License Number                                                251948
Operating Provider License Number                                                   NaN
Other Provider License Number                                                       NaN
Birth Weight                                                                          0
Abortion Edit Indicator                                                               N
Emergency Department Indicator                                                        N
Total Charges                                                                       0.5
Total Costs                                                                       $0.13
Name: 1424707, dtype: object
In [13]:
meantc = s['Total Charges'].mean()
meantc
Out[13]:
40184.170849815331
In [14]:
mediantc = s['Total Charges'].median()
mediantc
Out[14]:
21872.93
In [20]:
from histogram1d import histogram1d
In [17]:
len(s)
Out[17]:
2365208
In [22]:
from numpy import *
def histogram1d(xall,xmin,xmax,nbinsx=10,rawcounts=False):
    xfac = float(nbinsx)/float(xmax-xmin)
    inbox = logical_and(xall>=xmin,xall<xmax);
    x = xall[ inbox ]
    ix = array( (x-xmin)*xfac, dtype=int )
    counts = zeros(nbinsx,dtype=int)
    add.at( counts, ix, 1 )
    dx = (xmax-xmin)/float(nbinsx)
    #bincenters = linspace(xmin,xmax,nbinsx+1)[:-1]+0.5*dx
    left        = linspace(xmin,xmax,nbinsx+1)[:-1] # left ends of bins
    if rawcounts:
       #return bincenters,counts
       return left,counts
    else:
        # return probability density samples
        #return bincenters, counts/(dx*float(len(xall)))
        return left, counts/(dx*float(len(xall)))
In [23]:
mincharge = s['Total Charges'].min()
maxcharge = s['Total Charges'].max()
lefts,p = histogram1d(s['Total Charges'],mincharge,maxcharge,100)
In [28]:
%pylab inline
bar(lefts,p,lefts[1]-lefts[0],color='m',alpha=0.4)
Populating the interactive namespace from numpy and matplotlib
Out[28]:
<Container object of 100 artists>
In [29]:
lefts,p = histogram1d(s['Total Charges'],mincharge,maxcharge/100,100)
In [30]:
bar(lefts,p,lefts[1]-lefts[0],color='m',alpha=0.4)
Out[30]:
<Container object of 100 artists>
In [31]:
lefts,p = histogram1d(log10(s['Total Charges']),log10(mincharge),log10(maxcharge),100)
bar(lefts,p,lefts[1]-lefts[0],color='m',alpha=0.4)
Out[31]:
<Container object of 100 artists>
In [32]:
lefts,p = histogram1d(log10(s['Total Charges']),3,6,100)
bar(lefts,p,lefts[1]-lefts[0],color='m',alpha=0.4)
Out[32]:
<Container object of 100 artists>
In [33]:
def gaussian(x,mu,sigma): return (1/sqrt(2*pi)/sigma)*exp(-(x-mu)**2/2/sigma**2)
In [37]:
lc = log10(s['Total Charges'])
lefts,p = histogram1d(lc,3,6,100)
w = lefts[1]-lefts[0]
bar(lefts,p,lefts[1]-lefts[0],color='m',alpha=0.4)
mu = lc.mean()
sigma = lc.std()
plot(lefts+w/2,gaussian(lefts+w/2,mu,sigma),'r',lw=3,alpha=0.5)
Out[37]:
[<matplotlib.lines.Line2D at 0x7f4373c6abe0>]
In [ ]: