In [2]:
import pandas
In [3]:
f = pandas.read_csv('fruit_sales_log.csv')
f.head()
Out[3]:
Date Seller Item Amount
0 07/01/17 Yiqing watermelons 6.21
1 07/01/17 Sakar watermelons 12.33
2 07/01/17 Yiqing apples 18.02
3 07/02/17 Sakar watermelons 1.95
4 07/04/17 Yiqing watermelons 14.88

Having selected and copied the data in a spreadsheet, for example:

In [5]:
f = pandas.read_clipboard()
f.head()
Out[5]:
Date Seller Item Amount
0 07/01/17 Yiqing watermelons 6.21
1 07/01/17 Sakar watermelons 12.33
2 07/01/17 Yiqing apples 18.02
3 07/02/17 Sakar watermelons 1.95
4 07/04/17 Yiqing watermelons 14.88
In [7]:
from numpy import *
In [8]:
v = vander([1,2,3,4,5])
v
Out[8]:
array([[  1,   1,   1,   1,   1],
       [ 16,   8,   4,   2,   1],
       [ 81,  27,   9,   3,   1],
       [256,  64,  16,   4,   1],
       [625, 125,  25,   5,   1]])
In [9]:
pv = pandas.DataFrame(v)
pv
Out[9]:
0 1 2 3 4
0 1 1 1 1 1
1 16 8 4 2 1
2 81 27 9 3 1
3 256 64 16 4 1
4 625 125 25 5 1
In [10]:
pv.index
Out[10]:
Int64Index([0, 1, 2, 3, 4], dtype='int64')
In [11]:
pv.columns
Out[11]:
Int64Index([0, 1, 2, 3, 4], dtype='int64')
In [12]:
f.index
Out[12]:
Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
           dtype='int64')
In [13]:
f.columns
Out[13]:
Index(['Date ', 'Seller ', 'Item ', 'Amount'], dtype='object')
In [14]:
pv.index = [20,99,134,777,0]
In [15]:
pv
Out[15]:
0 1 2 3 4
20 1 1 1 1 1
99 16 8 4 2 1
134 81 27 9 3 1
777 256 64 16 4 1
0 625 125 25 5 1
In [16]:
# fix headers
f.columns = [item.strip() for item in f.columns]
In [17]:
f[['Seller','Amount']]
Out[17]:
Seller Amount
0 Yiqing 6.21
1 Sakar 12.33
2 Yiqing 18.02
3 Sakar 1.95
4 Yiqing 14.88
5 Samuel 11.73
6 Jonathan 5.11
7 Anthony 14.68
8 Samuel 4.79
9 Jonathan 13.06
10 Jonathan 3.61
11 Yiqing 4.54
12 Sakar 5.16
13 Samuel 3.17
14 Anthony 11.75
15 Yiqing 11.19
16 Katherine 2.93
17 Samuel 1.91
18 Samuel 1.89
19 Sakar 7.59
20 Anthony 2.85
21 Sakar 15.35
22 Katherine 13.92
23 Yiqing 17.35
24 Yiqing 11.87
25 Yiqing 2.14
26 Jonathan 12.57
27 Jonathan 11.68
28 Yiqing 13.83
29 Anthony 7.26
30 Megan 7.07
31 Jonathan 18.64
32 Samuel 17.73
33 Samuel 2.86
34 Megan 4.99
35 Samuel 18.06
36 Sakar 10.53
37 Anthony 6.15
38 Sakar 11.17
39 Anthony 14.95
40 Megan 13.39
41 Anthony 9.58
42 Yiqing 14.23
43 Anthony 19.06
44 Megan 16.69
45 Megan 14.89
46 Jonathan 6.96
47 Sakar 8.68
48 Jonathan 1.29
49 Samuel 18.20
In [20]:
s  = pandas.read_clipboard()
s
Out[20]:
aa b c d z
Maggie 81 27 9 3 1
Edward 625 125 25 5 1
Sanjeevani 1296 216 36 6 1
Michael 256 64 16 4 1
Robert 2401 343 49 7 1

Accessing rows

loc provided access to rows via index value

In [21]:
s.loc['Robert']
Out[21]:
aa    2401
b      343
c       49
d        7
z        1
Name: Robert, dtype: int64

iloc provides access to rows via row number

In [22]:
s.iloc[2]
Out[22]:
aa    1296
b      216
c       36
d        6
z        1
Name: Sanjeevani, dtype: int64

ix accepts either index values or row numbers. Dangerous?

In [23]:
s.ix['Michael']
Out[23]:
aa    256
b      64
c      16
d       4
z       1
Name: Michael, dtype: int64
In [24]:
s.ix[2]
Out[24]:
aa    1296
b      216
c       36
d        6
z        1
Name: Sanjeevani, dtype: int64

What happens with an integer that could be either an index value or a row number?

In [25]:
l = list(s.index)
l[1]=4
s.index = l
In [26]:
s
Out[26]:
aa b c d z
Maggie 81 27 9 3 1
4 625 125 25 5 1
Sanjeevani 1296 216 36 6 1
Michael 256 64 16 4 1
Robert 2401 343 49 7 1
In [27]:
s.ix[4]
Out[27]:
aa    2401
b      343
c       49
d        7
z        1
Name: Robert, dtype: int64

Answer: ix treats it as a row number.

In [30]:
#s.ix['4']
In [31]:
s.loc['Maggie':'Michael']
Out[31]:
aa b c d z
Maggie 81 27 9 3 1
4 625 125 25 5 1
Sanjeevani 1296 216 36 6 1
Michael 256 64 16 4 1
In [32]:
f.head()
Out[32]:
Date Seller Item Amount
0 07/01/17 Yiqing watermelons 6.21
1 07/01/17 Sakar watermelons 12.33
2 07/01/17 Yiqing apples 18.02
3 07/02/17 Sakar watermelons 1.95
4 07/04/17 Yiqing watermelons 14.88
In [33]:
f.pivot_table(values='Amount',index='Seller',columns='Item',aggfunc=sum,margins=True).fillna(0)
Out[33]:
Item apples peaches watermelons All
Seller
Anthony 34.64 17.90 33.74 86.28
Jonathan 5.11 12.57 55.24 72.92
Katherine 2.93 0.00 13.92 16.85
Megan 12.06 16.69 28.28 57.03
Sakar 18.12 16.33 38.31 72.76
Samuel 6.66 29.46 44.22 80.34
Yiqing 58.43 34.74 21.09 114.26
All 137.95 127.69 234.80 500.44
In [ ]: