In [1]:
import numpy as np
import pandas as pd
import altair as alt
alt.renderers.enable('notebook');

Markdown $e^x$

In [5]:
import string
letters = string.ascii_lowercase
letters
Out[5]:
'abcdefghijklmnopqrstuvwxyz'
In [7]:
counts = { k:0 for k in letters  }
counts
Out[7]:
{'a': 0,
 'b': 0,
 'c': 0,
 'd': 0,
 'e': 0,
 'f': 0,
 'g': 0,
 'h': 0,
 'i': 0,
 'j': 0,
 'k': 0,
 'l': 0,
 'm': 0,
 'n': 0,
 'o': 0,
 'p': 0,
 'q': 0,
 'r': 0,
 's': 0,
 't': 0,
 'u': 0,
 'v': 0,
 'w': 0,
 'x': 0,
 'y': 0,
 'z': 0}

Web scraping

In [8]:
import requests
In [10]:
r = requests.get('http://nytimes.com')
type(r)
Out[10]:
requests.models.Response
In [12]:
r.text[:100]
Out[12]:
'<!DOCTYPE html>\n<html lang="en" xmlns:og="http://opengraphprotocol.org/schema/">\n  <head>\n    <title'
In [13]:
r = requests.get('http://nytimes.com')
'Florida' in r.text
Out[13]:
True
In [15]:
'Brexit' in r.text
Out[15]:
True
In [16]:
def innyt(word):
    r = requests.get('http://nytimes.com')
    return word in r.text
In [17]:
innyt('Ireland')
Out[17]:
False
In [ ]:
# split-and-select for extracting pieces of strings
In [19]:
s = 'some word(s) or other stuff'
s.split('(')
s.split('(')[1]
Out[19]:
's) or other stuff'
In [21]:
s = 'some word(s or more) or other stuff'
s.split('(')
s.split('(')[1].split(')')[0]
Out[21]:
's or more'
In [24]:
x = 'priceBlockBuyingPriceString">'
url = 'https://www.amazon.com/Vollrath-47934-4-Quart-Economy-Stainless/dp/B000CC1ECE'
r = requests.get(url)
len(r.text)
r
Out[24]:
<Response [503]>
In [25]:
requests.get('http://blue.math.buffalo.edu/hello')
Out[25]:
<Response [404]>
In [27]:
uas = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
requests.get('http://blue.math.buffalo.edu/hello',
            headers={'User-Agent':uas})
Out[27]:
<Response [404]>
In [31]:
uas = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
x = 'priceBlockBuyingPriceString">'
url = 'https://www.amazon.com/Vollrath-47934-4-Quart-Economy-Stainless/dp/B000CC1ECE'
r = requests.get(url,headers={'User-Agent':uas})
r
Out[31]:
<Response [200]>
In [32]:
'7.92' in r.text
Out[32]:
True
In [33]:
x = 'priceBlockBuyingPriceString">'
len(r.text.split(x))
Out[33]:
2
In [34]:
r.text.split(x)[1].split('<')[0]
Out[34]:
'$7.92'
In [35]:
r.text.split(x)[1].split('<')[0][1:]
Out[35]:
'7.92'
In [38]:
float(r.text.split(x)[1].split('<')[0][1:].replace(',',''))
Out[38]:
7.92
In [44]:
def getprice(productid):
    uas = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
    x = 'priceBlockBuyingPriceString">'
    url = 'https://www.amazon.com/dp/' + productid  #B000CC1ECE'
    r = requests.get(url,headers={'User-Agent':uas})
    return float(r.text.split(x)[1].split('<')[0][1:].replace(',',''))
In [40]:
getprice('B000CC1ECE')
Out[40]:
7.92
In [41]:
getprice('B07GD4CMDH')
Out[41]:
725.99
In [45]:
getprice('B07MW5WKC1')
Out[45]:
2742.21
In [46]:
'mississippi'.replace('s','')
Out[46]:
'miiippi'
In [47]:
'buffalo'.replace('s','')
Out[47]:
'buffalo'
In [48]:
import pandas as pd
pd.__version__
Out[48]:
'0.25.1'
In [ ]: