## methods vs functions
- methods act on a list in place
- append ```L.append(x)```
- del ```del L[3]```
- sort ```L.sort()```
- functions create a new list, leaving the original one alone
- S = L + [x]
- Y = sorted(L)
primes = [2,3,5,7,11,13,17,19]
new = primes.append(23)
print('returned value = ',new,'original = ',primes)
primes = [2,3,5,7,11,13,17,19]
new = primes + [23]
print('returned value = ',new,'original = ',primes)
del primes[3]
primes
primes = [2,3,5,7,11,13,17,19]
new = primes.sort(reverse=True)
print('returned value = ',new,'original = ',primes)
primes = [2,3,5,7,11,13,17,19]
new= sorted(primes,reverse=True)
print('returned value = ',new,'original = ',primes)
primes = [2,3,5,7,11,13,17,19]
S = primes
primes.append(23)
print(S)
taking a slice is one way to force a copy
primes = [2,3,5,7,11,13,17,19]
S = primes[:]
primes.append(23)
print(S)
so is using the .copy() method
primes = [2,3,5,7,11,13,17,19]
S = primes.copy()
primes.append(23)
print(S,primes)
','.join(['a','b','c'])
list('ABCDEF')
commas = ','.join(list('ABCDEF'))
print(commas)
commas.split(',')
data = ".5,.2,1.8,17,21"
data.split(',')
print('\t'.join(data.split(',')))
for
keyword is the fundamental iterator in pythonfor x in [2,3,5,7,11,13,17,19]:
print(x,'is a prime')
print(x**2,'is its square')
for x in [2,3,5,7,11,13,17,19]:
print(x,'is a prime')
print(x**2,'is its square')
See Tabs vs Spaces
for x in range(10):
print(x)
L=[]
for x in range(10):
L.append(x)
print(L)
for x in range(10,0,-1):
print(x)
import numpy as np
for x in np.arange(1,5,.1):
print(x,np.sin(x))
sum = 0
for x in range(20):
sum = sum + x
print(sum)
sentence = 'Now is the time for all good me to come to the aid of their party'
words = sentence.split()
sum = 0
for x in words:
print(x,len(x))
sum = sum + len(x)
print('Total length is',sum)
# iterating over files
import pandas as pd
for file in ['../data/gapminder_gdp_africa.csv','../data/gapminder_gdp_asia.csv']:
data = pd.read_csv(file,index_col='country')
print(file, data['gdpPercap_1957'].idxmin())
glob gives access to UNIX wild cards
from glob import glob
for file in glob('../data/*gdp*.csv'):
data = pd.read_csv(file,index_col='country')
print(file, data['gdpPercap_1957'].idxmin())
for file in glob('../data/*gdp*.csv'):
data = pd.read_csv(file,index_col='country')
continent = file.split('_')[2].split('.')[0].upper()
print(continent, '\nPoorest in 1957:',data['gdpPercap_1957'].idxmin(),'\nRichest in 1957',data['gdpPercap_1957'].idxmax(),'\n\n')
for fruit in ['apple','pear','grape']:
for color in ['green','orange','yellow']:
print('I wish I had a ',color,fruit)
- string operations to extract the continent from the file name
- making a column name
for file in glob('../data/*gdp*.csv'):
for year in ['1957','2002']:
data = pd.read_csv(file,index_col='country')
continent = file.split('_')[2].split('.')[0].upper()
key = 'gdpPercap_'+year
print(continent, '\nPoorest in '+year+':',data[key].idxmin(),'\nRichest in '+year+':',data[key].idxmax(),'\n\n')
import glob
import pandas as pd
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1,1)
for filename in glob.glob('../data/gapminder_gdp*.csv'):
dataframe = pd.read_csv(filename)
# extract <region> from the filename, expected to be in the format 'data/gapminder_gdp_<region>.csv'.
# we will split the string using the split method and `_` as our separator,
# retrieve the last string in the list that split returns (`<region>.csv`),
# and then remove the `.csv` extension from that string.
region = filename.split('_')[-1][:-4]
dataframe.mean().plot(ax=ax, label=region)
plt.legend()
plt.show()
We have seen many examples of built-in functions (print, math functions, read_csv, etc are all functions). You can make your own function: takes certain inputs (arguments) and returns a value.
def print_date(year, month, day):
joined = str(year)+'/'+str(month)+'/'+str(day)
print(joined)
print_date(2007,11,31)
def format_date(year,month,day):
joined = str(year)+'/'+str(month)+'/'+str(day)
return joined
format_date(2007,12,10)
dates = []
for year in range(2007,2010):
for month in range(5, 8):
for day in range(12,20):
dates.append(format_date(year,month,day))
dates
Recall that we worked with the pandas dataframe gapminder_data.csv
.
import matplotlib.pyplot as plt
plt.style.use('ggplot')
data = pd.read_csv('../data/gapminder_data.csv',index_col='country')
percapita = pd.pivot_table(data,index='country',columns='year',values='gdpPercap')
percapita.loc['Afghanistan'].plot(title='Afghanistan GDP Per capita over Time')
def country_gdp(country):
title = country + ' GDP Per Capita over Time'
percapita.loc[country].plot(figsize=(8,8),legend=True,title='GDP Per Capita')
country_gdp("United States")
country_gdp("Germany")
country_gdp("Switzerland")
for country in ['United States','China','Germany','Australia','Brazil']:
country_gdp(country)
import numpy.random as rnd
x = rnd.choice([-1,1])
def random_walk(N):
spot = 0
L = []
for i in range(N):
spot = spot + rnd.choice([-1,1])
L.append(spot)
return L
end_spots = []
for i in range(200):
walk = random_walk(100)
plt.plot(range(100),walk)
end_spots.append(walk[-1])
s=plt.hist(end_spots,bins=10)
variables inside functions are "local" to the function and changes to them don't last after the function ends
def lister(x):
L = [x]*10
v = 47
print(L)
print(L)
lister(4)
print(L)
There are subtleties to variable scope. If a variable is NOT NAMED as an argument, then it is assumed to come from "outside" the function.
L = ['a','b','c']
def j(x):
L.append(x)
j('u')
L
And structures (like lists, and arrays) CAN be modified inside a function EVEN IF they are given as an argument.
L = ['a','b','c']
def j(x,L):
L.append(x)
j('x',L)
print(L)
The full set of rules is a bit complicated and we won't go into all the details.
def my_abs(x):
if x<0:
return -x
else:
return x
my_abs(-1)
This function returns a "tuple" which is a pair of lists. You can get the two lists with subscripts or with the A, B = construction
def split_threshold(threshold,L):
Low = []
High = []
for item in L:
if item<threshold:
Low.append(item)
else:
High.append(item)
return Low, High
T = split_threshold(0,[-1,-5,2,-13,11,100])
print(T)
print(T[0])
print(T[1])
This is the syntax for unpacking a tuple
Low, High = split_threshold(0,[-1,-5,2,-13,11,100])
Low
High
Python admits and and or operators
data['pop'].head()
for x in 'Jeremy Teitelbaum':
if (x>='r' and x<='u'):
print(x)
population_size(300000000)
data['pop_class']=data['pop'].apply(population_size)
data.head()
data[(data['year']==2002)].groupby('pop_class')['lifeExp'].mean().plot(kind='bar')
def life_exp_by_class_and_year(year):
data[(data['year']==2002)].groupby('pop_class')['lifeExp'].mean().plot(kind='bar')
life_exp_by_class_and_year(1957)
data[(data['year']==2002) & (data['pop_class'] == 'large')]
def threshold(x,L):
"""Returns (Low, High) where Low is the list of elements in L less than x,
and High is a list of those greater than or equal to x"""
Low, High = [], []
for item in L:
if item<x:
Low.append(item)
else:
High.append(item)
return Low, High
?threshold
def threshold(L,x=0):
"""Returns (Low, High) where Low is the list of elements in L less than x,
and High is a list of those greater than or equal to x. x defaults to zero."""
Low, High = [], []
for item in L:
if item<x:
Low.append(item)
else:
High.append(item)
return Low, High
threshold([1,-3,2,5])
?threshold
?print