import numpy as np
import pandas as pd
Overview of linear algebra operations in python
The numpy
library is the main tool for linear algebra in python.
Creating and shaping arrays
= np.array([1, 2, 3, 4, 5, 6, 7, 8])
m print("Array {} with shape {} (a row vector)".format(m, m.shape))
print("Reshaped array to shape (2,4):\n {}".format(m.reshape((2, 4))))
print("Reshaped array to column vector:\n {}".format(m.reshape(8, 1)))
Array [1 2 3 4 5 6 7 8] with shape (8,) (a row vector)
Reshaped array to shape (2,4):
[[1 2 3 4]
[5 6 7 8]]
Reshaped array to column vector:
[[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]]
Some special arrays
= np.ones(shape=(3, 4))
one print(one)
= np.zeros(shape=(3, 3))
zero print(zero)
= np.diag([1, 2, 3, 4])
d print(d)
[[1. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 1. 1. 1.]]
[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
[[1 0 0 0]
[0 2 0 0]
[0 0 3 0]
[0 0 0 4]]
Addition and scalar multiplication
= np.random.normal(size=(4,))
x = np.random.normal(size=(4,))
y print(x + y)
= np.random.normal(size=(2,))
u try:
print(x + u)
except:
print("Cant Mix These")
[-0.80263053 0.2517913 1.11840438 -0.72227525]
Cant Mix These
Broadcasting
= np.array([[1, 2], [3, 4]])
x print("x={}".format(x))
print("x-1={}".format(x - 1))
= np.array([1, 2])
z print("z={}".format(z))
print("x-z={}".format(x - z))
= np.array([[3], [4]])
z print("z={}".format(z))
print("x-z={}".format(x - z))
x=[[1 2]
[3 4]]
x-1=[[0 1]
[2 3]]
z=[1 2]
x-z=[[0 0]
[2 2]]
z=[[3]
[4]]
x-z=[[-2 -1]
[-1 0]]
Element by Element
= np.array([[1, 2, 3], [2, 3, 4], [4, 5, 6]])
x print(1 / x)
print(np.log(x))
[[1. 0.5 0.33333333]
[0.5 0.33333333 0.25 ]
[0.25 0.2 0.16666667]]
[[0. 0.69314718 1.09861229]
[0.69314718 1.09861229 1.38629436]
[1.38629436 1.60943791 1.79175947]]
Multiplication
= np.random.normal(size=(3, 4))
x print(x)
= np.random.normal(size=(4, 1))
y print(y)
print(x @ y)
[[-0.38282693 0.56666355 -1.23844588 0.28654749]
[ 0.64545868 1.18922632 -0.52113292 0.16461292]
[-0.54573844 0.82154508 1.10311255 0.20158659]]
[[ 0.85713165]
[-0.41036715]
[ 0.06549535]
[-0.7014138 ]]
[[-0.84277399]
[-0.08436991]
[-0.87405168]]
Transpose
= np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]])
x print(x)
print(x.transpose())
[[1 2 3]
[2 3 4]
[3 4 5]]
[[1 2 3]
[2 3 4]
[3 4 5]]
Norm
= np.array([[1, 2], [3, 4]])
x print(x)
= np.linalg.norm(x)
y print(y)
= np.linalg.norm(x, axis=0)
y print("axis=0 yields row norms: {}".format(y))
= np.linalg.norm(x, axis=1)
y print("axis=1 yields column norms: {}".format(y))
[[1 2]
[3 4]]
5.477225575051661
axis=0 yields row norms: [3.16227766 4.47213595]
axis=1 yields column norms: [2.23606798 5. ]
Rank
= np.array([[1,2],[3,4]])
x np.linalg.matrix_rank(x)
2
Dataframes and matrices
= pd.read_csv("data/penguins-raw.csv")
data print(data[["Body Mass (g)"]])
print(data[["Body Mass (g)"]].values)
Body Mass (g)
0 3750.0
1 3800.0
2 3250.0
3 NaN
4 3450.0
.. ...
339 4000.0
340 3400.0
341 3775.0
342 4100.0
343 3775.0
[344 rows x 1 columns]
[[3750.]
[3800.]
[3250.]
[ nan]
[3450.]
[3650.]
[3625.]
[4675.]
[3475.]
[4250.]
[3300.]
[3700.]
[3200.]
[3800.]
[4400.]
[3700.]
[3450.]
[4500.]
[3325.]
[4200.]
[3400.]
[3600.]
[3800.]
[3950.]
[3800.]
[3800.]
[3550.]
[3200.]
[3150.]
[3950.]
[3250.]
[3900.]
[3300.]
[3900.]
[3325.]
[4150.]
[3950.]
[3550.]
[3300.]
[4650.]
[3150.]
[3900.]
[3100.]
[4400.]
[3000.]
[4600.]
[3425.]
[2975.]
[3450.]
[4150.]
[3500.]
[4300.]
[3450.]
[4050.]
[2900.]
[3700.]
[3550.]
[3800.]
[2850.]
[3750.]
[3150.]
[4400.]
[3600.]
[4050.]
[2850.]
[3950.]
[3350.]
[4100.]
[3050.]
[4450.]
[3600.]
[3900.]
[3550.]
[4150.]
[3700.]
[4250.]
[3700.]
[3900.]
[3550.]
[4000.]
[3200.]
[4700.]
[3800.]
[4200.]
[3350.]
[3550.]
[3800.]
[3500.]
[3950.]
[3600.]
[3550.]
[4300.]
[3400.]
[4450.]
[3300.]
[4300.]
[3700.]
[4350.]
[2900.]
[4100.]
[3725.]
[4725.]
[3075.]
[4250.]
[2925.]
[3550.]
[3750.]
[3900.]
[3175.]
[4775.]
[3825.]
[4600.]
[3200.]
[4275.]
[3900.]
[4075.]
[2900.]
[3775.]
[3350.]
[3325.]
[3150.]
[3500.]
[3450.]
[3875.]
[3050.]
[4000.]
[3275.]
[4300.]
[3050.]
[4000.]
[3325.]
[3500.]
[3500.]
[4475.]
[3425.]
[3900.]
[3175.]
[3975.]
[3400.]
[4250.]
[3400.]
[3475.]
[3050.]
[3725.]
[3000.]
[3650.]
[4250.]
[3475.]
[3450.]
[3750.]
[3700.]
[4000.]
[4500.]
[5700.]
[4450.]
[5700.]
[5400.]
[4550.]
[4800.]
[5200.]
[4400.]
[5150.]
[4650.]
[5550.]
[4650.]
[5850.]
[4200.]
[5850.]
[4150.]
[6300.]
[4800.]
[5350.]
[5700.]
[5000.]
[4400.]
[5050.]
[5000.]
[5100.]
[4100.]
[5650.]
[4600.]
[5550.]
[5250.]
[4700.]
[5050.]
[6050.]
[5150.]
[5400.]
[4950.]
[5250.]
[4350.]
[5350.]
[3950.]
[5700.]
[4300.]
[4750.]
[5550.]
[4900.]
[4200.]
[5400.]
[5100.]
[5300.]
[4850.]
[5300.]
[4400.]
[5000.]
[4900.]
[5050.]
[4300.]
[5000.]
[4450.]
[5550.]
[4200.]
[5300.]
[4400.]
[5650.]
[4700.]
[5700.]
[4650.]
[5800.]
[4700.]
[5550.]
[4750.]
[5000.]
[5100.]
[5200.]
[4700.]
[5800.]
[4600.]
[6000.]
[4750.]
[5950.]
[4625.]
[5450.]
[4725.]
[5350.]
[4750.]
[5600.]
[4600.]
[5300.]
[4875.]
[5550.]
[4950.]
[5400.]
[4750.]
[5650.]
[4850.]
[5200.]
[4925.]
[4875.]
[4625.]
[5250.]
[4850.]
[5600.]
[4975.]
[5500.]
[4725.]
[5500.]
[4700.]
[5500.]
[4575.]
[5500.]
[5000.]
[5950.]
[4650.]
[5500.]
[4375.]
[5850.]
[4875.]
[6000.]
[4925.]
[ nan]
[4850.]
[5750.]
[5200.]
[5400.]
[3500.]
[3900.]
[3650.]
[3525.]
[3725.]
[3950.]
[3250.]
[3750.]
[4150.]
[3700.]
[3800.]
[3775.]
[3700.]
[4050.]
[3575.]
[4050.]
[3300.]
[3700.]
[3450.]
[4400.]
[3600.]
[3400.]
[2900.]
[3800.]
[3300.]
[4150.]
[3400.]
[3800.]
[3700.]
[4550.]
[3200.]
[4300.]
[3350.]
[4100.]
[3600.]
[3900.]
[3850.]
[4800.]
[2700.]
[4500.]
[3950.]
[3650.]
[3550.]
[3500.]
[3675.]
[4450.]
[3400.]
[4300.]
[3250.]
[3675.]
[3325.]
[3950.]
[3600.]
[4050.]
[3350.]
[3450.]
[3250.]
[4050.]
[3800.]
[3525.]
[3950.]
[3650.]
[3650.]
[4000.]
[3400.]
[3775.]
[4100.]
[3775.]]
Datatypes
= np.array([[1, 2, 3], [2, 3, 4]], dtype=int)
x print(x)
= np.array([[1, 2, 3], [2, 3, 4]], dtype=float)
y print(y)
print(x + y)
[[1 2 3]
[2 3 4]]
[[1. 2. 3.]
[2. 3. 4.]]
[[2. 4. 6.]
[4. 6. 8.]]