import plainEnglishCoding
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import ipywidgets as widgets
M = np.array(
[
[3,6,2],
[10,3,8],
[1,7,5],
]
)
costs = np.array([2,3,5])
totals = np.dot(M,costs)
eigvals,eigvec = np.linalg.eig(M)
print(eigvals)
print(totals)
[15.16204133 2.41626789 -6.57830922] [34 69 48]
Systems of Linear Equations¶
The last module taught us linear algebra, vectors, and matrices from a mathematical perspective.
Let's see how to use these concepts in data science work to work with data and to solve a problem.
Let's say that the matrix
$ M = \begin{bmatrix} 3 & 6 & 2 \\ 10 & 3 & 8 \\ 1 & 7 & 5 \\ \end{bmatrix} $
represents the number of apples, oranges, and bread (i.e., the columns) purchased by Bob, Alice, and Tim (i.e., the rows).
This matrix is storing data about individuals' purchasing behavior.
$ M = \begin{bmatrix} 3 & 6 & 2 \\ 10 & 3 & 8 \\ 1 & 7 & 5 \\ \end{bmatrix} $
Now, let's say we have the final receipts of each person's purchase (in row order):
- Bob paid $34
- Alice paid $69
- Tim paid $48
We can represent this information as a cost vector $\hspace{1cm}\vec{c} = \begin{bmatrix} 34 \\ 69 \\ 48 \\ \end{bmatrix}$.
Now the question is how much does each grocery item cost?
Let's use $\vec{p}=\begin{bmatrix} \color{red}{p_1} \\ \color{orange}{p_2} \\ \color{blue}{p_3} \end{bmatrix}$ to represent the vector of grocery item prices (i.e., $\color{red}{p_1}$ is the price of apples, $\color{orange}{p_2}$ is the price of oranges, etc).
We have $\hspace{1cm}M\cdot\vec{p} = \vec{c}\hspace{1cm}$ and we want to know what is in $\vec{p}$.
We have $\hspace{1cm}M\cdot\vec{p} = \vec{c}\hspace{1cm}$ and we want to know what is in $\vec{p}$.
$ M\cdot\vec{p} = \vec{c} \hspace{1cm}\Rightarrow\hspace{1cm} \begin{bmatrix} 3 & 6 & 2 \\\ 10 & 3 & 8 \\ 1 & 7 & 5 \\ \end{bmatrix} \cdot \begin{bmatrix} \color{red}{p_1} \\ \color{orange}{p_2} \\ \color{blue}{p_3} \end{bmatrix} =\begin{bmatrix} 34 \\ 69 \\ 48 \\ \end{bmatrix} \hspace{2cm} $
which we yields a system of linear equations based on each individual's purchases (i.e., each row of $M$).
$ \begin{bmatrix} 3 & 6 & 2 \\\ 10 & 3 & 8 \\ 1 & 7 & 5 \\ \end{bmatrix} \cdot \begin{bmatrix} \color{red}{p_1} \\ \color{orange}{p_2} \\ \color{blue}{p_3} \end{bmatrix} =\begin{bmatrix} 34 \\ 69 \\ 48 \\ \end{bmatrix} $
becomes
$ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ 10\color{red}{p_1} + 3\color{orange}{p_2} + 8\color{blue}{p_3} &= 69 \\ 1\color{red}{p_1} + 7\color{orange}{p_2} + 5\color{blue}{p_3} &= 48 \\ \end{align} $
$ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ 10\color{red}{p_1} + 3\color{orange}{p_2} + 8\color{blue}{p_3} &= 69 \\ 1\color{red}{p_1} + 7\color{orange}{p_2} + 5\color{blue}{p_3} &= 48 \\ \end{align} $
This is a system of three equations containing three unknown variables (i.e., $\color{red}{p_1},\,\color{orange}{p_2},\,\color{blue}{p_3}$) and our task is to find solutions for the unknowns.
The strategy is to take one variable at a time (let's start with $\color{red}{p_1}$) and use the system of equations to find a formula for that variable that uses the other variables.
$ 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} = 34 \hspace{1cm}\Rightarrow\hspace{1cm} \color{red}{p_1} =\frac{34}{3}-2\color{orange}{p_2}-\frac{2}{3}\color{blue}{p_3} $
Next, we replace $\color{red}{p_1}$ with this formula throughout the system of equations and then group like terms:
$ \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ 10\color{red}{p_1} + 3\color{orange}{p_2} + 8\color{blue}{p_3} &= 69 \\ 1\color{red}{p_1} + 7\color{orange}{p_2} + 5\color{blue}{p_3} &= 48 \\ \end{align} }$ $\hspace{1cm}\Rightarrow\hspace{1cm} \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ 10\left(\frac{34}{3}-2\color{orange}{p_2}-\frac{2}{3}\color{blue}{p_3}\right) + 3\color{orange}{p_2} + 8\color{blue}{p_3} &= 69 \\ 1\left(\frac{34}{3}-2\color{orange}{p_2}-\frac{2}{3}\color{blue}{p_3}\right) + 7\color{orange}{p_2} + 5\color{blue}{p_3} &= 48\\ \end{align} } $ $\hspace{1cm}\Rightarrow\hspace{1cm} \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} &= \frac{-133}{3} \\ 5\color{orange}{p_2} + \frac{13}{3}\color{blue}{p_3} &= \frac{110}{3} \\ \end{align} } $
Now we repeat the process by finding a formula $\color{orange}{p_2}$ using the remaining unknown variables
$ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} = \frac{-133}{3} \hspace{1cm}\Rightarrow\hspace{1cm} \color{orange}{p_2} = \frac{133}{17\cdot3}+\frac{4}{17\cdot3}\color{blue}{p_3} = \frac{133}{51}+\frac{4}{51}\color{blue}{p_3} $
and replace $\color{orange}{p_2}$ throughout the system of equations:
$ \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} &= \frac{-133}{3} \\ 5\color{orange}{p_2} + \frac{13}{3}\color{blue}{p_3} &= \frac{110}{3} \\ \end{align} } $ $ \hspace{1cm}\Rightarrow\hspace{1cm} \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} &= \frac{-133}{3} \\ 5\left(\frac{133}{51}+\frac{4}{51}\color{blue}{p_3} \right) + \frac{13}{3}\color{blue}{p_3} &= \frac{110}{3} \\ \end{align} } $ $ \hspace{1cm}\Rightarrow\hspace{1cm} \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} &= \frac{-133}{3} \\ \frac{723}{153}\color{blue}{p_3} &= \frac{3615}{153} \\ \end{align} } $
Or, simplifying a bit more, $\color{blue}{p_3} = \frac{3615}{153}\cdot\frac{153}{723} = 5$.
We know one of the prices! $\color{blue}{p_3} = 5$ which we can use to substitute back up into the system of equations:
$ \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} &= \frac{-133}{3} \\ \color{blue}{p_3} &= 5 \\ \end{align} } $ $ \hspace{1cm}\Rightarrow\hspace{1cm} \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\cdot\color{blue}{5} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\cdot\color{blue}{5} &= \frac{-133}{3} \\ \color{blue}{p_3} &= 5 \\ \end{align} } $ $ \hspace{1cm}\Rightarrow\hspace{1cm} \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\cdot\color{blue}{5} &= 34 \\ \color{orange}{p_2} &= \frac{1}{-17}\left(\frac{-133}{3}-\frac{4}{3}\cdot\color{blue}{5}\right) = 3 \\ \color{blue}{p_3} &= 5 \\ \end{align} } $
So now we know $\color{blue}{p_3}=5$ and $\color{orange}{p_2}=3$. Once more...
$ \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\cdot\color{blue}{5} &= 34 \\ \color{orange}{p_2} &= 3 \\ \color{blue}{p_3} &= 5 \\ \end{align} } $ $ \hspace{1cm}\Rightarrow\hspace{1cm} \bbox[border: 1pt solid black]{ \begin{align} 3\color{red}{p_1} + 6\cdot\color{orange}{3} + 2\cdot\color{blue}{5} &= 34 \\ \color{orange}{p_2} &= 3 \\ \color{blue}{p_3} &= 5 \\ \end{align} } $ $ \hspace{1cm}\Rightarrow\hspace{1cm} \bbox[border: 1pt solid black]{ \begin{align} \color{red}{p_1} &= \frac{1}{3}\left(34-\color{orange}{18}-\color{blue}{10}\right) = 2 \\ \color{orange}{p_2} &= 3 \\ \color{blue}{p_3} &= 5 \\ \end{align} } $
Finally, let's check our solution: $ \hspace{1cm} M\cdot\vec{p} = \vec{c} \hspace{1cm}\Rightarrow\hspace{1cm} \begin{align} 3\cdot\color{red}{2} + 6\cdot\color{orange}{3} + 2\cdot\color{blue}{5} &= 34 \\ 10\cdot\color{red}{2} + 3\cdot\color{orange}{3} + 8\cdot\color{blue}{5} &= 69 \\ 1\cdot\color{red}{2} + 7\cdot\color{orange}{3} + 5\cdot\color{blue}{5} &= 48 \\ \end{align} $
M = np.array(
[
[3,6,2],
[10,3,8],
[1,7,5],
]
)
p = np.array([2,3,5])
np.dot(M,p)
array([34, 69, 48])
Backsolving and Inverting Matrices¶
In the last video, we solved $ \hspace{1cm} M\cdot\vec{p}=\vec{c} \hspace{1cm} \Rightarrow \hspace{1cm} \begin{bmatrix} 3 & 6 & 2 \\\ 10 & 3 & 8 \\ 1 & 7 & 5 \\ \end{bmatrix} \cdot \begin{bmatrix} \color{red}{p_1} \\ \color{orange}{p_2} \\ \color{blue}{p_3} \end{bmatrix} =\begin{bmatrix} 34 \\ 69 \\ 48 \\ \end{bmatrix} \hspace{1cm} $ finding $\hspace{1cm}\vec{p} = \begin{bmatrix} \color{red}{2} \\ \color{orange}{3} \\ \color{blue}{5} \end{bmatrix}$
The algorithm we used to find formulas for variables and then substituting those formulas into later equations is called Gaussian elimination or row reduction or backsolving.
Effectively, if we think about $M$ acting on $\vec{p}$ as a function, then, implicitly, we calculated the matrix that undoes the action of $M$.
This matrix that can act on $\vec{c}$ and give us $\vec{p}$ is called $M$'s inverse and we denote it as $M^{-1}$ because $$M^{-1}M\vec{p}=M^{-1}\vec{c}\hspace{1cm}\Rightarrow\hspace{1cm}\vec{p}=M^{-1}\vec{c}$$
If $M$ were a number instead of a matrix, then we would see $M^{-1}\cdot M = M/M = 1$ which explains the notation, and, indeed, for the matrix version $M^{-1}M= I$.
To find $M^{-1}$, we can represent the problem from before using a special matrix representation called an augmented matrix where we concatenate the identity matrix onto the original matrix.
$ \begin{bmatrix} 3 & 6 & 2 \\ 10 & 3 & 8 \\ 1 & 7 & 5 \\ \end{bmatrix} \hspace{1cm} \Rightarrow \hspace{1cm} \begin{bmatrix} \begin{array}{ccc|ccc} 3 & 6 & 2 & 1 & 0 & 0\\ 10 & 3 & 8 & 0 & 1 & 0\\ 1 & 7 & 5 & 0 & 0 & 1\\ \end{array} \end{bmatrix} $
We now perform linear operations on the rows of the augmented matrix until we transform the original matrix on the left of the line in to the identity matrix. The matrix on the right of the line will be $M^{-1}$.
Starting with $ \begin{bmatrix} \begin{array}{ccc|ccc} 3 & 6 & 2 & 1 & 0 & 0\\ 10 & 3 & 8 & 0 & 1 & 0\\ 1 & 7 & 5 & 0 & 0 & 1\\ \end{array} \end{bmatrix} $
We first divide the first row ($R_1$) by 3 to make the left side look more like the identity matrix.
$ \begin{bmatrix} \begin{array}{ccc|ccc} 3 & 6 & 2 & 1 & 0 & 0\\ 10 & 3 & 8 & 0 & 1 & 0\\ 1 & 7 & 5 & 0 & 0 & 1\\ \end{array} \end{bmatrix} \hspace{1cm} \Rightarrow R_1 / 3 \Rightarrow \hspace{1cm} \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 2/3 & 1/3 & 0 & 0\\ 10 & 3 & 8 & 0 & 1 & 0\\ 1 & 7 & 5 & 0 & 0 & 1\\ \end{array} \end{bmatrix} $
Then, for each remaining row, we subtract $R_1$ multiplied by the value in the first column.
Remember, our goal is to make the left of the line look like the identity matrix.
$ \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 2/3 & 1/3 & 0 & 0\\ 10 & 3 & 8 & 0 & 1 & 0\\ 1 & 7 & 5 & 0 & 0 & 1\\ \end{array} \end{bmatrix} \hspace{1cm} \Rightarrow R_2-10\cdot R_1,\,R_3-1\cdot R_1 \Rightarrow \hspace{1cm} \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 2/3 & 1/3 & 0 & 0\\ 0 & -17 & 4/3 & -10/3 & 1 & 0\\ 0 & 5 & 13/3 & -1/3 & 0 & 1\\ \end{array} \end{bmatrix} $
Next, we divide $R_2$ by $-17$ and substract the appropriate multiple of $R_2$ from $R_3$.
$ \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 2/3 & 1/3 & 0 & 0\\ 0 & -17 & 4/3 & -10/3 & 1 & 0\\ 0 & 5 & 13/3 & -1/3 & 0 & 1\\ \end{array} \end{bmatrix} \hspace{1cm} \Rightarrow R_2 / (-17),\, R_3-5\cdot R_2 \Rightarrow \hspace{1cm} \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 2/3 & 1/3 & 0 & 0\\ 0 & 1 & -4/51 & 10/51 & -1/17 & 0\\ 0 & 0 & 241/51 & -67/51 & 5/17 & 1\\ \end{array} \end{bmatrix} $
Already, we are starting to see some of the same fractions that we encountered when we solved the system of equations before.
By multiplying the last row by $51/241$, we get
$ \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 2/3 & 1/3 & 0 & 0\\ 0 & 1 & -4/51 & 10/51 & -1/17 & 0\\ 0 & 0 & 1 & -67/241 & 15/241 & 51/241\\ \end{array} \end{bmatrix} $
Now, we subtract multiples of $R_3$ from $R_2$ and $R_1$
$ \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 2/3 & 1/3 & 0 & 0\\ 0 & 1 & -4/51 & 10/51 & -1/17 & 0\\ 0 & 0 & 1 & -67/241 & 15/241 & 51/241\\ \end{array} \end{bmatrix} \hspace{1cm} \Rightarrow R_2-(-4/51)R_3,\, R_1-(2/3)R_3 \Rightarrow \hspace{1cm} \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 0 & \frac{125}{241} & \frac{-10}{241} & \frac{-34}{241}\\ 0 & 1 & 0 & \frac{42}{241} & \frac{-13}{241} & \frac{4}{241}\\ 0 & 0 & 1 & \frac{-67}{241} & \frac{15}{241} & \frac{51}{241}\\ \end{array} \end{bmatrix} $
and, finally, subrtracting multiples of $R_2$
$ \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & 0 & \frac{125}{241} & \frac{-10}{241} & \frac{-34}{241}\\ 0 & 1 & 0 & \frac{42}{241} & \frac{-13}{241} & \frac{4}{241}\\ 0 & 0 & 1 & \frac{-67}{241} & \frac{15}{241} & \frac{51}{241}\\ \end{array} \end{bmatrix} \hspace{1cm} \Rightarrow R_1-2R_2 \Rightarrow \hspace{1cm} \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 0 & 0 & \frac{41}{241} & \frac{16}{241} & - \frac{42}{241}\\ 0 & 1 & 0 & \frac{42}{241} & - \frac{13}{241} & \frac{4}{241} \\ 0 & 0 & 1 & - \frac{67}{241} & \frac{15}{241} & \frac{51}{241} \\ \end{array} \end{bmatrix} $
This gives us $\hspace{1cm}M^{-1}=\begin{bmatrix} \begin{array}{ccc|ccc} 1 & 0 & 0 & \frac{41}{241} & \frac{16}{241} & - \frac{42}{241}\\ 0 & 1 & 0 & \frac{42}{241} & - \frac{13}{241} & \frac{4}{241} \\ 0 & 0 & 1 & - \frac{67}{241} & \frac{15}{241} & \frac{51}{241} \\ \end{array} \end{bmatrix} $
We can test it out by solving
$ M^{-1}\vec{c} \hspace{1cm}= \hspace{1cm} \begin{bmatrix} \frac{41}{241} & \frac{16}{241} & - \frac{42}{241}\\ \frac{42}{241} & - \frac{13}{241} & \frac{4}{241} \\ - \frac{67}{241} & \frac{15}{241} & \frac{51}{241} \\ \end{bmatrix}\cdot\begin{bmatrix} 34 \\ 69 \\ 48 \\ \end{bmatrix} \hspace{1cm}= \hspace{1cm} \begin{bmatrix} \color{red}{2}\\ \color{orange}{3}\\ \color{blue}{5}\\ \end{bmatrix}=\vec{p} $
M = np.array(
[
[3,6,2],
[10,3,8],
[1,7,5],
]
)
M1 = np.array(
[
[41/241,16/241,-42/241],
[42/241,-13/241,4/241],
[-67/241,15/241,51/241],
]
)
c = np.array([34,69,48])
np.dot(M1,c)
array([2., 3., 5.])
But, also, we can check $M^{-1}M=I$
$ \begin{bmatrix} \frac{41}{241} & \frac{16}{241} & - \frac{42}{241}\\ \frac{42}{241} & - \frac{13}{241} & \frac{4}{241} \\ - \frac{67}{241} & \frac{15}{241} & \frac{51}{241} \\ \end{bmatrix} \cdot \begin{bmatrix} 3 & 6 & 2 \\ 10 & 3 & 8 \\ 1 & 7 & 5 \\ \end{bmatrix} = \begin{bmatrix} 1 & 0 & 0 \\ 0 & 1 & 0 \\ 0 & 0 & 1 \\ \end{bmatrix} $
# np.dot(M1,M)
np.round(np.dot(M1,M),decimals=10)
array([[ 1., -0., 0.], [-0., 1., -0.], [-0., -0., 1.]])
# using symbolic programming to check my work throughout
from sympy import *
M = Matrix(
[
[3,6,2,1,0,0],
[10,3,8,0,1,0],
[1,7,5,0,0,1]
])
M[0,:] /= 3
M[1,:] = M[1,:]-10*M[0,:]
M[2,:] = M[2,:]-1*M[0,:]
M[1,:] /= -17
M[2,:] = M[2,:]-5*M[1,:]
M[2,:] /= M[2,2]
M[1,:] -= M[1,2]*M[2,:]
M[0,:] -= M[0,2]*M[2,:]
M[0,:] -= M[0,1]*M[1,:]
# M
print(latex(M))
print(M[:,3:]*Matrix([34,69,48]))
M[:,3:]*Matrix([[3,6,2],[10,3,8],[1,7,5]])
\left[\begin{matrix}1 & 0 & 0 & \frac{41}{241} & \frac{16}{241} & - \frac{42}{241}\\0 & 1 & 0 & \frac{42}{241} & - \frac{13}{241} & \frac{4}{241}\\0 & 0 & 1 & - \frac{67}{241} & \frac{15}{241} & \frac{51}{241}\end{matrix}\right] Matrix([[2], [3], [5]])
Perils in Backsolving¶
Backsolving gives us exact solutions to unknown variables.
But backsolving only works for specific types of matrices.
Let's cover some of the restrictions.
$ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ 10\color{red}{p_1} + 3\color{orange}{p_2} + 8\color{blue}{p_3} &= 69 \\ 1\color{red}{p_1} + 7\color{orange}{p_2} + 5\color{blue}{p_3} &= 48 \\ \end{align} $
This is a system of three equations containing three unknown variables.
But what if we ignore the last equation yielding two equations and three unknown variables?
In solving this smaller system of equations,
$ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ 10\color{red}{p_1} + 3\color{orange}{p_2} + 8\color{blue}{p_3} &= 69 \\ % 1\color{red}{p_1} + 7\color{orange}{p_2} + 5\color{blue}{p_3} &= 48 \\ \end{align} $
we follow the same process as in the original case.
Eventually, we arrive at the step where we attempt to solve for $\color{blue}{p_3}$.
Instead of
$ \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} &= \frac{-133}{3} \\ \frac{723}{153}\color{blue}{p_3} &= \frac{3615}{153} \\ \end{align} $
we have
$ % \hspace{1cm}\Rightarrow\hspace{1cm} \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} &= \frac{-133}{3} \\ % \frac{723}{153}\color{blue}{p_3} &= \frac{3615}{153} \\ \end{align} $
and we are stuck!
$ % \hspace{1cm}\Rightarrow\hspace{1cm} \begin{align} 3\color{red}{p_1} + 6\color{orange}{p_2} + 2\color{blue}{p_3} &= 34 \\ -17\color{orange}{p_2} + \frac{4}{3}\color{blue}{p_3} &= \frac{-133}{3} \\ % \frac{723}{153}\color{blue}{p_3} &= \frac{3615}{153} \\ \end{align} $
Instead of a unique solution for $\color{orange}{p_2}$ and $\color{blue}{p_3}$, we end up with infinite possible solutions.
The second equation defines a plane in 3-dimensional space and any point on that plane is a possible solution.
To solve a system of linear equations, you must have at least as many equations as variables.
But, even with three equations, we can still run into problems if one equation is a "linear combination" of the other rows.
For example, in the following system of linear equations, the coefficients in the third equation are double the coefficients in the second equation (including the RHS):
$ M\cdot\vec{p}=\vec{c} \hspace{1cm} \Rightarrow \hspace{1cm} \begin{bmatrix} 3 & 6 & 2 \\\ 10 & 3 & 8 \\ 20 & 6 & 16 \\ \end{bmatrix} \cdot \begin{bmatrix} \color{red}{p_1} \\ \color{orange}{p_2} \\ \color{blue}{p_3} \end{bmatrix} =\begin{bmatrix} 34 \\ 69 \\ 138 \\ \end{bmatrix} $
Here again, we have three equations and three unknown variables.
Let's try to invert the matrix $M$ (remember, this is an equivalent process to solving the system of lienar equations).
Following the same process from before (since the first two rows of $M$ are unchanged), we get to
$ \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & \frac{2}{3} & \frac{1}{3} & 0 & 0\\0 & 1 & - \frac{4}{51} & \frac{10}{51} & - \frac{1}{17} & 0\\0 & 0 & 0 & 0 & -2 & 1 \end{array} \end{bmatrix} $
and now you are again stuck!
You're ability to solve for $\color{blue}{p_3}$ in the third row has disappeared!
This result indicates that $M$ is not invertible; that is, $M^{-1}$ does not exist.
We can actually test for this by measuring $\hspace{1cm}\det\left(M\right)=0\hspace{1cm}$ which indicates that the volume of the 3-dimensional parallelogram produced by $M$ acting in the unit cube is $volume=0$.
This is a problem because multiple points on the unit cube map to the same point on the 3D parallelogram.
If $M^{-1}$ exists, then $M^{-1}$ acting on the parallelogram should recover the unit cube.
This means we cannot invert the $M$ transformation on the unit cube because one point on the parallelogram cannot map to multiple different points when $M^{-1}$ acts on the parallelogram.
# using symbolic programming to check check my work throughout
from sympy import *
M = Matrix(
[
[3,6,2,1,0,0],
[10,3,8,0,1,0],
[20,6,16,0,0,1]
])
M[0,:] /= M[0,0]
M[1,:] = M[1,:]-M[1,0]*M[0,:]
M[2,:] = M[2,:]-M[2,0]*M[0,:]
# M[3,:] = M[3,:]-M[3,0]*M[0,:]
M[1,:] /= M[1,1]
M[2,:] = M[2,:]-M[2,1]*M[1,:]
print(latex(M))
M
# print(M[:,3:]*Matrix([34,69,48]))
# M[:,3:]*Matrix([[3,6,2],[10,3,8],[1,7,5]])
\left[\begin{matrix}1 & 2 & \frac{2}{3} & \frac{1}{3} & 0 & 0\\0 & 1 & - \frac{4}{51} & \frac{10}{51} & - \frac{1}{17} & 0\\0 & 0 & 0 & 0 & -2 & 1\end{matrix}\right]
M = Matrix(
[
[3,6,2,1,0,0],
[10,3,8,0,1,0],
[20,6,16,0,0,1]
])
M = M[:,:3]
M.det()
However, if we add the original third equation back in as a fourth equation, we can still solve for the three unknowns.
$ \begin{bmatrix} \begin{array}{ccc|ccc} 3 & 6 & 2 & 1 & 0 & 0\\10 & 3 & 8 & 0 & 1 & 0\\20 & 6 & 16 & 0 & 0 & 1\\1 & 7 & 5 & 0 & 0 & 1 \end{array} \end{bmatrix} \hspace{1cm} $ we arrive at $ \hspace{1cm} \begin{bmatrix} \begin{array}{ccc|ccc} 1 & 2 & \frac{2}{3} & \frac{1}{3} & 0 & 0\\0 & 1 & - \frac{4}{51} & \frac{10}{51} & - \frac{1}{17} & 0\\0 & 0 & 0 & 0 & -2 & 1\\0 & 0 & 1 & - \frac{67}{241} & \frac{15}{241} & \frac{51}{241} \end{array} \end{bmatrix} $
Here, the problematic third row is "zero-ed out" on the left side of the line.
And we can still solve for $\color{blue}{p_3}$ in the fourth row which we can then use to solve for the other unknown variables following our original process.
M = Matrix(
[
[3,6,2,1,0,0],
[10,3,8,0,1,0],
[20,6,16,0,0,1],
[1,7,5,0,0,1],
])
print(latex(M))
M[0,:] /= M[0,0]
M[1,:] = M[1,:]-M[1,0]*M[0,:]
M[2,:] = M[2,:]-M[2,0]*M[0,:]
M[3,:] = M[3,:]-M[3,0]*M[0,:]
M[1,:] /= M[1,1]
M[2,:] = M[2,:]-M[2,1]*M[1,:]
M[3,:] -= M[3,1]*M[1,:]
M[3,:] /= M[3,2]
# M[2,:] /= M[2,2]
# M[1,:] -= M[1,2]*M[2,:]
# M[0,:] -= M[0,2]*M[2,:]
# M[0,:] -= M[0,1]*M[1,:]
print(latex(M))
M
\left[\begin{matrix}3 & 6 & 2 & 1 & 0 & 0\\10 & 3 & 8 & 0 & 1 & 0\\20 & 6 & 16 & 0 & 0 & 1\\1 & 7 & 5 & 0 & 0 & 1\end{matrix}\right] \left[\begin{matrix}1 & 2 & \frac{2}{3} & \frac{1}{3} & 0 & 0\\0 & 1 & - \frac{4}{51} & \frac{10}{51} & - \frac{1}{17} & 0\\0 & 0 & 0 & 0 & -2 & 1\\0 & 0 & 1 & - \frac{67}{241} & \frac{15}{241} & \frac{51}{241}\end{matrix}\right]
A third consideration is that we are assuming each grocery item has a single price.
However, in the real-world, the price of an apple might vary from store to store.
In this case, the true price of an apple is actually a distribution of prices.
Backsolving is too rigid to handle this scenario and probabilistic approaches are required instead.
We will cover this later in the course.
Python Programming and Inverting Matrices¶
We can use Numpy to invert matrices
# problem setup
# matrix of individuals' purchasing quantities
M = np.array(
[
[3,6,2],
[10,3,8],
[1,7,5],
]
)
# prices per item
prices = np.array([2,3,5])
# calculate total costs per individual
totalCosts = np.dot(M,prices)
totalCosts
array([34, 69, 48])
# using Numpy to invert a matrix
Minv = np.linalg.inv(M)
print(Minv)
# test that Minv*M yields identity matrix
# note '@' is an alias for np.dot
# so A@B is equivalent to np.dot(A,B)
print(Minv@M)
# same thing but round resulting matrix components
print(np.round(Minv@M,decimals=10))
# use Minv to solve for grocery prices
prices2 = np.dot(Minv,totalCosts)
prices2
[[ 0.17012448 0.06639004 -0.17427386] [ 0.17427386 -0.05394191 0.01659751] [-0.2780083 0.06224066 0.21161826]] [[ 1.00000000e+00 1.66533454e-16 5.55111512e-17] [-2.39391840e-16 1.00000000e+00 -1.14491749e-16] [ 1.66533454e-16 -2.77555756e-16 1.00000000e+00]] [[ 1. 0. 0.] [-0. 1. -0.] [ 0. -0. 1.]]
array([2., 3., 5.])
# another option is the Numpy solve() function to
# treat the same problem in it's original form.
# Behind this scenes, this approach is still inverting M.
prices3 = np.linalg.solve(M,totalCosts)
prices3
array([2., 3., 5.])
Numpy is highly optimized from a computational perspective and is ideal for solving these linear algebra problems on computers.
However, the Python package SymPy is more useful for understanding these linear algebra problems from a human-readable perspective.
SymPy does symbolic arithmetic. This type of computing is not a focus for this course, so I will not cover too many details. But, it's essentially a tool for performing human-readable math.
# Numpy does calculations on binary objects
a,b = 10,2
print("a+b = %d" % int(a+b))
# equivalent calculation for simple unsigned bit strings
import bitstring
bit_a = bitstring.Bits(uint=a,length=16)
bit_b = bitstring.Bits(uint=b,length=16)
print("bit_a: %s" % bit_a.b)
print("bit_b: %s" % bit_b.b)
def addBitStrings(b1,b2):
# function for adding two binary strings
# using bitwise operations.
# This mimics how CPUs perform calculations.
carry = 0
Z = bitstring.Bits(uint=0,length=16)
while b2 != Z:
carry = b1 & b2
b1 = b1 ^ b2
b2 = carry << 1
return b1
bit_c = addBitStrings(bit_a,bit_b)
print("bit_c: %s" % bit_c.b)
print("bit_c as an integer: %d" % bit_c.i)
a+b = 12 bit_a: 0000000000001010 bit_b: 0000000000000010 bit_c: 0000000000001100 bit_c as an integer: 12
# normal calculations of fractions results in decimals
a = 1/2
b = 2/3
print(a+b)
# import SymPy
import sympy,fractions
# define symbols instead of variables
x = fractions.Fraction(1,2)
y = fractions.Fraction(2,3)
e = x+y
print(e)
print(7/6)
1.1666666666666665 7/6 1.1666666666666667
import pprint
def symbolicGaussianElimination(M):
n = M.shape[0]
aM = M.row_join(sympy.eye(n))
# From top to bottom
print("Going down the rows!")
for i in range(n):
# normalize the ith row to create 1 in the (i,i) position
aM[i,:] /= aM[i,i]
for j in range(i+1,n):
# remove multiple of row i from all subsequent rows
aM[j,:] -= aM[j,i]*aM[i,:]
#print current step
pprint.pp(aM)
print("Going back up the rows!")
for i in range(n,0,-1):
i -= 1
for j in range(0,i):
# for row above the ith row, subtract multiple of row i
aM[j,:] -= aM[j,i]*aM[i,:]
#print current step
pprint.pp(aM)
return aM
M = sympy.Matrix(
[
[3,6,2],
[10,3,8],
[1,7,5],
]
)
symbolicGaussianElimination(M);
Going down the rows! Matrix([ [1, 2, 2/3, 1/3, 0, 0], [0, -17, 4/3, -10/3, 1, 0], [0, 5, 13/3, -1/3, 0, 1]]) Matrix([ [1, 2, 2/3, 1/3, 0, 0], [0, 1, -4/51, 10/51, -1/17, 0], [0, 0, 241/51, -67/51, 5/17, 1]]) Matrix([ [1, 2, 2/3, 1/3, 0, 0], [0, 1, -4/51, 10/51, -1/17, 0], [0, 0, 1, -67/241, 15/241, 51/241]]) Going back up the rows! Matrix([ [1, 2, 0, 125/241, -10/241, -34/241], [0, 1, 0, 42/241, -13/241, 4/241], [0, 0, 1, -67/241, 15/241, 51/241]]) Matrix([ [1, 0, 0, 41/241, 16/241, -42/241], [0, 1, 0, 42/241, -13/241, 4/241], [0, 0, 1, -67/241, 15/241, 51/241]]) Matrix([ [1, 0, 0, 41/241, 16/241, -42/241], [0, 1, 0, 42/241, -13/241, 4/241], [0, 0, 1, -67/241, 15/241, 51/241]])
# Use Sympy to represent a symbolic matrix
m11,m12,m21,m22 = sympy.symbols("m11 m12 m21 m22")
M = sympy.Matrix(
[
[m11,m12],
[m21,m22]
]
)
M
# use Sympy to symbolically invert 2x2 matrix
M1 = M.inv()
M1
f = sympy.lambdify([m11,m12,m21,m22],M1)
f(5,7,2,3)
array([[ 3., -7.], [-2., 5.]])
# the symbolic solution is nice enough for 2x2 matrices, but what about 3x3?
m11,m12,m13,m21,m22,m23,m31,m32,m33 = sympy.symbols("m11 m12 m13 m21 m22 m23 m31 m32 m33")
M = sympy.Matrix(
[
[m11,m12,m13],
[m21,m22,m23],
[m31,m32,m33]
]
)
M
# craziness
M.inv()
Backsolving Example: Gravitational Lensing¶
The science of gravitational lensing is based on the theory of general relativity, in which massive objects curving spacetime around them.
When light passes near a massive object, the curved spacetime can change the path of that light, bending it. This phenomenon is referred to as gravitational lensing.
![]() |
![]() |
The simplest form of gravitational lensing is a point mass, also known as Singular Isothermal Sphere (SIS) model, lensing light from a distant galaxy.
In this case, the lens equation can often be represented as the following linear equation:
$A\cdot X=Y$
where $X$ and $Y$ matrices representing the source and image positions on the sky, respectively
$A$ is a 2x2 matrix that represents the lensing effect, which depends on the mass and distance of the lensing object.
$A$ encodes both the rotation and tidal forces of the gravitational lensing.
To find the source position given the image position (i.e., the real path of light without the presence of a mass), we need to invert the matrix $A$.
That is, if we estimate the size and mass of the source of the gravitational lensing (maybe a black hole is the source!), then we can estimate $A$ and calculate it's inverse $A^{-1}$.
By multiplying the inverted matrix with image position $Y$, we find the source position $X$.
$ X = A^{-1}\cdot Y $
The knowledge of the source position and the mass helps us measure deformation on the image, effectively using a massive object as a natural telescope.
This "simple" case has many assumptions - for example, light is diverted only slightly by the gravitational field and the sizes of the objects involved are smaller than the distances between them. But it shows that gravitational lensing can be described (and inverted) through matrix algebra.
# using code from https://github.com/adamdempsey90/gravitational-lensing/tree/master
from matplotlib import cm
import matplotlib.pyplot as plt
# from scipy.misc import imread
from ipywidgets import interact,fixed,Checkbox,Text
import IPython.display as disp
import dm_funcs as dm
global x,y,galaxy_properties,gal
@interact(n=(10,400,50),sigma=(.01,.5,.01),xcen=(-1,1,.1),ycen=(-1,1,.1),axratio=(.2,4,.1),posangle=(0,180,5))
def set_number_of_points(n=100,sigma=0.2,xcen=0,ycen=0,axratio=2.0,posangle=30):
"""Set up a mock galaxy with the input parameters.
n: sets the number of points in each direction.
sigma: sets the 'width' of the galaxy.
xcen & ycen: set the center of the galaxy.
axratio: sets the ellipticity of the galaxy.
posangle: sets the orientation of the galaxy. """
global x,y,galaxy_properties,gal
(x,y) = dm.load_grid(n)
galaxy_properties={'peak':1,'sigma':sigma,'xcen':xcen,'ycen':ycen,'axratio':axratio,'posangle':posangle}
gal = dm.create_galaxy(x,y,**galaxy_properties)
plt.imshow(gal);
interactive(children=(IntSlider(value=100, description='n', max=400, min=10, step=50), FloatSlider(value=0.2, …
def compute_deflection2(x,y,**kwargs):
""" Compute the deflection and shear from a singular isothermal ellipsoid potential """
mass = kwargs['Mass']
xcen = kwargs['xcen']
ycen = kwargs['ycen']
axratio = np.abs(kwargs['axratio'])
posangle = kwargs['posangle']
tol = .001
if axratio>1:
axratio = 1.0/axratio
posangle += 90
phi = np.deg2rad(posangle)
def rotate(x,y,phi,clockwise=False):
# rotation matrix
if clockwise:
R = np.array(
[
[np.cos(phi),np.sin(phi)],
[-np.sin(phi),np.cos(phi)]
]
)
else:
R = np.array(
[
[np.cos(phi),np.sin(phi)],
[-np.sin(phi),np.cos(phi)]
]
)
P = np.stack((x.ravel(),y.ravel()))
P = R@P
x_new = P[0,:].reshape(x.shape)
y_new = P[1,:].reshape(y.shape)
return x_new,y_new
x_new,y_new = rotate(x-xcen,y-ycen,phi,clockwise=True)
r = np.sqrt(axratio*x_new*x_new + y_new * y_new /axratio)
q = np.sqrt(1./axratio - axratio)
softening = (r == 0).astype(float)
fac_x = x_new/(r + softening)
fac_y = y_new/(r + softening)
if q>=tol:
x_def = (mass/q)*np.arctan(q*fac_x)
y_def = (mass/q)*np.arctanh(q*fac_y)
else:
x_def = mass*fac_x
y_def = mass*fac_y
x_out,y_out = rotate(x_def,y_def,phi,clockwise=False)
return x_out,y_out
@interact(
DarkMatterSize=(0,3,.1),
DarkMatterXCenter=(-1,1,.01),
DarkMatterYCenter=(-1,1,.01),
DarkMatterAxisRatio=(.01,4,.03),
DarkMatterPositionAngle=(0,180,5)
)
def plot_galaxy(DarkMatterSize=.1,DarkMatterXCenter=0,DarkMatterYCenter=0,DarkMatterAxisRatio=1,DarkMatterPositionAngle=0):
global x,y,galaxy_properties,gal
plt.close();
# xl,yl = dm.compute_deflection(x,y,Mass=DarkMatterSize,xcen=DarkMatterXCenter,
# ycen=DarkMatterYCenter,axratio=DarkMatterAxisRatio,
# posangle=DarkMatterPositionAngle)
xl,yl = compute_deflection2(x,y,Mass=DarkMatterSize,xcen=DarkMatterXCenter,
ycen=DarkMatterYCenter,axratio=DarkMatterAxisRatio,
posangle=DarkMatterPositionAngle)
gal = dm.create_galaxy(x-xl,y-yl,**galaxy_properties)
plt.imshow(gal)
interactive(children=(FloatSlider(value=0.1, description='DarkMatterSize', max=3.0), FloatSlider(value=0.0, de…