Fastai Linear Algebra - chapter 1

Image background removal with SGD
Published

November 11, 2021

# !pip install imageio-ffmpeg moviepy imageio

import imageio
import moviepy.editor as mpe
import numpy as np
import scipy
from pathlib import Path
from PIL import Image

%matplotlib inline
import matplotlib.pyplot as plt
 
np.set_printoptions(precision=4, suppress=True)
# download video from https://github.com/momonala/image-processing-projects/blob/master/background_removal/video/Video_003/Video_003.avi
!curl -o Video_003.avi https://raw.githubusercontent.com/momonala/image-processing-projects/master/background_removal/video/Video_003/Video_003.avi
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1241k  100 1241k    0     0  3505k      0 --:--:-- --:--:-- --:--:-- 3495k
video = mpe.VideoFileClip("Video_003.avi")
print(f'fps - {video.fps} and duration is {video.duration}')
fps - 7.0 and duration is 113.57
clip = video.subclip(0, 50).ipython_display(width=300)
clip
Moviepy - Building video __temp__.mp4.
Moviepy - Writing video __temp__.mp4
                                                                
Moviepy - Done !
Moviepy - video ready __temp__.mp4

Helper Methods

def resize(img_array, dims):
    return np.array(Image.fromarray(img_array).resize(size=dims))



def rgb2gray(rgb):
    # return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

Format the Data

An image from 1 moment in time is 120 pixels by 160 pixels (when scaled). We can unroll that picture into a single tall column. So instead of having a 2D picture that is 120×160, we have a 1×19,200 column

This isn’t very human-readable, but it’s handy because it lets us stack the images from different times on top of one another, to put a video all into 1 matrix. If we took the video image every hundredth of a second for 100 seconds (so 10,000 different images, each from a different point in time), we’d have a 10,000×19,200 matrix, representing the video!

scale = 0.5 # adjust scale to change resolution of image
dims = (int(240 * scale), int(320 * scale))
print(f'dims - {dims}')
fps = video.fps
fps
dims - (120, 160)
7.0
clip = video.subclip(0, 1000)
frame = clip.get_frame(100/fps)
# frame[..., :3]
gray = rgb2gray(frame)
gray.shape
resized = resize(gray, dims)
print(resized.shape)
(160, 120)
%%time
def create_data_matrix_from_video(clip, fps=5, scale=0.5):
    # get dimension of each frame
    dims = clip.get_frame(0).shape[:2]
    
    # get scaled dimensions
    dims = [int(o*scale) for o in dims]
    print(dims)
    return np.vstack([resize(rgb2gray(clip.get_frame(i/float(fps))), dims).astype(int).flatten() for i in range(int(fps) * int(clip.duration))]).T

M = create_data_matrix_from_video(video.subclip(0,100), fps, scale)
print(M.shape, M.dtype)
[120, 160]
(19200, 700) int64
CPU times: user 2.21 s, sys: 163 ms, total: 2.38 s
Wall time: 2.86 s

SVD

SVD for a matrix A is given by

\[ A_{(m,n)} = U_{(m,m)}{\cdot}Sigma_{(m,n)}{\cdot}V^T_{(n,n)} \]

  • U is called left singular matrix
  • Sigma is called sing

Sigma is a diagonal matrix. Lets check with an example

from scipy.linalg import svd
from typing import Tuple

def compute_svd(shape: Tuple, full_matrices=True):
    A = np.random.randn(*shape)
    U, s, VT = svd(A, full_matrices=full_matrices)
    print(f'{A.shape} Matrix with full_matrices {full_matrices}, decomposes into', U.shape, s.shape, VT.shape)
    return U, s, VT

def reconstruct(U, s, VT):
    m = U.shape[0]
    n = VT.shape[0]
    
    sigma = np.zeros((m,n))
    if m > n:
        sigma[:n, :] = np.diag(s) 
    else: 
        sigma[:, :m] = np.diag(s)
    return np.linalg.multi_dot([U, sigma, VT])
        
    

_, _, _ = compute_svd((2,3), full_matrices=True)
_, _, _ = compute_svd((3,2), full_matrices=True)


_, _, _ = compute_svd((3,10), full_matrices=True)
_, _, _ = compute_svd((3,10), full_matrices=False)
(2, 3) Matrix with full_matrices True, decomposes into (2, 2) (2,) (3, 3)
(3, 2) Matrix with full_matrices True, decomposes into (3, 3) (2,) (2, 2)
(3, 10) Matrix with full_matrices True, decomposes into (3, 3) (3,) (10, 10)
(3, 10) Matrix with full_matrices False, decomposes into (3, 3) (3,) (3, 10)

Check that we are able to reconstruct the matrix

A = np.random.randn(4, 3)
U, s, VT = svd(A, full_matrices=full_matrices)
A_recons = reconstruct(U, s, VT)
np.allclose(A, A_recons)
True

Pseudoinverse

Matrix inversion is not defined for matrices that are not square. […] When A has more columns than rows, then solving a linear equation using the pseudoinverse provides one of the many possible solutions.

%%time
U, s, V = np.linalg.svd(M, full_matrices=False)
print(M.shape, U.shape, s.shape, V.shape)
(3, 2) (3, 2) (2,) (2, 2)
CPU times: user 358 µs, sys: 468 µs, total: 826 µs
Wall time: 810 µs
%%time
from scipy.linalg import svd
U, s, VT = svd(M)
print(M.shape, U.shape, s.shape, VT.shape)
(19200, 700) (19200, 19200) (700,) (700, 700)
CPU times: user 2min 6s, sys: 2.86 s, total: 2min 8s
Wall time: 1min 11s
M = np.random.randn(3,2)
U, s, VT = svd(M)
print(M.shape, U.shape, s.shape, VT.shape)
(3, 2) (3, 3) (2,) (2, 2)