# !pip install imageio-ffmpeg moviepy imageio
import imageio
import moviepy.editor as mpe
import numpy as np
import scipy
from pathlib import Path
from PIL import Image
%matplotlib inline
import matplotlib.pyplot as plt
=4, suppress=True) np.set_printoptions(precision
# download video from https://github.com/momonala/image-processing-projects/blob/master/background_removal/video/Video_003/Video_003.avi
!curl -o Video_003.avi https://raw.githubusercontent.com/momonala/image-processing-projects/master/background_removal/video/Video_003/Video_003.avi
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1241k 100 1241k 0 0 3505k 0 --:--:-- --:--:-- --:--:-- 3495k
= mpe.VideoFileClip("Video_003.avi")
video print(f'fps - {video.fps} and duration is {video.duration}')
fps - 7.0 and duration is 113.57
= video.subclip(0, 50).ipython_display(width=300)
clip clip
Moviepy - Building video __temp__.mp4.
Moviepy - Writing video __temp__.mp4
Moviepy - Done !
Moviepy - video ready __temp__.mp4
Helper Methods
def resize(img_array, dims):
return np.array(Image.fromarray(img_array).resize(size=dims))
def rgb2gray(rgb):
# return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])
Format the Data
An image from 1 moment in time is 120 pixels by 160 pixels (when scaled). We can unroll that picture into a single tall column. So instead of having a 2D picture that is 120×160, we have a 1×19,200 column
This isn’t very human-readable, but it’s handy because it lets us stack the images from different times on top of one another, to put a video all into 1 matrix. If we took the video image every hundredth of a second for 100 seconds (so 10,000 different images, each from a different point in time), we’d have a 10,000×19,200 matrix, representing the video!
= 0.5 # adjust scale to change resolution of image
scale = (int(240 * scale), int(320 * scale))
dims print(f'dims - {dims}')
= video.fps
fps fps
dims - (120, 160)
7.0
= video.subclip(0, 1000)
clip = clip.get_frame(100/fps)
frame # frame[..., :3]
= rgb2gray(frame)
gray
gray.shape= resize(gray, dims)
resized print(resized.shape)
(160, 120)
%%time
def create_data_matrix_from_video(clip, fps=5, scale=0.5):
# get dimension of each frame
= clip.get_frame(0).shape[:2]
dims
# get scaled dimensions
= [int(o*scale) for o in dims]
dims print(dims)
return np.vstack([resize(rgb2gray(clip.get_frame(i/float(fps))), dims).astype(int).flatten() for i in range(int(fps) * int(clip.duration))]).T
= create_data_matrix_from_video(video.subclip(0,100), fps, scale)
M print(M.shape, M.dtype)
[120, 160]
(19200, 700) int64
CPU times: user 2.21 s, sys: 163 ms, total: 2.38 s
Wall time: 2.86 s
SVD
SVD for a matrix A is given by
\[ A_{(m,n)} = U_{(m,m)}{\cdot}Sigma_{(m,n)}{\cdot}V^T_{(n,n)} \]
- U is called left singular matrix
- Sigma is called sing
Sigma is a diagonal matrix. Lets check with an example
from scipy.linalg import svd
from typing import Tuple
def compute_svd(shape: Tuple, full_matrices=True):
= np.random.randn(*shape)
A = svd(A, full_matrices=full_matrices)
U, s, VT print(f'{A.shape} Matrix with full_matrices {full_matrices}, decomposes into', U.shape, s.shape, VT.shape)
return U, s, VT
def reconstruct(U, s, VT):
= U.shape[0]
m = VT.shape[0]
n
= np.zeros((m,n))
sigma if m > n:
= np.diag(s)
sigma[:n, :] else:
= np.diag(s)
sigma[:, :m] return np.linalg.multi_dot([U, sigma, VT])
= compute_svd((2,3), full_matrices=True)
_, _, _ = compute_svd((3,2), full_matrices=True)
_, _, _
= compute_svd((3,10), full_matrices=True)
_, _, _ = compute_svd((3,10), full_matrices=False) _, _, _
(2, 3) Matrix with full_matrices True, decomposes into (2, 2) (2,) (3, 3)
(3, 2) Matrix with full_matrices True, decomposes into (3, 3) (2,) (2, 2)
(3, 10) Matrix with full_matrices True, decomposes into (3, 3) (3,) (10, 10)
(3, 10) Matrix with full_matrices False, decomposes into (3, 3) (3,) (3, 10)
Check that we are able to reconstruct the matrix
= np.random.randn(4, 3)
A = svd(A, full_matrices=full_matrices)
U, s, VT = reconstruct(U, s, VT)
A_recons np.allclose(A, A_recons)
True
Pseudoinverse
Matrix inversion is not defined for matrices that are not square. […] When A has more columns than rows, then solving a linear equation using the pseudoinverse provides one of the many possible solutions.
%%time
= np.linalg.svd(M, full_matrices=False)
U, s, V print(M.shape, U.shape, s.shape, V.shape)
(3, 2) (3, 2) (2,) (2, 2)
CPU times: user 358 µs, sys: 468 µs, total: 826 µs
Wall time: 810 µs
%%time
from scipy.linalg import svd
= svd(M)
U, s, VT print(M.shape, U.shape, s.shape, VT.shape)
(19200, 700) (19200, 19200) (700,) (700, 700)
CPU times: user 2min 6s, sys: 2.86 s, total: 2min 8s
Wall time: 1min 11s
= np.random.randn(3,2)
M = svd(M)
U, s, VT print(M.shape, U.shape, s.shape, VT.shape)
(3, 2) (3, 3) (2,) (2, 2)