Movie Recommender using pytorch

Simple implementation
Published

January 16, 2022

import torch
import torch.nn as nn
import torch.nn.functional as F

import pytorch_lightning as pl
import numpy as np
import pandas as pd
# !wget https://files.grouplens.org/datasets/movielens/ml-100k.zip
# !unzip ml-100k.zip
# read movie ratings
ratings_cols = ['user_id', 'item_id', 'rating', 'timestamp']
ratings_train_df = pd.read_csv('ml-100k/u1.base', sep='\t', header=None)
ratings_train_df.columns = ratings_cols
print(f'ratings_train_df.shape \t - {ratings_train_df.shape}')

ratings_test_df = pd.read_csv('ml-100k/u1.test', sep='\t', header=None)
ratings_test_df.columns = ratings_cols
print(f'ratings_test_shape \t - {ratings_test_df.shape}')

ratings_train_df.head(2)
ratings_train_df.shape   - (80000, 4)
ratings_test_shape   - (20000, 4)
user_id item_id rating timestamp
0 1 1 5 874965758
1 1 2 3 876893171
# read movie data
movies_df = pd.read_csv('ml-100k/u.item', sep='|', header=None)
print(f'users_df.shape - {movies_df.shape}')
movie_cols = ['movie_id','title', 'release_date', 'video_release_date', 'imdb_url', 'unknown',
             'action', 'adventure', 'animation', 'childrens', 'comedy', 'crime', 'documentary', 
             'drama', 'fantasy','film-noir', 'horror', 'musical','mystery', 'romance', 
             'scifi', 'thriller', 'war', 'western']
movies_df.columns = movie_cols
movies_df.head(2)
users_df.shape - (1682, 24)
movie_id title release_date video_release_date imdb_url unknown action adventure animation childrens ... fantasy film-noir horror musical mystery romance scifi thriller war western
0 1 Toy Story (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
1 2 GoldenEye (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... 0 0 0 0 0 0 0 1 0 0

2 rows × 24 columns

# read user data
users_df = pd.read_csv('ml-100k/u.user', sep='|', header=None)
user_cols = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
users_df.columns = user_cols
users_df.head(2)
user_id age gender occupation zip_code
0 1 24 M technician 85711
1 2 53 F other 94043

References

https://yonigottesman.github.io/recsys/pytorch/elasticsearch/2020/02/18/fm-torch-to-recsys.html