TMDB Movie DB Analysis
I used SQL to performs opertions on the Database to get better insights on the movie database.
SQL Operations performed
--HIGHEST GROSSING FILM
SELECT film_id, title, (revenue-budget) as profit
FROM movie
ORDER by profit DESC

--SELECT all Movies with the name 'Avatar' or 'Avengers' in them
SELECT film_id, title
FROM movie
WHERE title LIKE '%Avatar%' OR title LIKE '%Avengers%

--Finding the count of movies under of a particular Movie collection
SELECT m.collection_id, c.name, COUNT(m.film_id) as movie_count
FROM movie as m
JOIN collection as c
ON m.collection_id = c.collection_id
WHERE c.name LIKE '%Star War%' or c.name LIKE '%Mission%'
GROUP BY m.collection_id,c.name

--List all films where Pixar company was involved in creating the film.
----using WINDOWS function & string_agg function and since its a Integer had to cast the column to a STRING(text)
WITH X as (SELECT m.film_id, m.title, string_agg(mc.company_id::text, ',') as company_id,string_agg(c.name, ',') as company_names
from movie as m
JOIN movie_company as mc
ON m.film_id = mc.film_id
JOIN company as c
ON mc.company_id = c.company_id
GROUP BY m.film_id, m.title
)
SELECT X.film_id, X.title, X.company_names
FROM X
WHERE X.company_names LIKE '%Pixar%'

-- Find out all movies of 'TOM CRUISE' where the rating of the movie is above 7
We first find Tom cruise ID number in the DB
-- Show all movies released in Oct 2020 where rating is greater than 6
SELECT film_id, title, release_date, rating from movie
WHERE DATE_PART('year', release_date)=2020 and date_part('month', release_date)=10 and rating >6.0
ELECT person_id, name from crew_credit
WHERE name LIKE '%Tom Cru%'
--USING CTE we created a movies table having rating more than 7 and then an actor TABLE containing all TOM Cruise's movies.
WITH movies as (
SELECT filmid, title, rating
FROM movie
WHERE rating > 7.0
),
actor as (
SELECT film_id, person_id, name
FROM crew_credit
WHERE person_id = 500
)
SELECT movies.film_id, actor.person_id, actor.name, movies.title, movies.rating
FROM movies
JOIN actor
ON movies.film_id = actor.film_id

-- Show all movies released in Oct 2020 where rating is greater than 6
SELECT film_id, title, release_date, rating from movie
WHERE DATE_PART('year', release_date)=2020 and date_part('month', release_date)=10 and rating >6.0

--SHOW the Directors who have the BEST RATED MOVIES
---clause : director should have directed 3 or more movies and each movie has to have more than 5000 votes to qualify
WITH X as (
SELECT cc.person_id, cc.name as director, string_agg(m.film_id::text,',') as film_ids, string_agg(m.title,',') as film_titles, ROUND(AVG(m.rating),2) as avg_rating, SUM(m.vote_count) as total_votes
FROM crew_credit as cc
JOIN movie as m
ON cc.film_id = m.film_id
WHERE cc.department_id = 2 and m.vote_count > 5000
GROUP BY CC.person_id, cc.name
ORDER BY avg_rating DESC
)
SELECT *, SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1) as total_films
FROM X
GROUP BY X.person_id,X.director, X.film_ids, X.film_titles, X.avg_rating, X.total_votes
HAVING SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1) > 2
ORDER BY avg_rating DESC
--SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1 counts the number of commas in the string field

--SHOW me the avg rating of the movies of the 4 actors along with the total count of movies.
WITH X as (
SELECT cc.person_id, cc.name as actor, string_agg(m.film_id::text,',') as film_ids, string_agg(m.title,',') as film_titles, ROUND(AVG(m.rating),2) as avg_rating, SUM(m.vote_count) as total_votes
FROM crew_credit as cc
JOIN movie as m
ON cc.film_id = m.film_id
WHERE cc.name IN('Tom Cruise', 'Brad Pitt', 'Morgan Freeman', 'Leonardo DiCaprio')
GROUP BY CC.person_id, cc.name
ORDER BY avg_rating DESC
)
SELECT *, SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1) as total_films
FROM X
GROUP BY X.person_id,X.actor, X.film_ids, X.film_titles, X.avg_rating, X.total_votes
HAVING SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1) > 4 -- SELECTING actors only who have acted in 5 movies and more
ORDER BY avg_rating DESC
