• GitHub
  • -->
    2024

    TMDB Movie DB Analysis

    POWER BI Dashboard

    I used SQL to performs opertions on the Database to get better insights on the movie database.

    SQL Operations performed

    --HIGHEST GROSSING FILM
    SELECT film_id, title, (revenue-budget) as profit
    FROM movie
    ORDER by profit DESC

    --SELECT all Movies with the name 'Avatar' or 'Avengers' in them
    SELECT film_id, title FROM movie
    WHERE title LIKE '%Avatar%' OR title LIKE '%Avengers%

    --Finding the count of movies under of a particular Movie collection
    SELECT m.collection_id, c.name, COUNT(m.film_id) as movie_count
    FROM movie as m
    JOIN collection as c
    ON m.collection_id = c.collection_id
    WHERE c.name LIKE '%Star War%' or c.name LIKE '%Mission%'
    GROUP BY m.collection_id,c.name

    --List all films where Pixar company was involved in creating the film.
    ----using WINDOWS function & string_agg function and since its a Integer had to cast the column to a STRING(text)
    WITH X as (SELECT m.film_id, m.title, string_agg(mc.company_id::text, ',') as company_id,string_agg(c.name, ',') as company_names
    from movie as m
    JOIN movie_company as mc
    ON m.film_id = mc.film_id
    JOIN company as c
    ON mc.company_id = c.company_id
    GROUP BY m.film_id, m.title
    )
    SELECT X.film_id, X.title, X.company_names
    FROM X
    WHERE X.company_names LIKE '%Pixar%'

    -- Find out all movies of 'TOM CRUISE' where the rating of the movie is above 7
    We first find Tom cruise ID number in the DB -- Show all movies released in Oct 2020 where rating is greater than 6 SELECT film_id, title, release_date, rating from movie WHERE DATE_PART('year', release_date)=2020 and date_part('month', release_date)=10 and rating >6.0 ELECT person_id, name from crew_credit
    WHERE name LIKE '%Tom Cru%'
    --USING CTE we created a movies table having rating more than 7 and then an actor TABLE containing all TOM Cruise's movies.
    WITH movies as (
    SELECT filmid, title, rating
    FROM movie
    WHERE rating > 7.0
    ),
    actor as (
    SELECT film_id, person_id, name
    FROM crew_credit
    WHERE person_id = 500
    )
    SELECT movies.film_id, actor.person_id, actor.name, movies.title, movies.rating
    FROM movies
    JOIN actor
    ON movies.film_id = actor.film_id

    -- Show all movies released in Oct 2020 where rating is greater than 6
    SELECT film_id, title, release_date, rating from movie
    WHERE DATE_PART('year', release_date)=2020 and date_part('month', release_date)=10 and rating >6.0

    --SHOW the Directors who have the BEST RATED MOVIES
    ---clause : director should have directed 3 or more movies and each movie has to have more than 5000 votes to qualify
    WITH X as (
    SELECT cc.person_id, cc.name as director, string_agg(m.film_id::text,',') as film_ids, string_agg(m.title,',') as film_titles, ROUND(AVG(m.rating),2) as avg_rating, SUM(m.vote_count) as total_votes
    FROM crew_credit as cc
    JOIN movie as m
    ON cc.film_id = m.film_id
    WHERE cc.department_id = 2 and m.vote_count > 5000
    GROUP BY CC.person_id, cc.name
    ORDER BY avg_rating DESC
    )
    SELECT *, SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1) as total_films
    FROM X
    GROUP BY X.person_id,X.director, X.film_ids, X.film_titles, X.avg_rating, X.total_votes
    HAVING SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1) > 2
    ORDER BY avg_rating DESC
    --SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1 counts the number of commas in the string field

    --SHOW me the avg rating of the movies of the 4 actors along with the total count of movies.
    WITH X as (
    SELECT cc.person_id, cc.name as actor, string_agg(m.film_id::text,',') as film_ids, string_agg(m.title,',') as film_titles, ROUND(AVG(m.rating),2) as avg_rating, SUM(m.vote_count) as total_votes
    FROM crew_credit as cc
    JOIN movie as m
    ON cc.film_id = m.film_id
    WHERE cc.name IN('Tom Cruise', 'Brad Pitt', 'Morgan Freeman', 'Leonardo DiCaprio')
    GROUP BY CC.person_id, cc.name
    ORDER BY avg_rating DESC
    )
    SELECT *, SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1) as total_films
    FROM X
    GROUP BY X.person_id,X.actor, X.film_ids, X.film_titles, X.avg_rating, X.total_votes
    HAVING SUM(length(x.film_ids)-length(replace(X.film_ids, ',',''))+1) > 4 -- SELECTING actors only who have acted in 5 movies and more
    ORDER BY avg_rating DESC

    Open Power BI Interactive Dashboard

    -->