import requests
import json 
import pandas as pd 
import time
import os
import numpy as np
import ast
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
from datetime import datetime
from IPython.display import display
from scipy import spatial


# Reading and loading the json file into a dataframe
playlist_file = 'playlist_data.json' 
with open(playlist_file,'r') as j:
     playlist_df = json.loads(j.read())

# flattening the list data
playlist_df = pd.json_normalize(playlist_df)['playlists'] 
playlist_df = pd.DataFrame(playlist_df[0])

# Setting playlist id as index
playlist_df.set_index('pid',inplace = True) 

# Displaying the dataframe
playlist_df.head(10)


# Converting UTC Unix Epoch format into datetime format
playlist_df['modified_year'] = playlist_df['modified_at'].apply(lambda x: datetime.fromtimestamp(x))

# Converting datetime into string and extracting the year 
playlist_df['modified_year'] = playlist_df['modified_year'].astype(str)
playlist_df['modified_year'] = playlist_df['modified_year'].str.split("-").str[0]

# Displaying the data
playlist_df.head(10)


# Dropping irrelevant columns
playlist_df.drop(columns=['description', 'modified_at'], inplace = True)
playlist_df.head(10)


# Appending and displaying track information within playlist
track_df = pd.DataFrame()
for i in range(0,len(playlist_df['tracks'])):
      track_df = track_df.append(pd.DataFrame.from_dict(playlist_df['tracks'][i]))
track_df.head(10)


# Defining variables to store track data
all_play = [] # list to append data
counter = 0


# Defining paths to acquire data
metric_path = 'audio-features' # path to acquire track metrics
track_path = 'tracks' # path to acquire track popularity
genre_path = 'artists' # path to acquire genre information

# Authorizing access token to make API calls
access_token = 'BQB7hOhXalGnMD96vx7AAVj0LnEJXXWJNDNXwTNj_pYcs5sYAiuCLkb7VqxdPmSx4AvIzaHyFLuyBabz_i0aFKhNQNm62AS3HmoqP6BGkk-YOhyJItVPfHlWssJgBpPWrS8BsL-oa9NK-iySxd_bbC8MCbPHaIIImCot2Lo'


# Checking if a file named track_data exists, if not then make API calls to get all the data and store it in a list
# If the file exsits, then we already have required data and no need to make the API calls

# Iterarte through all the playlist in the dataframe and increment the counter after every playlist.
# By this even if we reach the rate limit all the previous data gathered won't be lost, as it will start again interarting through the last successful iteration

if not os.path.exists(r'track_data.csv'):
    for playlist in range(counter,len(playlist_df['playlists'])):
        for track in playlist_df['playlists'][playlist]['tracks']:
            uri = track['track_uri'].split(':')[-1]
            url = f'https://api.spotify.com/v1/{path}/{uri}'
            response = requests.get(url, headers={'Authorization':f'Bearer {access_token}','Content-Type': 'application/json'})
            track['danceability'] = response.json()['danceability']
            track['energy'] = response.json()['energy']
            track['loudness'] = response.json()['loudness']
            track['speechiness'] = response.json()['speechiness']
            track['valence'] = response.json()['valence']
            track['temp'] = response.json()['tempo']
            track['liveness'] = response.json()['liveness']
            track['acousticness'] = response.json()['acousticness']
            track['key'] = response.json()['key']
            track['instrumentalness'] = response.json()['instrumentalness']
            track['time signature'] = response.json()['time_signature']
            url = f'https://api.spotify.com/v1/{path1}/{uri}'
            response = requests.get(url, headers={'Authorization':f'Bearer {access_token}','Content-Type': 'application/json'})
            track['popularity'] = response.json()['popularity']
            uri = track['artist_uri'].split(':')[-1]
            url = f'https://api.spotify.com/v1/{path2}/{uri}'
            track['genre'] = response.json()['genre']
        all_play.append(playlist_df['playlists'][playlist])
        counter += 1
        
print('Data Extracted Successfully')

Data Extracted Successfully


# Making a dictionary of track metrics
final_playlist = {'playlist' : all_play}

# If the file 'track_data.csv' does not exist then create the file and dump track metadata
if not os.path.exists(r'track_data.csv'):
    with open(r'track_data.csv', 'a') as outfile:
                json.dump(final_playlist, outfile, indent = 4)

# Loading and displaying the dataframe
track_file = r'track_data.csv'
track_df = pd.read_csv(track_file)
track_df.drop(columns='Unnamed: 0', inplace = True)
track_df.set_index('pid', inplace = True)
track_df.head(10)


# Taking only attribute columns from track data
heatmap_df = track_df.loc[:,'danceability':'popularity']

# Formatting font, size, title and ticks
font = {'fontname' : 'Monospace', 'fontsize' : '20', 'pad' : '25'}
sns.set(font_scale = 1.25)
plt.figure(figsize=(20, 10))
plt.title('Correlation Between Track Attributes', **font)

# Plotting the heatmap with annotations
sns.heatmap(heatmap_df.corr(), annot = True, cmap = "Greens")
plt.show()


# Creating the labels for radar chart
labels = ['danceability', 'energy', 'speechiness', 'valence', 'liveness', 'acousticness']

# Finding the mean values of lables created above for 5 most popular artists
popular_artist_df = track_df.groupby('artist_name').mean().sort_values('popularity', ascending = False).head(5)[labels].mean().to_list()
least_popular_artist_df = track_df.groupby('artist_name').mean().sort_values('popularity', ascending = False).tail(5)[labels].mean().to_list()

# Finding the mean values of lables created above
std_scaler = MinMaxScaler()
all_artist_df = track_df[labels] 
df_scaled = std_scaler.fit_transform(all_artist_df.to_numpy())
df_scaled = pd.DataFrame(df_scaled)

all_artist_df = df_scaled.mean().to_list()

# Creating radar chart
angles = np.linspace(0, 2 * np.pi, len(labels), endpoint = False)
fig = plt.figure(figsize = (15,15))

# Plotting songs characteristic (mean) of songs by popular artists
ax = fig.add_subplot(221, polar = True)
ax.plot(angles, popular_artist_df, 'o-', linewidth = 2, label = "Most Popular Artists", color = 'blue')
ax.fill(angles, popular_artist_df, alpha = 0.25, facecolor = 'blue')

# Setting characteristics name on the plot
ax.set_thetagrids(angles * 180/np.pi, ['Danceability','Energy','Speechiness','Valence','Liveness','Acousticness'], fontsize=14)
ax.xaxis.set_tick_params(pad = 30)

# Creating labels for plot
ax.set_rlabel_position(250)
plt.yticks([0.1 , 0.2 , 0.3 , 0.4, 0.5,  0.6, 0.7, 0.8, 0.9], ["0.1",'0.2', "0.3", "0.4", "0.5", "0.6", '0.7', '0.8', '0.9'], size=12)
plt.ylim(0,0.9)

# Plotting songs characteristic (mean) of songs by all artists
ax.plot(angles, least_popular_artist_df, 'o-', linewidth = 2, label = "Least Popular Artists", color = 'green')
ax.fill(angles, least_popular_artist_df, alpha = 0.25, facecolor = 'green')
ax.grid(True)

# Displaying the plot
hfont = {'fontname':'Monospace','fontsize': 15}
ax.set_title('Comparison of Track Attributes Between Most Popular and Least Popular Artists', **hfont, pad = 60)
plt.legend(loc = 'best', bbox_to_anchor = (1.5, 1), fontsize = 14)
plt.show()


# Load the dataframe
track_df = pd.read_csv('track_data.csv')


# Copying the track dataframe to a genre dataframe 
genre_df = track_df.copy()

# Determining average populartiy of all the tracks in the dataset
average_popularity = genre_df['popularity'].mean()

# Droping null values and convert the genre column to a list
genre_df.dropna(inplace=True)
genre_df['genre'] = genre_df['genre'].apply(lambda x: ast.literal_eval(x))

# Drop all duplicate tracks in a dataframe
genre_df.drop_duplicates(subset='track_uri', inplace=True)
genre_df = genre_df.explode(column='genre')

# If a track's popularity is greater than average popularity of all tracks then create a dummy variable as 1 and 0 as 'Hit Song' and 'Flop Song' respectively
genre_df['Hit or Flop'] = genre_df['popularity'].apply(lambda x: 1 if x>average_popularity else -1)

# Creating a dataframe with top 3 popular genres and least3 popular genres
top_3_genre_df = pd.DataFrame(genre_df.groupby('genre')['Hit or Flop'].sum()).sort_values(by='Hit or Flop', ascending=False).head(3)
bottom_3_genre_df = pd.DataFrame(genre_df.groupby('genre')['Hit or Flop'].sum()).sort_values(by='Hit or Flop', ascending=False).tail(3)


# Displaying top 3 genres
top_3_genre_df


# Displaying bottom 3 genres
bottom_3_genre_df


# Merging the playlist dataframe and track dataframe on playlist id to populate tracks with the year they were added to the playlist and drop redundant columns
track_df = track_df.merge(playlist_df, on='pid')
track_df.drop(columns=['tracks','num_tracks','num_albums','num_followers','num_edits','name','collaborative','num_artists','duration_ms_y','duration_ms_x'],inplace=True)

# Droping the rows if there are any null values and type cast the genre column from string to list
track_df.dropna(inplace=True)
track_df['genre'] = track_df['genre'].apply(lambda x: ast.literal_eval(x))

# For every genre of a track, repeat the tracks individually for each genre 
track_df= track_df.explode(column='genre')

# Counting the total number of genres that were in all the songs for all playlist every year 
total_genre = pd.DataFrame(track_df.groupby('modified_year')['genre'].count())
total_genre.reset_index(inplace=True)

# Counting the individual genres by year
gen_count_year = pd.DataFrame(track_df.groupby('modified_year')['genre'].value_counts())
gen_count_year.rename(columns={'genre':'genre count'},inplace=True)
gen_count_year.reset_index(inplace=True)

# Merging the dataframes of individual genre count and total genre count by year 
gen_count_year = gen_count_year.merge(total_genre, on='modified_year')

# Calculating the percentage of each genre as a proportion of total genre in a particular year
gen_count_year['percentage'] = (gen_count_year['genre count']/gen_count_year['genre_y'])*100
gen_count_year = gen_count_year[gen_count_year.modified_year != '2011']
gen_count_year = gen_count_year[gen_count_year.modified_year != '2012']

# Dropping columns which are not needed
gen_count_year.drop(columns=['genre count','genre_y'], inplace=True)
gen_count_year

# Creating individual dataframe for each genre that we want to visualize
indierock = gen_count_year[gen_count_year['genre_x'] == 'indie rock']
stompandhaller = gen_count_year[gen_count_year['genre_x'] == 'stomp and holler']
indiefolk = gen_count_year[gen_count_year['genre_x'] == 'indie folk']
rap = gen_count_year[gen_count_year['genre_x'] == 'rap']
hiphop = gen_count_year[gen_count_year['genre_x'] == 'hip hop']
pop = gen_count_year[gen_count_year['genre_x'] == 'pop']


# Plotting the line chart for the genres over the past 5 years to see if there are any trends
plt.figure(figsize=(15,8))
plt.plot(indierock['modified_year'], indierock['percentage'], label = "Indie Rock")
plt.plot(stompandhaller['modified_year'], stompandhaller['percentage'], label = "Stomp And Holler")
plt.plot(indiefolk['modified_year'], indiefolk['percentage'], label = "Indie Folk")
plt.plot(rap['modified_year'], rap['percentage'], label = "Rap")
plt.plot(hiphop['modified_year'], hiphop['percentage'], label = "Hip Hop")
plt.plot(pop['modified_year'], pop['percentage'], label = "Pop")
plt.legend(bbox_to_anchor=(1.1, 1.05))
hfont = {'fontname':'Monospace', 'fontsize':15,}
vfont = {'fontname':'Monospace', 'fontsize':25}
plt.gca().set_yticklabels(['{:.0f}%'.format(x*1) for x in plt.gca().get_yticks()])
plt.xlabel('Years', **hfont)
plt.ylabel('Genre popularity', **hfont)
plt.title('Growth of Genres over the Years', **vfont)
plt.show()

/var/folders/b5/8g478x497bx16dgzdvcympc80000gn/T/ipykernel_3564/3735283558.py:53: UserWarning: FixedFormatter should only be used together with FixedLocator
  plt.gca().set_yticklabels(['{:.0f}%'.format(x*1) for x in plt.gca().get_yticks()])


# Grouping tracks by playlist ids (pid) and take the count
track_df['num_of_tracks'] = track_df.groupby('pid')['pid'].transform('count')

# Filling NA values by 0
track_df = track_df.fillna(0)

# Dropping duplicate tracks from the track dataframe
track_df.drop_duplicates(subset='track_uri', inplace=True)

# #Determining the average popularity of artist and sorting the tracks by playlist id
track_df['artist_pop'] = track_df.groupby('artist_name')['popularity'].transform('mean')
track_df = track_df.sort_values(by = 'pid').set_index('pid')
playlist_df = playlist_df.add_prefix('playlist_')

# Merge all songs df with playlist df 
full_df = pd.merge(track_df, playlist_df, how = 'left', on = 'pid').drop(columns = ['playlist_tracks'])

# List of Billboard's Top Five Artist of the Decade
top_artists = ['Drake', 
               'Taylor Swift', 
               'Bruno Mars', 
               'Rihanna',
               'Ed Sheeran']

# Filter songs df with only top artists
top_artists_full_df = full_df[full_df['artist_name'].isin(top_artists)].sort_values(by = 'num_of_tracks', 
                                                                                    ascending = False)
# New dataframe with average values of top artists
avg_top_artists_full_df = top_artists_full_df.groupby('artist_name').mean().reset_index()

top_artists_full_df.rename(columns={'artist_name':'Artist Name','popularity':'Popularity'},inplace=True)
# Set style and graph dimensions
sns.set_style('whitegrid')
plt.figure(figsize = (20,10))
hfont = {'fontname':'Monospace','fontsize':22}
plt.tight_layout()

# Create violinplot for the song popularity of top artists
viol_plot = sns.violinplot(x ='Artist Name', 
                           y ='Popularity', 
                           data = top_artists_full_df, 
                           palette = 'winter', 
                           bw = .3, 
                           cut = 1, 
                           ).set(title = "How Popular Are Billboard's Top Artists Of The Decade?")

sns.set(font_scale = 2)
# Remove spines on x and y axis
sns.despine(left=True, bottom=True)
plt.tight_layout()


# Dropping duplicates and converting genre string into array of genres
track_df = pd.read_csv(r'track_data.csv')
track_df.drop_duplicates(subset='track_uri', keep='first', inplace=True)
track_df.dropna(inplace=True)
track_df['genre'] = track_df['genre'].apply(lambda x: ast.literal_eval(x))


def cosine_similarity(row, song_row):
    
    ''' Cosine similarity will help us find a similarity matrix between our song metadata and give us a similarity score '''

    if row['genre']:
        if not set(song_row['genre'].iloc[0]).isdisjoint(row['genre']):
            song_metadata = song_row.loc[:, 'danceability':'popularity'].values.reshape(-1, 1)
            cs_song_metadata = row['danceability':'popularity'].values.reshape(-1, 1)
            return 1 - spatial.distance.cosine(song_metadata, cs_song_metadata)
    return -1

def hamilton_similarity(row, song_row):
    
    ''' Hamilton similarity like cosine similarity will help us find a matrix between our song metadata 
    and give us a similarity score. This will be an added comparison on top of cosine similarity '''
    
    if row['genre']:
        if not set(song_row['genre'].iloc[0]).isdisjoint(row['genre']):
            song_metadata = song_row.loc[:, 'danceability':'popularity'].values.reshape(-1, 1)
            hs_song_metadata = row['danceability':'popularity'].values.reshape(-1, 1)
            return spatial.distance.hamming(song_metadata, hs_song_metadata)
    return -1


def song_input(recommend_df):
    
    ''' Taking input of the first song and its respective artist and iterating our dataframe to match the input '''
    
    song_name = input('Enter the Song Name  ')
    artist_name = input('Enter the Artist Name  ')
    song_row = recommend_df.loc[
        (recommend_df['track_name'].str.lower() == song_name.lower()) & (recommend_df['artist_name'].str.lower() == artist_name.lower())]
    if song_row.empty:
        print("The Song-Artist Combination does not exist. Please input again\n")
        song_input(recommend_df)
    recommendation(song_row)


def recommendation(song_row):
        
    ''' our dataset will first be sorted on Cosine and then on hamilton similarity '''
    
    track_df['cosine_similarity'] = track_df.apply(cosine_similarity, args=(song_row.head(1),), axis=1)
    track_df['hamilton_similarity'] = track_df.apply(hamilton_similarity, args=(song_row.head(1),), axis=1)
    track_df.sort_values(['cosine_similarity', 'hamilton_similarity'], inplace=True, ascending=[False, False])
    pd.set_option('display.max_columns', None)
    top_songs = track_df.head(20)
    print('Your recommended playlist based on your song choice: \n')
    display(top_songs.reset_index()[['track_name', 'artist_name']])


try:
    song_input(track_df)
except Exception as Fault:
    error = Fault

Your recommended playlist based on your song choice:

	name	collaborative	modified_at	num_tracks	num_albums	num_followers	tracks	num_edits	duration_ms	num_artists	description
pid
0	Throwbacks	false	1493424000	52	47	1	[{'pos': 0, 'artist_name': 'Missy Elliott', 't...	6	11532414	37	NaN
1	Awesome Playlist	false	1506556800	39	23	1	[{'pos': 0, 'artist_name': 'Survivor', 'track_...	5	11656470	21	NaN
2	korean	false	1505692800	64	51	1	[{'pos': 0, 'artist_name': 'Hoody', 'track_uri...	18	14039958	31	NaN
3	mat	false	1501027200	126	107	1	[{'pos': 0, 'artist_name': 'Camille Saint-Saën...	4	28926058	86	NaN
4	90s	false	1401667200	17	16	2	[{'pos': 0, 'artist_name': 'The Smashing Pumpk...	7	4335282	16	NaN
5	Wedding	false	1430956800	80	71	1	[{'pos': 0, 'artist_name': 'Cali Swag District...	3	19156557	56	NaN
6	I Put A Spell On You	false	1477094400	16	15	1	[{'pos': 0, 'artist_name': 'Creedence Clearwat...	2	3408479	13	NaN
7	2017	false	1509321600	53	52	1	[{'pos': 0, 'artist_name': 'Fink', 'track_uri'...	38	12674796	48	NaN
8	BOP	false	1508976000	46	37	2	[{'pos': 0, 'artist_name': 'Catfish and the Bo...	21	9948921	23	NaN
9	old country	false	1501804800	21	20	1	[{'pos': 0, 'artist_name': 'Willie Nelson', 't...	10	4297488	18	NaN

	name	collaborative	modified_at	num_tracks	num_albums	num_followers	tracks	num_edits	duration_ms	num_artists	description	modified_year
pid
0	Throwbacks	false	1493424000	52	47	1	[{'pos': 0, 'artist_name': 'Missy Elliott', 't...	6	11532414	37	NaN	2017
1	Awesome Playlist	false	1506556800	39	23	1	[{'pos': 0, 'artist_name': 'Survivor', 'track_...	5	11656470	21	NaN	2017
2	korean	false	1505692800	64	51	1	[{'pos': 0, 'artist_name': 'Hoody', 'track_uri...	18	14039958	31	NaN	2017
3	mat	false	1501027200	126	107	1	[{'pos': 0, 'artist_name': 'Camille Saint-Saën...	4	28926058	86	NaN	2017
4	90s	false	1401667200	17	16	2	[{'pos': 0, 'artist_name': 'The Smashing Pumpk...	7	4335282	16	NaN	2014
5	Wedding	false	1430956800	80	71	1	[{'pos': 0, 'artist_name': 'Cali Swag District...	3	19156557	56	NaN	2015
6	I Put A Spell On You	false	1477094400	16	15	1	[{'pos': 0, 'artist_name': 'Creedence Clearwat...	2	3408479	13	NaN	2016
7	2017	false	1509321600	53	52	1	[{'pos': 0, 'artist_name': 'Fink', 'track_uri'...	38	12674796	48	NaN	2017
8	BOP	false	1508976000	46	37	2	[{'pos': 0, 'artist_name': 'Catfish and the Bo...	21	9948921	23	NaN	2017
9	old country	false	1501804800	21	20	1	[{'pos': 0, 'artist_name': 'Willie Nelson', 't...	10	4297488	18	NaN	2017

	name	collaborative	num_tracks	num_albums	num_followers	tracks	num_edits	duration_ms	num_artists	modified_year
pid
0	Throwbacks	false	52	47	1	[{'pos': 0, 'artist_name': 'Missy Elliott', 't...	6	11532414	37	2017
1	Awesome Playlist	false	39	23	1	[{'pos': 0, 'artist_name': 'Survivor', 'track_...	5	11656470	21	2017
2	korean	false	64	51	1	[{'pos': 0, 'artist_name': 'Hoody', 'track_uri...	18	14039958	31	2017
3	mat	false	126	107	1	[{'pos': 0, 'artist_name': 'Camille Saint-Saën...	4	28926058	86	2017
4	90s	false	17	16	2	[{'pos': 0, 'artist_name': 'The Smashing Pumpk...	7	4335282	16	2014
5	Wedding	false	80	71	1	[{'pos': 0, 'artist_name': 'Cali Swag District...	3	19156557	56	2015
6	I Put A Spell On You	false	16	15	1	[{'pos': 0, 'artist_name': 'Creedence Clearwat...	2	3408479	13	2016
7	2017	false	53	52	1	[{'pos': 0, 'artist_name': 'Fink', 'track_uri'...	38	12674796	48	2017
8	BOP	false	46	37	2	[{'pos': 0, 'artist_name': 'Catfish and the Bo...	21	9948921	23	2017
9	old country	false	21	20	1	[{'pos': 0, 'artist_name': 'Willie Nelson', 't...	10	4297488	18	2017

	pos	artist_name	track_uri	artist_uri	track_name	album_uri	duration_ms	album_name
0	0	Missy Elliott	spotify:track:0UaMYEvWZi0ZqiDOoHU3YI	spotify:artist:2wIVse2owClT7go1WT98tk	Lose Control (feat. Ciara & Fat Man Scoop)	spotify:album:6vV5UrXcfyQD1wu4Qo2I9K	226863	The Cookbook
1	1	Britney Spears	spotify:track:6I9VzXrHxO9rA9A5euc8Ak	spotify:artist:26dSoYclwsYLMAKD3tpOr4	Toxic	spotify:album:0z7pVBGOD7HCIB7S8eLkLI	198800	In The Zone
2	2	Beyoncé	spotify:track:0WqIKmW4BTrj3eJFmnCKMv	spotify:artist:6vWDO969PvNqNYHIOW5v0m	Crazy In Love	spotify:album:25hVFAxTlDvXbx2X2QkUkE	235933	Dangerously In Love (Alben für die Ewigkeit)
3	3	Justin Timberlake	spotify:track:1AWQoqb9bSvzTjaLralEkT	spotify:artist:31TPClRtHm23RisEBtV3X7	Rock Your Body	spotify:album:6QPkyl04rXwTGlGlcYaRoW	267266	Justified
4	4	Shaggy	spotify:track:1lzr43nnXAijIGYnCT8M8H	spotify:artist:5EvFsr3kj42KNv97ZEnqij	It Wasn't Me	spotify:album:6NmFmPX56pcLBOFMhIiKvF	227600	Hot Shot
5	5	Usher	spotify:track:0XUfyU2QviPAs6bxSpXYG4	spotify:artist:23zg3TcAtWQy7J6upgbUnj	Yeah!	spotify:album:0vO0b1AvY49CPQyVisJLj0	250373	Confessions
6	6	Usher	spotify:track:68vgtRHr7iZHpzGpon6Jlo	spotify:artist:23zg3TcAtWQy7J6upgbUnj	My Boo	spotify:album:1RM6MGv6bcl6NrAG8PGoZk	223440	Confessions
7	7	The Pussycat Dolls	spotify:track:3BxWKCI06eQ5Od8TY2JBeA	spotify:artist:6wPhSqRtPu1UhRCDX5yaDJ	Buttons	spotify:album:5x8e8UcCeOgrOzSnDGuPye	225560	PCD
8	8	Destiny's Child	spotify:track:7H6ev70Weq6DdpZyyTmUXk	spotify:artist:1Y8cdNmUJH7yBTd9yOvr5i	Say My Name	spotify:album:283NWqNsCA9GwVHrJk59CG	271333	The Writing's On The Wall
9	9	OutKast	spotify:track:2PpruBYCo4H7WOBJ7Q2EwM	spotify:artist:1G9G7WwrXka3Z1r7aIDjI7	Hey Ya! - Radio Mix / Club Mix	spotify:album:1UsmQ3bpJTyK6ygoOOjG1r	235213	Speakerboxxx/The Love Below

	artist_uri	pos	artist_name	track_uri	track_name	album_uri	duration_ms	album_name	danceability	energy	...	speechiness	valence	temp	liveness	acousticness	key	instrumentalness	time signature	popularity	genre
pid
0	spotify:artist:2wIVse2owClT7go1WT98tk	0.0	Missy Elliott	spotify:track:0UaMYEvWZi0ZqiDOoHU3YI	Lose Control (feat. Ciara & Fat Man Scoop)	spotify:album:6vV5UrXcfyQD1wu4Qo2I9K	226863.0	The Cookbook	0.904	0.813	...	0.1210	0.810	125.461	0.0471	0.03110	4.0	0.006970	4.0	67.0	['dance pop', 'hip hop', 'hip pop', 'pop', 'po...
0	spotify:artist:26dSoYclwsYLMAKD3tpOr4	1.0	Britney Spears	spotify:track:6I9VzXrHxO9rA9A5euc8Ak	Toxic	spotify:album:0z7pVBGOD7HCIB7S8eLkLI	198800.0	In The Zone	0.774	0.838	...	0.1140	0.924	143.040	0.2420	0.02490	5.0	0.025000	4.0	82.0	['dance pop', 'pop', 'post-teen pop']
0	spotify:artist:6vWDO969PvNqNYHIOW5v0m	2.0	Beyoncé	spotify:track:0WqIKmW4BTrj3eJFmnCKMv	Crazy In Love	spotify:album:25hVFAxTlDvXbx2X2QkUkE	235933.0	Dangerously In Love (Alben für die Ewigkeit)	0.664	0.758	...	0.2100	0.701	99.259	0.0598	0.00238	2.0	0.000000	4.0	24.0	['dance pop', 'pop', 'r&b']
0	spotify:artist:31TPClRtHm23RisEBtV3X7	3.0	Justin Timberlake	spotify:track:1AWQoqb9bSvzTjaLralEkT	Rock Your Body	spotify:album:6QPkyl04rXwTGlGlcYaRoW	267266.0	Justified	0.892	0.714	...	0.1410	0.817	100.972	0.0521	0.20100	4.0	0.000234	4.0	77.0	['dance pop', 'pop']
0	spotify:artist:5EvFsr3kj42KNv97ZEnqij	4.0	Shaggy	spotify:track:1lzr43nnXAijIGYnCT8M8H	It Wasn't Me	spotify:album:6NmFmPX56pcLBOFMhIiKvF	227600.0	Hot Shot	0.853	0.606	...	0.0713	0.654	94.759	0.3130	0.05610	0.0	0.000000	4.0	3.0	['pop rap', 'reggae fusion']
0	spotify:artist:23zg3TcAtWQy7J6upgbUnj	5.0	Usher	spotify:track:0XUfyU2QviPAs6bxSpXYG4	Yeah!	spotify:album:0vO0b1AvY49CPQyVisJLj0	250373.0	Confessions	0.881	0.788	...	0.1680	0.592	104.997	0.0377	0.02120	2.0	0.000000	4.0	0.0	['atl hip hop', 'dance pop', 'pop', 'r&b', 'so...
0	spotify:artist:23zg3TcAtWQy7J6upgbUnj	6.0	Usher	spotify:track:68vgtRHr7iZHpzGpon6Jlo	My Boo	spotify:album:1RM6MGv6bcl6NrAG8PGoZk	223440.0	Confessions	0.662	0.507	...	0.1180	0.676	86.412	0.0465	0.25700	5.0	0.000000	4.0	78.0	['atl hip hop', 'dance pop', 'pop', 'r&b', 'so...
0	spotify:artist:6wPhSqRtPu1UhRCDX5yaDJ	7.0	The Pussycat Dolls	spotify:track:3BxWKCI06eQ5Od8TY2JBeA	Buttons	spotify:album:5x8e8UcCeOgrOzSnDGuPye	225560.0	PCD	0.570	0.821	...	0.2670	0.408	210.857	0.2890	0.17800	2.0	0.000000	4.0	65.0	['dance pop', 'girl group', 'hip pop', 'pop', ...
0	spotify:artist:1Y8cdNmUJH7yBTd9yOvr5i	8.0	Destiny's Child	spotify:track:7H6ev70Weq6DdpZyyTmUXk	Say My Name	spotify:album:283NWqNsCA9GwVHrJk59CG	271333.0	The Writing's On The Wall	0.713	0.678	...	0.1020	0.734	138.009	0.1490	0.27300	5.0	0.000000	4.0	76.0	['dance pop', 'girl group', 'hip pop', 'pop', ...
0	spotify:artist:1G9G7WwrXka3Z1r7aIDjI7	9.0	OutKast	spotify:track:2PpruBYCo4H7WOBJ7Q2EwM	Hey Ya! - Radio Mix / Club Mix	spotify:album:1UsmQ3bpJTyK6ygoOOjG1r	235213.0	Speakerboxxx/The Love Below	0.727	0.974	...	0.0664	0.965	79.526	0.1740	0.10300	4.0	0.000532	4.0	80.0	['atl hip hop', 'dirty south rap', 'hip hop', ...

Introduction¶

Objective¶

Dataset¶

Importing Libraries For Data Processing¶

Data Processing¶

Data Extraction - Playlist Data¶

Data Wrangling - Playlist Data¶

Data Cleaning - Playlist Data¶

Extracting Songs from Playlists¶

Data Extraction - Track Metadata¶

Data Merging¶

Data Analysis¶

Understanding Track Attributes¶

Inference¶

Attributes Affecting Artist Popularity¶

Inference¶

Evolution of Sound over the Years¶

Inference¶

Billboard Top 5 Artists¶

Inference¶

Recommendation System¶

Scope of Improvement¶

Conclusion¶

	track_name	artist_name
0	Enter Sandman	Metallica
1	Afraid To Shoot Strangers - 1998 Remastered Ve...	Iron Maiden
2	The Unforgiven	Metallica
3	Merry Xmas Everybody	Slade
4	No Particular Place To Go	Chuck Berry
5	The Power Of Love	Huey Lewis & The News
6	Do It Again	The Kinks
7	Baba O'Riley	The Who
8	With A Little Help From My Friends - Take 1 / ...	The Beatles
9	Come Dancing	The Kinks
10	He's Evil	The Kinks
11	Congratulations	MGMT
12	Runnin' Down A Dream	Tom Petty
13	Honky Tonk Women - Mono Version	The Rolling Stones
14	Tongues	Joywave
15	Like A Friend	Pulp
16	Come Together - Remastered	The Beatles
17	Who Are You	The Who
18	Anything You Want	Spoon
19	Run	AWOLNATION

	Hit or Flop
genre
rap	902
hip hop	675
pop	616

	Hit or Flop
genre
indie folk	-452
stomp and holler	-456
indie rock	-476