billboard
year | artist | track | time | genre | week | rank | date | |
---|---|---|---|---|---|---|---|---|
246 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 1 | 87 | 2000-02-26 |
563 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 2 | 82 | 2000-03-04 |
880 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 3 | 72 | 2000-03-11 |
1197 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 4 | 77 | 2000-03-18 |
1514 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 5 | 87 | 2000-03-25 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
10793 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 35 | 33 | 2000-12-23 |
11110 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 36 | 37 | 2000-12-30 |
11427 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 37 | 38 | 2001-01-06 |
11744 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 38 | 38 | 2001-01-13 |
12061 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 39 | 48 | 2001-01-20 |
5307 rows × 8 columns
songs_cols = ["year", "artist", "track", "time", "genre"]
songs = billboard[songs_cols].drop_duplicates()
songs = songs.reset_index(drop=True)
songs["song_id"] = songs.index
songs
year | artist | track | time | genre | song_id | |
---|---|---|---|---|---|---|
0 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 0 |
1 | 2000 | 2Ge+her | The Hardest Part Of Breaking Up (Is Getting Ba... | 3:15 | R&B | 1 |
2 | 2000 | 3 Doors Down | Kryptonite | 3:53 | Rock | 2 |
3 | 2000 | 3 Doors Down | Loser | 4:24 | Rock | 3 |
4 | 2000 | 504 Boyz | Wobble Wobble | 3:35 | Rap | 4 |
... | ... | ... | ... | ... | ... | ... |
312 | 2000 | Yankee Grey | Another Nine Minutes | 3:10 | Country | 312 |
313 | 2000 | Yearwood, Trisha | Real Live Woman | 3:55 | Country | 313 |
314 | 2000 | Ying Yang Twins | Whistle While You Twurk | 4:19 | Rap | 314 |
315 | 2000 | Zombie Nation | Kernkraft 400 | 3:30 | Rock | 315 |
316 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 316 |
317 rows × 6 columns
# merge song id to billboard records
ranks = pd.merge(billboard, songs, on=["year","artist", "track", "time", "genre"])
ranks
year | artist | track | time | genre | week | rank | date | song_id | |
---|---|---|---|---|---|---|---|---|---|
0 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 1 | 87 | 2000-02-26 | 0 |
1 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 2 | 82 | 2000-03-04 | 0 |
2 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 3 | 72 | 2000-03-11 | 0 |
3 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 4 | 77 | 2000-03-18 | 0 |
4 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 5 | 87 | 2000-03-25 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5302 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 35 | 33 | 2000-12-23 | 316 |
5303 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 36 | 37 | 2000-12-30 | 316 |
5304 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 37 | 38 | 2001-01-06 | 316 |
5305 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 38 | 38 | 2001-01-13 | 316 |
5306 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 39 | 48 | 2001-01-20 | 316 |
5307 rows × 9 columns
ranks = ranks[["song_id", "date","rank"]]
ranks
song_id | date | rank | |
---|---|---|---|
0 | 0 | 2000-02-26 | 87 |
1 | 0 | 2000-03-04 | 82 |
2 | 0 | 2000-03-11 | 72 |
3 | 0 | 2000-03-18 | 77 |
4 | 0 | 2000-03-25 | 87 |
... | ... | ... | ... |
5302 | 316 | 2000-12-23 | 33 |
5303 | 316 | 2000-12-30 | 37 |
5304 | 316 | 2001-01-06 | 38 |
5305 | 316 | 2001-01-13 | 38 |
5306 | 316 | 2001-01-20 | 48 |
5307 rows × 3 columns