billboard
| year | artist | track | time | genre | week | rank | date | |
|---|---|---|---|---|---|---|---|---|
| 246 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 1 | 87 | 2000-02-26 |
| 563 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 2 | 82 | 2000-03-04 |
| 880 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 3 | 72 | 2000-03-11 |
| 1197 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 4 | 77 | 2000-03-18 |
| 1514 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 5 | 87 | 2000-03-25 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10793 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 35 | 33 | 2000-12-23 |
| 11110 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 36 | 37 | 2000-12-30 |
| 11427 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 37 | 38 | 2001-01-06 |
| 11744 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 38 | 38 | 2001-01-13 |
| 12061 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 39 | 48 | 2001-01-20 |
5307 rows × 8 columns
songs_cols = ["year", "artist", "track", "time", "genre"]
songs = billboard[songs_cols].drop_duplicates()
songs = songs.reset_index(drop=True)
songs["song_id"] = songs.index
songs
| year | artist | track | time | genre | song_id | |
|---|---|---|---|---|---|---|
| 0 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 0 |
| 1 | 2000 | 2Ge+her | The Hardest Part Of Breaking Up (Is Getting Ba... | 3:15 | R&B | 1 |
| 2 | 2000 | 3 Doors Down | Kryptonite | 3:53 | Rock | 2 |
| 3 | 2000 | 3 Doors Down | Loser | 4:24 | Rock | 3 |
| 4 | 2000 | 504 Boyz | Wobble Wobble | 3:35 | Rap | 4 |
| ... | ... | ... | ... | ... | ... | ... |
| 312 | 2000 | Yankee Grey | Another Nine Minutes | 3:10 | Country | 312 |
| 313 | 2000 | Yearwood, Trisha | Real Live Woman | 3:55 | Country | 313 |
| 314 | 2000 | Ying Yang Twins | Whistle While You Twurk | 4:19 | Rap | 314 |
| 315 | 2000 | Zombie Nation | Kernkraft 400 | 3:30 | Rock | 315 |
| 316 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 316 |
317 rows × 6 columns
# merge song id to billboard records
ranks = pd.merge(billboard, songs, on=["year","artist", "track", "time", "genre"])
ranks
| year | artist | track | time | genre | week | rank | date | song_id | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 1 | 87 | 2000-02-26 | 0 |
| 1 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 2 | 82 | 2000-03-04 | 0 |
| 2 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 3 | 72 | 2000-03-11 | 0 |
| 3 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 4 | 77 | 2000-03-18 | 0 |
| 4 | 2000 | 2 Pac | Baby Don't Cry (Keep Ya Head Up II) | 4:22 | Rap | 5 | 87 | 2000-03-25 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5302 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 35 | 33 | 2000-12-23 | 316 |
| 5303 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 36 | 37 | 2000-12-30 | 316 |
| 5304 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 37 | 38 | 2001-01-06 | 316 |
| 5305 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 38 | 38 | 2001-01-13 | 316 |
| 5306 | 2000 | matchbox twenty | Bent | 4:12 | Rock | 39 | 48 | 2001-01-20 | 316 |
5307 rows × 9 columns
ranks = ranks[["song_id", "date","rank"]]
ranks
| song_id | date | rank | |
|---|---|---|---|
| 0 | 0 | 2000-02-26 | 87 |
| 1 | 0 | 2000-03-04 | 82 |
| 2 | 0 | 2000-03-11 | 72 |
| 3 | 0 | 2000-03-18 | 77 |
| 4 | 0 | 2000-03-25 | 87 |
| ... | ... | ... | ... |
| 5302 | 316 | 2000-12-23 | 33 |
| 5303 | 316 | 2000-12-30 | 37 |
| 5304 | 316 | 2001-01-06 | 38 |
| 5305 | 316 | 2001-01-13 | 38 |
| 5306 | 316 | 2001-01-20 | 48 |
5307 rows × 3 columns