In [8]:
billboard
Out[8]:
year artist track time genre week rank date
246 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 1 87 2000-02-26
563 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 2 82 2000-03-04
880 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 3 72 2000-03-11
1197 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 4 77 2000-03-18
1514 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 5 87 2000-03-25
... ... ... ... ... ... ... ... ...
10793 2000 matchbox twenty Bent 4:12 Rock 35 33 2000-12-23
11110 2000 matchbox twenty Bent 4:12 Rock 36 37 2000-12-30
11427 2000 matchbox twenty Bent 4:12 Rock 37 38 2001-01-06
11744 2000 matchbox twenty Bent 4:12 Rock 38 38 2001-01-13
12061 2000 matchbox twenty Bent 4:12 Rock 39 48 2001-01-20

5307 rows × 8 columns

In [9]:
songs_cols = ["year", "artist", "track", "time", "genre"]
songs = billboard[songs_cols].drop_duplicates()
songs = songs.reset_index(drop=True)
songs["song_id"] = songs.index
songs
Out[9]:
year artist track time genre song_id
0 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 0
1 2000 2Ge+her The Hardest Part Of Breaking Up (Is Getting Ba... 3:15 R&B 1
2 2000 3 Doors Down Kryptonite 3:53 Rock 2
3 2000 3 Doors Down Loser 4:24 Rock 3
4 2000 504 Boyz Wobble Wobble 3:35 Rap 4
... ... ... ... ... ... ...
312 2000 Yankee Grey Another Nine Minutes 3:10 Country 312
313 2000 Yearwood, Trisha Real Live Woman 3:55 Country 313
314 2000 Ying Yang Twins Whistle While You Twurk 4:19 Rap 314
315 2000 Zombie Nation Kernkraft 400 3:30 Rock 315
316 2000 matchbox twenty Bent 4:12 Rock 316

317 rows × 6 columns

In [10]:
# merge song id to billboard records
ranks = pd.merge(billboard, songs, on=["year","artist", "track", "time", "genre"])
ranks
Out[10]:
year artist track time genre week rank date song_id
0 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 1 87 2000-02-26 0
1 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 2 82 2000-03-04 0
2 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 3 72 2000-03-11 0
3 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 4 77 2000-03-18 0
4 2000 2 Pac Baby Don't Cry (Keep Ya Head Up II) 4:22 Rap 5 87 2000-03-25 0
... ... ... ... ... ... ... ... ... ...
5302 2000 matchbox twenty Bent 4:12 Rock 35 33 2000-12-23 316
5303 2000 matchbox twenty Bent 4:12 Rock 36 37 2000-12-30 316
5304 2000 matchbox twenty Bent 4:12 Rock 37 38 2001-01-06 316
5305 2000 matchbox twenty Bent 4:12 Rock 38 38 2001-01-13 316
5306 2000 matchbox twenty Bent 4:12 Rock 39 48 2001-01-20 316

5307 rows × 9 columns

In [11]:
ranks = ranks[["song_id", "date","rank"]]
ranks
Out[11]:
song_id date rank
0 0 2000-02-26 87
1 0 2000-03-04 82
2 0 2000-03-11 72
3 0 2000-03-18 77
4 0 2000-03-25 87
... ... ... ...
5302 316 2000-12-23 33
5303 316 2000-12-30 37
5304 316 2001-01-06 38
5305 316 2001-01-13 38
5306 316 2001-01-20 48

5307 rows × 3 columns