In [3]:
df
Out[3]:
country year m014 m1524 m2534 m3544 m4554 m5564 m65 mu f014
0 AD 2000 0.0 0.0 1.0 0.0 0 0 0.0 NaN NaN
1 AE 2000 2.0 4.0 4.0 6.0 5 12 10.0 NaN 3.0
2 AF 2000 52.0 228.0 183.0 149.0 129 94 80.0 NaN 93.0
3 AG 2000 0.0 0.0 0.0 0.0 0 0 1.0 NaN 1.0
4 AL 2000 2.0 19.0 21.0 14.0 24 19 16.0 NaN 3.0
5 AM 2000 2.0 152.0 130.0 131.0 63 26 21.0 NaN 1.0
6 AN 2000 0.0 0.0 1.0 2.0 0 0 0.0 NaN 0.0
7 AO 2000 186.0 999.0 1003.0 912.0 482 312 194.0 NaN 247.0
8 AR 2000 97.0 278.0 594.0 402.0 419 368 330.0 NaN 121.0
9 AS 2000 NaN NaN NaN NaN 1 1 NaN NaN NaN
In [4]:
melted = pd.melt(df, id_vars=["country","year"], value_name="cases", 
                 var_name="sex_and_age")
melted
Out[4]:
country year sex_and_age cases
0 AD 2000 m014 0.0
1 AE 2000 m014 2.0
2 AF 2000 m014 52.0
3 AG 2000 m014 0.0
4 AL 2000 m014 2.0
... ... ... ... ...
85 AM 2000 f014 1.0
86 AN 2000 f014 0.0
87 AO 2000 f014 247.0
88 AR 2000 f014 121.0
89 AS 2000 f014 NaN

90 rows × 4 columns

In [7]:
tmp_df = melted["sex_and_age"].str.extract(
    "(?P<sex>\D)(?P<age_lower>\d+)(?P<age_upper>\d{2})")
tmp_df
Out[7]:
sex age_lower age_upper
0 m 0 14
10 m 15 24
20 m 25 34
30 m 35 44
40 m 45 54
50 m 55 64
60 NaN NaN NaN
81 f 0 14
In [10]:
# Merge 
melted = pd.concat([melted, tmp_df], axis=1)
melted
Out[10]:
country year sex_and_age cases sex age_lower age_upper
0 AD 2000 m014 0.0 m 0 14
33 AG 2000 m3544 0.0 m 35 44
30 AD 2000 m3544 0.0 m 35 44
43 AG 2000 m4554 0.0 m 45 54
46 AN 2000 m4554 0.0 m 45 54
... ... ... ... ... ... ... ...
47 AO 2000 m4554 482.0 m 45 54
28 AR 2000 m2534 594.0 m 25 34
37 AO 2000 m3544 912.0 m 35 44
17 AO 2000 m1524 999.0 m 15 24
27 AO 2000 m2534 1003.0 m 25 34

64 rows × 7 columns

In [11]:
melted["age"] = melted["age_lower"] + "-" + melted["age_upper"]
melted
Out[11]:
country year sex_and_age cases sex age_lower age_upper age
0 AD 2000 m014 0.0 m 0 14 0-14
33 AG 2000 m3544 0.0 m 35 44 35-44
30 AD 2000 m3544 0.0 m 35 44 35-44
43 AG 2000 m4554 0.0 m 45 54 45-54
46 AN 2000 m4554 0.0 m 45 54 45-54
... ... ... ... ... ... ... ... ...
47 AO 2000 m4554 482.0 m 45 54 45-54
28 AR 2000 m2534 594.0 m 25 34 25-34
37 AO 2000 m3544 912.0 m 35 44 35-44
17 AO 2000 m1524 999.0 m 15 24 15-24
27 AO 2000 m2534 1003.0 m 25 34 25-34

64 rows × 8 columns

In [12]:
melted = melted.drop(['sex_and_age',"age_lower","age_upper"], axis=1)
melted = melted.dropna()
melted = melted.sort_values(by=["country", "year", "sex", "age"])
melted
Out[12]:
country year cases sex age
0 AD 2000 0.0 m 0-14
10 AD 2000 0.0 m 15-24
20 AD 2000 1.0 m 25-34
30 AD 2000 0.0 m 35-44
40 AD 2000 0.0 m 45-54
... ... ... ... ... ...
38 AR 2000 402.0 m 35-44
48 AR 2000 419.0 m 45-54
58 AR 2000 368.0 m 55-64
49 AS 2000 1.0 m 45-54
59 AS 2000 1.0 m 55-64

64 rows × 5 columns