# Clone my repository, change to right directory, and import libraries.

#%cd /content
!git clone https://github.com/kellyyliu/Datasets.git
#%cd /content/cmps3160/

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

Cloning into 'Datasets'...
remote: Enumerating objects: 24, done.
remote: Counting objects: 100% (24/24), done.(24/24)
remote: Compressing objects: 100% (17/17), done.
remote: Total 24 (delta 5), reused 24 (delta 5), pack-reused 0
Receiving objects: 100% (24/24), 11.69 MiB | 10.35 MiB/s, done.
Resolving deltas: 100% (5/5), done.


# Import population data

wb_population = pd.read_csv('../Datasets/WB_Population.csv') # read the 'csv' file

wb_population = wb_population.drop(columns=['Country Code','Series Code']) # drop columns I don't need
wb_population = pd.melt(wb_population, id_vars=['Country Name', 'Series Name'], var_name='year', value_name='value') # melt the year columns into rows
wb_population = wb_population.dropna(subset=['Country Name','year']) # drop NaN for index columns
wb_population['year'] = wb_population['year'].str.replace('\[.*\]','', regex=True) # delete redundant parts in the 'Year' column


# Transform to a pivot table and keep key variables

wb_population = wb_population.pivot(index=['Country Name','year'], columns='Series Name', values='value') # transform the dataframe to be a pivot table so that we can have each variables in a column

wb_population = wb_population.rename(columns = {"Population ages 65 and above (% of total population)": "pop_over_65",
                                                                "Fertility rate, total (births per woman)":"fertility",
                                                                "Life expectancy at birth, total (years)":"life_expectancy",
                                                                "Mortality rate, infant (per 1,000 live births)":"mortality_infant",
                                                                "Death rate, crude (per 1,000 people)": "death_rate",
                                                                "Birth rate, crude (per 1,000 people)": "birth_rate"
                                                                 }) # rename key variables
wb_population = wb_population[['pop_over_65','fertility','life_expectancy','mortality_infant','death_rate','birth_rate']] # keep key variables


# Transform to a dataframe and make the dtypes correctly

wb_population = wb_population.reset_index()
wb_population = wb_population.rename(columns = {"Country Name":"country"})

wb_population['year'] = wb_population['year'].astype(int, errors='ignore')  # change the dtypes from objects to int or float
wb_population['pop_over_65'] = pd.to_numeric(wb_population['pop_over_65'], errors='coerce') 
wb_population['fertility'] = pd.to_numeric(wb_population['fertility'], errors='coerce')
wb_population['life_expectancy'] = pd.to_numeric(wb_population['life_expectancy'], errors='coerce')
wb_population['mortality_infant'] = pd.to_numeric(wb_population['mortality_infant'], errors='coerce')
wb_population['death_rate'] = pd.to_numeric(wb_population['death_rate'], errors='coerce')
wb_population['birth_rate'] = pd.to_numeric(wb_population['birth_rate'], errors='coerce')

wb_population.dtypes

wb_population


# Import GDP data

wb_gdp = pd.read_csv('../Datasets/WB_GDP.csv')

wb_gdp = wb_gdp.drop(columns=['Country Code','Series Code']) # drop columns I don't need
wb_gdp = pd.melt(wb_gdp, id_vars=['Country', 'Series'], var_name='year', value_name='value') # melt the year columns into rows
wb_gdp = wb_gdp.dropna(subset=['Country','year']) # drop NaN for index columns
wb_gdp = wb_gdp.drop_duplicates(subset=['Country','year'])
wb_gdp['year'] = wb_gdp['year'].str.replace(' \[.*\]','', regex=True) # delete redundant parts in the 'Year' column


# Transform to a pivot table and keep key variables

wb_gdp = wb_gdp.pivot(index=['Country','year'], columns='Series', values='value') # transform the dataframe to be a pivot table so that we can have each variables in a column

wb_gdp = wb_gdp.rename(columns = {"GDP,constant 2010 US$,millions,seas. adj.,": "gdp_2010"}) # rename key variables

wb_gdp = wb_gdp[['gdp_2010']] # keep key variables


# Transform to a dataframe and make the dtypes correctly

wb_gdp = wb_gdp.reset_index()
wb_gdp = wb_gdp.rename(columns = {"Country":"country"})
wb_gdp['year'] = wb_gdp['year'].astype(int, errors='ignore')  # change the dtypes from objects to int or float
wb_gdp['gdp_2010'] = pd.to_numeric(wb_gdp['gdp_2010'], errors='coerce') 

wb_gdp.dtypes
wb_gdp.describe()


# Import education data

wb_education = pd.read_csv('../Datasets/WB_Education.csv')

wb_education = wb_education.drop(columns=['Country Code','Series Code']) # drop columns I don't need
wb_education = pd.melt(wb_education, id_vars=['Country Name', 'Series'], var_name='year', value_name='value') # melt the year columns into rows
wb_education = wb_education.dropna(subset=['Country Name','year']) # drop NaN for index columns

wb_education['year'] = wb_education['year'].str.replace(' \[.*\]','', regex=True) # delete redundant parts in the 'Year' column


# Transform to a pivot table and keep key variables

wb_education = wb_education.pivot(index=['Country Name','year'], columns='Series', values='value') # transform the dataframe to be a pivot table so that we can have each variables in a column

wb_education = wb_education.rename(columns = {"Government expenditure on education as % of GDP (%)":"education_expenditure",
                                              "Out-of-school children of primary school age, both sexes (number)":"drop_out"}) # rename key variables

wb_education = wb_education[["education_expenditure","drop_out"]] # keep key variables


# Transform to a dataframe and make the dtypes correctly

wb_education = wb_education.reset_index()
wb_education = wb_education.rename(columns = {"Country Name":"country"})
wb_education['year'] = wb_education['year'].astype(int, errors='ignore')  # change the dtypes from objects to int or float
wb_education['education_expenditure'] = pd.to_numeric(wb_education['education_expenditure'], errors='coerce')
wb_education['drop_out'] = pd.to_numeric(wb_education['drop_out'], errors='coerce')

wb_education = wb_education.replace('..', np.nan)
wb_education.describe()


# Import health data

wb_health = pd.read_csv('../Datasets/WB_Health.csv')

wb_health = wb_health.drop(columns=['Country Code','Series Code']) # drop columns I don't need
wb_health = pd.melt(wb_health, id_vars=['Country Name', 'Series Name'], var_name='year', value_name='value') # melt the year columns into rows
wb_health = wb_health.dropna(subset=['Country Name','year']) # drop NaN for index columns
wb_health['year'] = wb_health['year'].str.replace(' \[.*\]','', regex=True) # delete redundant parts in the 'Year' column


# Transform to a pivot table and keep key variables

wb_health = wb_health.pivot(index=['Country Name','year'], columns='Series Name', values='value') # transform the dataframe to be a pivot table so that we can have each variables in a column

wb_health = wb_health.rename(columns = {'Current health expenditure (% of GDP)':'health_expenditure',
                                        'Hospital beds (per 1,000 people)':'hospital_beds'}) # rename key variables
wb_health = wb_health[['health_expenditure','hospital_beds']] # keep key variables


# Transform to a dataframe and make the dtypes correctly

wb_health = wb_health.reset_index()
wb_health = wb_health.rename(columns = {"Country Name":"country"})
wb_health['year'] = wb_health['year'].astype(int, errors='ignore')  # change the dtypes from objects to int or float
wb_health['health_expenditure'] = pd.to_numeric(wb_health['health_expenditure'], errors='coerce')
wb_health['hospital_beds'] = pd.to_numeric(wb_health['hospital_beds'], errors='coerce')

wb_health.dtypes

wb_health = wb_health.replace('..', np.nan)
wb_health.describe()


# Merge population data and GDP data

df = wb_population.merge(wb_gdp, on=["country", "year"], how="outer")

df


# Merge education data

df = df.merge(wb_education, on=["country", "year"], how="outer")

df


# Merge health data

#df = df.merge(wb_health, on=["country", "year"], how="inner")
df = df.merge(wb_health, on=["country", "year"], how="outer")
df = df.replace('..', np.nan)
df


# Check duplicates and NaN

df = df.drop_duplicates()

df.describe()


# Check missing data patterns_gdp

missing_counts_gdp = df.groupby('year').apply(lambda x: x[['gdp_2010']].isna().sum())

missing_counts_gdp[missing_counts_gdp['gdp_2010']==213].sort_values('year', ascending=True)


# Drop missing countries and years will not be used for model analysis
years_to_keep = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020]

df2 = df[(df['year'].isin(years_to_keep)) & (~df['gdp_2010'].isna()) & (~df['fertility'].isna())]

df2.head()
df2.describe()


# Plot the time trends for population ageing of the average level in the world

ax = df.groupby('year').pop_over_65.mean().plot(label = 'Percentage of population ages over 65', ylabel = "%", legend=True, figsize=(12, 8))

plt.show()


# plot the time trends for fertility and life expectancy of the average level in the world

ax = df.groupby('year').fertility.mean().plot(label = 'Fertility', ylabel = "Number of children/woman", legend=True, figsize=(12, 8))
df.groupby('year').life_expectancy.mean().plot(label = 'Life Expectancy', secondary_y = True,legend=True, figsize=(12, 8))
ax.set_ylim(0, 7)
ax.right_ax.set_ylim(50, 75)

plt.show()


# Draw the Population Ageing Distribution World Map in 2020

import plotly.express as px
fig2 = px.choropleth(df[df['year'] == 2020],
                    locations='country', 
                    locationmode="country names", 
                    scope="world",
                    color='pop_over_65',
                    color_continuous_scale='Blues')

fig2.update_layout(
      title_text = 'Population Ageing, 2020',
      title_font_family="Times New Roman",
      title_font_size = 25,
      title_font_color="black", 
      title_x=0.5)


# Count the number of countries that entered population ageing society in 1960 and in 2020

count_1960 = len(wb_population[(wb_population['year'] == 1960) & (wb_population['pop_over_65'] >= 7)])

count_2020 = len(wb_population[(wb_population['year'] == 2020) & (wb_population['pop_over_65'] >= 7)])

print("The number of countries that are regarded as population ageing society in 1960 is:", count_1960)
print("The number of countries that are regarded as population ageing society in 2020 is:", count_2020)

The number of countries that are regarded as population ageing society in 1960 is: 54
The number of countries that are regarded as population ageing society in 2020 is: 138


# Show the top 5 countries with highest fraction of population ages 65 and above in 1960

wb_population_1960 = wb_population[wb_population['year']==1960].sort_values(by='pop_over_65', ascending=False)

wb_population_1960.head(5)


# Show the bottom 5 countries with lowest fraction of population ages 65 and above in 1960

wb_population_1960 = wb_population[wb_population['year']==1960].sort_values(by='pop_over_65', ascending=True)

wb_population_1960.head(5)


# Show the top 5 countries with highest fraction of population ages 65 and above in 2020

wb_population_2020 = wb_population[wb_population['year']==2020].sort_values(by='pop_over_65', ascending=False)

wb_population_2020.head(5)


# Show the bottom 5 countries with lowest fraction of population ages 65 and above in 2020

wb_population_2020 = wb_population[wb_population['year']==2020].sort_values(by='pop_over_65', ascending=True)

wb_population_2020.head(5)


# Compare the birth rates and death rates in Japan
ax = df[df['country'] == 'Japan'].groupby('year')['birth_rate'].mean().plot(label='Birth Rate', ylabel="%", legend=True, figsize=(12, 8))
ax = df[df['country'] == 'Japan'].groupby('year')['death_rate'].mean().plot(label='Death Rate', ylabel="%", legend=True, figsize=(12, 8))

plt.show()


# Compare the birth rates and death rates in Papua New Guinea
ax = df[df['country'] == 'Papua New Guinea'].groupby('year')['birth_rate'].mean().plot(label='Birth Rate', ylabel="%", legend=True, figsize=(12, 8))
ax = df[df['country'] == 'Papua New Guinea'].groupby('year')['death_rate'].mean().plot(label='Death Rate', ylabel="%", legend=True, figsize=(12, 8))

plt.show()


# Generate the correlation matrix
correlation_matrix1 = df2[['pop_over_65','fertility','life_expectancy','gdp_2010']].corr()

import seaborn as sns
import matplotlib.pyplot as plt

# Visualize the correlation matrix with a heatmap
sns.heatmap(correlation_matrix1, annot=True, cmap='coolwarm', vmin=-1, vmax=1)

# Display the plot
plt.show()


# Generate the correlation matrix
correlation_matrix2 = df2[['fertility','education_expenditure','drop_out']].corr()

import seaborn as sns
import matplotlib.pyplot as plt

# Visualize the correlation matrix with a heatmap
sns.heatmap(correlation_matrix2, annot=True, cmap='coolwarm', vmin=-1, vmax=1)

# Display the plot
plt.show()


# Generate the correlation matrix
correlation_matrix3 = df2[['life_expectancy','health_expenditure','hospital_beds']].corr()

import seaborn as sns
import matplotlib.pyplot as plt

# Visualize the correlation matrix with a heatmap
sns.heatmap(correlation_matrix3, annot=True, cmap='coolwarm', vmin=-1, vmax=1)

# Display the plot
plt.show()


# Use Scikit-Learn to predict the ratio of population aged over 65 
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor

# Define the features.
features = ['fertility','life_expectancy','gdp_2010']

# Define the training data.
# Represent the features as a list of dicts.
X_train_dict = df2[features].to_dict(orient="records")
X_new_dict = [{
    'fertility': 2,
    'life_expectancy': 75,
    'gdp_2010': 8}]

y_train = df2["pop_over_65"]

# Dummy encoding
vec = DictVectorizer(sparse=False)
vec.fit(X_train_dict)
X_train = vec.transform(X_train_dict)
X_new = vec.transform(X_new_dict)

# Standardization
scaler = StandardScaler()
scaler.fit(X_train)
X_train_sc = scaler.transform(X_train)
X_new_sc = scaler.transform(X_new)

# K-Nearest Neighbors Model
model = KNeighborsRegressor(n_neighbors=30)
model.fit(X_train_sc, y_train)
model.predict(X_new_sc)

array([7.44168589])


# Add a new column 'pop_ageing' based on 'pop_over_65'

df2.loc[df2['pop_over_65'] >= 7, 'pop_ageing'] = 1
df2.loc[df2['pop_over_65'] < 7, 'pop_ageing'] = 0

#df2['pop_ageing'] = df2['pop_ageing'].astype(int, errors='ignore')

df2.dtypes

country                   object
year                       int64
pop_over_65              float64
fertility                float64
life_expectancy          float64
mortality_infant         float64
death_rate               float64
birth_rate               float64
gdp_2010                 float64
education_expenditure    float64
drop_out                 float64
health_expenditure       float64
hospital_beds            float64
pop_ageing                 int64
dtype: object


# SKLEARN stuff

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn import metrics

# Load SQLITE

import sqlite3
plt.style.use('fivethirtyeight')

# Make the fonts a little bigger in our graphs.

font = {'size'   : 20}
plt.rc('font', **font)
plt.rcParams['mathtext.fontset'] = 'cm'
plt.rcParams['pdf.fonttype'] = 42


from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
def fit_and_report(df, features, target):
    train, test = train_test_split(df,
                               test_size=0.4,
                               stratify=df[target])
    X_train = train[features]
    y_train = train[target]
    X_test = test[features]
    y_test = test[target]
    mod_dt = DecisionTreeClassifier(max_depth = 3, random_state = 1)
    mod_dt.fit(X_train,y_train)
    prediction=mod_dt.predict(X_test)
    ConfusionMatrixDisplay.from_estimator(mod_dt, X_test, y_test,
                                          display_labels=mod_dt.classes_,
                                          cmap=plt.cm.Blues, normalize='all')
    plt.figure(figsize = (15,8))
    plot_tree(mod_dt, feature_names = features, class_names={1:"pop ageing", 0:"not yet pop ageing"}, filled = True);

    print(f"The accuracy of the Decision Tree is {metrics.accuracy_score(prediction,y_test):.3f}")
    print(f"The Precision of the Decision Tree is {metrics.precision_score(prediction,y_test,average='weighted'):.3f}")
    print(f"The Recall of the Decision Tree is {metrics.recall_score(prediction,y_test,average='weighted'):.3f}")


fit_and_report(df2,
                ['fertility','life_expectancy','gdp_2010'],
                ["pop_ageing"])

The accuracy of the Decision Tree is 0.884
The Precision of the Decision Tree is 0.899
The Recall of the Decision Tree is 0.884

Series Name	country	year	pop_over_65	fertility	life_expectancy	mortality_infant	death_rate	birth_rate
0	Afghanistan	1960	2.833029	7.282	32.535	NaN	31.921	50.340
1	Afghanistan	1961	2.817674	7.284	33.068	NaN	31.349	50.443
2	Afghanistan	1962	2.799055	7.292	33.547	NaN	30.845	50.570
3	Afghanistan	1963	2.778968	7.302	34.016	228.9	30.359	50.703
4	Afghanistan	1964	2.758929	7.304	34.494	225.1	29.867	50.831
...	...	...	...	...	...	...	...	...
17147	Zimbabwe	2019	3.345781	3.599	61.292	37.1	8.043	31.518
17148	Zimbabwe	2020	3.376262	3.545	61.124	36.6	8.132	31.009
17149	Zimbabwe	2021	3.363343	3.491	59.253	35.7	9.057	30.537
17150	Zimbabwe	2022	3.321845	NaN	NaN	NaN	NaN	NaN
17151	Zimbabwe	2023	3.295719	NaN	NaN	NaN	NaN	NaN

Series	year	gdp_2010
count	8214.000000	4.144000e+03
mean	2005.000000	2.564769e+06
std	10.677728	8.162975e+06
min	1987.000000	0.000000e+00
25%	1996.000000	7.188595e+03
50%	2005.000000	1.476338e+05
75%	2014.000000	1.046619e+06
max	2023.000000	8.428075e+07

Series	year	education_expenditure	drop_out
count	17536.00000	4372.000000	6.542000e+03
mean	1991.50000	4.349850	7.867921e+06
std	18.47348	1.852381	1.923585e+07
min	1960.00000	0.000000	0.000000e+00
25%	1975.75000	3.134660	1.045600e+04
50%	1991.50000	4.260500	1.281455e+05
75%	2007.25000	5.317721	2.618750e+06
max	2023.00000	44.333980	1.308965e+08

Series Name	year	health_expenditure	hospital_beds
count	16758.000000	4863.000000	4837.000000
mean	1991.000000	6.191735	4.391141
std	18.184785	2.756636	3.357402
min	1960.000000	1.263576	0.100000
25%	1975.000000	4.275008	1.770000
50%	1991.000000	5.457591	3.410000
75%	2007.000000	7.809764	6.283300
max	2022.000000	24.230680	40.315456

	year	pop_over_65	fertility	life_expectancy	mortality_infant	death_rate	birth_rate	gdp_2010	education_expenditure	drop_out	health_expenditure	hospital_beds
count	19558.000000	16704.000000	15631.000000	15618.000000	12468.000000	15771.000000	15789.000000	4.144000e+03	4372.000000	6.542000e+03	4863.000000	4837.000000
mean	1992.674813	6.639269	3.902895	64.377915	48.556515	10.452507	28.071645	2.564769e+06	4.349850	7.867921e+06	6.191735	4.391141
std	18.329510	4.733198	1.963936	11.113930	43.602631	5.363457	12.871899	8.162975e+06	1.852381	1.923585e+07	2.756636	3.357402
min	1960.000000	0.171770	0.772000	11.995000	1.000000	0.795000	5.000000	0.000000e+00	0.000000	0.000000e+00	1.263576	0.100000
25%	1977.000000	3.235235	2.093951	57.053500	14.300000	6.940500	16.339000	7.188595e+03	3.134660	1.045600e+04	4.275008	1.770000
50%	1993.000000	4.584949	3.431000	67.017000	34.796448	9.170000	26.745000	1.476338e+05	4.260500	1.281455e+05	5.457591	3.410000
75%	2008.000000	9.051967	5.738000	72.625024	70.400000	12.339500	39.620000	1.046619e+06	5.317721	2.618750e+06	7.809764	6.283300
max	2023.000000	35.970125	8.864000	85.497561	278.200000	103.534000	58.121000	8.428075e+07	44.333980	1.308965e+08	24.230680	40.315456

Population Ageing in the World

Yu Liu

Motivation¶

Project Goals¶

Data and Models¶

Results Preview¶

Extraction, Transform, and Load (ETL)¶

Exploratory Data Analysis (EDA)¶

1. The global trends of population ageing¶

2. The global distribution of population ageing degree in 2020¶

3. Representative countries¶

4. Japan v.s. Papua New Guinea (Demographic Transition Stages)¶

5. Key factors that drive the phenomenon¶

Prediction Models¶

1. Regression Model¶

2. Classification Model¶

Takeaways¶

Limitations and Other Answers¶

	gdp_2010
year
1987	213
1988	213
1989	213
1990	213
1991	213
1992	213
1993	213
1994	213
1995	213
1996	213
1997	213
1998	213
1999	213
2000	213
2001	213
2002	213
2003	213
2004	213
2005	213
2006	213
2007	213
2008	213
2009	213
2010	213
2011	213
2012	213
2013	213
2014	213
2015	213
2016	213
2017	213
2018	213
2019	213
2020	213
2021	213
2022	213
2023	213

	year	pop_over_65	fertility	life_expectancy	mortality_infant	death_rate	birth_rate	gdp_2010	education_expenditure	drop_out	health_expenditure	hospital_beds
count	902.000000	902.000000	902.000000	902.000000	902.000000	902.000000	902.000000	9.020000e+02	477.000000	5.800000e+02	898.000000	606.000000
mean	2015.000000	11.821964	2.008339	76.253307	10.889690	8.054777	14.980883	1.700829e+06	4.933531	1.117164e+06	7.242571	3.731646
std	3.164032	6.390963	0.715555	5.805234	11.975713	3.202098	6.312993	8.082314e+06	1.338148	7.603072e+06	2.621943	2.513211
min	2010.000000	0.175989	1.100000	50.945000	1.700000	0.795000	6.800000	0.000000e+00	1.496170	0.000000e+00	1.599962	0.440000
25%	2012.000000	6.096221	1.530000	73.287250	3.425000	6.120250	10.200000	4.756168e+04	4.007890	1.585250e+03	5.231700	1.935000
50%	2015.000000	12.600528	1.805000	76.922195	6.500000	7.495509	12.539500	1.837965e+05	4.909180	8.566500e+03	7.144506	2.990000
75%	2018.000000	17.563674	2.306000	81.087195	14.300000	9.860000	18.812000	5.362975e+05	5.620230	5.845850e+04	9.157427	5.250000
max	2020.000000	29.583178	5.980000	84.560000	84.400000	18.000000	42.094000	7.994263e+07	8.559550	6.062076e+07	18.815826	13.510000

Series Name	country	year	pop_over_65	fertility	life_expectancy	mortality_infant	death_rate	birth_rate
10624	Monaco	1960	18.912719	NaN	NaN	NaN	NaN	NaN
7488	Isle of Man	1960	17.686963	2.875	64.409000	NaN	16.917000	14.959000
2816	Channel Islands	1960	13.533266	2.270	71.470000	NaN	12.355064	14.892019
896	Austria	1960	12.210282	2.690	68.585610	37.3	12.700000	17.900000
1344	Belgium	1960	11.987119	2.540	69.701951	29.4	12.400000	16.800000

Series Name	country	year	pop_over_65	fertility	life_expectancy	mortality_infant	death_rate	birth_rate
12352	Papua New Guinea	1960	1.094746	6.018	45.679000	133.0	16.888	44.713
11456	Niger	1960	1.122767	7.530	36.404000	NaN	27.566	58.121
6080	Guam	1960	1.284983	5.906	60.897000	NaN	6.443	33.021
13696	Singapore	1960	1.627747	5.760	64.694683	35.5	6.200	37.500
5952	Greenland	1960	1.975159	NaN	NaN	NaN	NaN	NaN

Series Name	country	year	pop_over_65	fertility	life_expectancy	mortality_infant	death_rate	birth_rate
10684	Monaco	2020	35.849900	NaN	NaN	1.0	NaN	NaN
7804	Japan	2020	29.583178	1.33	84.560000	1.8	11.1	6.8
7676	Italy	2020	23.372071	1.24	82.195122	2.4	12.5	6.8
5308	Finland	2020	22.490093	1.37	81.931707	1.8	10.0	8.4
12732	Portugal	2020	22.296726	1.41	80.975610	2.7	12.0	8.2

Series Name	country	year	pop_over_65	fertility	life_expectancy	mortality_infant	death_rate	birth_rate
12988	Qatar	2020	1.255821	1.816	79.099	4.8	1.219	10.895
16252	United Arab Emirates	2020	1.653976	1.460	78.946	5.6	1.766	10.620
16124	Uganda	2020	1.664816	4.693	62.851	32.1	5.852	37.252
17084	Zambia	2020	1.729860	4.379	62.380	41.1	6.602	34.953
2812	Chad	2020	2.032638	6.346	52.777	67.6	12.486	43.849