In [1]:
%reload_ext autotime
import pandas as pd
from bertopic import BERTopic
from bertopic.representation import KeyBERTInspired
import plotly.io as pio
import plotly.express as px # Plotting
pio.renderers.default = 'notebook'
pd.options.plotting.backend = "plotly"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 100)
df = pd.read_csv("Funded projects – Table for Download - EuroQol.csv")
df.dropna(subset="Start Year", inplace=True)
df["text"] = df.Title.fillna("") + " " + df.Abstract.fillna("")
df
Out[1]:
Project Id | Title | Abstract | Project PI / Applicant Name | Working Group | Approved Budget (EUR) | Status | Start Year | End Year | text | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 20200060 | Grant to develop the outcomes-research component of the intended Capacity-2 study | Capacity-2 aims to extend the current multicenter Capacity-clinical registration with 6 month o... | F Asselbergs | Populations and Health Systems | 29500 | Ongoing | 2020.0 | NaN | Grant to develop the outcomes-research component of the intended Capacity-2 study Capacity-2 aim... |
1 | 20191010 | Psychometric properties, feasibility and usefulness of the extended EQ-5D-Y-5L in children with ... | Background: Type 1 Type-1 diabetes mellitus and heart diseases are the most common medical probl... | Abraham Gebregziabiher | Youth | 23550 | Completed | 2019.0 | 2020.0 | Psychometric properties, feasibility and usefulness of the extended EQ-5D-Y-5L in children with ... |
2 | 220-VS | The development of the national value set for the EQ-5D-Y-3L in Brazil | BACKGROUND: There is an increasing growth of interest in the development of generic preference-b... | Tie Parma Yamato | Valuation, Youth | 58204 | Completed | 2022.0 | 2023.0 | The development of the national value set for the EQ-5D-Y-3L in Brazil BACKGROUND: There is an i... |
3 | 20180290 | Generation of an EQ-5D-5L value set for the Mexican population | Aim: To develop an EQ-SD-SL value set for the Mexican general population. Background: There is s... | Cristina Guttierez | Valuation | 0 | Completed | 2018.0 | 2020.0 | Generation of an EQ-5D-5L value set for the Mexican population Aim: To develop an EQ-SD-SL value... |
4 | 20170520 | Development of health-related quality of life (EQ-5D-5L) value set for India | The present study aims to develop EuroQol five-dimensional (EQ-5D-5L) health states value set fo... | Shankar Prinja | Valuation | 0 | Completed | 2017.0 | 2019.0 | Development of health-related quality of life (EQ-5D-5L) value set for India The present study a... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
826 | 20190040 | __ | The results of the study were published here: Chemli J, Drira C, Felfel H, Roudijk B, Al Sayah F... | Hajer Falfel | Valuation | 5050 | Completed | 2019.0 | NaN | __ The results of the study were published here: Chemli J, Drira C, Felfel H, Roudijk B, Al Saya... |
827 | 2015060 | EuroQol past, present & future publication | NaN | Nancy Devlin | Education and Outreach | 13500 | Completed | 2015.0 | NaN | EuroQol past, present & future publication |
828 | 2015390 | Can social care needs and well-being be explained by EQ-5D? Analysis of the Health Survey for En... | Introduction: It is becoming increasingly important for decision makers to be able to measure an... | Jeshika Singh | Valuation | 11800 | Completed | 2015.0 | 2016.0 | Can social care needs and well-being be explained by EQ-5D? Analysis of the Health Survey for En... |
829 | 2013140 | A Japanese valuation study for the EQ-5D-5L | This survey was conducted as a s upplemental analy sis of "A Japanese valuati on study for the E... | Shunya Ikeda | Valuation | 73500 | Completed | 2013.0 | NaN | A Japanese valuation study for the EQ-5D-5L This survey was conducted as a s upplemental analy s... |
830 | 2015100 | The distribution of the EQ-5D-5L Index in patient populations | Background:EQ-5D data are often summarised by anEQ-5D index, whosedistribution foritsoriginal ve... | Nancy Devlin | Valuation | 9750 | Completed | 2015.0 | NaN | The distribution of the EQ-5D-5L Index in patient populations Background:EQ-5D data are often su... |
827 rows × 10 columns
In [2]:
df.groupby(['Start Year', 'Working Group'])["Approved Budget (EUR)"].sum().unstack().plot(kind='bar')
In [3]:
df.groupby("Start Year")["Approved Budget (EUR)"].sum().plot.bar()
In [4]:
df.groupby("Start Year").size().plot()
In [5]:
keywords = ["covid", "cancer", "leukemia", "heart", "stroke", "kidney", "bowel"]
for word in keywords:
df[word] = df.text.str.lower().str.contains(rf"\b{word}\b")
df
Out[5]:
Project Id | Title | Abstract | Project PI / Applicant Name | Working Group | Approved Budget (EUR) | Status | Start Year | End Year | text | covid | cancer | leukemia | heart | stroke | kidney | bowel | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 20200060 | Grant to develop the outcomes-research component of the intended Capacity-2 study | Capacity-2 aims to extend the current multicenter Capacity-clinical registration with 6 month o... | F Asselbergs | Populations and Health Systems | 29500 | Ongoing | 2020.0 | NaN | Grant to develop the outcomes-research component of the intended Capacity-2 study Capacity-2 aim... | True | False | False | False | False | False | False |
1 | 20191010 | Psychometric properties, feasibility and usefulness of the extended EQ-5D-Y-5L in children with ... | Background: Type 1 Type-1 diabetes mellitus and heart diseases are the most common medical probl... | Abraham Gebregziabiher | Youth | 23550 | Completed | 2019.0 | 2020.0 | Psychometric properties, feasibility and usefulness of the extended EQ-5D-Y-5L in children with ... | False | False | False | True | False | False | False |
2 | 220-VS | The development of the national value set for the EQ-5D-Y-3L in Brazil | BACKGROUND: There is an increasing growth of interest in the development of generic preference-b... | Tie Parma Yamato | Valuation, Youth | 58204 | Completed | 2022.0 | 2023.0 | The development of the national value set for the EQ-5D-Y-3L in Brazil BACKGROUND: There is an i... | False | False | False | False | False | False | False |
3 | 20180290 | Generation of an EQ-5D-5L value set for the Mexican population | Aim: To develop an EQ-SD-SL value set for the Mexican general population. Background: There is s... | Cristina Guttierez | Valuation | 0 | Completed | 2018.0 | 2020.0 | Generation of an EQ-5D-5L value set for the Mexican population Aim: To develop an EQ-SD-SL value... | False | False | False | False | False | False | False |
4 | 20170520 | Development of health-related quality of life (EQ-5D-5L) value set for India | The present study aims to develop EuroQol five-dimensional (EQ-5D-5L) health states value set fo... | Shankar Prinja | Valuation | 0 | Completed | 2017.0 | 2019.0 | Development of health-related quality of life (EQ-5D-5L) value set for India The present study a... | False | False | False | False | False | False | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
826 | 20190040 | __ | The results of the study were published here: Chemli J, Drira C, Felfel H, Roudijk B, Al Sayah F... | Hajer Falfel | Valuation | 5050 | Completed | 2019.0 | NaN | __ The results of the study were published here: Chemli J, Drira C, Felfel H, Roudijk B, Al Saya... | False | False | False | False | False | False | False |
827 | 2015060 | EuroQol past, present & future publication | NaN | Nancy Devlin | Education and Outreach | 13500 | Completed | 2015.0 | NaN | EuroQol past, present & future publication | False | False | False | False | False | False | False |
828 | 2015390 | Can social care needs and well-being be explained by EQ-5D? Analysis of the Health Survey for En... | Introduction: It is becoming increasingly important for decision makers to be able to measure an... | Jeshika Singh | Valuation | 11800 | Completed | 2015.0 | 2016.0 | Can social care needs and well-being be explained by EQ-5D? Analysis of the Health Survey for En... | False | False | False | False | False | False | False |
829 | 2013140 | A Japanese valuation study for the EQ-5D-5L | This survey was conducted as a s upplemental analy sis of "A Japanese valuati on study for the E... | Shunya Ikeda | Valuation | 73500 | Completed | 2013.0 | NaN | A Japanese valuation study for the EQ-5D-5L This survey was conducted as a s upplemental analy s... | False | False | False | False | False | False | False |
830 | 2015100 | The distribution of the EQ-5D-5L Index in patient populations | Background:EQ-5D data are often summarised by anEQ-5D index, whosedistribution foritsoriginal ve... | Nancy Devlin | Valuation | 9750 | Completed | 2015.0 | NaN | The distribution of the EQ-5D-5L Index in patient populations Background:EQ-5D data are often su... | False | False | False | False | False | False | False |
827 rows × 17 columns
In [6]:
df[["covid", "cancer", "leukemia", "heart", "stroke", "kidney", "bowel"]].value_counts()
Out[6]:
covid cancer leukemia heart stroke kidney bowel False False False False False False False 727 True False False False False False False 41 False True False False False False False 27 False False True False False False 9 False True False False 6 True False False False False 4 False False False False True 2 True False 2 True False True False True False 2 True False False False False 2 False False True True False False 1 True False False False False True 1 True False 1 True False False 1 True False False False 1 Name: count, dtype: int64
In [7]:
df.groupby("Start Year")[keywords].sum().plot(height=800)
In [21]:
topic_model = BERTopic(representation_model=KeyBERTInspired(), min_topic_size=7, verbose=True)
topic_model.fit(df.text)
topic_model.get_topic_info()
2025-07-30 15:37:09,976 - BERTopic - Embedding - Transforming documents to embeddings.
Batches: 0%| | 0/26 [00:00<?, ?it/s]
2025-07-30 15:37:13,240 - BERTopic - Embedding - Completed ✓ 2025-07-30 15:37:13,241 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm 2025-07-30 15:37:14,368 - BERTopic - Dimensionality - Completed ✓ 2025-07-30 15:37:14,369 - BERTopic - Cluster - Start clustering the reduced embeddings 2025-07-30 15:37:14,393 - BERTopic - Cluster - Completed ✓ 2025-07-30 15:37:14,395 - BERTopic - Representation - Extracting topics from clusters using representation models. 2025-07-30 15:37:15,254 - BERTopic - Representation - Completed ✓
Out[21]:
Topic | Count | Name | Representation | Representative_Docs | |
---|---|---|---|---|---|
0 | -1 | 237 | -1_valuation_values_value_health | [valuation, values, value, health, respondents, eq5d, preference, survey, eq5d5l, utility] | [Revised title: Are there any challenges in valuing Y-5L arising from the descriptive system? A ... |
1 | 0 | 96 | 0_healthrelated_hrqol_health_eq5dy | [healthrelated, hrqol, health, eq5dy, eq5dy5l, eq5dy3l, pediatric, pedsql, psychometric, adolesc... | [Health-Related Quality of Life in children dependent on technology for breathing Objectives: Me... |
2 | 1 | 61 | 1_wellbeing_health_psychometric_caregivers | [wellbeing, health, psychometric, caregivers, assessment, eq, eqhwb, assess, dementia, measures] | [Investigating validity and responsiveness to change of the EQ-HWB-S in a sample of people livin... |
3 | 2 | 48 | 2_euroqol_eq5d_eq_hta | [euroqol, eq5d, eq, hta, asia, singapore, eqdaphnie, research, studies, agencies] | [A travel grant to support a clinical application of PROMs in pediatrics with a focus on EQ-5D-Y... |
4 | 3 | 32 | 3_health_healthrelated_covid_pandemic | [health, healthrelated, covid, pandemic, covid19, wellbeing, hrqol, disease, chronic, patients] | [Population health impact of the COVID-19 pandemic (POPCORN): third wave In 2020, the World Heal... |
5 | 4 | 31 | 4_psychometric_assessed_health_eq5d | [psychometric, assessed, health, eq5d, eq5d5l, eq5d3l, anxietydepression, measures, dimensions, ... | [Testing the development of a Dimension Specific Module using cognition – Extending the Deep Div... |
6 | 5 | 31 | 5_health_surveys_respondents_eq | [health, surveys, respondents, eq, measures, eq5d, euroqol, clinical, population, eq5d5l] | [Quantifying Health Inequality: Systematic literature review of the application of EuroQol instr... |
7 | 6 | 26 | 6_valuation_eq5d_eq5d5l_eq5d3l | [valuation, eq5d, eq5d5l, eq5d3l, eqvt, euroqol, value, qatar, economic, population] | [An EQ-5D-5L value set for the Swedish population The aim of this proposal is to develop a Swedi... |
8 | 7 | 25 | 7_health_valuation_adolescents_valuing | [health, valuation, adolescents, valuing, value, adolescent, qualitative, youth, children, age] | [Insight into the higher health state valuation for children compared to adults: effect of 3 val... |
9 | 8 | 22 | 8_eq5d_health_eq5d5l_values | [eq5d, health, eq5d5l, values, eq5d3l, value, eq, patients, respondents, data] | [Getting personal: scoping the potential of using OPUF to develop an EQ-5D-5L-based a decision a... |
10 | 9 | 18 | 9_cancers_oncology_cancer_health | [cancers, oncology, cancer, health, eq5d, eq5d5l, psychometric, eq5d3l, validity, colorectal] | [The Psychometric Properties of the EQ-5D-5L among Ethiopian Cervical Cancer Patients: A Longitu... |
11 | 10 | 18 | 10_value_valuation_values_health | [value, valuation, values, health, discounting, death, preference, severity, utility, comparison] | [States worse than Dead: Exposing the measurement properties of the Better than Dead preference ... |
12 | 11 | 18 | 11_cognition_cognitive_psychometric_impairment | [cognition, cognitive, psychometric, impairment, eq5d, descriptive, visual, eq5d5l, descriptors,... | [Development and testing of EQ-5D-5L bolt-on descriptors for hearing The EQ-5D descriptive syste... |
13 | 12 | 18 | 12_dce_valuation_duration_values | [dce, valuation, duration, values, dces, preference, design, efficiency, designs, value] | [Comparing DCE designs that can be used to value EQ-5D-5L The aim of this study was to compare a... |
14 | 13 | 18 | 13_eq5d5l_eq5dy_valuation_value | [eq5d5l, eq5dy, valuation, value, eqvt, values, respondents, euroqol, health, ctto] | [Resubmission of pre-approved EQ Project 20190450: Re-estimating the EQ-5D-5L value set for Chin... |
15 | 14 | 15 | 14_valuation_health_preference_values | [valuation, health, preference, values, value, respondents, heterogeneity, estimates, statistica... | [Preference heterogeneity in health valuation To better understand heterogeneity in health valua... |
16 | 15 | 14 | 15_outcomes_clinical_eq5d_proms | [outcomes, clinical, eq5d, proms, decisionmaking, clinicians, outcome, prom, eq5d5l, patients] | [Crafting and elaborating the potential of clinical dashboards incorporating PROMs Objectives: A... |
17 | 16 | 14 | 16_psoriasis_dermatitis_dermatology_dermatological | [psoriasis, dermatitis, dermatology, dermatological, skin, psychometric, eq5d, eq5d5l, atopic, s... | [Testing the skin irritation, self-confidence, social relationships, social participation and so... |
18 | 17 | 12 | 17_health_insurance_valuation_hrqol | [health, insurance, valuation, hrqol, costutility, survey, hta, guidelines, value, utility] | [Phase-2 study of the Global HTA Agency Survey project Objectives: Health technology assessment ... |
19 | 18 | 12 | 18_valuations_valuation_valuing_values | [valuations, valuation, valuing, values, valued, health, perspective, 10year, perspectives, psyc... | [The effect of perspective, duration and views on life after death on valuation of severe states... |
20 | 19 | 11 | 19_outcomes_knee_arthroplasty_decisions | [outcomes, knee, arthroplasty, decisions, orthopedic, outcome, preoperative, effectiveness, post... | [A PROMs based patient decision aid for patients considering total knee arthroplasty: developmen... |
21 | 20 | 11 | 20_psychometric_health_wellbeing_qaly | [psychometric, health, wellbeing, qaly, validity, efa, healthy, test, testing, quality] | [Psychometric assessment of the eQALY item pool in Australia The ‘Extending the QALY’ project ai... |
22 | 21 | 11 | 21_valuation_values_value_scaling | [valuation, values, value, scaling, health, eq5d, core, data, dimensions, models] | [Testing the partially-fixed model for bolt-on valuation: a multi-country study Introduction: Bo... |
23 | 22 | 10 | 22_valuation_ctto_tto_eq5d5l | [valuation, ctto, tto, eq5d5l, ptto, value, eqvt, ntto, values, health] | [Comparing a new TTO method with cTTO for valuation of EQ-5D-5L health states in a general popul... |
24 | 23 | 9 | 23_asthma_respiratory_asthmatic_breathing | [asthma, respiratory, asthmatic, breathing, eq5d, breath, psychometric, copd, eq5d5lr, eq5d5l] | [Investigating the aspects of HRQoL covered by the descriptive system and the added value of the... |
25 | 24 | 9 | 24_dementia_health_recall_recalled | [dementia, health, recall, recalled, chronic, eq5d5l, hrqol, questionnaires, assessing, measures] | [The performance of EQ-5D-5L in various disease groups with different durations The recall perio... |
In [22]:
topic_model.visualize_topics()
In [23]:
topics_over_time = topic_model.topics_over_time(df.text, df["Start Year"], nr_bins=10)
topics_over_time
0it [00:00, ?it/s]
10it [00:05, 1.76it/s]
Out[23]:
Topic | Words | Frequency | Timestamp | |
---|---|---|---|---|
0 | -1 | eq5d, eq5d5l, health, eqvt, valuations | 9 | 2011.986 |
1 | 0 | eq5dy, questionnairesby, hrqol, eq5dy5l, adolescents | 3 | 2011.986 |
2 | 1 | wellbeingmeasures, wellbeing, happiness, health, happinessis | 1 | 2011.986 |
3 | 4 | eq5d, psychometric, health, euroqol, cochrane | 1 | 2011.986 |
4 | 6 | eq5d5l, valuation, eqvt, assessing, euroqol | 5 | 2011.986 |
... | ... | ... | ... | ... |
188 | 18 | valuations, valuation, valuing, health, values | 1 | 2024.600 |
189 | 19 | visualization, visualizing, data, hip, arthroplasty | 1 | 2024.600 |
190 | 21 | valuation, value, scaling, health, data | 4 | 2024.600 |
191 | 23 | asthma, psychometric, assessing, validity, respiratory | 1 | 2024.600 |
192 | 24 | recall, questionnaires, health, wellbeing, questionnaire | 1 | 2024.600 |
193 rows × 4 columns
In [24]:
topic_model.visualize_topics_over_time(topics_over_time, top_n_topics=20, height=600)