In [1]:
%reload_ext autotime
import pandas as pd
import plotly.io as pio
import plotly.express as px # Plotting
from tqdm import tqdm
tqdm.pandas()
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.ensemble import RandomForestRegressor
pio.renderers.default = 'notebook'
pd.options.plotting.backend = "plotly"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 100)
def parse(number):
try:
return float(number)
except ValueError:
return None
time: 1.39 s (started: 2025-07-22 14:44:49 +12:00)
In [2]:
df = pd.read_excel("LLM_results.xlsx")
df = df[(df.timeLeft == "closed") & (df.goal > 0)]
df.sentiment = df.sentiment.astype(str).str.split(" ").str[0].astype(float)
df.truth = df.truth.astype(str).str.split(" ").str[0].astype(float)
df.urgency = df.urgency.astype(str).str.split(" ").str[0].astype(float)
df["Target_completion"] = df.amountRaised / df.goal * 100
df
Out[2]:
Index | title | pitch | hero | uri | amountRaised | goal | timeLeft | action | actionUri | start | end | n_questions | location | n_donors | description | use_of_funds | whos_involved | moderated | n_updates | updates | _21 | text | condition | ICD10 | ICD | name | gender | age | age_group | ethnicity | urgency | sentiment | truth | notes | smiling | deservingness | attractiveness | use | region | narrative_clarity | narrative_quality | emotional_tone | image_type | face_visible | facial_expression | image_quality | Target_completion | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
31 | 31 | Helping a person in need | Helping out a person in need that would drop his hat to help someone else | https://static.givealittle.co.nz/assets/hero/ed760e80-7e10-4078-b7a8-b2ec0104419b-320:False | https://givealittle.co.nz/cause/helping-a-person-in-need | 1700.0 | 2000.0 | closed | Read more | https://givealittle.co.nz/cause/helping-a-person-in-need | 29 May 2025 | 8 Jun 2025 | 0 | Blenheim, Marlborough | 54.0 | Tok had a serious injury while running — he tore a muscle, which led to internal bleeding and ev... | medical care where needed and general stuff around the house if needed also and utility's | Created by, and paying to a verified bank account of,\r\nMatthew bergman on behalf of Tok H... | Page Moderated | 3.0 | Big thanks\n7 June 2025\nHey team huge thanks to all that have donated Tok is humbled beyond wor... | NaN | Title: Helping a person in need\nPitch: Helping out a person in need that would drop his hat to ... | Muscle tear leading to internal bleeding and infection | S86.0 | Chapter S00-T88 - Injury, poisoning and certain other consequences of external causes | Tok | Male | Indeterminate | indeterminate/unknown | Māori (based on visual appearance) | 80.0 | 70.0 | 90.0 | Age was not explicitly mentioned in the text, so it is marked as indeterminate. Ethnicity was in... | True | 90 | 60 | medical expenses, travel expenses | Marlborough | 80 | 75 | grateful | portrait | True | smiling | medium | 85.000000 |
56 | 56 | Please help Walter 🙏 to get the veterinary care he needs. | Please help us 🙏 Walty needs bloodwork to rule out cancer 😪 | https://static.givealittle.co.nz/assets/hero/a5763829-1835-4780-adc3-b2e500bd5557-320:False | https://givealittle.co.nz/cause/please-help-walter-to-get-the-veterinary-care-he | 380.0 | 500.0 | closed | Read more | https://givealittle.co.nz/cause/please-help-walter-to-get-the-veterinary-care-he | 22 May 2025 | 1 Jun 2025 | 0 | Otago | 14.0 | Walter has been my life for 15 years, since he was a pup. It has always being just the two of us... | The money raised will go into Walts vet costsFull bloods, medication etc at the moment is quoted... | Created by, and paying to a verified bank account of,\r\nKerri Back | Page Moderated | 1.0 | Bloodwork done.\n23 May 2025\nThank you so much for your kind donations.\nWalty has had his bloo... | NaN | Title: Please help Walter 🙏 to get the veterinary care he needs.\nPitch: Please help us 🙏 Walty ... | Pancreatitis | K85 | Chapter K00-K95 - Diseases of the digestive system | Walter | Other/unknown | indeterminate/unknown | indeterminate/unknown | Unknown (animal) | 80.0 | 70.0 | 90.0 | The subject is a pet dog, not a human. Age and age group cannot be determined precisely. | True | 90 | 80 | medical expenses | Otago | 85 | 80 | grateful | portrait | True | smiling | high | 76.000000 |
59 | 59 | Medical Treatment for Atish Lal for Coronary Artery Disease | medical Treatment | https://static.givealittle.co.nz/assets/hero/0a81d0ab-2bfc-4890-8049-b2e3017fea04-320:False | https://givealittle.co.nz/cause/medical-treatment-for-atish-lal-for-coronary | 380.0 | 14900.0 | closed | Read more | https://givealittle.co.nz/cause/medical-treatment-for-atish-lal-for-coronary | 20 May 2025 | 6 Jun 2025 | 0 | Auckland | 6.0 | Dear Family and Friends,\nI hope this message finds you well.\nI am reaching out today with a hu... | Estimated costs for Treatment & Travel:- PTCA treatment package + home stay : $11,000 USD- Reser... | Created by\r\nShaneel Prasad \n\n\n\n\r\n Paying to a verified bank account of\r\nPriya Go... | Page Moderated | NaN | NaN | NaN | Title: Medical Treatment for Atish Lal for Coronary Artery Disease\nPitch: medical Treatment\nDe... | Coronary Artery Disease | I25.1 | Chapter I00-I99 - Diseases of the circulatory system | Atish Lal | Male | Not specified | indeterminate/unknown | Indian (assumed based on name and context) | 90.0 | 70.0 | 95.0 | Age was not explicitly mentioned in the text; ethnicity inferred from name. | False | 90 | 50 | medical expenses | Auckland | 80 | 75 | grateful | portrait | True | neutral | medium | 2.550336 |
74 | 74 | Help me to reclaim my freedom. | Help my to reclaim my freedom and contribute to my community. | https://static.givealittle.co.nz/assets/hero/0e91c689-ef0e-4c5c-aa9d-b2e0010f721b-320:False | https://givealittle.co.nz/cause/help-me-to-reclaim-my-freedom | 2999.0 | 6000.0 | closed | Read more | https://givealittle.co.nz/cause/help-me-to-reclaim-my-freedom | 17 May 2025 | 5 Jun 2025 | 0 | Bay of Plenty | 15.0 | Two years after my first Covid infection, I’m still living with debilitating Long Covid, Chronic... | The money will be spend on a mobility scooter that can be folded down and taken in the car so I ... | Created by, and paying to a verified bank account of,\r\nAmy Cole | Page Moderated | NaN | NaN | NaN | Title: Help me to reclaim my freedom.\nPitch: Help my to reclaim my freedom and contribute to my... | Long Covid, Chronic Fatigue Syndrome/Myalgic Encephalomyelitis (ME/CFS), Post-exertional Malaise... | G93.3 (Postviral fatigue syndrome) | Chapter G00-G99 - Diseases of the nervous system | Amy | Female | Indeterminate | indeterminate/unknown | Caucasian (assumed based on image) | 80.0 | 70.0 | 90.0 | Age was not explicitly mentioned in the text, so it is marked as indeterminate. Ethnicity was in... | True | 90 | 70 | medical expenses | Bay of Plenty | 85 | 80 | grateful | portrait | True | smiling | high | 49.983333 |
75 | 75 | Help My Mum Fight Cancer - Every Bit Counts | Rasing funds for my mum's cancer treatment in India - every donation brings us closer to saving ... | https://static.givealittle.co.nz/assets/hero/3b1e6715-fdbd-4e4c-b787-b2e20160c6d0-320:False | https://givealittle.co.nz/cause/help-my-mum-fight-cancer-every-bit-counts | 630.0 | 50000.0 | closed | Read more | https://givealittle.co.nz/cause/help-my-mum-fight-cancer-every-bit-counts | 17 May 2025 | 19 May 2025 | 0 | Otahuhu, Auckland | 26.0 | I'm reaching out with a heavy heart and hope in my hands. My mum, the strongest person I know, h... | Hospital bills, chemotherapy, travel expenses, medications, post-treatment care in India. | Created by, and paying to a verified bank account of,\r\nAlana Al Jebin on behalf of My mot... | Page Moderated | NaN | NaN | NaN | Title: Help My Mum Fight Cancer - Every Bit Counts\nPitch: Rasing funds for my mum's cancer trea... | Cancer | C00-C97 | Chapter C00-D49 - Neoplasms | Not explicitly mentioned | Female | Indeterminate | indeterminate/unknown | South Asian (based on name and context) | 90.0 | 70.0 | 95.0 | Age and specific ethnic details not provided in text; inferred ethnicity based on name. | False | 95 | 50 | medical expenses, travel expenses, medications, post-treatment care | Auckland | 85 | 80 | hopeful | portrait | True | neutral | medium | 1.260000 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
11184 | 11192 | New Zealand MindBody Trust web-based 'whole person' resources for healthworkers and their patien... | The NZ MindBody Trust web-based project to support and educate clinicians and patients in a 'who... | https://static.givealittle.co.nz/assets/hero/158d0e80-2c47-449a-a05e-a603014e786e-320:False | https://givealittle.co.nz/cause/mindbodyresourcesforpatientsandhealthworkers | 6700.0 | 9000.0 | closed | Read more | https://givealittle.co.nz/cause/mindbodyresourcesforpatientsandhealthworkers | 9 May 2016 | 15 Jul 2016 | 0 | Auckland | 36.0 | The NZ MindBody Trust (2004) is concerned with 'whole person' and person-centred approaches to h... | NaN | Created by, and paying to a verified bank account of,_x000D_\n _x000D_\n New Zealand... | Page Moderated | 2.0 | The Give-A-Little Campaign Result\n12 July 2016\nThe MindBody Trustees want to thank you all for... | NaN | Title: New Zealand MindBody Trust web-based 'whole person' resources for healthworkers and their... | Chronic illness | R53 | Chapter R00-R99 - Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere c... | Not specified | Other/unknown | Indeterminate | indeterminate/unknown | Not specified in text, inferred as likely New Zealand general population based on context | 70.0 | 80.0 | 90.0 | The text does not specify a particular individual, so details like name, gender, age, and ethnic... | False | 85 | 50 | medical expenses, experimental therapies, travel expenses, lost wages | Auckland | 90 | 85 | hopeful | symbolic | False | not_detectable | high | 74.444444 |
11186 | 11194 | Diane's cyst removal to continue my dream job | Large Radial Ganglion Cyst Removal | https://static.givealittle.co.nz/assets/hero/06af4a19-e239-44bb-a5a8-a60400ab4982-320:False | https://givealittle.co.nz/cause/wristoperation | 730.0 | 5000.0 | closed | Read more | https://givealittle.co.nz/cause/wristoperation | 9 May 2016 | 3 Jun 2016 | 0 | Northland | 11.0 | I am in need of a large ganglion cyst removed from my left wrist. It has entwined itself with 2 ... | NaN | Created by, and paying to a verified bank account of,_x000D_\n _x000D_\n Diane Crocker | Page Moderated | NaN | NaN | NaN | Title: Diane's cyst removal to continue my dream job\nPitch: Large Radial Ganglion Cyst Removal... | Ganglion Cyst | M71.0 | Chapter M00-M99 - Diseases of the musculoskeletal system and connective tissue | Diane | Female | Indeterminate | indeterminate/unknown | Caucasian (assumed based on image) | 80.0 | 50.0 | 90.0 | Age was not specified in the text; ethnicity was inferred from the image. | False | 85 | 60 | medical expenses | Northland | 70 | 65 | desperate | portrait | True | neutral | medium | 14.600000 |
11194 | 11202 | A thank you to Waikato NICU for helping so many little babies and their families | Raising funds for Waikato NICU, and a portable cardiac monitor to support the miracle babies. | https://static.givealittle.co.nz/assets/hero/75b48fa1-0752-44b4-8484-a5fb015ecab2-320:False | https://givealittle.co.nz/cause/charlieandoliver | 8912.7 | 35000.0 | closed | Read more | https://givealittle.co.nz/cause/charlieandoliver | 3 May 2016 | 30 Jun 2017 | 0 | Waikato | 100.0 | This give-a-little page aims to raise funds to allow the NICU at Waikato Hospital to purchase a ... | NaN | Created by_x000D_\n _x000D_\n Rebecca Clews_x000D_\n \n\n\n\n\n\n\n_x000D_\n ... | Page Moderated | 4.0 | Quiz Night!\n7 November 2016\nGot no plans on the 17th of November? Come on down to the Speights... | NaN | Title: A thank you to Waikato NICU for helping so many little babies and their families\nPitch: ... | Premature birth | P07 | Chapter P00-P96 - Certain conditions originating in the perinatal period | Charlie and Oliver | Male | Newborn (preterm) | 0-14 | Asian (based on image) | 90.0 | 80.0 | 95.0 | The text describes a campaign for premature twins, focusing on the need for a portable cardiac m... | True | 95 | 70 | medical expenses | Waikato | 90 | 85 | grateful | portrait | True | smiling | medium | 25.464857 |
11195 | 11203 | Help restore David’s smile after being assaulted | Talented professional musician and entertainer David Shanhun needs your help to restore his teet... | https://static.givealittle.co.nz/assets/hero/ee0d17f1-b6a5-4d41-85e9-a5fa0166b741-320:False | https://givealittle.co.nz/cause/help-restore-davids-teeth-and-smile | 8048.0 | 54500.0 | closed | Read more | https://givealittle.co.nz/cause/help-restore-davids-teeth-and-smile | 3 May 2016 | 31 Jul 2016 | 0 | Auckland | 115.0 | David was assaulted on the 16th of April 2016. He had 2 teeth knocked out completely, and 10 tee... | NaN | Created by_x000D_\n _x000D_\n Fiona Shanhun_x000D_\n \n\n\n\n\n\n\n_x000D_\n ... | Page Moderated | 2.0 | Health/Teeth/Face Update\n24 May 2016\n***Message from David:***\nThank you all so much for your... | NaN | Title: Help restore David’s smile after being assaulted\nPitch: Talented professional musician a... | Dental Trauma | S02.5 | Chapter S00-T88 - Injury, poisoning and certain other consequences of external causes | David Shanhun | Male | Indeterminate/Unknown | Indeterminate/Unknown | Caucasian (assumed based on image) | 80.0 | 70.0 | 90.0 | Age and ethnicity inferred; no explicit details provided in text. | True | 90 | 75 | medical expenses | Auckland | 85 | 80 | grateful | portrait | True | smiling | high | 14.766972 |
11204 | 11212 | TRUS Biopsy Machine for Taranaki | The men of Taranaki need this TRUS Biopsy Machine to diagnose Prostate Cancer. Reduce waiting t... | https://static.givealittle.co.nz/assets/hero/c9169845-63b5-4c1e-bb50-a5fc00e5abd0-320:False | https://givealittle.co.nz/cause/prostatebiopsymachinenewplymouth | 1910.0 | 20000.0 | closed | Read more | https://givealittle.co.nz/cause/prostatebiopsymachinenewplymouth | 29 Apr 2016 | 31 Aug 2016 | 0 | Taranaki | 15.0 | Currently the TRUS (Trans Rectal Ultrasound) machine for diagnosing prostate cancer is only avai... | NaN | Created by, and paying to a verified bank account of,_x000D_\n _x000D_\n Rotary Club... | Page Moderated | NaN | NaN | NaN | Title: TRUS Biopsy Machine for Taranaki\nPitch: The men of Taranaki need this TRUS Biopsy Machin... | Prostate Cancer | C61 | Chapter C00-D49 - Neoplasms | Not specified | Male | Indeterminate | indeterminate/unknown | Not specified in text; assumed to be New Zealand European/Pākehā based on context | 80.0 | 70.0 | 90.0 | The text does not specify a specific individual, so details like name, age, and ethnicity are in... | False | 90 | 50 | medical expenses | Taranaki | 80 | 75 | hopeful | symbolic | False | not_detectable | high | 9.550000 |
4499 rows × 48 columns
time: 10.2 s (started: 2025-07-22 14:44:50 +12:00)
In [3]:
df.gender.value_counts().head(20)
Out[3]:
gender Female 2276 Male 1648 Other/unknown 542 Non-binary 3 Male and Female 3 Mixed (Male and Female) 2 Female (target audience is primarily women, but includes non-binary and trans men) 1 Mixed (group of individuals) 1 Female (Lesa), Male (Victor) 1 Other/unknown (as they are twin babies) 1 Other/unknown (animal) 1 Transgender (Female) 1 Male (assumed based on image) 1 Female, Male 1 Transgender (Female to Male) 1 Female (assumed based on context) 1 Other/unknown (nonbinary) 1 Non-Binary 1 Trans Masc Leaning 1 Transmasculine 1 Name: count, dtype: int64
time: 6.86 ms (started: 2025-07-22 14:45:00 +12:00)
In [4]:
print("\n".join(df[df.gender == "Male and Female"].uri.to_list()))
https://givealittle.co.nz/cause/brook-smith-family https://givealittle.co.nz/cause/cancellation-of-second-street-appeal https://givealittle.co.nz/cause/help-matt-and-jiwoo-get-out-of-their-rough-patch time: 4.98 ms (started: 2025-07-22 14:45:00 +12:00)
In [5]:
df.gender = df.gender.str.lower().where(df.gender.str.lower().isin(['male', 'female']), 'other')
df.gender.value_counts()
Out[5]:
gender female 2276 male 1648 other 575 Name: count, dtype: int64
time: 14.9 ms (started: 2025-07-22 14:45:00 +12:00)
In [6]:
df.ethnicity.value_counts().head(20)
Out[6]:
ethnicity Caucasian (assumed based on image) 1499 European/Caucasian (based on image) 216 Caucasian (based on image) 213 European/New Zealander (based on image) 162 Caucasian (assumed from image) 69 Pacific Islander (based on image) 69 South Asian (based on image) 69 Filipino 68 South Asian 67 Caucasian 61 Asian (based on image) 56 Māori 53 European/New Zealander (assumed based on image) 53 European/Caucasian (assumed based on image) 53 Indian 49 European/New Zealand European (based on image) 45 Māori (based on visual appearance) 44 Asian 40 European/New Zealand (based on image) 39 Unknown (animal) 37 Name: count, dtype: int64
time: 7.58 ms (started: 2025-07-22 14:45:00 +12:00)
In [7]:
df.ethnicity = df.ethnicity.str.extract(r"(.+?)(?= \(|$)")
df.ethnicity.value_counts().head(20)
Out[7]:
ethnicity Caucasian 1886 European/New Zealander 298 Māori 289 European/Caucasian 281 South Asian 184 Asian 178 Pacific Islander 128 Unknown 82 Filipino 78 European/New Zealand 73 Indian 69 European/New Zealand European 54 Maori 47 African 40 Not specified 36 Not applicable 34 Indeterminate/Unknown 33 Mixed 32 Latin American/Hispanic 29 Māori/Pacific Islander 28 Name: count, dtype: int64
time: 21.7 ms (started: 2025-07-22 14:45:00 +12:00)
In [8]:
df.attractiveness = df.attractiveness.apply(parse)
df[df.attractiveness >= 90]
Out[8]:
Index | title | pitch | hero | uri | amountRaised | goal | timeLeft | action | actionUri | start | end | n_questions | location | n_donors | description | use_of_funds | whos_involved | moderated | n_updates | updates | _21 | text | condition | ICD10 | ICD | name | gender | age | age_group | ethnicity | urgency | sentiment | truth | notes | smiling | deservingness | attractiveness | use | region | narrative_clarity | narrative_quality | emotional_tone | image_type | face_visible | facial_expression | image_quality | Target_completion | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
169 | 169 | A Helmet for Amelia’s Bright Future | Helping baby Amelia get the orthotic helmet she needs — because every little head deserves a hea... | https://static.givealittle.co.nz/assets/hero/1767aff8-a54d-40d8-9566-b2c7017108a3-320:False | https://givealittle.co.nz/cause/a-helmet-for-amelias-bright-future | 2501.0 | 4200.0 | closed | Read more | https://givealittle.co.nz/cause/a-helmet-for-amelias-bright-future | 22 Apr 2025 | 13 May 2025 | 0 | Hamilton, Waikato | 75.0 | My bright baby girl, Amelia, has been diagnosed with cranial deformities. The recommended treatm... | Orthotic cranial helmet and therapy for Amelia at Orthotic House, NZ (https://www.orthotichouse.... | Created by, and paying to a verified bank account of,\r\n \r\n Dil Askarova\r\n ... | Page Moderated | NaN | NaN | NaN | Title: A Helmet for Amelia’s Bright Future\nPitch: Helping baby Amelia get the orthotic helmet s... | cranial deformities | Q75.3 | Chapter Q00-Q99 - Congenital malformations, deformations and chromosomal abnormalities | Amelia | female | infant | 0-14 | Asian | 90.0 | 85.0 | 95.0 | No specific warnings or errors detected. | True | 95 | 90.0 | medical expenses | Waikato | 90 | 85 | hopeful | portrait | True | smiling | high | 59.547619 |
3116 | 3118 | Isla’s Hip dysplasia journey - pavlik harness | Little Isla and her hip dysplasia journey 💕 | https://static.givealittle.co.nz/assets/hero/799380ba-e48c-4051-9e59-af9a016d59a4-320:False | https://givealittle.co.nz/cause/islas-hip-dysplasia-journey-pavlik-harness | 92.0 | 800.0 | closed | Read more | https://givealittle.co.nz/cause/islas-hip-dysplasia-journey-pavlik-harness | 30 Jan 2023 | 15 Feb 2023 | 0 | Otago | 7.0 | Isla is 11 weeks old and has just been diagnosed with hip dysplasia.\nShe is now in a pavlik har... | Clothing, Car seat, fuel to get to and from specialist appointments. | Created by, and paying to a verified bank account of,_x000D_\nKerra McLellan | Page Moderated | 1.0 | Islas journey\n14 February 2023\nWe have been gifted some clothing!\nShare this update\n0 commen... | NaN | Title: Isla’s Hip dysplasia journey - pavlik harness\nPitch: Little Isla and her hip dysplasia j... | Hip dysplasia | M00-M99 | Chapter M00-M99 - Diseases of the musculoskeletal system and connective tissue | Isla | female | 11 | 0-14 | Caucasian | 80.0 | 60.0 | 90.0 | No explicit mention of ethnicity in text; inferred from image. | True | 95 | 90.0 | medical expenses, travel expenses | Otago | 80 | 70 | hopeful | portrait | True | smiling | high | 11.500000 |
6455 | 6459 | 10for10 | We want you to donate $10 for the 10% of babies by doing 10 days of 10 exercises and then nomina... | https://static.givealittle.co.nz/assets/hero/4227fe37-03cb-4c72-ada5-ab9e00f2557c-320:False | https://givealittle.co.nz/cause/10for10 | 2325.0 | 5000.0 | closed | Read more | https://givealittle.co.nz/cause/10for10 | 15 Apr 2020 | 15 Jul 2020 | 0 | Nationwide | 102.0 | Each year 10% of babies are born prematurely or at full-term with significant ongoing medical is... | These funds will help us supply Essential Packs for families, Winter Warmer packs for vulnerable... | Created by The Little Miracles Trust, paying to a verified bank account of_x000D_\n _x000D_... | Page Moderated | NaN | NaN | NaN | Title: 10for10\nPitch: We want you to donate $10 for the 10% of babies by doing 10 days of 10 ex... | Premature birth or significant ongoing medical issues | P07.9 (Prematurity, unspecified) | Chapter P00-P96 - Certain conditions originating in the perinatal period | Not specified | other | Newborn | 0-14 | Not specified in text; guessing from image: Likely diverse | 80.0 | 85.0 | 95.0 | The text does not specify a particular individual, but rather focuses on supporting families of ... | False | 90 | 90.0 | medical expenses, experimental therapies, travel expenses, lost wages | Nationwide | 90 | 85 | hopeful | symbolic | False | not_detectable | high | 46.500000 |
8092 | 8097 | Our Harness Hero Princess | Zoe is going for Surgey on the 10th of december where she will be placed into a Spica Cast. Due ... | https://static.givealittle.co.nz/assets/hero/7a00f120-baa5-4374-91be-a9a800a80665-320:False | https://givealittle.co.nz/cause/our-harness-hero-princess | 700.0 | 400.0 | closed | Read more | https://givealittle.co.nz/cause/our-harness-hero-princess | 30 Nov 2018 | 1 Dec 2018 | 0 | Canterbury | 7.0 | Kia ora whanau and friends!\nA lot of you know that our precious girl, Zoe, has been through a b... | The funds will go directly towards her carseat :) | Created by Abbey Thorne, paying to a verified bank account of_x000D_\nAbbey Thorne (Chambers) | Page Moderated | 1.0 | Thank you!!\n30 November 2018\nWe are completely overwhelmed by the response we have recieved to... | NaN | Title: Our Harness Hero Princess\nPitch: Zoe is going for Surgey on the 10th of december where s... | Bilateral Developmental Dislocation of the Hips | M47.0 | Chapter M00-M99 - Diseases of the musculoskeletal system and connective tissue | Zoe | female | 4 months | 0-14 | Caucasian | 90.0 | 70.0 | 95.0 | The text provides detailed medical history and context, making it highly informative. The urgenc... | True | 95 | 90.0 | medical expenses | Canterbury | 90 | 85 | hopeful | portrait | True | smiling | high | 175.000000 |
10808 | 10816 | Help Effie | Raising funds for Effie's open heart surgery | https://static.givealittle.co.nz/assets/hero/35a23088-2eb6-41ba-99cc-a67800ed1dc3-320:False | https://givealittle.co.nz/cause/effie | 237.0 | 1000.0 | closed | Read more | https://givealittle.co.nz/cause/effie | 5 Sep 2016 | 8 Nov 2016 | 0 | Auckland | 9.0 | Little Effie was born needing open heart surgery and without it she faces a very uncertain futur... | NaN | Created by, and paying to a verified bank account of,_x000D_\n _x000D_\n Peter Nicol... | Page Moderated | NaN | NaN | NaN | Title: Help Effie\nPitch: Raising funds for Effie's open heart surgery\nDescription: Little Effi... | Congenital Heart Defect | Q20 | Chapter Q00-Q99 - Congenital malformations, deformations and chromosomal abnormalities | Effie | other | indeterminate/unknown | 0-14 | Caucasian | 90.0 | 70.0 | 95.0 | Age is indeterminate but assumed to be in the 0-14 range due to mention of being a baby. Ethnici... | True | 95 | 90.0 | medical expenses, travel expenses, accommodation | Auckland | 80 | 75 | hopeful | portrait | True | smiling | high | 23.700000 |
time: 62 ms (started: 2025-07-22 14:45:00 +12:00)
In [9]:
df.ethnicity = df.ethnicity.replace({
"European/Caucasian": "Caucasian",
"European/White": "Caucasian",
"European/White": "Caucasian",
"European/New Zealand": "Caucasian",
"European/New Zealander": "Caucasian",
"European/New Zealand European": "Caucasian",
"Māori/Pacific Islander": "Māori",
"Asian/Pacific Islander": "Pacific Islander",
"Maori": "Māori"
})
top_20 = df.ethnicity.value_counts().head(20)
display(top_20)
df.ethnicity = df.ethnicity.where(df.ethnicity.isin(top_20.index), "other")
ethnicity Caucasian 2594 Māori 364 South Asian 184 Asian 178 Pacific Islander 153 Unknown 82 Filipino 78 Indian 69 African 40 Not specified 36 Not applicable 34 Indeterminate/Unknown 33 Mixed 32 Latin American/Hispanic 29 Fijian 24 Indian/Fijian 24 East Asian 22 Fijian/Pacific Islander 20 European/New Zealand White 19 Indeterminate 17 Name: count, dtype: int64
time: 18.7 ms (started: 2025-07-22 14:45:00 +12:00)
In [10]:
df.condition.str.lower().value_counts().head(20)
Out[10]:
condition cancer 185 breast cancer 143 gender dysphoria 137 stroke 75 multiple sclerosis 67 none specified 54 endometriosis 45 bowel cancer 39 type 1 diabetes 37 melanoma 37 mental health issues 36 cerebral palsy 33 terminal cancer 33 infertility 33 chronic kidney disease 31 obesity 31 heart attack 29 coronary artery disease 28 lung cancer 27 ovarian cancer 27 Name: count, dtype: int64
time: 14.3 ms (started: 2025-07-22 14:45:00 +12:00)
In [11]:
df.sentiment.value_counts()
Out[11]:
sentiment 70.0 1860 60.0 713 50.0 657 80.0 453 85.0 268 30.0 215 90.0 182 75.0 84 40.0 29 20.0 23 95.0 8 10.0 6 100.0 1 Name: count, dtype: int64
time: 6.56 ms (started: 2025-07-22 14:45:00 +12:00)
In [12]:
df.use.str.lower().value_counts().head(20)
Out[12]:
use medical expenses 2054 medical expenses, travel expenses 624 travel expenses 327 medical expenses, travel expenses, lost wages 306 medical expenses, experimental therapies 238 medical expenses, lost wages 221 medical expenses, experimental therapies, travel expenses 93 medical expenses, experimental therapies, travel expenses, lost wages 77 travel expenses, lost wages 43 experimental therapies 42 travel expenses, medical expenses 31 experimental therapies, travel expenses 27 medical expenses, travel expenses, accommodation 26 funeral expenses 21 lost wages 20 funeral costs 12 medical expenses, funeral expenses 8 educational expenses 6 travel expenses, accommodation 6 medical expenses, lost wages, travel expenses 5 Name: count, dtype: int64
time: 14.9 ms (started: 2025-07-22 14:45:00 +12:00)
In [13]:
df.age_group = df.age_group.str.extract(r"(.+?)(?= \(|$)")
df.age_group = df.age_group.str.lower().where(df.age_group.str.lower().isin(['0-14', '15-64', '65+']), 'indeterminate/unknown')
df.age_group.value_counts()
Out[13]:
age_group indeterminate/unknown 2671 15-64 1117 0-14 504 65+ 207 Name: count, dtype: int64
time: 31.3 ms (started: 2025-07-22 14:45:00 +12:00)
In [14]:
df.gender.value_counts()
Out[14]:
gender female 2276 male 1648 other 575 Name: count, dtype: int64
time: 10.6 ms (started: 2025-07-22 14:45:01 +12:00)
In [15]:
df["Charity"] = df.whos_involved.str.contains(r"(Charity)", regex=False)
time: 4.8 ms (started: 2025-07-22 14:45:01 +12:00)
In [16]:
df.to_excel("LLM_results_cleaned.xlsx", index=False)
time: 7.07 s (started: 2025-07-22 14:45:01 +12:00)
In [17]:
df["met_target"] = (df.amountRaised >= df.goal).astype(int)
time: 1.57 ms (started: 2025-07-22 14:45:08 +12:00)
In [18]:
df["parsed_age"] = df.age.apply(parse)
df.parsed_age.value_counts().head(20)
Out[18]:
parsed_age 25.0 52 30.0 51 35.0 47 17.0 43 20.0 41 28.0 39 23.0 37 6.0 34 40.0 33 27.0 33 18.0 32 4.0 32 13.0 30 3.0 28 26.0 28 38.0 27 19.0 27 14.0 27 2.0 26 50.0 26 Name: count, dtype: int64
time: 8.81 ms (started: 2025-07-22 14:45:08 +12:00)
In [19]:
df.met_target.value_counts()
Out[19]:
met_target 0 3859 1 640 Name: count, dtype: int64
time: 3.98 ms (started: 2025-07-22 14:45:08 +12:00)
In [20]:
df.deservingness = df.deservingness.apply(parse).astype(float)
df.deservingness.describe()
Out[20]:
count 4499.000000 mean 91.133585 std 4.859491 min 20.000000 25% 90.000000 50% 90.000000 75% 95.000000 max 100.000000 Name: deservingness, dtype: float64
time: 8.3 ms (started: 2025-07-22 14:45:08 +12:00)
In [21]:
import pandas as pd
import numpy as np
from scipy.stats import f_oneway, chi2_contingency, pearsonr
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
def analyze_feature_influence(df, target_col='met_target'):
results = {}
target = df[target_col]
is_target_numeric = np.issubdtype(target.dtype, np.number)
for col in df.columns:
if col == target_col:
continue
feature = df[col]
if feature.isnull().any() or target.isnull().any():
continue # skip missing for simplicity
if is_target_numeric:
# Target is numeric
if np.issubdtype(feature.dtype, np.number):
# Pearson correlation
corr, p_value = pearsonr(feature, target)
results[col] = {
'test': 'pearsonr',
'stat': corr,
'p_value': p_value
}
else:
# ANOVA (feature is categorical)
groups = [target[feature == val] for val in feature.unique()]
stat, p_value = f_oneway(*groups)
results[col] = {
'test': 'anova',
'stat': stat,
'p_value': p_value
}
else:
# Target is categorical
if not np.issubdtype(feature.dtype, np.number):
# Chi-squared test
contingency = pd.crosstab(df[col], target)
stat, p, _, _ = chi2_contingency(contingency)
results[col] = {
'test': 'chi2',
'stat': stat,
'p_value': p
}
return pd.DataFrame(results).T.sort_values('p_value')
# Usage:
# df = pd.read_csv('your_data.csv')
df.met_target = df.met_target.astype(bool)
analyze_feature_influence(df, target_col='met_target')
Out[21]:
test | stat | p_value | |
---|---|---|---|
image_quality | chi2 | 40.932891 | 0.0 |
emotional_tone | chi2 | 73.290279 | 0.0 |
age_group | chi2 | 21.177083 | 0.000097 |
facial_expression | chi2 | 26.442983 | 0.000184 |
ethnicity | chi2 | 48.378961 | 0.000376 |
end | chi2 | 2186.22469 | 0.215375 |
region | chi2 | 104.213501 | 0.290067 |
notes | chi2 | 2766.80732 | 0.292727 |
start | chi2 | 2427.549619 | 0.326723 |
whos_involved | chi2 | 4433.025955 | 0.417108 |
face_visible | chi2 | 0.606062 | 0.436275 |
pitch | chi2 | 4490.804466 | 0.489598 |
title | chi2 | 4473.047477 | 0.49277 |
text | chi2 | 4499.0 | 0.49299 |
actionUri | chi2 | 4499.0 | 0.49299 |
uri | chi2 | 4499.0 | 0.49299 |
hero | chi2 | 4499.0 | 0.49299 |
Charity | chi2 | 0.407019 | 0.523486 |
gender | chi2 | 1.054881 | 0.590114 |
image_type | chi2 | 7.374276 | 0.689701 |
moderated | chi2 | 0.0 | 1.0 |
action | chi2 | 0.0 | 1.0 |
timeLeft | chi2 | 0.0 | 1.0 |
time: 679 ms (started: 2025-07-22 14:45:08 +12:00)
In [22]:
df.smiling = df.smiling.astype(bool)
time: 854 μs (started: 2025-07-22 14:45:08 +12:00)
In [23]:
df["have_age"] = ~df.parsed_age.isna()
df[["met_target", "have_age"]].value_counts()
Out[23]:
met_target have_age False False 2509 True 1350 True False 440 True 200 Name: count, dtype: int64
time: 9.29 ms (started: 2025-07-22 14:45:08 +12:00)
In [24]:
df["log_amountRaised"] = np.log1p(df.amountRaised)
df["log_amountRaised"].describe()
Out[24]:
count 4499.000000 mean 7.422533 std 1.885026 min 0.000000 25% 6.208588 50% 7.476472 75% 8.764990 max 13.388103 Name: log_amountRaised, dtype: float64
time: 7.58 ms (started: 2025-07-22 14:45:08 +12:00)
In [25]:
df.start = pd.to_datetime(df.start, format="mixed")
df.end = pd.to_datetime(df.end, format="mixed")
df["days"] = (df.end - df.start).dt.days
df.days.describe()
Out[25]:
count 4499.000000 mean 131.302734 std 141.845040 min 0.000000 25% 32.000000 50% 90.000000 75% 182.000000 max 1755.000000 Name: days, dtype: float64
time: 274 ms (started: 2025-07-22 14:45:08 +12:00)
In [26]:
formula = "log_amountRaised ~ days + sentiment + smiling + gender + truth + urgency + parsed_age"
logit = smf.glm(formula=formula, data=df, family=sm.families.Gamma(link=sm.families.links.Log())).fit()
display(logit.summary())
display(f"AIC (Logit): {logit.aic:.2f}")
probit = smf.glm(formula=formula, data=df, family=sm.families.Gamma(link=sm.families.links.Log())).fit()
display(probit.summary())
display(f"AIC (Probit): {probit.aic:.2f}")
/home/ubuntu/givealittle/.venv/lib/python3.10/site-packages/statsmodels/genmod/families/family.py:812: RuntimeWarning: divide by zero encountered in log /home/ubuntu/givealittle/.venv/lib/python3.10/site-packages/statsmodels/genmod/generalized_linear_model.py:1891: RuntimeWarning: invalid value encountered in scalar subtract
Dep. Variable: | log_amountRaised | No. Observations: | 1550 |
---|---|---|---|
Model: | GLM | Df Residuals: | 1541 |
Model Family: | Gamma | Df Model: | 8 |
Link Function: | Log | Scale: | 0.055701 |
Method: | IRLS | Log-Likelihood: | inf |
Date: | Tue, 22 Jul 2025 | Deviance: | 239.92 |
Time: | 14:45:09 | Pearson chi2: | 85.8 |
No. Iterations: | 12 | Pseudo R-squ. (CS): | nan |
Covariance Type: | nonrobust |
coef | std err | z | P>|z| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Intercept | -0.2256 | 0.167 | -1.354 | 0.176 | -0.552 | 0.101 |
smiling[T.True] | 0.0688 | 0.013 | 5.362 | 0.000 | 0.044 | 0.094 |
gender[T.male] | 0.0359 | 0.013 | 2.858 | 0.004 | 0.011 | 0.060 |
gender[T.other] | -0.0900 | 0.029 | -3.095 | 0.002 | -0.147 | -0.033 |
days | 0.0002 | 3.91e-05 | 5.521 | 0.000 | 0.000 | 0.000 |
sentiment | 0.0035 | 0.001 | 6.577 | 0.000 | 0.002 | 0.005 |
truth | 0.0120 | 0.002 | 5.936 | 0.000 | 0.008 | 0.016 |
urgency | 0.0097 | 0.001 | 12.200 | 0.000 | 0.008 | 0.011 |
parsed_age | -0.0002 | 0.000 | -0.629 | 0.529 | -0.001 | 0.000 |
'AIC (Logit): -inf'
/home/ubuntu/givealittle/.venv/lib/python3.10/site-packages/statsmodels/genmod/families/family.py:812: RuntimeWarning: divide by zero encountered in log /home/ubuntu/givealittle/.venv/lib/python3.10/site-packages/statsmodels/genmod/generalized_linear_model.py:1891: RuntimeWarning: invalid value encountered in scalar subtract
Dep. Variable: | log_amountRaised | No. Observations: | 1550 |
---|---|---|---|
Model: | GLM | Df Residuals: | 1541 |
Model Family: | Gamma | Df Model: | 8 |
Link Function: | Log | Scale: | 0.055701 |
Method: | IRLS | Log-Likelihood: | inf |
Date: | Tue, 22 Jul 2025 | Deviance: | 239.92 |
Time: | 14:45:09 | Pearson chi2: | 85.8 |
No. Iterations: | 12 | Pseudo R-squ. (CS): | nan |
Covariance Type: | nonrobust |
coef | std err | z | P>|z| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Intercept | -0.2256 | 0.167 | -1.354 | 0.176 | -0.552 | 0.101 |
smiling[T.True] | 0.0688 | 0.013 | 5.362 | 0.000 | 0.044 | 0.094 |
gender[T.male] | 0.0359 | 0.013 | 2.858 | 0.004 | 0.011 | 0.060 |
gender[T.other] | -0.0900 | 0.029 | -3.095 | 0.002 | -0.147 | -0.033 |
days | 0.0002 | 3.91e-05 | 5.521 | 0.000 | 0.000 | 0.000 |
sentiment | 0.0035 | 0.001 | 6.577 | 0.000 | 0.002 | 0.005 |
truth | 0.0120 | 0.002 | 5.936 | 0.000 | 0.008 | 0.016 |
urgency | 0.0097 | 0.001 | 12.200 | 0.000 | 0.008 | 0.011 |
parsed_age | -0.0002 | 0.000 | -0.629 | 0.529 | -0.001 | 0.000 |
'AIC (Probit): -inf'
time: 118 ms (started: 2025-07-22 14:45:09 +12:00)
In [27]:
df.met_target = df.met_target.astype(int)
smf.ols(formula="Target_completion ~ days + sentiment + smiling + gender + truth + urgency + parsed_age", data=df).fit().summary()
Out[27]:
Dep. Variable: | Target_completion | R-squared: | 0.003 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | -0.002 |
Method: | Least Squares | F-statistic: | 0.6002 |
Date: | Tue, 22 Jul 2025 | Prob (F-statistic): | 0.778 |
Time: | 14:45:09 | Log-Likelihood: | -13128. |
No. Observations: | 1550 | AIC: | 2.627e+04 |
Df Residuals: | 1541 | BIC: | 2.632e+04 |
Df Model: | 8 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Intercept | -943.8363 | 816.545 | -1.156 | 0.248 | -2545.493 | 657.820 |
smiling[T.True] | 51.4457 | 62.877 | 0.818 | 0.413 | -71.888 | 174.780 |
gender[T.male] | 96.6374 | 61.509 | 1.571 | 0.116 | -24.012 | 217.287 |
gender[T.other] | 19.1091 | 142.511 | 0.134 | 0.893 | -260.427 | 298.645 |
days | -0.0727 | 0.192 | -0.379 | 0.705 | -0.449 | 0.303 |
sentiment | 0.4612 | 2.599 | 0.177 | 0.859 | -4.636 | 5.558 |
truth | 8.8228 | 9.898 | 0.891 | 0.373 | -10.592 | 28.237 |
urgency | 1.2515 | 3.907 | 0.320 | 0.749 | -6.412 | 8.915 |
parsed_age | 0.3826 | 1.567 | 0.244 | 0.807 | -2.690 | 3.455 |
Omnibus: | 4408.166 | Durbin-Watson: | 2.007 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 117341667.316 |
Skew: | 35.986 | Prob(JB): | 0.00 |
Kurtosis: | 1349.003 | Cond. No. | 6.70e+03 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 6.7e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
time: 48 ms (started: 2025-07-22 14:45:09 +12:00)
In [28]:
import seaborn as sns
sns.regplot(x="amountRaised", y="smiling", data=df, logistic=True)
Out[28]:
<Axes: xlabel='amountRaised', ylabel='smiling'>
time: 5.88 s (started: 2025-07-22 14:45:09 +12:00)
In [29]:
smf.ols(formula=formula, data=df).fit().summary()
Out[29]:
Dep. Variable: | log_amountRaised | R-squared: | 0.214 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.209 |
Method: | Least Squares | F-statistic: | 52.30 |
Date: | Tue, 22 Jul 2025 | Prob (F-statistic): | 3.16e-75 |
Time: | 14:45:15 | Log-Likelihood: | -3058.1 |
No. Observations: | 1550 | AIC: | 6134. |
Df Residuals: | 1541 | BIC: | 6182. |
Df Model: | 8 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Intercept | -7.7171 | 1.232 | -6.264 | 0.000 | -10.134 | -5.300 |
smiling[T.True] | 0.5030 | 0.095 | 5.302 | 0.000 | 0.317 | 0.689 |
gender[T.male] | 0.2312 | 0.093 | 2.491 | 0.013 | 0.049 | 0.413 |
gender[T.other] | -0.6517 | 0.215 | -3.031 | 0.002 | -1.074 | -0.230 |
days | 0.0017 | 0.000 | 5.995 | 0.000 | 0.001 | 0.002 |
sentiment | 0.0255 | 0.004 | 6.507 | 0.000 | 0.018 | 0.033 |
truth | 0.0735 | 0.015 | 4.923 | 0.000 | 0.044 | 0.103 |
urgency | 0.0730 | 0.006 | 12.390 | 0.000 | 0.061 | 0.085 |
parsed_age | -0.0010 | 0.002 | -0.404 | 0.686 | -0.006 | 0.004 |
Omnibus: | 59.312 | Durbin-Watson: | 1.962 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 66.951 |
Skew: | -0.460 | Prob(JB): | 2.90e-15 |
Kurtosis: | 3.436 | Cond. No. | 6.70e+03 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 6.7e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
time: 63.2 ms (started: 2025-07-22 14:45:15 +12:00)
In [30]:
model = RandomForestRegressor()
cols = "sentiment + smiling + gender + truth + urgency + ethnicity".split(" + ")
X = pd.get_dummies(df[cols])
y = df["met_target"]
model.fit(X, y)
Out[30]:
RandomForestRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
n_estimators | 100 | |
criterion | 'squared_error' | |
max_depth | None | |
min_samples_split | 2 | |
min_samples_leaf | 1 | |
min_weight_fraction_leaf | 0.0 | |
max_features | 1.0 | |
max_leaf_nodes | None | |
min_impurity_decrease | 0.0 | |
bootstrap | True | |
oob_score | False | |
n_jobs | None | |
random_state | None | |
verbose | 0 | |
warm_start | False | |
ccp_alpha | 0.0 | |
max_samples | None | |
monotonic_cst | None |
time: 572 ms (started: 2025-07-22 14:45:15 +12:00)
In [31]:
list(zip(cols, model.feature_importances_))
Out[31]:
[('sentiment', np.float64(0.20735737033894144)), ('smiling', np.float64(0.11204050356227582)), ('gender', np.float64(0.09140590737411987)), ('truth', np.float64(0.215663147060733)), ('urgency', np.float64(0.049351978324802946)), ('ethnicity', np.float64(0.04748830954506516))]
time: 16.6 ms (started: 2025-07-22 14:45:16 +12:00)
In [32]:
df.region.value_counts().head(20)
Out[32]:
region Auckland 1331 Canterbury 555 Wellington 489 Waikato 385 Bay of Plenty 293 Nationwide 210 Otago 168 Northland 161 Hawke's Bay 134 International 107 Taranaki 100 Southland 71 Manawatu-Wanganui 63 Nelson / Tasman 62 Manawatu / Whanganui 51 Manawatu-Whanganui 51 Marlborough 37 Gisborne 34 Nelson 28 West Coast 24 Name: count, dtype: int64
time: 8.73 ms (started: 2025-07-22 14:45:16 +12:00)