In [1]:
%reload_ext autotime
import pandas as pd
import plotly.io as pio
import plotly.express as px # Plotting
from tqdm import tqdm
tqdm.pandas()
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.ensemble import RandomForestRegressor
pio.renderers.default = 'notebook'
pd.options.plotting.backend = "plotly"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 100)
def parse(number):
try:
return float(number)
except ValueError:
return None
time: 1.9 s (started: 2025-11-10 10:48:22 +13:00)
In [2]:
df = pd.read_excel("LLM_results.xlsx")
df = df[(df.timeLeft == "closed") & (df.goal > 0)]
df.sentiment = df.sentiment.astype(str).str.split(" ").str[0].astype(float)
df.truth = df.truth.astype(str).str.split(" ").str[0].astype(float)
df.urgency = df.urgency.astype(str).str.split(" ").str[0].astype(float)
df["Target_completion"] = df.amountRaised / df.goal * 100
df
Out[2]:
| Index | title | pitch | hero | uri | amountRaised | goal | timeLeft | action | actionUri | start | end | n_questions | location | n_donors | description | use_of_funds | whos_involved | moderated | n_updates | updates | _21 | text | condition | ICD10 | ICD | name | gender | age | age_group | ethnicity | urgency | sentiment | truth | notes | smiling | deservingness | attractiveness | use | region | narrative_clarity | narrative_quality | emotional_tone | image_type | face_visible | facial_expression | image_quality | progression | treatment | treatment_effectiveness | treatment_side_effects | site | stage | reason | narrative_clarity | emotional_tone | facial_expression | progression | treatment | treatment_effectiveness | treatment_side_effects | site | stage | reason | Target_completion | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 21 | 21 | Need roughly $30.000 for unfunded cancer treatment (Abiraterone Acetate+Methylprednisolone) | Your support would mean the world, helping cover these costs so I can focus on cherishing every ... | https://static.givealittle.co.nz/assets/hero/e7f518e2-a54e-4fa7-9763-b37c00fedf16-320:False | https://givealittle.co.nz/cause/need-dollar30000-for-unfunded-cancer-treatment | 265.0 | 35000.0 | closed | Read more | https://givealittle.co.nz/cause/need-dollar30000-for-unfunded-cancer-treatment | 20 Oct 2025 | 24 Oct 2025 | 0 | Nationwide | 10.0 | In July 2024 I was diagnosed with Metastatic prostate cancer. Metastatic as it has spread extens... | My Cancer treatment (Abiraterone Acetate+Methylprednisolone) which isnt funded in New Zealand | Created by, and paying to a verified bank account of,\r\nBruce Alpine | Page Moderated | NaN | NaN | NaN | Title: Need roughly $30.000 for unfunded cancer treatment (Abiraterone Acetate+Methylprednisolon... | Metastatic prostate cancer | C61 | Chapter C00-D49 - Neoplasms | Unknown | Male | 65 | 65+ | Caucasian | 95.0 | 65.0 | 90.0 | The person is likely of Caucasian ethnicity based on appearance in the image. The campaign is fo... | 0 | 90.0 | 70.0 | medical expenses | Nationwide | 90.0 | 85.0 | desperate | portrait | 1 | serious | high | 95 | 70 | 40 | 60 | prostate | IV | The individual is seeking funds to cover the cost of unfunded cancer treatment (Abiraterone Acet... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.757143 |
| 87 | 87 | Support Mum’s Urgent Heart Surgery | To raise funds for our mum’s open-heart surgery. Any contribution is deeply appreciated as we st... | https://static.givealittle.co.nz/assets/hero/d4a8d0e0-f9f9-4040-9f7a-b36301448e77-320:False | https://givealittle.co.nz/cause/help-save-our-mums-heart | 8520.0 | 15000.0 | closed | Read more | https://givealittle.co.nz/cause/help-save-our-mums-heart | 25 Sep 2025 | 3 Oct 2025 | 0 | Auckland | 44.0 | On Monday, September 15th, our mum suffered a heart attack. Doctors discovered three major arter... | Procedure and hospital expenses | Created by, and paying to a verified bank account of,\r\nCielo Tionko on behalf of My Mum's... | Page Moderated | NaN | NaN | NaN | Title: Support Mum’s Urgent Heart Surgery\nPitch: To raise funds for our mum’s open-heart surger... | Heart attack | I21 | Chapter I00-I99 - Diseases of the circulatory system | Cielo Tionko's mum | Female | 65 | 65+ | Asian | 95.0 | 70.0 | 90.0 | The text is clear and heartfelt, with a strong narrative about a heart attack and urgent surgery... | 1 | 95.0 | 75.0 | medical expenses | Auckland | 90.0 | 85.0 | desperate | portrait | 1 | smiling | high | 85 | 0 | 0 | 0 | NaN | NaN | To raise funds for urgent open-heart triple bypass surgery due to a heart attack with blocked ar... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 56.800000 |
| 90 | 90 | Please Help Mum Drive Her Car | Please help mum to modify her car to a left foot accelerator. | https://static.givealittle.co.nz/assets/hero/fd316603-730e-4d7a-8d2a-b3630141de75-320:False | https://givealittle.co.nz/cause/please-help-mum-drive-her-car | 3330.0 | 3500.0 | closed | Read more | https://givealittle.co.nz/cause/please-help-mum-drive-her-car | 24 Sep 2025 | 24 Oct 2025 | 0 | Taihape, Manawatu-Wanganui | 67.0 | "3 and a half years ago, my mum had an emergency right, below the knee amputation to save her li... | All funds raised will be spent on having my car modified to a left foot accelerator. | Created by, and paying to a verified bank account of,\r\nSally Margaret Chambers | Page Moderated | NaN | NaN | NaN | Title: Please Help Mum Drive Her Car\nPitch: Please help mum to modify her car to a left foot ac... | Below-knee amputation | T87.8 | Chapter S00-T88 - Injury, poisoning and certain other consequences of external causes | Sally Margaret Chambers | Female | 65 | 65+ | Pākehā | 85.0 | 60.0 | 85.0 | The text appears to be a genuine appeal for help with a car modification. The narrative is clear... | 0 | 90.0 | 70.0 | medical expenses, travel expenses | Manawatū-Whanganui | 90.0 | 80.0 | desperate | portrait | 1 | neutral | medium | 70 | 80 | 60 | 75 | NaN | NaN | The funds will be used to modify the car with a left foot accelerator so that the mother, who ha... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 95.142857 |
| 92 | 92 | A mother's love, a daughters fight - Help us heal a Liver and Life | A mother's love, a daughters fight - Help us heal a Liver and Life | https://static.givealittle.co.nz/assets/hero/acf99948-53f5-4846-a2d7-b362011ba176-320:False | https://givealittle.co.nz/cause/a-mothers-love-a-daughters-fight-help-us-heal | 1550.0 | 1500.0 | closed | Read more | https://givealittle.co.nz/cause/a-mothers-love-a-daughters-fight-help-us-heal | 24 Sep 2025 | 8 Oct 2025 | 0 | Bay of Plenty | 36.0 | After a suicide attempt, Kayla age 13 is in hospital with severe liver failure and needs urgent ... | Ambulance fees, Loss of income, travel and accommodation for family, prescription medications, r... | Created by\r\nHayley George \n\n\n\n\r\n Paying to a verified bank account of\r\nKrystle G... | Page Moderated | 3.0 | Update 01/10/2025\n1 October 2025\nKayla continues to fight bravely on her recovery journey. Her... | NaN | Title: A mother's love, a daughters fight - Help us heal a Liver and Life\nPitch: A mother's lov... | Liver failure | K76.8 | Chapter K00-K95 - Diseases of the digestive system | Kayla | Female | 13 | 0-14 | Pacific Islander | 90.0 | 65.0 | 85.0 | The image shows a young girl in a hospital bed, consistent with the narrative of a 13-year-old p... | 0 | 95.0 | 70.0 | medical expenses, travel expenses, lost wages | Bay of Plenty | 90.0 | 85.0 | hopeful | portrait | 1 | neutral | medium | 70 | 60 | 50 | 60 | NaN | NaN | Kayla, a 13-year-old girl, is in hospital with severe liver failure following a suicide attempt.... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 103.333333 |
| 93 | 93 | Please make Linetas Journey with Cancer less stressful by donating to her good cause to prolong ... | Requesting any donations to help with Lineta Uelese's Cancer Treatments. 6x Chemotherapy needed.... | https://static.givealittle.co.nz/assets/hero/a34be6ac-331d-4fc1-ad60-b36200c72db3-320:False | https://givealittle.co.nz/cause/please-make-linetas-journey-with-cancer-less | 1510.0 | 70000.0 | closed | Read more | https://givealittle.co.nz/cause/please-make-linetas-journey-with-cancer-less | 24 Sep 2025 | 24 Oct 2025 | 0 | Manurewa, Auckland | 14.0 | Please help suppport my sister-in-law Lineta Uelese. She is\na 39-year-old mother from Samoa.\nS... | Funds will go towards 6 Chemotherapy treatments plus other costs for her medical bills. | Created by, and paying to a verified bank account of,\r\nFiti Tapelu on behalf of Lineta Ue... | Page Moderated | NaN | NaN | NaN | Title: Please make Linetas Journey with Cancer less stressful by donating to her good cause to p... | Ovarian Cancer | C56.9 | Chapter C00-D49 - Neoplasms | Lineta Uelese | Female | 39 | 15-64 | Samoan | 95.0 | 65.0 | 90.0 | Ethnicity inferred from text mentioning 'mother from Samoa'. Image shows a person with short hai... | 1 | 95.0 | 70.0 | medical expenses, experimental therapies, travel expenses, lost wages | Auckland | 85.0 | 75.0 | desperate | selfie | 1 | smiling | high | 75 | 50 | 40 | 60 | Ovary | 3 | Lineta Uelese, a 39-year-old mother of six from Samoa, is undergoing treatment for Stage 3 Ovari... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.157143 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 11725 | 11733 | New Zealand MindBody Trust web-based 'whole person' resources for healthworkers and their patien... | The NZ MindBody Trust web-based project to support and educate clinicians and patients in a 'who... | https://static.givealittle.co.nz/assets/hero/158d0e80-2c47-449a-a05e-a603014e786e-320:False | https://givealittle.co.nz/cause/mindbodyresourcesforpatientsandhealthworkers | 6700.0 | 9000.0 | closed | Read more | https://givealittle.co.nz/cause/mindbodyresourcesforpatientsandhealthworkers | 9 May 2016 | 15 Jul 2016 | 0 | Auckland | 36.0 | The NZ MindBody Trust (2004) is concerned with 'whole person' and person-centred approaches to h... | NaN | Created by, and paying to a verified bank account of,_x000D_\n _x000D_\n New Zealand... | Page Moderated | 2.0 | The Give-A-Little Campaign Result\n12 July 2016\nThe MindBody Trustees want to thank you all for... | NaN | Title: New Zealand MindBody Trust web-based 'whole person' resources for healthworkers and their... | Chronic disorders | Z99.8 | Chapter Z00-Z99 - Factors influencing health status and contact with health services | Brian Broom | Male | 55 | 15-64 | Caucasian | 50.0 | 65.0 | 85.0 | The image is a logo, not a person. The person is not visible, so all person-related attributes a... | 0 | 70.0 | 50.0 | medical expenses | Auckland | 85.0 | 75.0 | NaN | symbolic | 0 | not_detectable | high | 50 | 50 | 50 | 50 | NaN | NaN | To fund the development of a free, accessible website providing 'whole person' health resources ... | NaN | grateful | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 74.444444 |
| 11727 | 11735 | Diane's cyst removal to continue my dream job | Large Radial Ganglion Cyst Removal | https://static.givealittle.co.nz/assets/hero/06af4a19-e239-44bb-a5a8-a60400ab4982-320:False | https://givealittle.co.nz/cause/wristoperation | 730.0 | 5000.0 | closed | Read more | https://givealittle.co.nz/cause/wristoperation | 9 May 2016 | 3 Jun 2016 | 0 | Northland | 11.0 | I am in need of a large ganglion cyst removed from my left wrist. It has entwined itself with 2 ... | NaN | Created by, and paying to a verified bank account of,_x000D_\n _x000D_\n Diane Crocker | Page Moderated | NaN | NaN | NaN | Title: Diane's cyst removal to continue my dream job\nPitch: Large Radial Ganglion Cyst Removal... | Radial Ganglion Cyst | M14.0 | Chapter M00-M99 - Diseases of the musculoskeletal system and connective tissue | Diane | Female | 55 | 15-64 | Caucasian | 75.0 | 50.0 | 90.0 | Image suggests a middle-aged woman, likely Caucasian. Text is factual and straightforward. Campa... | 0 | 85.0 | 60.0 | medical expenses | Northland | 85.0 | 75.0 | desperate | portrait | 1 | neutral | medium | 60 | 0 | 0 | 0 | NaN | NaN | Diane needs a large radial ganglion cyst removed from her left wrist, which is causing pain, los... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 14.600000 |
| 11735 | 11743 | A thank you to Waikato NICU for helping so many little babies and their families | Raising funds for Waikato NICU, and a portable cardiac monitor to support the miracle babies. | https://static.givealittle.co.nz/assets/hero/75b48fa1-0752-44b4-8484-a5fb015ecab2-320:False | https://givealittle.co.nz/cause/charlieandoliver | 8912.7 | 35000.0 | closed | Read more | https://givealittle.co.nz/cause/charlieandoliver | 3 May 2016 | 30 Jun 2017 | 0 | Waikato | 100.0 | This give-a-little page aims to raise funds to allow the NICU at Waikato Hospital to purchase a ... | NaN | Created by_x000D_\n _x000D_\n Rebecca Clews_x000D_\n \n\n\n\n\n\n\n_x000D_\n ... | Page Moderated | 4.0 | Quiz Night!\n7 November 2016\nGot no plans on the 17th of November? Come on down to the Speights... | NaN | Title: A thank you to Waikato NICU for helping so many little babies and their families\nPitch: ... | Prematurity | P07.9 | Chapter P00-P96 - Certain conditions originating in the perinatal period | Rebecca Clews | Female | 31 | 15-64 | Caucasian | 85.0 | 65.0 | 95.0 | The person in the image is a woman holding a premature baby. The text states that the twins were... | 0 | 80.0 | 60.0 | medical expenses, travel expenses | Waikato | 85.0 | 75.0 | grateful | portrait | 1 | neutral | medium | 50 | 70 | 80 | 30 | NaN | NaN | The campaign is raising funds for a portable cardiac monitor for the NICU at Waikato Hospital. T... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 25.464857 |
| 11736 | 11744 | Help restore David’s smile after being assaulted | Talented professional musician and entertainer David Shanhun needs your help to restore his teet... | https://static.givealittle.co.nz/assets/hero/ee0d17f1-b6a5-4d41-85e9-a5fa0166b741-320:False | https://givealittle.co.nz/cause/help-restore-davids-teeth-and-smile | 8048.0 | 54500.0 | closed | Read more | https://givealittle.co.nz/cause/help-restore-davids-teeth-and-smile | 3 May 2016 | 31 Jul 2016 | 0 | Auckland | 115.0 | David was assaulted on the 16th of April 2016. He had 2 teeth knocked out completely, and 10 tee... | NaN | Created by_x000D_\n _x000D_\n Fiona Shanhun_x000D_\n \n\n\n\n\n\n\n_x000D_\n ... | Page Moderated | 2.0 | Health/Teeth/Face Update\n24 May 2016\n***Message from David:***\nThank you all so much for your... | NaN | Title: Help restore David’s smile after being assaulted\nPitch: Talented professional musician a... | dental trauma | S00.8 | Chapter S00-T88 - Injury, poisoning and certain other consequences of external causes | David Shanhun | Male | 32 | 15-64 | Caucasian | 75.0 | 85.0 | 90.0 | Image suggests a Caucasian male. Age estimated based on appearance. Campaign narrative is clear ... | 1 | 95.0 | 80.0 | medical expenses, lost wages | Auckland | 90.0 | 85.0 | hopeful | portrait | 1 | smiling | high | 60 | 40 | 30 | 50 | NaN | NaN | David needs $26,500 for immediate dental treatment including root canals and implants, and over ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 14.766972 |
| 11745 | 11753 | TRUS Biopsy Machine for Taranaki | The men of Taranaki need this TRUS Biopsy Machine to diagnose Prostate Cancer. Reduce waiting t... | https://static.givealittle.co.nz/assets/hero/c9169845-63b5-4c1e-bb50-a5fc00e5abd0-320:False | https://givealittle.co.nz/cause/prostatebiopsymachinenewplymouth | 1910.0 | 20000.0 | closed | Read more | https://givealittle.co.nz/cause/prostatebiopsymachinenewplymouth | 29 Apr 2016 | 31 Aug 2016 | 0 | Taranaki | 15.0 | Currently the TRUS (Trans Rectal Ultrasound) machine for diagnosing prostate cancer is only avai... | NaN | Created by, and paying to a verified bank account of,_x000D_\n _x000D_\n Rotary Club... | Page Moderated | NaN | NaN | NaN | Title: TRUS Biopsy Machine for Taranaki\nPitch: The men of Taranaki need this TRUS Biopsy Machin... | Prostate Cancer | C61 | Chapter C00-D49 | Men of Taranaki | Male | indeterminate/unknown | indeterminate/unknown | unknown | 70.0 | 60.0 | 85.0 | The image is symbolic, not a portrait of a specific individual. The person's ethnicity is not me... | 0 | 75.0 | 50.0 | medical expenses | Taranaki | 85.0 | 75.0 | hopeful | symbolic | 0 | not_detectable | high | 50 | 0 | 0 | 0 | prostate | indeterminate/unknown | To purchase a TRUS Biopsy Machine for early diagnosis of prostate cancer, reducing waiting times... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 9.550000 |
4756 rows × 65 columns
time: 7.43 s (started: 2025-11-10 10:48:24 +13:00)
In [3]:
df.gender.value_counts().head(20)
Out[3]:
gender Female 2430 Male 1779 Other/unknown 539 unknown 5 indeterminate/unknown 1 Unknown 1 Male, Female 1 Name: count, dtype: int64
time: 8.74 ms (started: 2025-11-10 10:48:31 +13:00)
In [4]:
print("\n".join(df[df.gender == "Male and Female"].uri.to_list()))
time: 3.74 ms (started: 2025-11-10 10:48:31 +13:00)
In [5]:
df.gender = df.gender.str.lower().where(df.gender.str.lower().isin(['male', 'female']), 'other')
df.gender.value_counts()
Out[5]:
gender female 2430 male 1779 other 547 Name: count, dtype: int64
time: 19.3 ms (started: 2025-11-10 10:48:31 +13:00)
In [6]:
df.ethnicity.value_counts().head(20)
Out[6]:
ethnicity Caucasian 2088 Pacific Islander 483 unknown 444 Pākehā 399 Māori 387 South Asian 220 Asian 146 Pacific 94 Filipino 82 White 70 African 57 Unknown 55 Other/unknown 35 Hispanic 26 Samoan 24 Black 14 Fijian 14 Middle Eastern 11 Indian 10 Latino 10 Name: count, dtype: int64
time: 10.6 ms (started: 2025-11-10 10:48:31 +13:00)
In [7]:
df.ethnicity = df.ethnicity.str.extract(r"(.+?)(?= \(|$)")
df.ethnicity.value_counts().head(20)
Out[7]:
ethnicity Caucasian 2088 Pacific Islander 483 unknown 444 Pākehā 399 Māori 387 South Asian 220 Asian 146 Pacific 94 Filipino 82 White 70 African 57 Unknown 55 Other/unknown 35 Hispanic 26 Samoan 24 Black 14 Fijian 14 Middle Eastern 11 Indian 10 Latino 10 Name: count, dtype: int64
time: 32.1 ms (started: 2025-11-10 10:48:31 +13:00)
In [8]:
df.attractiveness = df.attractiveness.apply(parse)
df[df.attractiveness >= 90]
Out[8]:
| Index | title | pitch | hero | uri | amountRaised | goal | timeLeft | action | actionUri | start | end | n_questions | location | n_donors | description | use_of_funds | whos_involved | moderated | n_updates | updates | _21 | text | condition | ICD10 | ICD | name | gender | age | age_group | ethnicity | urgency | sentiment | truth | notes | smiling | deservingness | attractiveness | use | region | narrative_clarity | narrative_quality | emotional_tone | image_type | face_visible | facial_expression | image_quality | progression | treatment | treatment_effectiveness | treatment_side_effects | site | stage | reason | narrative_clarity | emotional_tone | facial_expression | progression | treatment | treatment_effectiveness | treatment_side_effects | site | stage | reason | Target_completion | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 220 | 220 | Life Saving Surgery For Esteban Our Mini Dachshund (IVDD) | Fund raising to get Esteban the mini dachshund life saving surgery. | https://static.givealittle.co.nz/assets/hero/0f9946bb-6956-497d-a8e9-b34401554923-320:False | https://givealittle.co.nz/cause/life-saving-surgery-for-esteban-our-mini-dachshund | 5485.00 | 5000.0 | closed | Read more | https://givealittle.co.nz/cause/life-saving-surgery-for-esteban-our-mini-dachshund | 25 Aug 2025 | 8 Sep 2025 | 0 | Dunedin, Otago | 105.0 | Please Help Us Save Esteban’s Life\nEsteban has been diagnosed with Intervertebral Disc Disease,... | Surgery and aftercare costs for Esteban. | Created by, and paying to a verified bank account of,\r\nSam Heath | Page Moderated | NaN | NaN | NaN | Title: Life Saving Surgery For Esteban Our Mini Dachshund (IVDD)\nPitch: Fund raising to get Est... | Intervertebral Disc Disease | M51.2 | Chapter M00-M99 - Diseases of the musculoskeletal system and connective tissue | Esteban | other | 5 | 0-14 | unknown | 95.0 | 65.0 | 90.0 | The image shows a dachshund, not a human, so all human attributes are inferred from the text. Th... | 0 | 85.0 | 90.0 | medical expenses | Otago | 90.0 | 85.0 | desperate | portrait | 1 | neutral | high | 70 | 0 | 0 | 0 | NaN | NaN | Donated funds will be used to cover the cost of life-saving surgery, aftercare, and rehabilitati... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 109.700000 |
| 441 | 441 | Tragic family circumstances | Tragic family circumstances | https://static.givealittle.co.nz/assets/hero/47874955-526a-40bd-9682-b31300cb2790-320:False | https://givealittle.co.nz/cause/tragic-family-circumstances | 2190.00 | 5000.0 | closed | Read more | https://givealittle.co.nz/cause/tragic-family-circumstances | 7 Jul 2025 | 28 Jul 2025 | 0 | Marlborough | 39.0 | Five days ago Valley was 5 months old, today she is on life support in Canada waiting for her gr... | Return flights to Canada/NZ, food | Created by\r\nRa Davis \n\n\n\n\r\n Paying to a verified bank account of\r\nDebbie Huia | Page Moderated | 1.0 | Tragic family circumstances\n15 July 2025\nAs many of you are aware Debz moko passed away in her... | NaN | Title: Tragic family circumstances\nPitch: Tragic family circumstances\nDescription: Five days a... | Neonatal death | P96.9 | Chapter P00-P96 - Certain conditions originating in the perinatal period | Valley | female | 5 | 0-14 | Māori | 95.0 | 10.0 | 85.0 | The text mentions a tragic event involving a 5-month-old baby. The image shows a smiling baby, w... | 1 | 90.0 | 95.0 | travel expenses, medical expenses | Marlborough | 85.0 | 70.0 | desperate | portrait | 1 | smiling | high | 100 | 0 | 0 | 0 | NaN | NaN | Funds are needed for Deb to travel to Canada to retrieve her deceased granddaughter's body and t... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 43.800000 |
| 1085 | 1085 | A Helmet for Amelia’s Bright Future | Helping baby Amelia get the orthotic helmet she needs — because every little head deserves a hea... | https://static.givealittle.co.nz/assets/hero/1767aff8-a54d-40d8-9566-b2c7017108a3-320:False | https://givealittle.co.nz/cause/a-helmet-for-amelias-bright-future | 2501.00 | 4200.0 | closed | Read more | https://givealittle.co.nz/cause/a-helmet-for-amelias-bright-future | 22 Apr 2025 | 13 May 2025 | 0 | Hamilton, Waikato | 75.0 | My bright baby girl, Amelia, has been diagnosed with cranial deformities. The recommended treatm... | Orthotic cranial helmet and therapy for Amelia at Orthotic House, NZ (https://www.orthotichouse.... | Created by, and paying to a verified bank account of,\r\n \r\n Dil Askarova\r\n ... | Page Moderated | NaN | NaN | NaN | Title: A Helmet for Amelia’s Bright Future\nPitch: Helping baby Amelia get the orthotic helmet s... | Cranial deformities | Q75.0 | Chapter Q00-Q99 - Congenital malformations, deformations and chromosomal abnormalities | Amelia | female | 6 | 0-14 | Caucasian | 85.0 | 80.0 | 90.0 | The text is consistent and clear, with no obvious signs of fabrication. The image shows a baby g... | 1 | 95.0 | 90.0 | medical expenses | Waikato | 95.0 | 85.0 | hopeful | portrait | 1 | smiling | high | 30 | 0 | 0 | 0 | NaN | NaN | Donated funds will be used to cover the cost of a custom cranial remodeling helmet and follow-up... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 59.547619 |
| 2284 | 2284 | Remi Rocks | Help us get Remi to Brisbane for intensive physical therapy | https://static.givealittle.co.nz/assets/hero/df7bb718-0d4a-422b-af53-b179014ca04a-320:False | https://givealittle.co.nz/cause/remi-rocks | 24675.00 | 30000.0 | closed | Read more | https://givealittle.co.nz/cause/remi-rocks | 24 May 2024 | 30 Jun 2024 | 0 | Mount Maunganui, Tauranga | 251.0 | Remi's story is still unwritten. After almost 2 years of tests and investigations we still don't... | Any money raised will go towards the cost of the intensive therapy as well as flights and accomm... | Created by, and paying to a verified bank account of,_x000D_\n _x000D_\n Mandy Hende... | Page Moderated | 1.0 | Let’s go again!\n30 May 2024\nBecause you guys are legends we are raising the goal. Anything ove... | NaN | Title: Remi Rocks\nPitch: Help us get Remi to Brisbane for intensive physical therapy\nDescripti... | neurodevelopmental disorder | R68.8 | Chapter R00-R99 - Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere c... | Remi | female | 3 | 0-14 | Caucasian | 85.0 | 70.0 | 80.0 | The campaign is for a child with unexplained neurodevelopmental challenges. The text is emotiona... | 1 | 95.0 | 90.0 | medical expenses, travel expenses | Bay of Plenty | 90.0 | 85.0 | hopeful | portrait | 1 | smiling | high | 50 | 60 | 40 | 20 | NaN | NaN | Donated funds will be used to cover the cost of intensive paediatric rehabilitation therapy in B... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 82.250000 |
| 3346 | 3348 | Please help our family get Alayna to Aussie for Intensive Therapy. | We are asking for help for our youngest daughter Alayna to get the intensive therapies she needs... | https://static.givealittle.co.nz/assets/hero/3cf9aae0-dbb5-43fe-93d9-aff501102de8-320:False | https://givealittle.co.nz/cause/please-help-our-family-get-to-aussie | 14960.00 | 14000.0 | closed | Read more | https://givealittle.co.nz/cause/please-help-our-family-get-to-aussie | 1 May 2023 | 1 Jun 2023 | 1 | Auckland | 126.0 | Our littlest Lever has a rare genetic condition called CACNA1A which comes with life altering ch... | All funds raised will be put towards flights and accommodation for our family, 2 weeks of intens... | Created by, and paying to a verified bank account of,_x000D_\nNicole Lever | Page Moderated | 1.0 | We made it!\n12 June 2023\nWe have made it to the Gold Coast and have completed our first day of... | on 7 May 2023\ndonna asks\nHave you given yourself a buffer for extra costs?\nNicole\nI thought ... | Title: Please help our family get Alayna to Aussie for Intensive Therapy.\nPitch: We are asking ... | CACNA1A | G11.8 | Chapter G00-G99 - Diseases of the nervous system | Alayna | female | 2 | 0-14 | Pākehā | 85.0 | 70.0 | 80.0 | The text is clear and well-written. The campaign is for a young child with a rare genetic condit... | 1 | 95.0 | 90.0 | medical expenses, travel expenses | Auckland | 90.0 | 85.0 | hopeful | portrait | 1 | smiling | high | 40 | 30 | 40 | 20 | NaN | NaN | To fund a 2-week intensive therapy program at The Centre of Movement in Australia for Alayna, wh... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 106.857143 |
| 3697 | 3699 | Help Hudson get his hearing and speech sorted. | Help Hudson get the surgery he needs for his speech and overall health after battling the public... | https://static.givealittle.co.nz/assets/hero/beb66e05-44a7-4e0b-b710-af8e01417ec9-320:False | https://givealittle.co.nz/cause/help-hudson-get-his-hearing-sorted | 121.00 | 9000.0 | closed | Read more | https://givealittle.co.nz/cause/help-hudson-get-his-hearing-sorted | 18 Jan 2023 | 18 Feb 2023 | 0 | Farm Cove, Auckland | 3.0 | Since Hudson was 1 1/2 as parents we have known there was something wrong with his speech. We ha... | The money will be used to get Hudson’s surgery done privately but if not enough will be used to ... | Created by, and paying to a verified bank account of,_x000D_\nChloe Milner | Page Moderated | NaN | NaN | NaN | Title: Help Hudson get his hearing and speech sorted.\nPitch: Help Hudson get the surgery he nee... | Sleep apnoea, hearing impairment, speech delay | H61.2, R06.0, R48.8, R49.8, R49.9, R65.9, R67.8, R67.9, R93.8, R93.9, R94.8, R94.9, R95.8, R95.9... | Chapter H60-H95 - Diseases of the ear and mastoid process | Hudson | male | 3 | 0-14 | Pacific Islander | 90.0 | 30.0 | 85.0 | The image shows a young boy with a big smile. The narrative is clear and well-structured. The pe... | 1 | 95.0 | 90.0 | medical expenses, experimental therapies, travel expenses, lost wages | Auckland | 90.0 | 85.0 | desperate | portrait | 1 | smiling | high | 60 | 40 | 30 | 50 | NaN | NaN | Hudson needs urgent surgery to address his sleep apnoea and hearing issues, which are affecting ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.344444 |
| 5141 | 5145 | Baby Atlas | Helping Baby Atlas | https://static.givealittle.co.nz/assets/hero/09faf6e8-55ca-4028-b0b9-adf500c40bdd-320:False | https://givealittle.co.nz/cause/baby-atlas | 5744.77 | 5000.0 | closed | Read more | https://givealittle.co.nz/cause/baby-atlas | 5 Dec 2021 | 5 Jun 2022 | 0 | Auckland | 68.0 | I'm setting up this page to hopefully raise funds on behalf of my bestfriend Hayley and her fian... | Oxygen/breathing monitor, everyday household bills. | Created by_x000D_\nAmberose Carson \n\n\n\n_x000D_\n Paying to a verified bank account of_... | Page Moderated | NaN | NaN | NaN | Title: Baby Atlas\nPitch: Helping Baby Atlas\nDescription: I'm setting up this page to hopefully... | Neurofibromatosis type 1 | Q85.0 | Chapter Q00-Q99 - Congenital malformations, deformations and chromosomal abnormalities | Atlas | male | 2 | 0-14 | Caucasian | 90.0 | 30.0 | 95.0 | The text is clear and the narrative is consistent. The image shows a baby who appears healthy, w... | 1 | 95.0 | 90.0 | medical expenses, lost wages | Auckland | 85.0 | 70.0 | desperate | portrait | 1 | smiling | high | 60 | 40 | 30 | 50 | NaN | NaN | Funds are needed for an oxygen/breathing monitor for Atlas when he returns home and to help with... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 114.895400 |
| 8099 | 8104 | Help baby Miné to hear | Help baby Miné to receive cochlear implants | https://static.givealittle.co.nz/assets/hero/aadffa91-73dc-48ed-bb37-aa5101575a16-320:False | https://givealittle.co.nz/cause/help-baby-mine-to-hear | 435.00 | 55000.0 | closed | Read more | https://givealittle.co.nz/cause/help-baby-mine-to-hear | 18 May 2019 | 1 Nov 2019 | 0 | Bay of Plenty | 8.0 | We, New Zealand citizens for more than 15 years, have a granddaughter, Miné, who was born in Jan... | Funding of cochlear implant operation, devices and after-care for baby Miné. Surplus will be use... | Created by, and paying to a verified bank account of,_x000D_\nHannes & Charille Schoeman on... | Page Moderated | 1.0 | 2 weeks to go!!\n25 September 2019\nOnly two weeks to go before beautiful little Miné gets her b... | NaN | Title: Help baby Miné to hear\nPitch: Help baby Miné to receive cochlear implants \nDescription:... | congenital deafness | H90.3 | Chapter H60-H95 - Diseases of the ear and mastoid process | Miné | female | 1 | 0-14 | White | 95.0 | 70.0 | 90.0 | The image shows a baby girl with light skin and blue eyes, suggesting a White ethnicity. The nar... | 1 | 95.0 | 90.0 | medical expenses, experimental therapies | Bay of Plenty | 90.0 | 85.0 | desperate | portrait | 1 | smiling | high | 50 | 0 | 0 | 0 | NaN | NaN | Donated funds will be used to cover the cost of the second cochlear implant and associated medic... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.790909 |
time: 78.5 ms (started: 2025-11-10 10:48:31 +13:00)
In [9]:
df.ethnicity = df.ethnicity.replace({
"European/Caucasian": "Caucasian",
"European/White": "Caucasian",
"European/White": "Caucasian",
"European/New Zealand": "Caucasian",
"European/New Zealander": "Caucasian",
"European/New Zealand European": "Caucasian",
"Māori/Pacific Islander": "Māori",
"Asian/Pacific Islander": "Pacific Islander",
"Maori": "Māori"
})
top_20 = df.ethnicity.value_counts().head(20)
display(top_20)
df.ethnicity = df.ethnicity.where(df.ethnicity.isin(top_20.index), "other")
ethnicity Caucasian 2088 Pacific Islander 483 unknown 444 Pākehā 399 Māori 390 South Asian 220 Asian 146 Pacific 94 Filipino 82 White 70 African 57 Unknown 55 Other/unknown 35 Hispanic 26 Samoan 24 Black 14 Fijian 14 Middle Eastern 11 Latino 10 Indian 10 Name: count, dtype: int64
time: 16.1 ms (started: 2025-11-10 10:48:32 +13:00)
In [10]:
df.condition.str.lower().value_counts().head(20)
Out[10]:
condition cancer 274 breast cancer 216 gender dysphoria 122 bowel cancer 85 stroke 79 lung cancer 72 multiple sclerosis 72 melanoma 55 obesity 46 endometriosis 44 infertility 43 cerebral palsy 39 ovarian cancer 38 kidney failure 37 type 1 diabetes 36 heart attack 33 traumatic brain injury 32 mental health 31 mental health issues 29 spinal cord injury 27 Name: count, dtype: int64
time: 13.3 ms (started: 2025-11-10 10:48:32 +13:00)
In [11]:
df.sentiment.value_counts()
Out[11]:
sentiment 65.0 1008 60.0 817 70.0 651 75.0 567 30.0 548 85.0 230 40.0 212 45.0 147 80.0 143 35.0 124 20.0 86 50.0 54 25.0 48 10.0 36 55.0 33 90.0 21 15.0 16 95.0 7 0.0 7 88.0 1 Name: count, dtype: int64
time: 9.38 ms (started: 2025-11-10 10:48:32 +13:00)
In [12]:
df.use.str.lower().value_counts().head(20)
Out[12]:
use medical expenses 1538 medical expenses, travel expenses 890 medical expenses, travel expenses, lost wages 764 medical expenses, lost wages 364 medical expenses, experimental therapies 320 travel expenses 158 medical expenses, experimental therapies, travel expenses, lost wages 157 travel expenses, medical expenses 93 medical expenses, experimental therapies, travel expenses 59 travel expenses, lost wages 37 medical expenses, experimental therapies, lost wages 27 medical expenses, lost wages, travel expenses 19 medical expenses, funeral expenses 18 lost wages 12 lost wages, medical expenses 11 experimental therapies, medical expenses 7 funeral expenses 7 funeral costs 7 travel expenses, accommodation 6 travel expenses, medical expenses, lost wages 5 Name: count, dtype: int64
time: 11.3 ms (started: 2025-11-10 10:48:32 +13:00)
In [13]:
df.age_group = df.age_group.str.extract(r"(.+?)(?= \(|$)")
df.age_group = df.age_group.str.lower().where(df.age_group.str.lower().isin(['0-14', '15-64', '65+']), 'indeterminate/unknown')
df.age_group.value_counts()
Out[13]:
age_group 15-64 2888 65+ 790 0-14 668 indeterminate/unknown 410 Name: count, dtype: int64
time: 28.9 ms (started: 2025-11-10 10:48:32 +13:00)
In [14]:
df.gender.value_counts()
Out[14]:
gender female 2430 male 1779 other 547 Name: count, dtype: int64
time: 4.1 ms (started: 2025-11-10 10:48:32 +13:00)
In [15]:
df["Charity"] = df.whos_involved.str.contains(r"(Charity)", regex=False)
time: 4.79 ms (started: 2025-11-10 10:48:32 +13:00)
In [16]:
df.to_excel("LLM_results_cleaned.xlsx", index=False)
time: 6.21 s (started: 2025-11-10 10:48:32 +13:00)
In [17]:
df["met_target"] = (df.amountRaised >= df.goal).astype(int)
time: 1.78 ms (started: 2025-11-10 10:48:38 +13:00)
In [18]:
df["parsed_age"] = df.age.apply(parse)
df.parsed_age.value_counts().head(20)
Out[18]:
parsed_age 35.0 605 45.0 369 65.0 315 55.0 242 30.0 181 25.0 166 0.0 156 32.0 115 28.0 109 38.0 86 60.0 83 52.0 80 40.0 77 42.0 73 50.0 70 62.0 64 58.0 58 70.0 54 6.0 53 5.0 51 Name: count, dtype: int64
time: 6.84 ms (started: 2025-11-10 10:48:38 +13:00)
In [19]:
df.met_target.value_counts()
Out[19]:
met_target 0 4086 1 670 Name: count, dtype: int64
time: 3.37 ms (started: 2025-11-10 10:48:38 +13:00)
In [20]:
df.deservingness = df.deservingness.apply(parse).astype(float)
df.deservingness.describe()
Out[20]:
count 4752.000000 mean 89.255892 std 8.391354 min 0.000000 25% 85.000000 50% 90.000000 75% 95.000000 max 100.000000 Name: deservingness, dtype: float64
time: 11 ms (started: 2025-11-10 10:48:38 +13:00)
In [21]:
import pandas as pd
import numpy as np
from scipy.stats import f_oneway, chi2_contingency, pearsonr
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
def analyze_feature_influence(df, target_col='met_target'):
results = {}
target = df[target_col]
is_target_numeric = np.issubdtype(target.dtype, np.number)
for col in df.columns:
if col == target_col:
continue
feature = df[col]
if feature.isnull().any() or target.isnull().any():
continue # skip missing for simplicity
if is_target_numeric:
# Target is numeric
if np.issubdtype(feature.dtype, np.number):
# Pearson correlation
corr, p_value = pearsonr(feature, target)
results[col] = {
'test': 'pearsonr',
'stat': corr,
'p_value': p_value
}
else:
# ANOVA (feature is categorical)
groups = [target[feature == val] for val in feature.unique()]
stat, p_value = f_oneway(*groups)
results[col] = {
'test': 'anova',
'stat': stat,
'p_value': p_value
}
else:
# Target is categorical
if not np.issubdtype(feature.dtype, np.number):
# Chi-squared test
contingency = pd.crosstab(df[col], target)
stat, p, _, _ = chi2_contingency(contingency)
results[col] = {
'test': 'chi2',
'stat': stat,
'p_value': p
}
return pd.DataFrame(results).T.sort_values('p_value')
# Usage:
# df = pd.read_csv('your_data.csv')
df.met_target = df.met_target.astype(bool)
analyze_feature_influence(df, target_col='met_target')
Out[21]:
| test | stat | p_value | |
|---|---|---|---|
| age_group | chi2 | 41.515522 | 0.0 |
| ethnicity | chi2 | 54.962474 | 0.000042 |
| end | chi2 | 2309.985027 | 0.169615 |
| gender | chi2 | 2.730386 | 0.255331 |
| start | chi2 | 2540.523648 | 0.325432 |
| whos_involved | chi2 | 4689.48702 | 0.417515 |
| pitch | chi2 | 4747.737518 | 0.490157 |
| hero | chi2 | 4756.0 | 0.493182 |
| uri | chi2 | 4756.0 | 0.493182 |
| actionUri | chi2 | 4756.0 | 0.493182 |
| notes | chi2 | 4756.0 | 0.493182 |
| text | chi2 | 4756.0 | 0.493182 |
| title | chi2 | 4728.458393 | 0.495385 |
| Charity | chi2 | 0.446239 | 0.504126 |
| action | chi2 | 0.0 | 1.0 |
| timeLeft | chi2 | 0.0 | 1.0 |
| moderated | chi2 | 0.0 | 1.0 |
time: 975 ms (started: 2025-11-10 10:48:38 +13:00)
In [22]:
df.smiling = df.smiling.astype(bool)
time: 860 μs (started: 2025-11-10 10:48:39 +13:00)
In [23]:
df["have_age"] = ~df.parsed_age.isna()
df[["met_target", "have_age"]].value_counts()
Out[23]:
met_target have_age False True 3830 True True 625 False False 256 True False 45 Name: count, dtype: int64
time: 6.7 ms (started: 2025-11-10 10:48:39 +13:00)
In [24]:
df["log_amountRaised"] = np.log1p(df.amountRaised)
df["log_amountRaised"].describe()
Out[24]:
count 4756.000000 mean 7.432131 std 1.893607 min 0.000000 25% 6.216606 50% 7.483525 75% 8.781019 max 13.388103 Name: log_amountRaised, dtype: float64
time: 6.09 ms (started: 2025-11-10 10:48:39 +13:00)
In [25]:
df.start = pd.to_datetime(df.start, format="mixed")
df.end = pd.to_datetime(df.end, format="mixed")
df["days"] = (df.end - df.start).dt.days
df.days.describe()
Out[25]:
count 4756.000000 mean 130.953532 std 140.808665 min 0.000000 25% 32.000000 50% 90.000000 75% 182.000000 max 1755.000000 Name: days, dtype: float64
time: 219 ms (started: 2025-11-10 10:48:39 +13:00)
In [26]:
formula = "log_amountRaised ~ days + sentiment + smiling + gender + truth + urgency + parsed_age"
logit = smf.glm(formula=formula, data=df, family=sm.families.Gamma(link=sm.families.links.Log())).fit()
display(logit.summary())
display(f"AIC (Logit): {logit.aic:.2f}")
probit = smf.glm(formula=formula, data=df, family=sm.families.Gamma(link=sm.families.links.Log())).fit()
display(probit.summary())
display(f"AIC (Probit): {probit.aic:.2f}")
/home/ubuntu/givealittle/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/family.py:812: RuntimeWarning: divide by zero encountered in log /home/ubuntu/givealittle/.venv/lib/python3.12/site-packages/statsmodels/genmod/generalized_linear_model.py:1891: RuntimeWarning: invalid value encountered in scalar subtract
| Dep. Variable: | log_amountRaised | No. Observations: | 4455 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 4446 |
| Model Family: | Gamma | Df Model: | 8 |
| Link Function: | Log | Scale: | 0.056312 |
| Method: | IRLS | Log-Likelihood: | inf |
| Date: | Mon, 10 Nov 2025 | Deviance: | 430.25 |
| Time: | 10:48:39 | Pearson chi2: | 250. |
| No. Iterations: | 25 | Pseudo R-squ. (CS): | nan |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 1.2417 | 0.056 | 22.145 | 0.000 | 1.132 | 1.352 |
| smiling[T.True] | 0.0826 | 0.007 | 11.099 | 0.000 | 0.068 | 0.097 |
| gender[T.male] | 0.0305 | 0.008 | 4.037 | 0.000 | 0.016 | 0.045 |
| gender[T.other] | -0.0377 | 0.015 | -2.432 | 0.015 | -0.068 | -0.007 |
| days | 0.0001 | 2.57e-05 | 5.821 | 0.000 | 9.92e-05 | 0.000 |
| sentiment | 0.0021 | 0.000 | 9.875 | 0.000 | 0.002 | 0.003 |
| truth | 0.0020 | 0.001 | 2.961 | 0.003 | 0.001 | 0.003 |
| urgency | 0.0049 | 0.000 | 17.627 | 0.000 | 0.004 | 0.005 |
| parsed_age | -0.0002 | 0.000 | -1.242 | 0.214 | -0.001 | 0.000 |
'AIC (Logit): -inf'
/home/ubuntu/givealittle/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/family.py:812: RuntimeWarning: divide by zero encountered in log /home/ubuntu/givealittle/.venv/lib/python3.12/site-packages/statsmodels/genmod/generalized_linear_model.py:1891: RuntimeWarning: invalid value encountered in scalar subtract
| Dep. Variable: | log_amountRaised | No. Observations: | 4455 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 4446 |
| Model Family: | Gamma | Df Model: | 8 |
| Link Function: | Log | Scale: | 0.056312 |
| Method: | IRLS | Log-Likelihood: | inf |
| Date: | Mon, 10 Nov 2025 | Deviance: | 430.25 |
| Time: | 10:48:40 | Pearson chi2: | 250. |
| No. Iterations: | 25 | Pseudo R-squ. (CS): | nan |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 1.2417 | 0.056 | 22.145 | 0.000 | 1.132 | 1.352 |
| smiling[T.True] | 0.0826 | 0.007 | 11.099 | 0.000 | 0.068 | 0.097 |
| gender[T.male] | 0.0305 | 0.008 | 4.037 | 0.000 | 0.016 | 0.045 |
| gender[T.other] | -0.0377 | 0.015 | -2.432 | 0.015 | -0.068 | -0.007 |
| days | 0.0001 | 2.57e-05 | 5.821 | 0.000 | 9.92e-05 | 0.000 |
| sentiment | 0.0021 | 0.000 | 9.875 | 0.000 | 0.002 | 0.003 |
| truth | 0.0020 | 0.001 | 2.961 | 0.003 | 0.001 | 0.003 |
| urgency | 0.0049 | 0.000 | 17.627 | 0.000 | 0.004 | 0.005 |
| parsed_age | -0.0002 | 0.000 | -1.242 | 0.214 | -0.001 | 0.000 |
'AIC (Probit): -inf'
time: 176 ms (started: 2025-11-10 10:48:39 +13:00)
In [27]:
df.met_target = df.met_target.astype(int)
smf.ols(formula="Target_completion ~ days + sentiment + smiling + gender + truth + urgency + parsed_age", data=df).fit().summary()
Out[27]:
| Dep. Variable: | Target_completion | R-squared: | 0.002 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | -0.000 |
| Method: | Least Squares | F-statistic: | 0.8567 |
| Date: | Mon, 10 Nov 2025 | Prob (F-statistic): | 0.553 |
| Time: | 10:48:40 | Log-Likelihood: | -35922. |
| No. Observations: | 4455 | AIC: | 7.186e+04 |
| Df Residuals: | 4446 | BIC: | 7.192e+04 |
| Df Model: | 8 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | -143.5134 | 181.748 | -0.790 | 0.430 | -499.831 | 212.804 |
| smiling[T.True] | 9.0121 | 24.118 | 0.374 | 0.709 | -38.271 | 56.295 |
| gender[T.male] | 38.0294 | 24.477 | 1.554 | 0.120 | -9.958 | 86.017 |
| gender[T.other] | 35.0388 | 50.187 | 0.698 | 0.485 | -63.352 | 133.430 |
| days | -0.1019 | 0.083 | -1.223 | 0.221 | -0.265 | 0.061 |
| sentiment | 0.1799 | 0.696 | 0.258 | 0.796 | -1.185 | 1.545 |
| truth | 2.2916 | 2.160 | 1.061 | 0.289 | -1.942 | 6.526 |
| urgency | 0.1624 | 0.901 | 0.180 | 0.857 | -1.603 | 1.928 |
| parsed_age | -0.7003 | 0.623 | -1.125 | 0.261 | -1.921 | 0.521 |
| Omnibus: | 13712.438 | Durbin-Watson: | 2.004 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 1129656429.335 |
| Skew: | 46.308 | Prob(JB): | 0.00 |
| Kurtosis: | 2468.180 | Cond. No. | 3.47e+03 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.47e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
time: 49.2 ms (started: 2025-11-10 10:48:40 +13:00)
In [28]:
import seaborn as sns
sns.regplot(x="amountRaised", y="smiling", data=df, logistic=True)
Out[28]:
<Axes: xlabel='amountRaised', ylabel='smiling'>
time: 7.83 s (started: 2025-11-10 10:48:40 +13:00)
In [29]:
smf.ols(formula=formula, data=df).fit().summary()
Out[29]:
| Dep. Variable: | log_amountRaised | R-squared: | 0.171 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.170 |
| Method: | Least Squares | F-statistic: | 114.9 |
| Date: | Mon, 10 Nov 2025 | Prob (F-statistic): | 2.95e-175 |
| Time: | 10:48:47 | Log-Likelihood: | -8747.6 |
| No. Observations: | 4455 | AIC: | 1.751e+04 |
| Df Residuals: | 4446 | BIC: | 1.757e+04 |
| Df Model: | 8 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 1.0209 | 0.408 | 2.504 | 0.012 | 0.222 | 1.820 |
| smiling[T.True] | 0.6074 | 0.054 | 11.227 | 0.000 | 0.501 | 0.714 |
| gender[T.male] | 0.2096 | 0.055 | 3.816 | 0.000 | 0.102 | 0.317 |
| gender[T.other] | -0.2730 | 0.113 | -2.425 | 0.015 | -0.494 | -0.052 |
| days | 0.0012 | 0.000 | 6.179 | 0.000 | 0.001 | 0.002 |
| sentiment | 0.0170 | 0.002 | 10.887 | 0.000 | 0.014 | 0.020 |
| truth | 0.0207 | 0.005 | 4.268 | 0.000 | 0.011 | 0.030 |
| urgency | 0.0384 | 0.002 | 18.980 | 0.000 | 0.034 | 0.042 |
| parsed_age | -0.0014 | 0.001 | -1.016 | 0.309 | -0.004 | 0.001 |
| Omnibus: | 83.437 | Durbin-Watson: | 1.996 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 99.082 |
| Skew: | -0.275 | Prob(JB): | 3.05e-22 |
| Kurtosis: | 3.480 | Cond. No. | 3.47e+03 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.47e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
time: 90.4 ms (started: 2025-11-10 10:48:47 +13:00)
In [30]:
model = RandomForestRegressor()
cols = "sentiment + smiling + gender + truth + urgency + ethnicity".split(" + ")
X = pd.get_dummies(df[cols])
y = df["met_target"]
model.fit(X, y)
Out[30]:
RandomForestRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
| n_estimators | 100 | |
| criterion | 'squared_error' | |
| max_depth | None | |
| min_samples_split | 2 | |
| min_samples_leaf | 1 | |
| min_weight_fraction_leaf | 0.0 | |
| max_features | 1.0 | |
| max_leaf_nodes | None | |
| min_impurity_decrease | 0.0 | |
| bootstrap | True | |
| oob_score | False | |
| n_jobs | None | |
| random_state | None | |
| verbose | 0 | |
| warm_start | False | |
| ccp_alpha | 0.0 | |
| max_samples | None | |
| monotonic_cst | None |
time: 797 ms (started: 2025-11-10 10:48:48 +13:00)
In [31]:
list(zip(cols, model.feature_importances_))
Out[31]:
[('sentiment', np.float64(0.21714220780964652)),
('smiling', np.float64(0.09765563879988882)),
('gender', np.float64(0.12161975328506257)),
('truth', np.float64(0.1969747348218822)),
('urgency', np.float64(0.04706132967039141)),
('ethnicity', np.float64(0.04718095039393153))]
time: 10.5 ms (started: 2025-11-10 10:48:48 +13:00)
In [32]:
df.region.value_counts().head(20)
Out[32]:
region Auckland 1437 Canterbury 587 Wellington 512 Waikato 413 Bay of Plenty 311 Nationwide 204 Otago 180 Northland 167 Hawke's Bay 137 indeterminate/unknown 122 Manuwatū-Whanganui 116 Taranaki 105 Manawatū-Whanganui 96 Nelson 90 Southland 75 International 54 Marlborough 42 Gisborne 36 West Coast 25 Tasman 18 Name: count, dtype: int64
time: 4.66 ms (started: 2025-11-10 10:48:48 +13:00)