%reload_ext autotime
import pandas as pd
import requests
from pprint import pprint
import json
import torch
from PIL import Image
from tqdm.auto import tqdm

pd.options.plotting.backend = "plotly"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 100)

time: 4.83 s (started: 2026-02-25 10:33:58 +13:00)

/mnt/streetview/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

df = pd.read_csv("results.csv").drop_duplicates(subset="panoid")
df

time: 45 ms (started: 2026-02-25 10:34:03 +13:00)

from transformers import Qwen3VLMoeForConditionalGeneration, AutoProcessor

model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
     "Qwen/Qwen3-VL-30B-A3B-Instruct",
     dtype=torch.bfloat16,
     attn_implementation="flash_attention_2",
     device_map="auto",
)

processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")

Fetching 13 files: 100%|██████████| 13/13 [00:00<00:00, 40782.31it/s]
Loading weights: 100%|██████████| 882/882 [01:00<00:00, 14.56it/s, Materializing param=model.visual.pos_embed.weight]

time: 1min 16s (started: 2026-02-25 10:34:03 +13:00)

prompt = """
    This image is a panorama from Google Street View.
    From the image, extract the following information, in JSON format:

    green: The percentage of the image that is green space (e.g., parks, gardens, trees, grass). A number between 0 and 100.
    environment: The general classification of the environment based on the visible surroundings. Choose the closest matching category from the following: "low density residential", "medium density residential", "indoor", "park", "commercial", "shops", "cafes", "supermarket" or suggest a custom classification as a string.
    active_transport: Indicate if an active transport corridor is visible (e.g., bike lane, walking path). Return true or false.
    quality: A subjective assessment of the area's upkeep, where 0 represents "run down" and 100 represents "well maintained." A number between 0 and 100.
    water: If streams, ponds, rivers, or the ocean are visible, estimate the distance to the nearest body of water in meters. If no water is present, return 0.
    obscured: The proportion of the view obscured by buildings (i.e., how much of the total line of sight is blocked by buildings in close proximity). A number between 0 and 100.
    people: The total number of people visible in the image. A whole number.
    cars: The total number of cars visible in the image. A whole number.
    bikes: The total number of bikes visible in the image. A whole number.
    dwellings: The total number of dwellings (houses, flats, or apartments) visible in the image. A whole number.
    shops: The total number of shops visible in the image. A whole number.
    offices: The total number of offices visible in the image. A whole number.

    Do not include comments in your JSON response. Only respond with the JSON object. Make sure the JSON is valid.
"""
for row in tqdm(df.sample(10).itertuples(index=False)):
    panoid = row.panoid
    image = Image.open(f"panoramas/{panoid}.jpg")
    display(image)
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image"},
            ]
        }
    ]
        # Preparation for inference
    inputs = processor.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_dict=True,
        return_tensors="pt"
    )
    inputs = inputs.to(model.device)
    print(len(inputs.input_ids[0]), "tokens in input")

    generated_ids = model.generate(**inputs, max_new_tokens=5000)
    generated_ids_trimmed = [
        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    print(len(generated_ids_trimmed[0]), "tokens in output")
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )[0]
    output_text = output_text.replace("```json", "").replace("```", "").strip()
    print("Output:")
    print(output_text)
    result = json.loads(output_text)
    pprint(result)
    print("\n")

0it [00:00, ?it/s]

420 tokens in input

1it [00:11, 11.07s/it]

84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 30, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 2, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 3,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 30,
 'offices': 0,
 'people': 2,
 'quality': 65,
 'shops': 2,
 'water': 0}

420 tokens in input

2it [00:20, 10.06s/it]

84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 35, "people": 3, "cars": 2, "bikes": 1, "dwellings": 8, "shops": 1, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 2,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 35,
 'offices': 0,
 'people': 3,
 'quality': 65,
 'shops': 1,
 'water': 0}

420 tokens in input

3it [00:29,  9.81s/it]

84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 35, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 1, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 3,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 35,
 'offices': 0,
 'people': 2,
 'quality': 70,
 'shops': 1,
 'water': 0}

results = []
for row in tqdm(df.itertuples(index=False), total=len(df)):
    panoid = row.panoid
    image = Image.open(f"panoramas/{panoid}.jpg")
    #display(image)
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image"},
            ]
        }
    ]
        # Preparation for inference
    inputs = processor.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_dict=True,
        return_tensors="pt"
    )
    inputs = inputs.to(model.device)

    generated_ids = model.generate(**inputs, max_new_tokens=5000)
    generated_ids_trimmed = [
        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )[0]
    output_text = output_text.replace("```json", "").replace("```", "").strip()
    result = json.loads(output_text)
    row = row._asdict()
    row.update(result)
    results.append(row)

results = pd.DataFrame(results)
results.to_csv("LLM_results.csv", index=False)
results

100%|██████████| 595/595 [1:36:47<00:00,  9.76s/it]

time: 1h 36min 47s (started: 2026-02-25 10:36:57 +13:00)

	Index	pid	n	time	anxiousness	latitude	longitude	geometry	panoid	panolat	panolon	panodate	panothirdparty	source	uploader
0	0	P20001	1	2023-04-25T02:51:42Z	0	-36.924795	174.738044	POINT (174.7380435 -36.92479483)	IvrcS0W1RlFAlnci-p39XA	-36.924667	174.737908	2012-04	False	launch	NaN
10	10	P20001	11	2023-04-24T00:42:25Z	0	-36.924837	174.737948	POINT (174.7379477 -36.92483659)	QEpZV7bnO2mBfp0weMUKEg	-36.924733	174.737819	2012-04	False	launch	NaN
13	13	P20006	1	2023-06-03T02:45:55Z	3	-36.892203	174.740125	POINT (174.7401253 -36.89220256)	dAVT1x86uv1m9orlHxG1Tg	-36.892635	174.740018	2024-05	False	launch	NaN
14	14	P20009	1	2023-05-16T21:36:38Z	6	-36.811089	174.632053	POINT (174.6320532 -36.81108876)	8mhuMKDyT-0Y34V6UeabWw	-36.810854	174.630303	2023-01	False	launch	NaN
15	15	P20009	2	2023-05-17T04:54:48Z	3	-36.923191	174.748620	POINT (174.7486203 -36.92319093)	fg5HSwvFk71VknyBYn1_AA	-36.923213	174.748832	2025-08	False	launch	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1289	1421	P20693	2	2024-05-02T03:43:23Z	3	-36.897778	174.721580	POINT (174.7215796 -36.89777786)	Uzuqd6oSo-EjCVuRP2Os0Q	-36.897745	174.721875	2022-06	False	launch	NaN
1292	1425	P20693	6	2024-05-05T03:00:22Z	2	-36.969426	174.790602	POINT (174.7906024 -36.96942642)	5d1FcEQVhi9WsKP3r6iajg	-36.969179	174.790872	2012-04	False	launch	NaN
1293	1426	P20721	1	2024-05-05T02:00:52Z	1	-36.893455	174.728262	POINT (174.728262 -36.89345532)	ftrRbHWdN74ZKSFMEkPscg	-36.893414	174.728087	2024-10	False	launch	NaN
1295	1428	P20721	3	2024-05-05T23:06:27Z	2	-36.845252	174.759951	POINT (174.7599508 -36.8452515)	CIHM0ogKEICAgICO_7-B_AE	-36.845292	174.759939	2022-06-24	True	photos:street_view_publish_api	Mint Design
1296	1429	P20721	4	2024-05-06T07:04:57Z	0	-36.845165	174.759885	POINT (174.7598849 -36.84516487)	CIHM0ogKEICAgICO_-_r1AE	-36.845177	174.759792	2022-06-24	True	photos:street_view_publish_api	Mint Design

	Index	pid	n	time	anxiousness	latitude	longitude	geometry	panoid	panolat	panolon	panodate	panothirdparty	source	uploader	green	environment	active_transport	quality	water	obscured	people	cars	bikes	dwellings	shops	offices
0	0	P20001	1	2023-04-25T02:51:42Z	0	-36.924795	174.738044	POINT (174.7380435 -36.92479483)	IvrcS0W1RlFAlnci-p39XA	-36.924667	174.737908	2012-04	False	launch	NaN	45	medium density residential	True	70	0	30	2	3	1	8	1	0
1	10	P20001	11	2023-04-24T00:42:25Z	0	-36.924837	174.737948	POINT (174.7379477 -36.92483659)	QEpZV7bnO2mBfp0weMUKEg	-36.924733	174.737819	2012-04	False	launch	NaN	45	medium density residential	True	65	0	35	2	3	1	8	1	0
2	13	P20006	1	2023-06-03T02:45:55Z	3	-36.892203	174.740125	POINT (174.7401253 -36.89220256)	dAVT1x86uv1m9orlHxG1Tg	-36.892635	174.740018	2024-05	False	launch	NaN	45	medium density residential	True	65	0	30	2	3	1	8	1	0
3	14	P20009	1	2023-05-16T21:36:38Z	6	-36.811089	174.632053	POINT (174.6320532 -36.81108876)	8mhuMKDyT-0Y34V6UeabWw	-36.810854	174.630303	2023-01	False	launch	NaN	45	medium density residential	True	70	0	30	2	3	1	6	1	0
4	15	P20009	2	2023-05-17T04:54:48Z	3	-36.923191	174.748620	POINT (174.7486203 -36.92319093)	fg5HSwvFk71VknyBYn1_AA	-36.923213	174.748832	2025-08	False	launch	NaN	45	medium density residential	True	70	0	35	2	3	1	6	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
590	1421	P20693	2	2024-05-02T03:43:23Z	3	-36.897778	174.721580	POINT (174.7215796 -36.89777786)	Uzuqd6oSo-EjCVuRP2Os0Q	-36.897745	174.721875	2022-06	False	launch	NaN	65	medium density residential	True	75	0	40	3	2	1	8	1	0
591	1425	P20693	6	2024-05-05T03:00:22Z	2	-36.969426	174.790602	POINT (174.7906024 -36.96942642)	5d1FcEQVhi9WsKP3r6iajg	-36.969179	174.790872	2012-04	False	launch	NaN	45	medium density residential	True	75	0	30	3	2	1	6	0	0
592	1426	P20721	1	2024-05-05T02:00:52Z	1	-36.893455	174.728262	POINT (174.728262 -36.89345532)	ftrRbHWdN74ZKSFMEkPscg	-36.893414	174.728087	2024-10	False	launch	NaN	45	medium density residential	True	70	0	30	2	3	1	6	1	0
593	1428	P20721	3	2024-05-05T23:06:27Z	2	-36.845252	174.759951	POINT (174.7599508 -36.8452515)	CIHM0ogKEICAgICO_7-B_AE	-36.845292	174.759939	2022-06-24	True	photos:street_view_publish_api	Mint Design	45	medium density residential	True	65	0	30	3	2	1	8	1	0
594	1429	P20721	4	2024-05-06T07:04:57Z	0	-36.845165	174.759885	POINT (174.7598849 -36.84516487)	CIHM0ogKEICAgICO_-_r1AE	-36.845177	174.759792	2022-06-24	True	photos:street_view_publish_api	Mint Design	45	medium density residential	True	65	0	30	3	2	1	6	1	0