In [1]:
%reload_ext autotime
import pandas as pd
import requests
from pprint import pprint
import json
import torch
from PIL import Image
from tqdm.auto import tqdm
pd.options.plotting.backend = "plotly"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 100)
time: 4.83 s (started: 2026-02-25 10:33:58 +13:00)
/mnt/streetview/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
In [2]:
df = pd.read_csv("results.csv").drop_duplicates(subset="panoid")
df
Out[2]:
| Index | pid | n | time | anxiousness | latitude | longitude | geometry | panoid | panolat | panolon | panodate | panothirdparty | source | uploader | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | P20001 | 1 | 2023-04-25T02:51:42Z | 0 | -36.924795 | 174.738044 | POINT (174.7380435 -36.92479483) | IvrcS0W1RlFAlnci-p39XA | -36.924667 | 174.737908 | 2012-04 | False | launch | NaN |
| 10 | 10 | P20001 | 11 | 2023-04-24T00:42:25Z | 0 | -36.924837 | 174.737948 | POINT (174.7379477 -36.92483659) | QEpZV7bnO2mBfp0weMUKEg | -36.924733 | 174.737819 | 2012-04 | False | launch | NaN |
| 13 | 13 | P20006 | 1 | 2023-06-03T02:45:55Z | 3 | -36.892203 | 174.740125 | POINT (174.7401253 -36.89220256) | dAVT1x86uv1m9orlHxG1Tg | -36.892635 | 174.740018 | 2024-05 | False | launch | NaN |
| 14 | 14 | P20009 | 1 | 2023-05-16T21:36:38Z | 6 | -36.811089 | 174.632053 | POINT (174.6320532 -36.81108876) | 8mhuMKDyT-0Y34V6UeabWw | -36.810854 | 174.630303 | 2023-01 | False | launch | NaN |
| 15 | 15 | P20009 | 2 | 2023-05-17T04:54:48Z | 3 | -36.923191 | 174.748620 | POINT (174.7486203 -36.92319093) | fg5HSwvFk71VknyBYn1_AA | -36.923213 | 174.748832 | 2025-08 | False | launch | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1289 | 1421 | P20693 | 2 | 2024-05-02T03:43:23Z | 3 | -36.897778 | 174.721580 | POINT (174.7215796 -36.89777786) | Uzuqd6oSo-EjCVuRP2Os0Q | -36.897745 | 174.721875 | 2022-06 | False | launch | NaN |
| 1292 | 1425 | P20693 | 6 | 2024-05-05T03:00:22Z | 2 | -36.969426 | 174.790602 | POINT (174.7906024 -36.96942642) | 5d1FcEQVhi9WsKP3r6iajg | -36.969179 | 174.790872 | 2012-04 | False | launch | NaN |
| 1293 | 1426 | P20721 | 1 | 2024-05-05T02:00:52Z | 1 | -36.893455 | 174.728262 | POINT (174.728262 -36.89345532) | ftrRbHWdN74ZKSFMEkPscg | -36.893414 | 174.728087 | 2024-10 | False | launch | NaN |
| 1295 | 1428 | P20721 | 3 | 2024-05-05T23:06:27Z | 2 | -36.845252 | 174.759951 | POINT (174.7599508 -36.8452515) | CIHM0ogKEICAgICO_7-B_AE | -36.845292 | 174.759939 | 2022-06-24 | True | photos:street_view_publish_api | Mint Design |
| 1296 | 1429 | P20721 | 4 | 2024-05-06T07:04:57Z | 0 | -36.845165 | 174.759885 | POINT (174.7598849 -36.84516487) | CIHM0ogKEICAgICO_-_r1AE | -36.845177 | 174.759792 | 2022-06-24 | True | photos:street_view_publish_api | Mint Design |
595 rows × 15 columns
time: 45 ms (started: 2026-02-25 10:34:03 +13:00)
In [3]:
from transformers import Qwen3VLMoeForConditionalGeneration, AutoProcessor
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
"Qwen/Qwen3-VL-30B-A3B-Instruct",
dtype=torch.bfloat16,
attn_implementation="flash_attention_2",
device_map="auto",
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
Fetching 13 files: 100%|██████████| 13/13 [00:00<00:00, 40782.31it/s] Loading weights: 100%|██████████| 882/882 [01:00<00:00, 14.56it/s, Materializing param=model.visual.pos_embed.weight]
time: 1min 16s (started: 2026-02-25 10:34:03 +13:00)
In [4]:
prompt = """
This image is a panorama from Google Street View.
From the image, extract the following information, in JSON format:
green: The percentage of the image that is green space (e.g., parks, gardens, trees, grass). A number between 0 and 100.
environment: The general classification of the environment based on the visible surroundings. Choose the closest matching category from the following: "low density residential", "medium density residential", "indoor", "park", "commercial", "shops", "cafes", "supermarket" or suggest a custom classification as a string.
active_transport: Indicate if an active transport corridor is visible (e.g., bike lane, walking path). Return true or false.
quality: A subjective assessment of the area's upkeep, where 0 represents "run down" and 100 represents "well maintained." A number between 0 and 100.
water: If streams, ponds, rivers, or the ocean are visible, estimate the distance to the nearest body of water in meters. If no water is present, return 0.
obscured: The proportion of the view obscured by buildings (i.e., how much of the total line of sight is blocked by buildings in close proximity). A number between 0 and 100.
people: The total number of people visible in the image. A whole number.
cars: The total number of cars visible in the image. A whole number.
bikes: The total number of bikes visible in the image. A whole number.
dwellings: The total number of dwellings (houses, flats, or apartments) visible in the image. A whole number.
shops: The total number of shops visible in the image. A whole number.
offices: The total number of offices visible in the image. A whole number.
Do not include comments in your JSON response. Only respond with the JSON object. Make sure the JSON is valid.
"""
for row in tqdm(df.sample(10).itertuples(index=False)):
panoid = row.panoid
image = Image.open(f"panoramas/{panoid}.jpg")
display(image)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image"},
]
}
]
# Preparation for inference
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
)
inputs = inputs.to(model.device)
print(len(inputs.input_ids[0]), "tokens in input")
generated_ids = model.generate(**inputs, max_new_tokens=5000)
generated_ids_trimmed = [
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
print(len(generated_ids_trimmed[0]), "tokens in output")
output_text = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
output_text = output_text.replace("```json", "").replace("```", "").strip()
print("Output:")
print(output_text)
result = json.loads(output_text)
pprint(result)
print("\n")
0it [00:00, ?it/s]
420 tokens in input
1it [00:11, 11.07s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 30, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 2, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 3,
'dwellings': 8,
'environment': 'medium density residential',
'green': 45,
'obscured': 30,
'offices': 0,
'people': 2,
'quality': 65,
'shops': 2,
'water': 0}
420 tokens in input
2it [00:20, 10.06s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 35, "people": 3, "cars": 2, "bikes": 1, "dwellings": 8, "shops": 1, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 2,
'dwellings': 8,
'environment': 'medium density residential',
'green': 45,
'obscured': 35,
'offices': 0,
'people': 3,
'quality': 65,
'shops': 1,
'water': 0}
420 tokens in input
3it [00:29, 9.81s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 35, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 1, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 3,
'dwellings': 8,
'environment': 'medium density residential',
'green': 45,
'obscured': 35,
'offices': 0,
'people': 2,
'quality': 70,
'shops': 1,
'water': 0}
420 tokens in input
4it [00:39, 9.68s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 30, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 2, "offices": 1}
{'active_transport': True,
'bikes': 1,
'cars': 3,
'dwellings': 8,
'environment': 'medium density residential',
'green': 45,
'obscured': 30,
'offices': 1,
'people': 2,
'quality': 70,
'shops': 2,
'water': 0}
420 tokens in input
5it [00:49, 9.66s/it]
84 tokens in output
Output:
{"green": 65, "environment": "low density residential", "active_transport": true, "quality": 75, "water": 0, "obscured": 40, "people": 2, "cars": 1, "bikes": 1, "dwellings": 4, "shops": 0, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 1,
'dwellings': 4,
'environment': 'low density residential',
'green': 65,
'obscured': 40,
'offices': 0,
'people': 2,
'quality': 75,
'shops': 0,
'water': 0}
420 tokens in input
6it [00:58, 9.66s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 35, "people": 3, "cars": 2, "bikes": 1, "dwellings": 6, "shops": 1, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 2,
'dwellings': 6,
'environment': 'medium density residential',
'green': 45,
'obscured': 35,
'offices': 0,
'people': 3,
'quality': 70,
'shops': 1,
'water': 0}
420 tokens in input
7it [01:08, 9.67s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 75, "water": 0, "obscured": 30, "people": 3, "cars": 2, "bikes": 1, "dwellings": 6, "shops": 1, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 2,
'dwellings': 6,
'environment': 'medium density residential',
'green': 45,
'obscured': 30,
'offices': 0,
'people': 3,
'quality': 75,
'shops': 1,
'water': 0}
420 tokens in input
8it [01:18, 9.67s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 30, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 2, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 3,
'dwellings': 8,
'environment': 'medium density residential',
'green': 45,
'obscured': 30,
'offices': 0,
'people': 2,
'quality': 65,
'shops': 2,
'water': 0}
420 tokens in input
9it [01:27, 9.67s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 35, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 1, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 3,
'dwellings': 8,
'environment': 'medium density residential',
'green': 45,
'obscured': 35,
'offices': 0,
'people': 2,
'quality': 70,
'shops': 1,
'water': 0}
420 tokens in input
10it [01:37, 9.74s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 30, "people": 3, "cars": 2, "bikes": 1, "dwellings": 6, "shops": 1, "offices": 0}
{'active_transport': True,
'bikes': 1,
'cars': 2,
'dwellings': 6,
'environment': 'medium density residential',
'green': 45,
'obscured': 30,
'offices': 0,
'people': 3,
'quality': 65,
'shops': 1,
'water': 0}
time: 1min 37s (started: 2026-02-25 10:35:19 +13:00)
In [5]:
results = []
for row in tqdm(df.itertuples(index=False), total=len(df)):
panoid = row.panoid
image = Image.open(f"panoramas/{panoid}.jpg")
#display(image)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image"},
]
}
]
# Preparation for inference
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
)
inputs = inputs.to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=5000)
generated_ids_trimmed = [
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
output_text = output_text.replace("```json", "").replace("```", "").strip()
result = json.loads(output_text)
row = row._asdict()
row.update(result)
results.append(row)
results = pd.DataFrame(results)
results.to_csv("LLM_results.csv", index=False)
results
100%|██████████| 595/595 [1:36:47<00:00, 9.76s/it]
Out[5]:
| Index | pid | n | time | anxiousness | latitude | longitude | geometry | panoid | panolat | panolon | panodate | panothirdparty | source | uploader | green | environment | active_transport | quality | water | obscured | people | cars | bikes | dwellings | shops | offices | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | P20001 | 1 | 2023-04-25T02:51:42Z | 0 | -36.924795 | 174.738044 | POINT (174.7380435 -36.92479483) | IvrcS0W1RlFAlnci-p39XA | -36.924667 | 174.737908 | 2012-04 | False | launch | NaN | 45 | medium density residential | True | 70 | 0 | 30 | 2 | 3 | 1 | 8 | 1 | 0 |
| 1 | 10 | P20001 | 11 | 2023-04-24T00:42:25Z | 0 | -36.924837 | 174.737948 | POINT (174.7379477 -36.92483659) | QEpZV7bnO2mBfp0weMUKEg | -36.924733 | 174.737819 | 2012-04 | False | launch | NaN | 45 | medium density residential | True | 65 | 0 | 35 | 2 | 3 | 1 | 8 | 1 | 0 |
| 2 | 13 | P20006 | 1 | 2023-06-03T02:45:55Z | 3 | -36.892203 | 174.740125 | POINT (174.7401253 -36.89220256) | dAVT1x86uv1m9orlHxG1Tg | -36.892635 | 174.740018 | 2024-05 | False | launch | NaN | 45 | medium density residential | True | 65 | 0 | 30 | 2 | 3 | 1 | 8 | 1 | 0 |
| 3 | 14 | P20009 | 1 | 2023-05-16T21:36:38Z | 6 | -36.811089 | 174.632053 | POINT (174.6320532 -36.81108876) | 8mhuMKDyT-0Y34V6UeabWw | -36.810854 | 174.630303 | 2023-01 | False | launch | NaN | 45 | medium density residential | True | 70 | 0 | 30 | 2 | 3 | 1 | 6 | 1 | 0 |
| 4 | 15 | P20009 | 2 | 2023-05-17T04:54:48Z | 3 | -36.923191 | 174.748620 | POINT (174.7486203 -36.92319093) | fg5HSwvFk71VknyBYn1_AA | -36.923213 | 174.748832 | 2025-08 | False | launch | NaN | 45 | medium density residential | True | 70 | 0 | 35 | 2 | 3 | 1 | 6 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 590 | 1421 | P20693 | 2 | 2024-05-02T03:43:23Z | 3 | -36.897778 | 174.721580 | POINT (174.7215796 -36.89777786) | Uzuqd6oSo-EjCVuRP2Os0Q | -36.897745 | 174.721875 | 2022-06 | False | launch | NaN | 65 | medium density residential | True | 75 | 0 | 40 | 3 | 2 | 1 | 8 | 1 | 0 |
| 591 | 1425 | P20693 | 6 | 2024-05-05T03:00:22Z | 2 | -36.969426 | 174.790602 | POINT (174.7906024 -36.96942642) | 5d1FcEQVhi9WsKP3r6iajg | -36.969179 | 174.790872 | 2012-04 | False | launch | NaN | 45 | medium density residential | True | 75 | 0 | 30 | 3 | 2 | 1 | 6 | 0 | 0 |
| 592 | 1426 | P20721 | 1 | 2024-05-05T02:00:52Z | 1 | -36.893455 | 174.728262 | POINT (174.728262 -36.89345532) | ftrRbHWdN74ZKSFMEkPscg | -36.893414 | 174.728087 | 2024-10 | False | launch | NaN | 45 | medium density residential | True | 70 | 0 | 30 | 2 | 3 | 1 | 6 | 1 | 0 |
| 593 | 1428 | P20721 | 3 | 2024-05-05T23:06:27Z | 2 | -36.845252 | 174.759951 | POINT (174.7599508 -36.8452515) | CIHM0ogKEICAgICO_7-B_AE | -36.845292 | 174.759939 | 2022-06-24 | True | photos:street_view_publish_api | Mint Design | 45 | medium density residential | True | 65 | 0 | 30 | 3 | 2 | 1 | 8 | 1 | 0 |
| 594 | 1429 | P20721 | 4 | 2024-05-06T07:04:57Z | 0 | -36.845165 | 174.759885 | POINT (174.7598849 -36.84516487) | CIHM0ogKEICAgICO_-_r1AE | -36.845177 | 174.759792 | 2022-06-24 | True | photos:street_view_publish_api | Mint Design | 45 | medium density residential | True | 65 | 0 | 30 | 3 | 2 | 1 | 6 | 1 | 0 |
595 rows × 27 columns
time: 1h 36min 47s (started: 2026-02-25 10:36:57 +13:00)