In [1]:
%reload_ext autotime
import pandas as pd
import requests
from pprint import pprint
import json
import torch
from PIL import Image
from tqdm.auto import tqdm

pd.options.plotting.backend = "plotly"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 100)
time: 4.83 s (started: 2026-02-25 10:33:58 +13:00)
/mnt/streetview/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
In [2]:
df = pd.read_csv("results.csv").drop_duplicates(subset="panoid")
df
Out[2]:
Index pid n time anxiousness latitude longitude geometry panoid panolat panolon panodate panothirdparty source uploader
0 0 P20001 1 2023-04-25T02:51:42Z 0 -36.924795 174.738044 POINT (174.7380435 -36.92479483) IvrcS0W1RlFAlnci-p39XA -36.924667 174.737908 2012-04 False launch NaN
10 10 P20001 11 2023-04-24T00:42:25Z 0 -36.924837 174.737948 POINT (174.7379477 -36.92483659) QEpZV7bnO2mBfp0weMUKEg -36.924733 174.737819 2012-04 False launch NaN
13 13 P20006 1 2023-06-03T02:45:55Z 3 -36.892203 174.740125 POINT (174.7401253 -36.89220256) dAVT1x86uv1m9orlHxG1Tg -36.892635 174.740018 2024-05 False launch NaN
14 14 P20009 1 2023-05-16T21:36:38Z 6 -36.811089 174.632053 POINT (174.6320532 -36.81108876) 8mhuMKDyT-0Y34V6UeabWw -36.810854 174.630303 2023-01 False launch NaN
15 15 P20009 2 2023-05-17T04:54:48Z 3 -36.923191 174.748620 POINT (174.7486203 -36.92319093) fg5HSwvFk71VknyBYn1_AA -36.923213 174.748832 2025-08 False launch NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1289 1421 P20693 2 2024-05-02T03:43:23Z 3 -36.897778 174.721580 POINT (174.7215796 -36.89777786) Uzuqd6oSo-EjCVuRP2Os0Q -36.897745 174.721875 2022-06 False launch NaN
1292 1425 P20693 6 2024-05-05T03:00:22Z 2 -36.969426 174.790602 POINT (174.7906024 -36.96942642) 5d1FcEQVhi9WsKP3r6iajg -36.969179 174.790872 2012-04 False launch NaN
1293 1426 P20721 1 2024-05-05T02:00:52Z 1 -36.893455 174.728262 POINT (174.728262 -36.89345532) ftrRbHWdN74ZKSFMEkPscg -36.893414 174.728087 2024-10 False launch NaN
1295 1428 P20721 3 2024-05-05T23:06:27Z 2 -36.845252 174.759951 POINT (174.7599508 -36.8452515) CIHM0ogKEICAgICO_7-B_AE -36.845292 174.759939 2022-06-24 True photos:street_view_publish_api Mint Design
1296 1429 P20721 4 2024-05-06T07:04:57Z 0 -36.845165 174.759885 POINT (174.7598849 -36.84516487) CIHM0ogKEICAgICO_-_r1AE -36.845177 174.759792 2022-06-24 True photos:street_view_publish_api Mint Design

595 rows × 15 columns

time: 45 ms (started: 2026-02-25 10:34:03 +13:00)
In [3]:
from transformers import Qwen3VLMoeForConditionalGeneration, AutoProcessor

model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
     "Qwen/Qwen3-VL-30B-A3B-Instruct",
     dtype=torch.bfloat16,
     attn_implementation="flash_attention_2",
     device_map="auto",
)

processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
Fetching 13 files: 100%|██████████| 13/13 [00:00<00:00, 40782.31it/s]
Loading weights: 100%|██████████| 882/882 [01:00<00:00, 14.56it/s, Materializing param=model.visual.pos_embed.weight]                                 
time: 1min 16s (started: 2026-02-25 10:34:03 +13:00)
In [4]:
prompt = """
    This image is a panorama from Google Street View.
    From the image, extract the following information, in JSON format:

    green: The percentage of the image that is green space (e.g., parks, gardens, trees, grass). A number between 0 and 100.
    environment: The general classification of the environment based on the visible surroundings. Choose the closest matching category from the following: "low density residential", "medium density residential", "indoor", "park", "commercial", "shops", "cafes", "supermarket" or suggest a custom classification as a string.
    active_transport: Indicate if an active transport corridor is visible (e.g., bike lane, walking path). Return true or false.
    quality: A subjective assessment of the area's upkeep, where 0 represents "run down" and 100 represents "well maintained." A number between 0 and 100.
    water: If streams, ponds, rivers, or the ocean are visible, estimate the distance to the nearest body of water in meters. If no water is present, return 0.
    obscured: The proportion of the view obscured by buildings (i.e., how much of the total line of sight is blocked by buildings in close proximity). A number between 0 and 100.
    people: The total number of people visible in the image. A whole number.
    cars: The total number of cars visible in the image. A whole number.
    bikes: The total number of bikes visible in the image. A whole number.
    dwellings: The total number of dwellings (houses, flats, or apartments) visible in the image. A whole number.
    shops: The total number of shops visible in the image. A whole number.
    offices: The total number of offices visible in the image. A whole number.

    Do not include comments in your JSON response. Only respond with the JSON object. Make sure the JSON is valid.
"""
for row in tqdm(df.sample(10).itertuples(index=False)):
    panoid = row.panoid
    image = Image.open(f"panoramas/{panoid}.jpg")
    display(image)
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image"},
            ]
        }
    ]
        # Preparation for inference
    inputs = processor.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_dict=True,
        return_tensors="pt"
    )
    inputs = inputs.to(model.device)
    print(len(inputs.input_ids[0]), "tokens in input")

    generated_ids = model.generate(**inputs, max_new_tokens=5000)
    generated_ids_trimmed = [
        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    print(len(generated_ids_trimmed[0]), "tokens in output")
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )[0]
    output_text = output_text.replace("```json", "").replace("```", "").strip()
    print("Output:")
    print(output_text)
    result = json.loads(output_text)
    pprint(result)
    print("\n")
0it [00:00, ?it/s]
No description has been provided for this image
420 tokens in input
1it [00:11, 11.07s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 30, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 2, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 3,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 30,
 'offices': 0,
 'people': 2,
 'quality': 65,
 'shops': 2,
 'water': 0}


No description has been provided for this image
420 tokens in input
2it [00:20, 10.06s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 35, "people": 3, "cars": 2, "bikes": 1, "dwellings": 8, "shops": 1, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 2,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 35,
 'offices': 0,
 'people': 3,
 'quality': 65,
 'shops': 1,
 'water': 0}


No description has been provided for this image
420 tokens in input
3it [00:29,  9.81s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 35, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 1, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 3,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 35,
 'offices': 0,
 'people': 2,
 'quality': 70,
 'shops': 1,
 'water': 0}


No description has been provided for this image
420 tokens in input
4it [00:39,  9.68s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 30, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 2, "offices": 1}
{'active_transport': True,
 'bikes': 1,
 'cars': 3,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 30,
 'offices': 1,
 'people': 2,
 'quality': 70,
 'shops': 2,
 'water': 0}


No description has been provided for this image
420 tokens in input
5it [00:49,  9.66s/it]
84 tokens in output
Output:
{"green": 65, "environment": "low density residential", "active_transport": true, "quality": 75, "water": 0, "obscured": 40, "people": 2, "cars": 1, "bikes": 1, "dwellings": 4, "shops": 0, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 1,
 'dwellings': 4,
 'environment': 'low density residential',
 'green': 65,
 'obscured': 40,
 'offices': 0,
 'people': 2,
 'quality': 75,
 'shops': 0,
 'water': 0}


No description has been provided for this image
420 tokens in input
6it [00:58,  9.66s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 35, "people": 3, "cars": 2, "bikes": 1, "dwellings": 6, "shops": 1, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 2,
 'dwellings': 6,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 35,
 'offices': 0,
 'people': 3,
 'quality': 70,
 'shops': 1,
 'water': 0}


No description has been provided for this image
420 tokens in input
7it [01:08,  9.67s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 75, "water": 0, "obscured": 30, "people": 3, "cars": 2, "bikes": 1, "dwellings": 6, "shops": 1, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 2,
 'dwellings': 6,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 30,
 'offices': 0,
 'people': 3,
 'quality': 75,
 'shops': 1,
 'water': 0}


No description has been provided for this image
420 tokens in input
8it [01:18,  9.67s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 30, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 2, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 3,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 30,
 'offices': 0,
 'people': 2,
 'quality': 65,
 'shops': 2,
 'water': 0}


No description has been provided for this image
420 tokens in input
9it [01:27,  9.67s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 70, "water": 0, "obscured": 35, "people": 2, "cars": 3, "bikes": 1, "dwellings": 8, "shops": 1, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 3,
 'dwellings': 8,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 35,
 'offices': 0,
 'people': 2,
 'quality': 70,
 'shops': 1,
 'water': 0}


No description has been provided for this image
420 tokens in input
10it [01:37,  9.74s/it]
84 tokens in output
Output:
{"green": 45, "environment": "medium density residential", "active_transport": true, "quality": 65, "water": 0, "obscured": 30, "people": 3, "cars": 2, "bikes": 1, "dwellings": 6, "shops": 1, "offices": 0}
{'active_transport': True,
 'bikes': 1,
 'cars': 2,
 'dwellings': 6,
 'environment': 'medium density residential',
 'green': 45,
 'obscured': 30,
 'offices': 0,
 'people': 3,
 'quality': 65,
 'shops': 1,
 'water': 0}


time: 1min 37s (started: 2026-02-25 10:35:19 +13:00)

In [5]:
results = []
for row in tqdm(df.itertuples(index=False), total=len(df)):
    panoid = row.panoid
    image = Image.open(f"panoramas/{panoid}.jpg")
    #display(image)
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image"},
            ]
        }
    ]
        # Preparation for inference
    inputs = processor.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_dict=True,
        return_tensors="pt"
    )
    inputs = inputs.to(model.device)

    generated_ids = model.generate(**inputs, max_new_tokens=5000)
    generated_ids_trimmed = [
        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )[0]
    output_text = output_text.replace("```json", "").replace("```", "").strip()
    result = json.loads(output_text)
    row = row._asdict()
    row.update(result)
    results.append(row)

results = pd.DataFrame(results)
results.to_csv("LLM_results.csv", index=False)
results
100%|██████████| 595/595 [1:36:47<00:00,  9.76s/it]
Out[5]:
Index pid n time anxiousness latitude longitude geometry panoid panolat panolon panodate panothirdparty source uploader green environment active_transport quality water obscured people cars bikes dwellings shops offices
0 0 P20001 1 2023-04-25T02:51:42Z 0 -36.924795 174.738044 POINT (174.7380435 -36.92479483) IvrcS0W1RlFAlnci-p39XA -36.924667 174.737908 2012-04 False launch NaN 45 medium density residential True 70 0 30 2 3 1 8 1 0
1 10 P20001 11 2023-04-24T00:42:25Z 0 -36.924837 174.737948 POINT (174.7379477 -36.92483659) QEpZV7bnO2mBfp0weMUKEg -36.924733 174.737819 2012-04 False launch NaN 45 medium density residential True 65 0 35 2 3 1 8 1 0
2 13 P20006 1 2023-06-03T02:45:55Z 3 -36.892203 174.740125 POINT (174.7401253 -36.89220256) dAVT1x86uv1m9orlHxG1Tg -36.892635 174.740018 2024-05 False launch NaN 45 medium density residential True 65 0 30 2 3 1 8 1 0
3 14 P20009 1 2023-05-16T21:36:38Z 6 -36.811089 174.632053 POINT (174.6320532 -36.81108876) 8mhuMKDyT-0Y34V6UeabWw -36.810854 174.630303 2023-01 False launch NaN 45 medium density residential True 70 0 30 2 3 1 6 1 0
4 15 P20009 2 2023-05-17T04:54:48Z 3 -36.923191 174.748620 POINT (174.7486203 -36.92319093) fg5HSwvFk71VknyBYn1_AA -36.923213 174.748832 2025-08 False launch NaN 45 medium density residential True 70 0 35 2 3 1 6 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
590 1421 P20693 2 2024-05-02T03:43:23Z 3 -36.897778 174.721580 POINT (174.7215796 -36.89777786) Uzuqd6oSo-EjCVuRP2Os0Q -36.897745 174.721875 2022-06 False launch NaN 65 medium density residential True 75 0 40 3 2 1 8 1 0
591 1425 P20693 6 2024-05-05T03:00:22Z 2 -36.969426 174.790602 POINT (174.7906024 -36.96942642) 5d1FcEQVhi9WsKP3r6iajg -36.969179 174.790872 2012-04 False launch NaN 45 medium density residential True 75 0 30 3 2 1 6 0 0
592 1426 P20721 1 2024-05-05T02:00:52Z 1 -36.893455 174.728262 POINT (174.728262 -36.89345532) ftrRbHWdN74ZKSFMEkPscg -36.893414 174.728087 2024-10 False launch NaN 45 medium density residential True 70 0 30 2 3 1 6 1 0
593 1428 P20721 3 2024-05-05T23:06:27Z 2 -36.845252 174.759951 POINT (174.7599508 -36.8452515) CIHM0ogKEICAgICO_7-B_AE -36.845292 174.759939 2022-06-24 True photos:street_view_publish_api Mint Design 45 medium density residential True 65 0 30 3 2 1 8 1 0
594 1429 P20721 4 2024-05-06T07:04:57Z 0 -36.845165 174.759885 POINT (174.7598849 -36.84516487) CIHM0ogKEICAgICO_-_r1AE -36.845177 174.759792 2022-06-24 True photos:street_view_publish_api Mint Design 45 medium density residential True 65 0 30 3 2 1 6 1 0

595 rows × 27 columns

time: 1h 36min 47s (started: 2026-02-25 10:36:57 +13:00)