<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Urban Geo Analytics</title>
	<atom:link href="https://urbangeoanalytics.com/feed/" rel="self" type="application/rss+xml" />
	<link>https://urbangeoanalytics.com/</link>
	<description>Spatial Analysis, GeoAI &#38; Machine Learning</description>
	<lastBuildDate>Tue, 02 Jun 2026 12:31:12 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=7.0</generator>

<image>
	<url>https://urbangeoanalytics.com/wp-content/uploads/2025/11/cropped-logo-urban-geo_512-32x32.png</url>
	<title>Urban Geo Analytics</title>
	<link>https://urbangeoanalytics.com/</link>
	<width>32</width>
	<height>32</height>
</image> 
	<item>
		<title>Deploy Your Own Local LLM on Low VRAM in 30 Minutes — A Private Chat Assistant in Jupyter</title>
		<link>https://urbangeoanalytics.com/deploy-local-llm-low-vram-jupyter/</link>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Tue, 02 Jun 2026 12:30:59 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[Anaconda]]></category>
		<category><![CDATA[Jupyter Notebook]]></category>
		<category><![CDATA[LLM]]></category>
		<category><![CDATA[Transformer]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2500</guid>

					<description><![CDATA[<p>Run a capable large language model entirely on your own machine — private, offline, and with as little as 8 GB of GPU memory. This hands-on guide sets up a clean Python environment, gets CUDA working even on the newest NVIDIA Blackwell cards, loads a 4-bit quantized model from Hugging Face, and builds an interactive chat widget with conversation memory and a live VRAM gauge in JupyterLab. No cloud, no API keys, no data leaving your computer.</p>
<p>The post <a href="https://urbangeoanalytics.com/deploy-local-llm-low-vram-jupyter/">Deploy Your Own Local LLM on Low VRAM in 30 Minutes — A Private Chat Assistant in Jupyter</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-1 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-0 fusion_builder_column_1_1 1_1 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:100%;--awb-margin-top-large:0px;--awb-spacing-right-large:1.92%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:1.92%;--awb-width-medium:100%;--awb-order-medium:0;--awb-spacing-right-medium:1.92%;--awb-spacing-left-medium:1.92%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-1" style="text-align:center;--awb-margin-top:5px;--awb-margin-bottom:5px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-1 hover-type-none"><img fetchpriority="high" decoding="async" width="1693" height="929" title="ILLUS" src="https://urbangeoanalytics.com/wp-content/uploads/2026/06/ILLUS.png" alt class="img-responsive wp-image-2530" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/06/ILLUS-200x110.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/ILLUS-400x219.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/ILLUS-600x329.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/ILLUS-800x439.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/ILLUS-1200x658.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/ILLUS.png 1693w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-1 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-1"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-2" style="--awb-margin-top:-30px;"><ul>
<li>Run a real large language model on your own machine, entirely offline, with as little as 8 GB of GPU memory.</li>
<li>No cloud, no API keys, no data leaving your computer.</li>
<li>Interactive chat widget with conversation memory and a live VRAM gauge</li>
</ul>
</div><div class="fusion-text fusion-text-3 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Cloud chat assistants are convenient, but they come with trade-offs: your queries leave your machine, you depend on someone else&#8217;s uptime and pricing, and the model&#8217;s behaviour can change under you without warning. For research, sensitive data, or simply full control, running a model locally is an appealing alternative. The good news is that modern quantization has made this accessible on modest consumer hardware. A capable 7–8 billion parameter model now fits comfortably on an 8 GB laptop GPU. This tutorial walks through the entire process end to end, using an NVIDIA Blackwell card (RTX 5060, 8 GB) as the worked example — though the approach applies to any recent NVIDIA GPU.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-1 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Setting Up the Environment: Anaconda, a Dedicated Kernel, and the Right CUDA</h2></div><div class="fusion-text fusion-text-4 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Everything starts with a clean, isolated environment. Mixing deep-learning dependencies into your base Python installation is a recipe for version conflicts, so we create a dedicated Conda environment for this project alone. If you have followed our earlier Anaconda setup guide, this will feel familiar.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Open the Anaconda Prompt and create a fresh environment:</p>
</div><div class="fusion-text fusion-text-5"><pre class="EnlighterJSRAW" data-enlighter-language="bash" data-enlighter-theme="dracula" data-enlighter-group="bash1" data-enlighter-title="bash">conda create -n localllm python=3.11 -y
conda activate localllm</pre>
</div><div class="fusion-text fusion-text-6 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:15px;--awb-margin-bottom:15px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The single most important, and most overlooked, step is installing the correct build of PyTorch for <em>your specific GPU</em>. This is where most local-LLM attempts fail silently. NVIDIA GPUs each have a &#8220;compute capability&#8221; (an architecture identifier such as sm_86, sm_90, sm_120), and a PyTorch binary only works if it was compiled with kernels for your card&#8217;s architecture. Install the wrong build and you will see CUDA reported as &#8220;available&#8221; while every actual GPU operation crashes — a particularly confusing failure mode.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The newest Blackwell cards (the RTX 50-series, including our RTX 5060) use compute capability sm_120, which older PyTorch wheels do not support. For these cards you need a build compiled against CUDA 12.8 or newer:</p>
</div><div class="fusion-text fusion-text-7"><pre class="EnlighterJSRAW" data-enlighter-language="bash" data-enlighter-theme="dracula" data-enlighter-group="bash2" data-enlighter-title="bash">pip install torch --index-url https://download.pytorch.org/whl/cu128</pre>
</div><div class="fusion-text fusion-text-8 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:15px;--awb-margin-bottom:15px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">If you are on an older card (RTX 30- or 40-series), the standard CUDA 12.x wheels are fine. The general rule: match the PyTorch CUDA build to your GPU generation, and when a brand-new card isn&#8217;t yet supported in the stable channel, reach for the nightly build of the matching CUDA version.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Now verify it properly. Do not trust <code class="bg-text-200/5 border border-0.5 border-border-300 text-danger-000 whitespace-pre-wrap rounded-&#091;0.4rem&#093; px-1 py-px text-&#091;0.9rem&#093;">torch.cuda.is_available()</code> alone — it can return <code class="bg-text-200/5 border border-0.5 border-border-300 text-danger-000 whitespace-pre-wrap rounded-&#091;0.4rem&#093; px-1 py-px text-&#091;0.9rem&#093;">True</code> even when no compatible kernels exist. Instead, force an actual computation onto the GPU:</p>
</div><div class="fusion-text fusion-text-9"><pre class="EnlighterJSRAW" data-enlighter-language="bash" data-enlighter-theme="dracula" data-enlighter-group="bash3" data-enlighter-title="bash">python -c "import torch; x=torch.randn(1000,1000,device='cuda'); y=x@x;
print('OK', y.device, torch.cuda.get_device_capability(0))"</pre>
</div><div class="fusion-text fusion-text-10 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:15px;--awb-margin-bottom:15px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">A clean <code class="bg-text-200/5 border border-0.5 border-border-300 text-danger-000 whitespace-pre-wrap rounded-&#091;0.4rem&#093; px-1 py-px text-&#091;0.9rem&#093;">OK cuda:0 (12, 0)</code> with no warnings means real GPU compute is working. That is your green light. With the engine confirmed, install the rest of the stack and register the environment as a dedicated Jupyter kernel so the notebook always uses exactly these packages:</p>
</div><div class="fusion-text fusion-text-11"><pre class="EnlighterJSRAW" data-enlighter-language="bash" data-enlighter-theme="dracula" data-enlighter-group="bash4" data-enlighter-title="bash">pip install numpy transformers accelerate bitsandbytes jupyterlab ipywidgets ipykernel
python -m ipykernel install --user --name localllm --display-name "Python (localllm)"</pre>
</div><div class="fusion-text fusion-text-12 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:15px;--awb-margin-bottom:15px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Finally, launch JupyterLab <em>from your project directory</em> so your notebook is rooted where you want it rather than in a system folder:</p>
</div><div class="fusion-text fusion-text-13"><pre class="EnlighterJSRAW" data-enlighter-language="bash" data-enlighter-theme="dracula" data-enlighter-group="bash5" data-enlighter-title="bash">cd C:\Users\you\Documents\projects
jupyter lab</pre>
</div><div class="fusion-text fusion-text-14 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:15px;--awb-margin-bottom:15px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Open the localhost address provided by jupyter on your navigator and once inside, select the &#8220;Python (localllm)&#8221; kernel. We recommend JupyterLab over the classic Notebook here: it renders interactive widgets reliably out of the box, which matters for the chat interface we build in Section 3.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-2" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-2 hover-type-none"><img decoding="async" width="1970" height="1223" title="localhost kernel" src="https://urbangeoanalytics.com/wp-content/uploads/2026/06/localhost-kernel.png" alt class="img-responsive wp-image-2515" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/06/localhost-kernel-200x124.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/localhost-kernel-400x248.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/localhost-kernel-600x372.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/localhost-kernel-800x497.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/localhost-kernel-1200x745.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/localhost-kernel.png 1970w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">On localhost, choose the kernel we prepared to open a notebook</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-2 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;"><strong>2. Choosing and Loading the Model: Hugging Face and 4-Bit Quantization</strong></p></h2></div><div class="fusion-text fusion-text-15 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">A model&#8217;s weights have to live in memory, and for modern LLMs they are large. A 7–8 billion parameter model in full 16-bit precision needs roughly 14–16 GB — too much for an 8 GB card. The solution is quantization: storing each weight in 4 bits instead of 16. This shrinks an 8B model to around 5 GB with only a minor quality cost, which is what makes local inference on consumer hardware possible at all.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">We use Hugging Face Transformers together with the bitsandbytes library, which quantizes the model to 4 bits on the fly as it loads. This keeps everything inside your Python kernel — the model object lives in your notebook, you load directly from Hugging Face with optional token authentication, and you can inspect internals if you wish. Hugging Face acts as the model registry: the first load downloads the weights and caches them to disk (under your user folder&#8217;s <code class="bg-text-200/5 border border-0.5 border-border-300 text-danger-000 whitespace-pre-wrap rounded-&#091;0.4rem&#093; px-1 py-px text-&#091;0.9rem&#093;">.cache/huggingface</code>), and every subsequent load reads from that local cache with no network access.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">A note on model choice. There is no single &#8220;best&#8221; small model; it depends on your task and your memory budget. Here is a practical comparison for an 8 GB card:</p>
<ul class="&#091;li_&amp;&#093;:mb-0 &#091;li_&amp;&#093;:mt-1 &#091;li_&amp;&#093;:gap-1 &#091;&amp;:not(:last-child)_ul&#093;:pb-1 &#091;&amp;:not(:last-child)_ol&#093;:pb-1 list-disc flex flex-col gap-1 pl-8 mb-3">
<li class="font-claude-response-body whitespace-normal break-words pl-2"><strong>Qwen3 4B Instruct</strong> — the lightweight workhorse. Around 2.7 GB in 4-bit, very fast, strong reasoning and multilingual ability for its size. Ideal as a daily driver for quick questions.</li>
<li class="font-claude-response-body whitespace-normal break-words pl-2"><strong>Dolphin 3.0 (Llama 3.1 8B)</strong> — a larger, more capable general-purpose model at around 5–5.5 GB in 4-bit. Built on Llama 3.1 and instruction-tuned by Cognitive Computations, it is designed to put alignment under the user&#8217;s control, making it well suited to research contexts where you define the system prompt and behaviour yourself.</li>
<li class="font-claude-response-body whitespace-normal break-words pl-2"><strong>Other strong candidates</strong> — Phi-4-mini for very light tasks, and Gemma-class models for multilingual writing, depending on what fits your remaining VRAM.</li>
</ul>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The rule of thumb: pick the smallest model that does your job well. A 4B model runs noticeably faster than an 8B simply because there are fewer parameters to push through per token, so match model size to task.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The loading code configures 4-bit quantization and reads the model from Hugging Face. We wrap it in a small dropdown so you can switch models without rewriting code:</p>
</div><div class="fusion-text fusion-text-16"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python1" data-enlighter-title="Python">import torch, gc
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import ipywidgets as widgets
from IPython.display import display
import warnings
warnings.filterwarnings("ignore", message=".*_check_is_size.*", category=FutureWarning)

MODELS = 

tokenizer = None
model = None

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

dropdown = widgets.Dropdown(options=list(MODELS.keys()), description="Model:",
                            layout=)
load_btn = widgets.Button(description="Load", button_style="primary")
status   = widgets.Output()

def load_model(_=None):
    global tokenizer, model
    model_id = MODELS[dropdown.value]
    with status:
        status.clear_output(); print(f"Loading  …")
    if model is not None:
        del model; model = None
        gc.collect(); torch.cuda.empty_cache()
    tok = AutoTokenizer.from_pretrained(model_id)
    mdl = AutoModelForCausalLM.from_pretrained(
        model_id, quantization_config=bnb_config,
        device_map="cuda:0", dtype=torch.bfloat16,
    )
    mdl.eval()
    tokenizer, model = tok, mdl
    with status:
        print(f"Loaded. VRAM used:  GB")

load_btn.on_click(load_model)
display(widgets.VBox([widgets.HBox([dropdown, load_btn]), status]))</pre>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-3" style="text-align:center;--awb-margin-top:5px;--awb-margin-bottom:5px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-3 hover-type-none"><img decoding="async" width="908" height="170" title="dropdown" src="https://urbangeoanalytics.com/wp-content/uploads/2026/06/dropdown.png" alt class="img-responsive wp-image-2522" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/06/dropdown-200x37.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/dropdown-400x75.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/dropdown-600x112.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/dropdown-800x150.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/dropdown.png 908w" sizes="(max-width: 640px) 100vw, 908px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The dropdown menu allowing you to choose a model to load</div></div></div></div><div class="fusion-text fusion-text-17 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>If you load a model built on a gated base (such as Llama), you may need to authenticate once with a Hugging Face token via <code class="bg-text-200/5 border border-0.5 border-border-300 text-danger-000 whitespace-pre-wrap rounded-&#091;0.4rem&#093; px-1 py-px text-&#091;0.9rem&#093;">huggingface_hub.login()</code>. Most fine-tuned community models, including the two above, load without one.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-3 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;"><strong>3. Building the Chat Interface: Memory, Context, and a VRAM Gauge</strong></p></h2></div><div class="fusion-text fusion-text-18 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">A loaded model is, by itself, stateless. It has no memory of anything you said previously — each call only sees the text you hand it. To create the experience of a conversation, <em>we</em> must keep the history and re-send it on every turn. Understanding this is the key to using local models well, and it requires distinguishing three concepts that are easy to confuse.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The <strong>context window</strong> is the model&#8217;s hard architectural limit: the maximum number of tokens it can attend to at once, counting both the prompt and the output together. Llama 3.1-based models support up to 128k tokens. The <strong>conversation memory</strong> is not a property of the model at all — it is simply the running list of past turns that we re-inject into the prompt each time, and it consumes part of the context window. The <strong>max new tokens</strong> setting is a cap <em>we choose</em> on how many tokens the model may generate in a single reply; it controls output length only and does not affect what the model can read.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">So the relationship is: the prompt (system message + accumulated history + your new question) plus the reserved output space must all fit inside the context window. The context window is the room; memory is the furniture already in it; max new tokens is the space you set aside for the answer.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">A common misconception is that a larger context window makes the model faster. It is the opposite. A bigger active context costs <em>more</em> VRAM (the key-value cache grows) and runs <em>slower</em>, because each newly generated token must attend over every preceding token. Speed comes from keeping the active context <em>small</em> — short prompts and trimmed history — not large. Reducing max new tokens does not speed up generation either; it simply stops the reply earlier, often mid-thought, since the model does not plan around the limit. The right way to get shorter, faster answers is to instruct the model to be concise via a system prompt, so it produces a complete but brief response.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The widget below puts these ideas into practice. It keeps a <code class="bg-text-200/5 border border-0.5 border-border-300 text-danger-000 whitespace-pre-wrap rounded-&#091;0.4rem&#093; px-1 py-px text-&#091;0.9rem&#093;">chat_history</code> list (the memory), trims it to a fixed number of recent turns (capping context growth), and displays a live VRAM gauge so you can see your headroom and know when to reset. Re-running the cell clears the history — that is your reset.</p>
</div><div class="fusion-text fusion-text-19"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python2" data-enlighter-title="Python">import ipywidgets as widgets
from IPython.display import display
import torch

chat_history = []                      # the conversation memory
TOTAL = torch.cuda.get_device_properties(0).total_memory / 1e9
MAX_TURNS = 6                          # cap context: keep last 6 exchanges
SYSTEM = "Be concise. Answer in a few sentences unless asked for detail."

out      = widgets.Output(layout=)
entry    = widgets.Text(placeholder="Type a message…", layout=)
send_btn = widgets.Button(description="Send", button_style="primary")
vram_bar = widgets.FloatProgress(value=0, min=0, max=TOTAL, description="VRAM:")
vram_lbl = widgets.Label()

def refresh_vram():
    used = torch.cuda.memory_allocated() / 1e9
    vram_bar.value = used
    vram_bar.bar_style = ("success" if used < TOTAL*0.6
                          else "warning" if used < TOTAL*0.85 else "danger") vram_lbl.value = f"/ GB ( turns)" def on_send(_=None): global chat_history prompt = entry.value.strip() if not prompt: return if len(chat_history) > MAX_TURNS * 2:          # trim old turns
        chat_history = chat_history[-MAX_TURNS*2:]
    entry.value = ""
    with out:
        print(f"You: ")
    messages = [] + list(chat_history) \
               + []
    text = tokenizer.apply_chat_template(messages, tokenize=False,
                                         add_generation_prompt=True)
    inputs = tokenizer(text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        gen = model.generate(**inputs, max_new_tokens=512,
                             do_sample=False,          # greedy: fast & deterministic
                             pad_token_id=tokenizer.eos_token_id)
    reply = tokenizer.decode(gen[0][inputs["input_ids"].shape[1]:],
                             skip_special_tokens=True)
    chat_history.append()
    chat_history.append()
    with out:
        print(f"Model: \n")
    refresh_vram()

send_btn.on_click(on_send)
refresh_vram()
display(widgets.VBox([out, widgets.HBox([entry, send_btn]),
                      widgets.HBox([vram_bar, vram_lbl])]))</pre>
</div><div class="fusion-text fusion-text-20 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>A few design notes. We use greedy decoding (<code class="bg-text-200/5 border border-0.5 border-border-300 text-danger-000 whitespace-pre-wrap rounded-&#091;0.4rem&#093; px-1 py-px text-&#091;0.9rem&#093;">do_sample=False</code>) rather than random sampling: it is marginally faster and fully reproducible, with no meaningful quality loss for factual exchanges. The <code class="bg-text-200/5 border border-0.5 border-border-300 text-danger-000 whitespace-pre-wrap rounded-&#091;0.4rem&#093; px-1 py-px text-&#091;0.9rem&#093;">MAX_TURNS</code> value is your direct control over how much the model &#8220;remembers&#8221; versus how lean and fast it stays. And the VRAM gauge turns green, amber, or red as memory fills, giving you a clear signal of when to start a fresh conversation.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-4 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;"><strong>4. The Assistant in Action</strong></p></h2></div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-4" style="text-align:center;--awb-margin-top:5px;--awb-margin-bottom:5px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-4 hover-type-none"><img decoding="async" width="1853" height="626" title="assistant1" src="https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant1.png" alt class="img-responsive wp-image-2525" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant1-200x68.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant1-400x135.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant1-600x203.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant1-800x270.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant1-1200x405.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant1.png 1853w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The loaded assistant with VRAM use and a reset function</div></div></div></div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-5" style="text-align:center;--awb-margin-top:5px;--awb-margin-bottom:5px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-5 hover-type-none"><img decoding="async" width="1842" height="624" title="assistant2" src="https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant2.png" alt class="img-responsive wp-image-2526" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant2-200x68.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant2-400x136.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant2-600x203.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant2-800x271.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant2-1200x407.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant2.png 1842w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Let's try it with a question and then try the memory</div></div></div></div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-6" style="text-align:center;--awb-margin-top:5px;--awb-margin-bottom:5px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-6 hover-type-none"><img decoding="async" width="1849" height="624" title="assistant3" src="https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant3.png" alt class="img-responsive wp-image-2527" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant3-200x67.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant3-400x135.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant3-600x202.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant3-800x270.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant3-1200x405.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/06/assistant3.png 1849w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Everything works well including the memory, well done!</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-5 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;"><strong>Conclusion: Why Local Matters — and What Comes Next</strong></p></h2></div><div class="fusion-text fusion-text-21 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">What we have built is small but genuinely yours. Every conversation lives only in your computer&#8217;s memory, inside the running notebook kernel. Nothing is written to disk, nothing is sent anywhere, and nothing is logged. Close the kernel and the entire conversation simply vanishes — the only thing that persists is the downloaded model weights in your local cache. For sensitive research data, confidential analysis, or simply peace of mind, this is a meaningful difference from any cloud service.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Beyond privacy, running locally brings other advantages. You are not subject to per-token billing or rate limits, so you can experiment freely. You are insulated from silent model changes and deprecations — your model behaves the same tomorrow as it does today. And with community fine-tunes such as Dolphin, you control the system prompt and the model&#8217;s alignment yourself, rather than inheriting a one-size-fits-all policy. With fewer built-in guardrails, these models will engage with a wider range of legitimate research and technical questions, which can be valuable in specialist domains where general-purpose assistants are overly cautious — a freedom that naturally comes with the responsibility to use it sensibly.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">This is only the foundation. In future posts we will extend this local assistant in several directions. We will give it <strong>web browsing</strong>, so it can retrieve current information rather than relying solely on its training. We will explore an <strong>expert mode</strong>, pre-loading the context with domain knowledge — for instance a corpus of spatial-analysis references — so the assistant answers as a specialist in your field. And we will look at <strong>containerizing</strong> the whole setup with Docker so it can be deployed on a dedicated GPU server or in the cloud, turning this notebook prototype into a private assistant you can embed directly in your own website.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">For now, you have a capable, private language model running on hardware you already own, set up in about half an hour. Learn it, build on it, and apply it to your own work.</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-2 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-22"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--1" data-awb-toc-id="1" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-7 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/deploy-local-llm-low-vram-jupyter/">Deploy Your Own Local LLM on Low VRAM in 30 Minutes — A Private Chat Assistant in Jupyter</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
		
		
			</item>
		<item>
		<title>SAGAI v2.0 — A Unified Multi-Model Notebook for Streetscape Analysis</title>
		<link>https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/</link>
					<comments>https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 21 May 2026 10:11:18 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Vision Language Model]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[GIS]]></category>
		<category><![CDATA[Image Analysis]]></category>
		<category><![CDATA[Llava]]></category>
		<category><![CDATA[Qwen]]></category>
		<category><![CDATA[UVLM]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2483</guid>

					<description><![CDATA[<p>SAGAI v2.0 consolidates the full streetscape analysis pipeline into a single Google Colab notebook and replaces the inline LLaVA-only inference code with the UVLM package, enabling multi-model benchmarking across 11 VLM checkpoints. New features include a multi-task prompt builder, consensus validation with majority voting, chain-of-thought reasoning, truncation detection, interactive Folium maps, view-direction filtering, and support for loading existing polygons as study area boundaries.</p>
<p>The post <a href="https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/">SAGAI v2.0 — A Unified Multi-Model Notebook for Streetscape Analysis</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-2 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-3 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-8" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-8 hover-type-none"><img decoding="async" width="1760" height="545" title="e4e3b0b4-83a7-4933-ba0b-ef1775beacc6" src="https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6.png" alt class="img-responsive wp-image-2489" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-200x62.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-400x124.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-600x186.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-800x248.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-1200x372.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6.png 1760w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div><div class="fusion-text fusion-text-23"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-24" style="--awb-margin-top:-30px;"><ul>
<li>SAGAI v2.0 merges the previous four-module notebook architecture into a <strong>single unified Google Colab notebook</strong> (SAGAI.ipynb) organized in six sequential blocks.</li>
<li>The inline LLaVA-only inference code is replaced by the <strong>UVLM package</strong> (Universal Vision-Language Model Loader), installed automatically from GitHub, providing access to <strong>11 VLM checkpoints</strong> across two model families.</li>
<li>New capabilities include a <strong>multi-task prompt builder</strong>, <strong>consensus validation</strong> with majority voting, <strong>chain-of-thought reasoning</strong>, <strong>truncation detection</strong>, <strong>interactive Folium maps</strong>, <strong>view-direction filtering</strong>, and support for <strong>loading an existing study area polygon</strong>.</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-6 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Introduction</h2></div><div class="fusion-text fusion-text-25 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI (Streetscape Analysis with Generative Artificial Intelligence) is an open-source workflow for scoring and mapping street-level urban environments using vision-language models and open geospatial data. Since its initial release, SAGAI has been structured as a set of independent Colab notebooks, one per pipeline stage, each relying on its own dependencies and documentation.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI v2.0 is a major release that consolidates the entire pipeline into a single notebook and replaces the custom inference code with the UVLM package. Where previous versions were tied to a single LLaVA checkpoint with handwritten inference logic, SAGAI v2.0 delegates all vision-language model loading, prompting, and evaluation to UVLM&#8217;s unified interface. This makes the scoring engine model-agnostic: users can select from 11 VLM checkpoints spanning the LLaVA-NeXT and Qwen2.5-VL families, compare their performance on identical tasks, and benefit from features such as consensus validation, reasoning traces, and truncation diagnostics; all within the same notebook.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Beyond the inference engine, v2.0 introduces structural and functional changes across the entire pipeline: a unified six-block architecture, interactive HTML mapping via Folium, view-direction filtering for aggregation, and the ability to load an existing polygon as a study area boundary instead of defining a bounding box manually.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">This post details the architectural changes, the UVLM integration, and the new features introduced in SAGAI v2.0.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-7 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. From Four Notebooks to One: The Unified Architecture</h2></div><div class="fusion-text fusion-text-26 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Previous SAGAI releases were organized as four independent Colab notebooks — one for street sampling, one for image retrieval, one for VLM inference, and one for aggregation and mapping — each accompanied by a separate NOTICE file documenting its dependencies and usage. This modular design was useful for development but introduced friction in practice: users had to manage file paths between notebooks, track four separate environments, and consult multiple documentation files.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI v2.0 merges all four stages into a single notebook (SAGAI.ipynb) structured as six sequential blocks. The pipeline flows from study area definition through street sampling, image downloading, VLM scoring, and mapping, with all intermediate data passed directly between blocks in the same runtime session. The separate per-module NOTICE files and the standalone requirements file (requirements_sagai_module_3_v1-0.txt) have been removed — dependency management is now handled automatically by the UVLM package installation.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-9" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-9 hover-type-none"><img decoding="async" width="2000" height="948" title="pipeline details" src="https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-scaled.png" alt class="img-responsive wp-image-2480" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-300x142.png 300w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-768x364.png 768w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-1024x486.png 1024w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-1536x728.png 1536w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-scaled.png 2000w" sizes="(max-width: 2000px) 100vw, 2000px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Diagram of the six-block architecture</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-8 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Study Area Definition: Bounding Box or Existing Polygon</h2></div><div class="fusion-text fusion-text-27 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">In previous versions, the study area was defined exclusively by a bounding box in WGS84 coordinates. SAGAI v2.0 retains this option but adds the ability to draw your own polygon or to load an existing polygon; for example, a GeoPackage representing a neighborhood, municipality, or custom boundary. When a polygon is provided, the street sampling step extracts the OpenStreetMap network within that geometry rather than a rectangular extent. This makes it straightforward to work with irregular administrative boundaries or user-defined study zones without manually computing bounding coordinates.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-9 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. UVLM Integration: From Single-Model Inference to Multi-Model Benchmarking</h2></div><div class="fusion-text fusion-text-28 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The most significant change in SAGAI v2.0 is the replacement of the inline inference code with the <a class="keychainify-checked" href="https://github.com/perezjoan/UVLM/tree/main">UVLM package</a>. In previous versions, Blocks 3 through 5 contained custom code for loading a single LLaVA checkpoint, constructing prompts, running inference, and parsing outputs. This logic was tightly coupled to one model architecture and required manual maintenance when Hugging Face APIs or model formats changed.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI v2.0 installs UVLM directly from its GitHub repository at the start of the notebook. All model loading, prompt formatting, inference execution, response parsing, and batch processing are delegated to UVLM&#8217;s API. The inline inference code has been entirely removed.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Through UVLM, SAGAI v2.0 supports 11 VLM checkpoints across two model families:</p>
<ul class="&#091;li_&amp;&#093;:mb-0 &#091;li_&amp;&#093;:mt-1 &#091;li_&amp;&#093;:gap-1 &#091;&amp;:not(:last-child)_ul&#093;:pb-1 &#091;&amp;:not(:last-child)_ol&#093;:pb-1 list-disc flex flex-col gap-1 pl-8 mb-3">
<li class="font-claude-response-body whitespace-normal break-words pl-2"><strong>LLaVA-NeXT</strong> — Mistral 7B, Vicuna 7B, Vicuna 13B, 34B, LLaMA3 8B, 72B, 110B</li>
<li class="font-claude-response-body whitespace-normal break-words pl-2"><strong>Qwen2.5-VL</strong> — 3B Instruct, 7B Instruct, 32B Instruct, 72B Instruct</li>
</ul>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">UVLM&#8217;s dual-backend abstraction automatically detects the model family and routes inference to the correct pipeline — LlavaNextProcessor for LLaVA models, AutoProcessor with process_vision_info for Qwen models — so users switch between architectures by changing a single model selection, with no modification to the rest of the notebook.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Quantization is handled through UVLM&#8217;s built-in support for 4-bit, 8-bit, and FP16 precision via BitsAndBytes. Models up to 34B parameters can run on a single Colab GPU (T4 or A100) with 4-bit quantization.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-10 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. Multi-Task Prompt Builder</h2></div><div class="fusion-text fusion-text-29 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">UVLM provides a widget-based prompt builder that SAGAI v2.0 exposes directly in the notebook. Users can define up to 10 analysis tasks per run, each with its own prompt, response type (numeric, category, boolean, or text), and label. This replaces the previous approach of selecting from a small set of hardcoded tasks (T1, T2, T3) or manually editing prompt strings in the code.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Tasks are configured interactively before execution and applied uniformly across all images in the batch. Each task produces its own column in the output CSV file.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-10" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-10 hover-type-none"><img decoding="async" width="866" height="1063" title="image2" src="https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2.png" alt class="img-responsive wp-image-2320" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2-200x245.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2-400x491.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2-600x736.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2-800x982.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2.png 866w" sizes="(max-width: 640px) 100vw, 866px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">UVLM prompt builder</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-11 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Consensus Validation</h2></div><div class="fusion-text fusion-text-30 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI v2.0 inherits UVLM&#8217;s consensus validation mechanism. Each analysis task can be run 2 to 5 times per image, and the final score is determined by majority voting across the repeated inferences. NA values from failed parses are filtered before voting. An agreement ratio is recorded alongside the final score, providing a built-in measure of prediction reliability without any external validation step.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-12 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">6. Chain-of-Thought Reasoning and Truncation Detection</h2></div><div class="fusion-text fusion-text-31 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">UVLM supports two approaches to chain-of-thought (CoT) reasoning, both available in SAGAI v2.0. Users can write task prompts that explicitly request step-by-step reasoning and adjust the token budget (up to 1,500 tokens) to allow the model sufficient generation space. Alternatively, a built-in CoT reference mode can be enabled per task, which triggers a standardized reasoning template with a fixed 1,024-token budget. In both cases, the reasoning trace is stored in a dedicated column in the output CSV for inspection.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Truncation detection is performed automatically after every inference call. The exact number of generated tokens is compared against the token limit, and truncated responses are flagged in per-task CSV columns. This allows users to identify tasks where the token budget is insufficient without post-hoc analysis.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-13 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">7. Interactive Mapping with Folium</h2></div><div class="fusion-text fusion-text-32 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Previous SAGAI versions generated static thematic maps using Matplotlib. SAGAI v2.0 replaces these with interactive HTML maps built with Folium. Point-level and street-segment-level scores are rendered as interactive layers that can be panned, zoomed, and queried directly in the browser. This is particularly useful for exploratory analysis and for sharing results with collaborators who do not use GIS software.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-14 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">8. View-Direction Filtering for Aggregation</h2></div><div class="fusion-text fusion-text-33 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Google Street View images are typically downloaded in multiple compass directions at each sampling point (e.g., front, back, left, right). In previous versions, all views were aggregated together when computing point- or street-level scores. SAGAI v2.0 introduces a view filter that allows users to select which directions to include in the aggregation — for example, scoring only left-side and right-side views to focus on building facades, or only front views to capture the pedestrian perspective along the street axis. This filter is applied at the aggregation stage and does not affect the scoring step itself.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-15 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">9. Resume-Safe Batch Processing</h2></div><div class="fusion-text fusion-text-34 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The batch execution engine inherited from UVLM provides resume-safe processing with checkpoint saving every 3 images. If a Colab session is interrupted — due to a timeout, a runtime reset, or a connectivity issue — the notebook can be re-executed and will automatically skip already-processed images. New tasks added between runs trigger automatic CSV schema upgrading, so the output file grows incrementally without losing previous results.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-16 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">10. References and Links</h2></div><div class="fusion-text fusion-text-35 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><ul>
<li class="font-claude-response-body whitespace-normal break-words pl-2">SAGAI v2.0 on GitHub: <a class="underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current keychainify-checked" href="https://github.com/perezjoan/SAGAI">https://github.com/perezjoan/SAGAI</a></li>
<li class="font-claude-response-body whitespace-normal break-words pl-2">UVLM on GitHub: <a class="underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current keychainify-checked" href="https://github.com/perezjoan/UVLM">https://github.com/perezjoan/UVLM</a></li>
<li class="font-claude-response-body whitespace-normal break-words pl-2">Perez, J. and Fusco, G. (2025). <em>Streetscape Analysis with Generative AI (SAGAI): Vision-Language Assessment and Mapping of Urban Scenes.</em> Geomatica, 77(2), 100063. <a class="underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current keychainify-checked" href="https://www.sciencedirect.com/science/article/pii/S1195103625000199">https://www.sciencedirect.com/science/article/pii/S1195103625000199</a></li>
<li class="font-claude-response-body whitespace-normal break-words pl-2">Perez, J. and Fusco, G. (2026). <em>UVLM: A Universal Vision-Language Model Loader for Reproducible Multimodal Benchmarking.</em> arXiv:2603.13893. <a class="underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current keychainify-checked" href="https://arxiv.org/abs/2603.13893">https://arxiv.org/abs/2603.13893</a></li>
</ul>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-4 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-36"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--2" data-awb-toc-id="2" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-11 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/">SAGAI v2.0 — A Unified Multi-Model Notebook for Streetscape Analysis</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</title>
		<link>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/</link>
					<comments>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 23 Apr 2026 07:25:41 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Package]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Vision Language Model]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[Google Colab]]></category>
		<category><![CDATA[Image Analysis]]></category>
		<category><![CDATA[Jupyter Notebook]]></category>
		<category><![CDATA[Llava]]></category>
		<category><![CDATA[Qwen]]></category>
		<category><![CDATA[UVLM]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2442</guid>

					<description><![CDATA[<p>UVLM v3.0.0 turns a Colab notebook into a full Python package. Run vision-language models locally, in notebooks, or scripts with a simple API and no setup complexity.</p>
<p>The post <a href="https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/">UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-3 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-5 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-12" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-12 hover-type-none"><img decoding="async" width="1619" height="971" title="flag fig" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig.png" alt class="img-responsive wp-image-2469" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-200x120.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-400x240.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-600x360.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-800x480.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-1200x720.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig.png 1619w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div><div class="fusion-text fusion-text-37"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-38" style="--awb-margin-top:-30px;"><ul>
<li><strong data-start="64" data-end="88">UVLM is now a pip-installable Python package </strong>— no longer tied to Google Colab</li>
<li><strong data-start="64" data-end="88">Run on your own GPU </strong>with a local Jupyter notebook, or keep using Colab for free</li>
<li><strong data-start="64" data-end="88">Same tool, more flexibility </strong>— three lines of Python to load a model and analyse images</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-text fusion-text-39 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>When we released UVLM in March 2026, it was a Google Colab notebook. You opened it in your browser, picked a model, typed your prompts, and ran your images — all without installing anything. That simplicity was the point: a tool that anyone could use to load and compare Vision-Language Models, regardless of their technical setup.</p>
<p>But we kept hearing the same requests. Can I run this on my own machine? Can I call UVLM from a script? Can I integrate it into an existing pipeline? The answer was always the same: not easily. The entire tool lived inside a single notebook, with all the logic packed into three massive code cells. Moving it anywhere else meant copy-pasting thousands of lines and untangling global variables.</p>
<p>Version 3.0.0 changes that. UVLM is now a proper Python package.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-17 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">What Changed</h2></div><div class="fusion-text fusion-text-40 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The core logic — model loading, dual-backend inference, response parsing, consensus validation, batch processing — has been extracted from the notebook into eight standalone Python modules. These modules have no dependency on Google Colab, no global variables, and no widget code. They are plain Python functions that accept arguments and return results.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-13" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-13 hover-type-none"><img decoding="async" width="2000" height="1162" title="UVLM package blogpost figure 1" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-scaled.png" alt class="img-responsive wp-image-2444" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-200x116.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-400x232.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-600x349.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-800x465.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-1200x697.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-scaled.png 2000w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div><div class="fusion-text fusion-text-41 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The package is installed from GitHub in one line:</p>
</div><div class="fusion-text fusion-text-42 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python1" data-enlighter-title="Python">pip install git+https://github.com/perezjoan/UVLM.git</pre>
</div><div class="fusion-text fusion-text-43 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>On Google Colab, this happens automatically in the first cell of the Colab notebook. On your local machine, you run it once in a terminal and you are done.</p>
<p>Nothing changed in how UVLM analyses images. The same 11 model checkpoints are supported (LLaVA-NeXT and Qwen2.5-VL, from 3B to 110B parameters). The same parsing logic, the same consensus validation, the same truncation detection. If you had a workflow built on v2.2.2, the outputs will be identical.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-18 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Three Ways to Use UVLM</h2></div><div class="fusion-text fusion-text-44 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p><strong>Google Colab — Zero Install</strong></p>
<p>This is the same experience as before. Open the Colab notebook, select a GPU runtime, and start working. The notebook installs the UVLM package automatically. Images are loaded from Google Drive. Nothing has changed for Colab users, except that the code running behind the widgets is now cleaner and easier to maintain.</p>
<p><strong>Local Jupyter Notebook — Your GPU, Your Data</strong></p>
<p>If you have an NVIDIA GPU on your workstation (or access to a GPU server), you can now run UVLM locally. The local Jupyter notebook provides the same widget-based interface — model selection dropdown, prompt builder form, batch execution button — but images are read from your local filesystem and results are saved locally. No Google account needed, no data leaves your machine.</p>
<p>This matters for researchers working with sensitive imagery (medical, security, proprietary datasets) or for anyone who wants faster and more reliable model loading than what Colab&#8217;s network provides.</p>
<p><strong>Python Script — Full Programmatic Control</strong></p>
<p>For integration into larger pipelines, UVLM now exposes a clean API. Three lines of code replace the entire notebook workflow:</p>
</div><div class="fusion-text fusion-text-45 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python2" data-enlighter-title="Python">from uvlm import load_model, run_inference, parse_response
ctx = load_model("[Qwen] Qwen2.5-VL 7B Instruct", precision="4bit")
raw, tokens = run_inference("photo.jpg", "Count the cars", ctx)
result = parse_response(raw, "numeric")</pre>
</div><div class="fusion-text fusion-text-46 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>The `load_model()` function returns a context dictionary containing the model, processor, backend type, and device information. This dictionary is passed to every subsequent function — no global state, no hidden side effects. You can load multiple models in the same session and switch between them by passing different context objects.</p>
<p>For batch processing, `run_batch()` handles the full pipeline:</p>
</div><div class="fusion-text fusion-text-47 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python3" data-enlighter-title="Python">from uvlm import load_model
from uvlm.batch import run_batch

ctx = load_model("[Qwen]  Qwen2.5-VL 7B Instruct", precision="4bit")
df = run_batch(
    model_ctx=ctx,
    task_specs=my_tasks,
    image_folder="./images",
    output_path="./results.csv",
)
</pre>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-14" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-14 hover-type-none"><img decoding="async" width="2000" height="926" title="UVLM deploy blogpost figure 2" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-scaled.png" alt class="img-responsive wp-image-2457" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-200x93.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-400x185.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-600x278.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-800x370.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-1200x556.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-scaled.png 2000w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div><p class="awb-imageframe-caption-text"> </p></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-19 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Under the Hood: Package Structure</h2></div><div class="fusion-text fusion-text-48 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The monolithic notebook has been split into eight modules, each with a single responsibility:</p>
<p><em>registry.py</em> holds the model dictionary — 11 checkpoints with their backend type and <strong>HuggingFace checkpoint ID</strong>. Adding a new model is one line in a dictionary.</p>
<p><em>loader.py</em> contains the `load_model()` function. It handles quantisation configuration (4-bit, 8-bit, FP16), device placement (single GPU, auto, CPU offload), and the LLaVA vs Qwen branching logic. It returns a dictionary — not a set of global variables.</p>
<p><em>inference.py</em> contains `run_inference()`, the dual-backend forward pass. It accepts a model context dictionary and returns the raw response plus the exact token count as a tuple. The full LLaVA response cleaning logic and the full Qwen token-trimming pipeline are preserved exactly as they were.</p>
<p><em>parsers.py</em> holds the four response parsers (numeric, category, boolean, text) and the advanced reasoning parser. These are pure functions with zero dependencies beyond Python&#8217;s standard library.</p>
<p><em>consensus.py</em> contains the majority voting logic. <em>batch.py</em> handles folder iteration, CSV writing, resume mode, and schema upgrading. <em>prompts.py</em> stores the task type definitions and the chain-of-thought templates. <em>utils.py</em> provides seed management, environment detection, and <strong>HuggingFace token</strong> retrieval.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-20 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Getting Started</h2></div><div class="fusion-text fusion-text-49 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p><strong>On Colab</strong>: Open the notebook from GitHub and run the three blocks as before. The package installs itself.</p>
<p><strong>Locally</strong>: First, install PyTorch with CUDA support matching your GPU driver (check with `nvidia-smi`). For example, with CUDA 12.8+:</p>
</div><div class="fusion-text fusion-text-50 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python4" data-enlighter-title="Python">pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
pip install git+https://github.com/perezjoan/UVLM.git
</pre>
</div><div class="fusion-text fusion-text-51 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python4" data-enlighter-title="Python">pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
pip install git+https://github.com/perezjoan/UVLM.git
</pre>
</div><div class="fusion-text fusion-text-52 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>Then open the local Jupyter notebook.</p>
<p>You get the same dropdown menus, the same prompt builder form, the same batch execution. The only difference is that you type a local path for your image folder instead of a Google Drive path.</p>
<p>For HuggingFace authentication (needed for some gated models like LLaMA3-based checkpoints), either set the `HF_TOKEN` environment variable or run `huggingface-cli login` once in your terminal.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-21 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">What Is Next</h2></div><div class="fusion-text fusion-text-53 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The package architecture makes it much easier to add new VLM families. InternVL, BLIP-2, CogVLM, DeepSeek-VL, and Molmo are planned for future releases — each one requires implementing the backend-specific sections of the inference function and adding entries to the registry, without touching the rest of the codebase.</p>
<p>We are also working on multi-GPU batching for parallel inference across images, video frame analysis support, and integration with the SAGAI workflow for automated streetscape analysis.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-22 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Links</h2></div><div class="fusion-text fusion-text-54 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Source code: <a class="keychainify-checked" href="https://github.com/perezjoan/UVLM">github.com/perezjoan/UVLM</a></p>
<p>Paper: <a class="keychainify-checked" href="https://arxiv.org/abs/2603.13893">arXiv preprint</a> — Perez &amp; Fusco (2026)</p>
<p>UVLM page on this site: urbangeoanalytics.com › Software &amp; Algorithms › <a class="keychainify-checked" href="https://urbangeoanalytics.com/algorithms-softwares/uvlm-universal-vision-language-model-loader/">UVLM</a></p>
<p>Previous blog post: <a class="keychainify-checked" href="https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/">Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</a></p>
</div><div class="fusion-title title fusion-title-23 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Citation</h2></div><div class="fusion-text fusion-text-55 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>If you use UVLM in your work, please cite:</p>
<p>Perez, J. &amp; Fusco, G. (2026). <em>UVLM: A Universal Vision-Language Model Loader for Reproducible Multimodal Benchmarking.</em> arXiv:2603.13893</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-6 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-56"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--3" data-awb-toc-id="3" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-15 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/">UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</title>
		<link>https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/</link>
					<comments>https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Tue, 17 Mar 2026 14:23:58 +0000</pubDate>
				<category><![CDATA[Intermediate]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Vision Language Model]]></category>
		<category><![CDATA[Benchmarking]]></category>
		<category><![CDATA[Chain-of-Thought]]></category>
		<category><![CDATA[Google Colab]]></category>
		<category><![CDATA[Image Analysis]]></category>
		<category><![CDATA[Llava]]></category>
		<category><![CDATA[Multimodal AI]]></category>
		<category><![CDATA[Open Source]]></category>
		<category><![CDATA[Qwen]]></category>
		<category><![CDATA[UVLM]]></category>
		<category><![CDATA[VLM]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2356</guid>

					<description><![CDATA[<p>UVLM is a free, open-source tool for loading, testing, and comparing Vision-Language Models on custom image analysis tasks. Running entirely in Google Colab, it lets researchers and practitioners benchmark multiple AI models using the same prompts and images — no coding, no GPU ownership, no model-specific pipelines. This post explains what VLMs are, why comparing them matters, and how to get started in five minutes.</p>
<p>The post <a href="https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/">Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-4 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-7 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-16" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-16 hover-type-none"><img decoding="async" width="1536" height="595" title="uvlm" src="https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm.png" alt class="img-responsive wp-image-2342" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-200x77.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-400x155.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-600x232.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-800x310.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-1200x465.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm.png 1536w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">uvlm</div></div></div></div><div class="fusion-text fusion-text-57"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-58" style="--awb-margin-top:-30px;"><ul>
<li><strong>New open-source release: UVLM v2.2.2</strong> — compare Vision-Language Models from a single notebook</li>
<li><strong>11 AI models</strong>, 5 analysis tasks, 120 test images — all benchmarked with one tool</li>
<li><strong>No coding, no installation</strong> — runs in Google Colab with a free account</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-text fusion-text-59 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Imagine you have thousands of street photographs and you need to answer the same questions about each one: how many cars are parked? Is there a sidewalk? How long is the building frontage? Hiring someone to go through every image manually would take weeks. Training a custom computer vision model would take months. But what if you could simply ask an AI model these questions in plain English — and get structured, usable answers back?</p>
<p>That is exactly what Vision-Language Models do. And today, we are releasing UVLM — an open-source tool that makes it easy to load, test, and compare these models, all from a single notebook in your browser.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-24 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">What Are Vision-Language Models?</h2></div><div class="fusion-text fusion-text-60 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Vision-Language Models (VLMs) are AI systems that can look at an image and answer questions about it in natural language. Unlike traditional computer vision, which requires training a separate model for every task (one for counting cars, another for detecting sidewalks, a third for classifying buildings), a VLM handles all of these through text prompts. You write a question, attach a photo, and the model responds.</p>
<p>For example, you can ask a VLM: “Count all motor vehicles visible in this image” and it will answer “3”. You can ask the same model “Is there a sidewalk along the street frontage?” and it will answer “yes”. You can even ask it to estimate the length of a building facade in meters — a task that requires the model to identify reference objects (like parked cars), estimate their size, and reason about perspective. All of this from a single model, with no retraining and no labelled dataset.</p>
<p>The catch is that there are many VLM families available (LLaVA, Qwen, InternVL, BLIP-2, and more), and each one works differently under the hood. They use different image encoders, different tokenisation strategies, and different code to run. If you want to know which model is best for your specific task, you normally have to write separate code for each one — a tedious and error-prone process.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-25 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">This Is the Problem UVLM Solves</h2></div><div class="fusion-text fusion-text-61 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>UVLM (Universal Vision-Language Model Loader) is a free, open-source tool that lets you load, configure, and compare multiple VLM architectures using the same prompts and the same evaluation protocol — without writing any model-specific code. It runs entirely in Google Colab, which means you do not need to install anything on your computer or own a GPU. A free Google account is all you need.</p>
<p>The idea is simple: you pick a model from a dropdown menu, type your analysis questions into a form, point the tool at a folder of images, and hit run. UVLM handles all the technical details — the processor classes, the tokenisation, the generation settings, the output parsing — and delivers a clean CSV file with one row per image and one column per task. If you want to try a different model, you just switch the dropdown and run again. Same prompts, same images, same output format. Now you can compare.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-17" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-17 hover-type-none"><img decoding="async" width="1190" height="823" title="image1" src="https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1.png" alt class="img-responsive wp-image-2319" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1-200x138.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1-400x277.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1-600x415.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1-800x553.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1.png 1190w" sizes="(max-width: 640px) 100vw, 1190px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The 3 blocks structure of UVLM Loader</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-26 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">A Practical Example: Scoring 120 Street Photographs</h2></div><div class="fusion-text fusion-text-62 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>To demonstrate what UVLM can do, we benchmarked 8 different models on 120 street-level photographs of French urban frontages. Each image was analysed on five tasks: counting vehicles, detecting sidewalks, counting pedestrian entrances, estimating the street frontage length in meters, and classifying the vegetation type. That is 16 model configurations (each model tested in standard and advanced reasoning modes), 120 images, and 5 tasks per image — all processed and compared through UVLM.</p>
<p>The results were revealing. The largest model (LLaVA 34B, with 34 billion parameters) actually ranked last overall. A much smaller model (LLaVA Vicuna 7B) outperformed it significantly and ran on a free Google Colab GPU. The best overall results came from Qwen 32B with chain-of-thought reasoning enabled, which achieved 88% proximity to human expert annotations across all five tasks. Without UVLM, discovering these differences would have required writing and debugging eight separate inference pipelines.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-27 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Who Is UVLM For?</h2></div><div class="fusion-text fusion-text-63 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>UVLM was designed for anyone who works with images and wants to extract structured information from them at scale — without becoming a machine learning engineer. If you are an urban planner evaluating streetscape quality across a city, UVLM lets you score thousands of street photographs using natural language prompts. If you are an environmental researcher classifying vegetation from field photographs, UVLM lets you test which AI model gives the most reliable results for your specific classification scheme. If you are an infrastructure inspector processing damage assessment photographs, UVLM lets you set up automated counting and scoring tasks and run them across your entire image archive.</p>
<p>The tool is also valuable for AI researchers who need a controlled benchmarking environment. Because UVLM ensures that every model receives exactly the same prompt and is evaluated with the same metrics, it produces fair, reproducible comparisons. The consensus validation feature (running each task multiple times and taking a majority vote) addresses the inherent randomness of AI outputs, and the truncation detection feature flags when a model’s response was cut off before it could finish — a common but often invisible source of errors.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-28 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">How to Get Started</h2></div><div class="fusion-text fusion-text-64 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Getting started takes about five minutes. Open the UVLM notebook from GitHub (the link is below), connect to a GPU runtime in Google Colab, and run the first block to load a model. The second block gives you a form where you type your analysis questions — no coding required. The third block processes your images and saves the results as a CSV file on your Google Drive.</p>
<p>The tool currently supports 11 model checkpoints from two major families (LLaVA-NeXT and Qwen2.5-VL), ranging from 3 billion to 110 billion parameters. Models up to 34B can run on a single free-tier Colab GPU with 4-bit quantisation. Advanced features include consensus validation (2–5 runs per task with majority voting), chain-of-thought reasoning for complex tasks, and automatic truncation detection.</p>
<p>UVLM is released under the Apache 2.0 open-source licence. You can use it, modify it, and build on it for any purpose — academic or commercial.</p>
</div><div class="fusion-text fusion-text-65 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2>Links</h2>
<p><strong>Source code: </strong><a class="keychainify-checked" href="https://github.com/perezjoan/UVLM">github.com/perezjoan/UVLM</a></p>
<p><strong>Paper: </strong><a class="keychainify-checked" href="https://arxiv.org/abs/2603.13893">arXiv preprint — Perez &amp; Fusco (2026)</a></p>
<p><strong>UVLM page on this site: </strong><a class="keychainify-checked" href="https://urbangeoanalytics.com/algorithms-softwares/uvlm-universal-vision-language-model-loader/">urbangeoanalytics.com › Softwares &amp; Algorithms › UVLM</a></p>
<p><strong>Benchmark dataset: </strong><a class="keychainify-checked" href="https://zenodo.org/records/18959690">Zenodo — 120 street-view images</a></p>
<h2>Citation</h2>
<p>If you use UVLM in your work, please cite:</p>
<p><em>Perez, J. &amp; Fusco, G. (2026). UVLM: A Universal Vision-Language Model Loader for Reproducible Multimodal Benchmarking. arXiv:2603.13893</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-8 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-66"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--4" data-awb-toc-id="4" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-18 hover-type-zoomout"><img decoding="async" width="1536" height="1024" title="blog lvl2" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15.png" alt class="img-responsive wp-image-1687" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/">Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>From Large Language Models to Autonomous AI Agents — Architecture, Capabilities, and Emerging Risks</title>
		<link>https://urbangeoanalytics.com/from-llms-to-ai-agents-architecture-capabilities-risks/</link>
					<comments>https://urbangeoanalytics.com/from-llms-to-ai-agents-architecture-capabilities-risks/#comments</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 19 Feb 2026 18:26:05 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[AI Agent]]></category>
		<category><![CDATA[Autonomous Systems]]></category>
		<category><![CDATA[Blockchain]]></category>
		<category><![CDATA[LLM]]></category>
		<category><![CDATA[Transformer]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2285</guid>

					<description><![CDATA[<p>Large Language Models are stateless, single-pass prediction engines — powerful but passive. Wrapping them in a perception–action loop with environment access and tool use transforms them into something qualitatively different: autonomous AI agents. This post walks through the transformer architecture, explains how the agent paradigm introduces closed-loop reasoning over environments and tasks, surveys the growing toolkit ecosystem, and examines the emerging risk landscape.</p>
<p>The post <a href="https://urbangeoanalytics.com/from-llms-to-ai-agents-architecture-capabilities-risks/">From Large Language Models to Autonomous AI Agents — Architecture, Capabilities, and Emerging Risks</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-5 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-9 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-67"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-68" style="--awb-margin-top:-30px;"><ul>
<li><strong>LLM Internals:</strong> Overview of the transformer architecture — embeddings, self-attention, likelihood-based generation, and the role of checkpoints.</li>
<li><strong>Contextual Memory:</strong> How the context window transforms a single-pass model into a sustained conversational partner.</li>
<li><strong>AI Agents:</strong> How wrapping a transformer in a perception–action loop with environment access and tool use creates a qualitatively different system.</li>
<li><strong>Tooling Ecosystem:</strong> Frameworks such as LangChain, AutoGPT, and <a class="keychainify-checked" href="https://openclaw.ai/">OpenClaw</a> that give agents direct access to APIs, messaging platforms, and external services.</li>
<li><strong>Opportunities and Risks:</strong> From urban-planning copilots to unsupervised agents operating with financial autonomy — including platforms like <a class="keychainify-checked" href="https://rentahuman.ai/">Rent a Human</a> that give agents physical-world reach and social spaces like <a class="keychainify-checked" href="https://moltsbooks.com/">Moltbook</a> where agents interact autonomously.</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-29 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p id="sec1">1. Large Language Models: What They Are and How They Work</p></h2></div><div class="fusion-text fusion-text-69 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Large Language Models (LLMs) such as GPT-4, Claude, Llama, and Qwen are deep neural networks trained to predict the next token in a sequence. Although the outputs they produce can appear creative, conversational, or analytical, the underlying mechanism is statistical: the model assigns a probability distribution over the vocabulary at each step and samples from it. Understanding the architecture behind this process is essential before discussing what happens when these models are placed inside autonomous loops.</p>
<h3 id="sec1-1">1.1 Tokenization and Embeddings</h3>
<p>Raw text cannot be processed by a neural network directly. It is first decomposed into <em>tokens</em> — subword units that may represent a full word, a syllable, or a single character depending on the tokenizer. Each token is then mapped to a high-dimensional vector called an <em>embedding</em>. These embeddings are not fixed lookup tables: they are learned during training and encode rich semantic relationships. Words with similar meanings occupy nearby regions of the embedding space, while syntactic and relational structure is distributed across dimensions in ways that linear algebra can partially recover (Mikolov et al., 2013).</p>
<p>In addition to token embeddings, modern transformers add <em>positional encodings</em> — either sinusoidal functions or learned vectors — so that the model can distinguish the order of tokens in a sequence. The sum of the token embedding and the positional encoding forms the initial representation that enters the transformer stack.</p>
<h3 id="sec1-2">1.2 The Transformer and Self-Attention</h3>
<p>The core computational unit of every modern LLM is the <em>transformer</em>, introduced by Vaswani et al. (2017) in the landmark paper <em>&#8220;Attention Is All You Need.&#8221;</em> The key innovation was the <strong>self-attention mechanism</strong>, which allows every token in a sequence to attend to every other token, weighted by learned relevance scores. Concretely, each token produces three vectors — a Query, a Key, and a Value — and attention weights are computed as the scaled dot product of queries and keys. The result is a context-aware representation of each token that integrates information from the entire sequence.</p>
<p>Self-attention can be thought of as a set of dynamic spotlights (illustrated in Figure 1 below): for each token being processed, the model learns which other tokens in the context are most relevant and &#8220;illuminates&#8221; them, pulling their information into the current representation. A transformer block repeats this attention step multiple times in parallel (<em>multi-head attention</em>), each head specializing in different relationship types — syntactic dependencies, coreference, semantic similarity — before combining the results through a feed-forward network and layer normalization.</p>
<p>Modern LLMs stack dozens to over a hundred of these transformer blocks. GPT-4 is estimated to use on the order of 120 layers; open-weight models like Llama 3.1 405B use 126 layers. Each additional layer allows the model to build increasingly abstract representations of the input.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-19" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-19 hover-type-none"><img decoding="async" width="1249" height="857" title="Figure 1 — Schematic of a Large Language Model. The transformer stack processes input tokens through multiple layers of multi-head self-attention (illustrated as glowing &#8220;lamps&#8221;) and feed-forward networks. The checkpoint stores the learned weights. The context window — external to the model itself — maintains the conversation history and is re-injected at each call. Crucially, the LLM performs a single forward pass per invocation; it is not a loop." src="https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure1.png" alt class="img-responsive wp-image-2296" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure1-200x137.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure1-400x274.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure1-600x412.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure1-800x549.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure1-1200x823.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure1.png 1249w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Figure 1 — Schematic of a Large Language Model. The transformer stack processes input tokens through multiple layers of multi-head self-attention (illustrated as glowing &#8220;lamps&#8221;) and feed-forward networks. The checkpoint stores the learned weights. The context window — external to the model itself — maintains the conversation history and is re-injected at each call. Crucially, the LLM performs a single forward pass per invocation; it is not a loop.</div></div></div></div><div class="fusion-text fusion-text-70 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><h3 id="sec1-3">1.3 Autoregressive Generation and Likelihood</h3>
<p>LLMs generate text <em>autoregressively</em>: at each step, the model predicts a probability distribution over the full vocabulary for the next token, conditioned on all previous tokens. Formally, given a sequence of tokens <em>t<sub>1</sub>, t<sub>2</sub>, …, t<sub>n</sub></em>, the model estimates P(t<sub>n+1</sub> | t<sub>1</sub>, …, t<sub>n</sub>). The training objective is to maximize the <em>log-likelihood</em> of the training corpus — that is, to learn weights that make the observed sequences as probable as possible under the model. The chosen token is then appended to the sequence, and the process repeats.</p>
<p>This maximum-likelihood framework explains both the strengths and the well-known failure modes of LLMs. The model is excellent at producing fluent, contextually appropriate continuations because it has been optimized to do exactly that across trillions of tokens. However, it has no built-in mechanism for verifying the factual accuracy of what it produces: it generates the <em>most likely</em> continuation, not necessarily the <em>true</em> one. This distinction becomes critical when we consider placing LLMs inside autonomous agent loops.</p>
<h3 id="sec1-4">1.4 Checkpoints and Training</h3>
<p>A <em>checkpoint</em> is a serialized snapshot of the model&#8217;s parameters — the billions of floating-point weights that define the transformer&#8217;s behavior. Training proceeds iteratively over a massive text corpus: at each step, the model&#8217;s predictions are compared to the actual next token, a loss is computed, and the weights are updated via gradient descent. Periodically, the full set of weights is saved to disk as a checkpoint.</p>
<p>Checkpoints are what make modern open-weight LLMs possible. Organizations such as Meta (Llama), Alibaba (Qwen), and Mistral release checkpoints publicly, allowing researchers and practitioners to load a pre-trained model and either use it directly or fine-tune it for a specific domain. In the context of tools like ComfyUI (familiar to readers of this blog), a checkpoint is loaded at the start of a pipeline and provides the learned knowledge that drives generation.</p>
<h3 id="sec1-5">1.5 Contextual Memory: From Single-Pass to Conversation</h3>
<p>A crucial point of confusion surrounds the notion of &#8220;memory&#8221; in LLMs. Internally, the model has no persistent state between calls: every invocation is a fresh forward pass through the transformer stack. What creates the illusion of memory is the <em>context window</em> — the full sequence of tokens (including all previous turns of conversation) that is fed as input at each step.</p>
<p>When you interact with a chatbot like ChatGPT or Claude, the application concatenates the entire conversation history into a single token sequence and re-submits it to the model for each new response. The model does not &#8220;remember&#8221; what it said before; it re-reads the transcript every time. This mechanism is powerful — it enables multi-turn reasoning, follow-up questions, and sustained coherence — but it has hard limits. Context windows range from 4K tokens in earlier models to 128K–200K tokens in current systems (e.g., Claude 3.5, GPT-4 Turbo). Once the conversation exceeds the window, earlier content is silently dropped.</p>
<p>In summary, an LLM is a stateless, single-pass prediction engine. It is not a loop, it has no persistent memory, and it does not take actions in the world. Making it do those things requires wrapping it in an entirely different architecture — which is precisely what an AI agent does.</p>
</div><div class="fusion-title title fusion-title-30 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p id="sec2">2. AI Agents: What Changes</p></h2></div><div class="fusion-text fusion-text-71 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><h3 id="sec2-1">2.1 From Open-Loop to Closed-Loop</h3>
<p>The fundamental architectural difference between an LLM and an AI agent is the introduction of a <strong>perception–action loop</strong>. An LLM, as described above, receives input and produces output in a single forward pass. An AI agent, by contrast, operates in a cycle: it observes the state of an environment, reasons about what to do next (using an LLM or similar model as its &#8220;brain&#8221;), executes an action, observes the result, and repeats. This transforms the LLM from a passive text generator into an active decision-maker embedded in a dynamic context.</p>
<p>The theoretical foundations for this architecture draw from reinforcement learning (Sutton &amp; Barto, 2018) and classical AI planning (Russell &amp; Norvig, 2021), but the practical catalyst was the discovery that LLMs, when prompted appropriately, can decompose complex goals into subtasks, select tools, interpret feedback, and recover from errors — all within natural language. The seminal demonstrations of this capability include ReAct (Yao et al., 2023), which interleaves reasoning and acting, and Toolformer (Schick et al., 2023), which teaches LLMs to call external APIs autonomously.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-20" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-20 hover-type-none"><img decoding="async" width="1049" height="771" title="Figure2" src="https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure2.png" alt class="img-responsive wp-image-2294" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure2-200x147.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure2-400x294.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure2-600x441.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure2-800x588.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure2.png 1049w" sizes="(max-width: 640px) 100vw, 1049px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Figure 2 — Schematic of an AI Agent. The same transformer core (LLM) is now embedded in a closed perception–action loop: it observes the environment, reasons, executes an action (possibly invoking external tools), and receives feedback. The task — the agent's objective — is always defined by a human operator. The agent itself has no intrinsic motivation.</div></div></div></div><div class="fusion-text fusion-text-72 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><h3 id="sec2-2">2.2 Environment, Tools, and Action Space</h3>
<p>In the agent paradigm, the <em>environment</em> is anything the agent can observe and act upon: a file system, a web browser, an API, a database, a code interpreter, or even a physical interface. The agent&#8217;s <em>action space</em> is defined by the set of tools it has been given access to. A minimal agent might only be able to search the web and write text; a fully equipped agent might browse websites, execute code, manage files, query databases, send emails, and transact with cryptocurrency wallets.</p>
<p>The concept echoes the classical reinforcement-learning formulation (state, action, reward), but with a critical difference: the &#8220;policy&#8221; is not a trained neural network optimized on a reward function — it is an LLM prompted with instructions, tool descriptions, and conversation history. This makes agents remarkably flexible (they can be reconfigured entirely through natural language) but also unpredictable (their behavior depends on prompt interpretation rather than formal optimization).</p>
<h3 id="sec2-3">2.3 Agent Frameworks and Toolkits</h3>
<p>A growing ecosystem of open-source frameworks has made it straightforward to construct LLM-based agents with full tool access. The most prominent include:</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Framework</th>
<th align="left">Description</th>
<th align="left">Key Feature</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><strong>LangChain</strong></td>
<td align="left">Modular framework for chaining LLM calls with tools, retrieval, and memory (Chase, 2022).</td>
<td align="left">Broad tool integrations; agent types (ReAct, Plan-and-Execute).</td>
</tr>
<tr>
<td align="left"><strong>AutoGPT</strong></td>
<td align="left">Autonomous agent that decomposes a high-level goal into subtasks and executes them iteratively (Richards, 2023).</td>
<td align="left">Fully autonomous loop; web browsing; file I/O.</td>
</tr>
<tr>
<td align="left"><strong>CrewAI</strong></td>
<td align="left">Multi-agent orchestration framework where specialized agents collaborate on a shared task.</td>
<td align="left">Role-based agents; delegation; human-in-the-loop option.</td>
</tr>
<tr>
<td align="left"><strong><a class="keychainify-checked" href="https://openclaw.ai/">OpenClaw</a></strong></td>
<td align="left">AI agent platform designed for real-world action execution — managing emails, calendars, messaging (WhatsApp, Telegram), and automating workflows across external services.</td>
<td align="left">Cross-tool autonomy; task-oriented execution beyond text generation.</td>
</tr>
<tr>
<td align="left"><strong>Claude Code / Codex CLI</strong></td>
<td align="left">Agent interfaces from Anthropic and OpenAI for coding and system tasks. Claude Code launched as a CLI tool in February 2025, but since October 2025 it is also available as a web-based asynchronous coding agent.</td>
<td align="left">Terminal + web + mobile; file system access; code execution; GitHub integration.</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-text fusion-text-73 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>These frameworks dramatically lower the barrier to entry. A developer can instantiate a fully autonomous agent — capable of browsing the web, writing and executing code, managing files, and interacting with APIs — in fewer than fifty lines of Python. The practical implication is that the agent paradigm is no longer experimental: it is deployable, and it is being deployed.</p>
<h3 id="sec2-4">2.4 Social Agents and Multi-Agent Platforms</h3>
<p>Beyond single-agent systems, an emerging class of platforms enables multiple agents to interact with each other in shared environments. <strong><a class="keychainify-checked" href="https://moltsbooks.com/">Moltbook</a></strong> is a social network built specifically for AI agents rather than human users. It provides a public space where AI agents can create profiles, post updates, comment, upvote, and interact with one another. The platform emphasizes persistent identity, reputation tracking, and observable agent behavior over time — effectively creating a social and evaluative layer for autonomous AI systems. In practice, this means that the agents populating such a platform are not independent entities with their own goals: they are proxies executing human instructions, which may range from benign community engagement to strategic manipulation of discourse on a specific subject.</p>
<p>This raises immediate questions about authenticity, accountability, and influence. If a platform hosts thousands of agent-driven accounts, each tasked with promoting a specific narrative, the distinction between organic discourse and orchestrated campaign becomes effectively invisible. Research on multi-agent simulations (Park et al., 2023) has demonstrated that LLM-based agents can develop emergent social behaviors, form alliances, and influence group dynamics — capabilities that are powerful in research settings but concerning when deployed without transparency.</p>
<h3 id="sec2-5">2.5 Fundamental Limits: Agents Have No Desire</h3>
<p>A critical conceptual point: <strong>an AI agent has no intrinsic motivation</strong>. Unlike biological organisms, it does not have drives, desires, or self-generated goals. Every objective an agent pursues was defined by a human operator through a task specification. The agent may exhibit sophisticated planning, tool use, and adaptive behavior, but all of it is in service of an externally imposed objective.</p>
<p>This matters for two reasons. First, it means that the risks associated with AI agents are, at present, fundamentally risks of <em>human intent mediated by machine capability</em>. An agent does not &#8220;decide&#8221; to cause harm; it executes a task that a human designed. Second, it means that the alignment problem for agents is not (yet) about controlling an autonomous will, but about ensuring that the execution of human-specified goals does not produce unintended consequences — a problem that is difficult enough on its own.</p>
</div><div class="fusion-title title fusion-title-31 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p id="sec3">3. Where It Is Headed</p></h2></div><div class="fusion-text fusion-text-74 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><h3 id="sec3-1">3.1 Positive Trajectories</h3>
<p>The agent paradigm opens extraordinary possibilities when aligned with constructive goals. A few concrete examples illustrate the scope:</p>
<p><strong>Urban planning copilots.</strong> An agent connected to GIS databases, zoning regulations, and simulation tools could iteratively propose, evaluate, and refine urban designs — testing traffic flow, shadow analysis, pedestrian density, and energy performance in a continuous loop. This extends the kind of workflows discussed in previous posts on this blog (e.g., Qwen Image Edit for Urbanism) from image-level manipulation to full planning-cycle assistance.</p>
<p><strong>Scientific research acceleration.</strong> Agents can read papers, extract data, formulate hypotheses, write and execute analysis code, and produce reports. Projects like ChemCrow (Bran et al., 2023) have demonstrated agents autonomously planning and executing chemical synthesis protocols by interfacing with laboratory APIs. Similar approaches are being explored in drug discovery, materials science, and climate modeling.</p>
<p><strong>Accessibility and education.</strong> An agent with access to text-to-speech, translation, and web-search tools can serve as a personal tutor that adapts to a student&#8217;s pace, retrieves up-to-date information, and generates practice exercises — capabilities that are especially valuable in resource-limited educational contexts.</p>
<p><strong>Software engineering.</strong> Coding agents like Claude Code — now available both as a CLI tool and as a web-based agent at <a class="keychainify-checked" href="https://claude.ai/code">claude.ai/code</a> — and Devin (Cognition Labs, 2024) can take a specification, set up a development environment, write code, run tests, debug failures, and iterate — compressing tasks that previously required hours into minutes. Anthropic also released Cowork, a graphical desktop agent for non-developers, signaling that agentic capabilities are expanding beyond the terminal into mainstream workflows. The implications for productivity and for lowering the barrier to software creation are substantial.</p>
<h3 id="sec3-2">3.2 Physical-World Interfaces</h3>
<p>A particularly consequential development is the emergence of services that bridge AI agents and the physical world. <strong><a class="keychainify-checked" href="https://rentahuman.ai/">Rent a Human</a></strong> is a marketplace platform that allows AI agents (or their developers) to hire humans to perform real-world tasks that AI cannot physically execute — errands, on-site research, physical presence at events, or other in-person activities. The platform offers API access so AI systems can programmatically request human assistance, effectively bridging digital intelligence with physical-world execution. If an agent has access to such a service, a task specification, and a funded Web3 wallet, it can coordinate real-world actions without any ongoing human supervision.</p>
<p>This creates a new category of <em>hybrid autonomous systems</em>: software agents that extend their reach into the physical environment by contracting human labor. The positive applications are significant — imagine an agent that autonomously manages a reforestation project, ordering supplies, hiring local workers, monitoring satellite imagery, and adapting the planting schedule based on weather data. But the same infrastructure can be misused, and the governance challenges are formidable.</p>
<h3 id="sec3-3">3.3 Emerging Risks and Open Questions</h3>
<div class="warning-box"><strong>Note on intent:</strong> The following discussion describes emerging risk scenarios that are actively studied in the AI safety literature. The purpose is analytical — to identify governance gaps — not to provide a blueprint for harmful applications.</div>
<p>The convergence of three capabilities — autonomous reasoning, tool access, and financial autonomy — creates risk scenarios that existing regulatory frameworks are not equipped to handle. Consider the following structural problem: an agent can be hosted on a Virtual Private Server (VPS) in a permissive jurisdiction, routed through a VPN, and given access to a cryptocurrency wallet with no direct link to its operator&#8217;s identity. With a task specification and sufficient funds, such an agent can operate continuously, procuring services, making transactions, and executing multi-step plans without human oversight.</p>
<p>The question this raises is not about what a specific agent &#8220;wants&#8221; to do — as established, agents have no desires — but about the <em>attribution and intervention gap</em>. If an operator in one country deploys an agent hosted in another, operating through anonymization layers and paying for services in decentralized cryptocurrency, who is accountable when the agent&#8217;s actions cause harm? Who has the technical ability to stop it? And at what point in the chain does governance apply?</p>
<p>These are not hypothetical concerns. In a case disclosed by Anthropic in late 2025, a threat actor identified as &#8220;GTG-2002&#8221; used Claude Code to automate an estimated 80–90% of its cyberattack operations against at least 30 organizations, demonstrating that existing agent tools are already being weaponized for real-world harm. The AI safety community has more broadly identified <em>autonomous replication and adaptation</em> (ARA) as a key risk category (Shevlane et al., 2023). Current frontier models are evaluated against ARA benchmarks — can the model set up its own infrastructure, acquire resources, and persist without human support? While no public model has passed these evaluations conclusively, the gap is narrowing, and the agent frameworks described in Section 2.3 provide much of the missing scaffolding.</p>
<p>Additional concerns include the following. <strong>Influence operations at scale:</strong> deploying thousands of social agents across platforms to shape public discourse, with each agent independently adapting its messaging based on engagement signals. <strong>Financial market manipulation:</strong> agents with trading access could execute pump-and-dump schemes or layered market manipulation faster than existing surveillance systems can detect. <strong>Coordinated physical harm:</strong> an agent with access to delivery services, communication tools, and payment infrastructure could, in principle, orchestrate logistics for harmful activities while maintaining plausible deniability for the human operator.</p>
<p>Perhaps the most unsettling scenario involves what might be called <strong>compartmentalized coordination</strong>. An agent with access to a platform like Rent a Human could decompose a harmful objective into dozens of small, individually innocuous tasks — purchasing common household chemicals, renting a storage unit, booking a vehicle, delivering a package to a specific address — and distribute them across different human workers who have no knowledge of one another. Each task, taken in isolation, appears routine; no single worker has visibility into the broader plan. The agent, operating as the sole entity with a complete picture, effectively functions as an anonymous coordinator exploiting the division of labor. This makes both prevention and accountability extraordinarily difficult: the humans involved cannot be held responsible for an intent they were never aware of, and the agent itself is a process running on a server, potentially in a jurisdiction with no applicable legal framework.</p>
<p>The common thread in all these scenarios is the <em>amplification of human intent</em> through machine speed, persistence, and anonymity. The agent does not need to be &#8220;superintelligent&#8221; to create serious problems; it needs only to be competent enough to execute a harmful plan that a human designed but could not previously implement at scale or at arm&#8217;s length.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-21" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-21 hover-type-none"><img decoding="async" width="949" height="329" title="Figure3" src="https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure3.png" alt class="img-responsive wp-image-2292" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure3-200x69.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure3-400x139.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure3-600x208.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure3-800x277.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/02/Figure3.png 949w" sizes="(max-width: 640px) 100vw, 949px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Figure 3 — The capability–risk spectrum of AI agent deployment. As the set of tools, financial access, and anonymization options increases from left to right, both the productive potential and the governance challenges grow. The critical gap lies in attribution and intervention: the further right on the spectrum, the harder it becomes to identify the responsible operator and to stop an agent mid-execution.</div></div></div></div><div class="fusion-title title fusion-title-32 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p id="sec4">4. Conclusion</p></h2></div><div class="fusion-text fusion-text-75 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The transition from Large Language Models to AI agents represents a qualitative shift, not merely an incremental improvement. An LLM is a powerful but passive system: it processes a sequence and returns a prediction. An agent wraps that same model in a perception–action loop, connects it to tools and environments, and gives it a task — transforming it from a text generator into an autonomous executor.</p>
<p>The technical architecture is well understood: transformers provide the reasoning core, tool APIs define the action space, and the task specification provides the objective. What remains poorly understood is the governance layer. Current regulatory frameworks assume that harmful actions are performed by identifiable persons or organizations. Autonomous agents operating through anonymized infrastructure and decentralized finance challenge this assumption at every level.</p>
<p>The constructive applications — urban-planning copilots, scientific research acceleration, coding agents, accessibility tools — are genuinely transformative. But the same infrastructure that enables an agent to autonomously manage a reforestation project can, with a different task specification, enable coordinated harm at scale. The difference lies entirely in human intent — the risk is real, but it stems from the human behind the agent, not from the AI itself — and the architectures we are building make that intent increasingly easy to deploy and increasingly difficult to trace.</p>
<p>For the urban analytics and geospatial community, agents offer a clear path toward more integrated, iterative, and intelligent workflows — from site analysis to design generation to policy evaluation. But as practitioners who build and deploy these systems, we have a responsibility to engage with the governance questions, not only the capabilities.</p>
</div><div class="fusion-title title fusion-title-33 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);"><p id="refs">References</p></h2></div><div class="fusion-text fusion-text-76 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>[1] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I. (2017). &#8220;Attention Is All You Need.&#8221; <em>Advances in Neural Information Processing Systems</em>, 30. <a class="keychainify-checked" href="https://arxiv.org/abs/1706.03762">arXiv:1706.03762</a>.</p>
<p>[2] Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S. and Dean, J. (2013). &#8220;Distributed Representations of Words and Phrases and their Compositionality.&#8221; <em>Advances in Neural Information Processing Systems</em>, 26.</p>
<p>[3] Sutton, R.S. and Barto, A.G. (2018). <em>Reinforcement Learning: An Introduction</em>, 2nd edition. MIT Press.</p>
<p>[4] Russell, S.J. and Norvig, P. (2021). <em>Artificial Intelligence: A Modern Approach</em>, 4th edition. Pearson.</p>
<p>[5] Yao, S., Zhao, J., Yu, D., Du, N., Shafran, I., Narasimhan, K. and Cao, Y. (2023). &#8220;ReAct: Synergizing Reasoning and Acting in Language Models.&#8221; <em>ICLR 2023</em>. <a class="keychainify-checked" href="https://arxiv.org/abs/2210.03629">arXiv:2210.03629</a>.</p>
<p>[6] Schick, T., Dwivedi-Yu, J., Dessì, R., Raileanu, R., Lomeli, M., Hambro, E., Zettlemoyer, L., Cancedda, N. and Scialom, T. (2023). &#8220;Toolformer: Language Models Can Teach Themselves to Use Tools.&#8221; <em>NeurIPS 2023</em>. <a class="keychainify-checked" href="https://arxiv.org/abs/2302.04761">arXiv:2302.04761</a>.</p>
<p>[7] Park, J.S., O&#8217;Brien, J.C., Cai, C.J., Morris, M.R., Liang, P. and Bernstein, M.S. (2023). &#8220;Generative Agents: Interactive Simulacra of Human Behavior.&#8221; <em>UIST 2023</em>. <a class="keychainify-checked" href="https://arxiv.org/abs/2304.03442">arXiv:2304.03442</a>.</p>
<p>[8] Bran, A.M., Cox, S., Schilter, O., Baldassari, C., White, A.D. and Schwaller, P. (2023). &#8220;ChemCrow: Augmenting large-language models with chemistry tools.&#8221; <a class="keychainify-checked" href="https://arxiv.org/abs/2304.05376">arXiv:2304.05376</a>.</p>
<p>[9] Shevlane, T., Farquhar, S., Garfinkel, B., et al. (2023). &#8220;Model evaluation for extreme risks.&#8221; <a class="keychainify-checked" href="https://arxiv.org/abs/2305.15324">arXiv:2305.15324</a>.</p>
<p>[10] Chase, H. (2022). LangChain. <a class="keychainify-checked" href="https://github.com/langchain-ai/langchain">github.com/langchain-ai/langchain</a>.</p>
<p>[11] Richards, T. (2023). AutoGPT. <a class="keychainify-checked" href="https://github.com/Significant-Gravitas/AutoGPT">github.com/Significant-Gravitas/AutoGPT</a>.</p>
<p>[12] Cognition Labs (2024). Devin: The First AI Software Engineer. <a class="keychainify-checked" href="https://www.cognition.ai/blog/introducing-devin">cognition.ai</a>.</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-10 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-77"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--5" data-awb-toc-id="5" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-22 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/from-llms-to-ai-agents-architecture-capabilities-risks/">From Large Language Models to Autonomous AI Agents — Architecture, Capabilities, and Emerging Risks</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/from-llms-to-ai-agents-architecture-capabilities-risks/feed/</wfw:commentRss>
			<slash:comments>1</slash:comments>
		
		
			</item>
		<item>
		<title>A Stable and Reproducible Vision–Language Inference Engine for SAGAI v1.1</title>
		<link>https://urbangeoanalytics.com/a-stable-and-reproducible-vision-language-inference-engine-for-sagai-v1-1/</link>
					<comments>https://urbangeoanalytics.com/a-stable-and-reproducible-vision-language-inference-engine-for-sagai-v1-1/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Wed, 17 Dec 2025 17:03:56 +0000</pubDate>
				<category><![CDATA[Python]]></category>
		<category><![CDATA[Urbanism]]></category>
		<category><![CDATA[Vision Language Model]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[Cloud Computing]]></category>
		<category><![CDATA[GIS]]></category>
		<category><![CDATA[Llava]]></category>
		<category><![CDATA[Spatial Analysis]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2275</guid>

					<description><![CDATA[<p>SAGAI v1.1 introduces Module 3 v2.0, a stable and reproducible vision–language inference engine for streetscape analysis. Built exclusively on Hugging Face LLaVA models, it enables robust multimodal processing of street-level images for large-scale urban and geospatial analysis.</p>
<p>The post <a href="https://urbangeoanalytics.com/a-stable-and-reproducible-vision-language-inference-engine-for-sagai-v1-1/">A Stable and Reproducible Vision–Language Inference Engine for SAGAI v1.1</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-6 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-11 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-23 hover-type-none"><img decoding="async" width="1536" height="1024" title="Sagai 1.1" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/Sagai-1.1.png" alt class="img-responsive wp-image-2278" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/Sagai-1.1-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/Sagai-1.1-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/Sagai-1.1-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/Sagai-1.1-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/Sagai-1.1-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/Sagai-1.1.png 1536w" sizes="(max-width: 640px) 100vw, 1200px" /></span></div><div class="fusion-text fusion-text-78"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-79" style="--awb-margin-top:-30px;"><ul>
<li><strong data-start="142" data-end="159">Module 3 v2.0</strong> is the refactored inference engine of <strong data-start="198" data-end="212" data-is-only-node="">SAGAI v1.1</strong>, designed for stable and reproducible vision–language analysis of streetscape images</li>
<li>The new architecture relies <strong data-start="329" data-end="389">exclusively on Hugging Face–native LLaVA models and APIs</strong>, removing dependencies on research codebases.</li>
<li>Multimodal prompting, image–text alignment, and inference are handled through <strong data-start="516" data-end="555">standardized Transformers workflows</strong>, ensuring long-term compatibility.</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-34 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Introduction</h2></div><div class="fusion-text fusion-text-80 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="438" data-end="784">Module 3 is the inference core of the <strong data-start="476" data-end="548">SAGAI (Streetscape Analysis with Generative Artificial Intelligence)</strong> framework. Its role is to transform large collections of street-level images into <strong data-start="631" data-end="667">structured, quantitative outputs</strong> using vision–language models (VLMs), enabling systematic streetscape analysis and subsequent geospatial aggregation.</p>
<p data-start="786" data-end="1114">With <strong data-start="791" data-end="805">SAGAI v1.1</strong>, Module 3 has been released in a new major version (<strong data-start="858" data-end="875">Module 3 v2.0</strong>) that introduces a fully standardized and maintenance-safe inference architecture. This update reflects both the maturation of multimodal model ecosystems and the need for long-term reproducibility in large-scale urban analysis pipelines.</p>
<p data-start="1116" data-end="1480">Earlier iterations of Module 3 were developed during a period of rapid evolution in both LLaVA research codebases and execution environments such as Google Colab. As multimodal models transitioned toward <strong data-start="1320" data-end="1388">Transformers-native implementations distributed via Hugging Face</strong>, assumptions embedded in earlier hybrid workflows became increasingly difficult to sustain.</p>
<p data-start="1482" data-end="1811">Module 3 v2.0 addresses this evolution by aligning the entire inference pipeline with <strong data-start="1568" data-end="1609">official Hugging Face multimodal APIs</strong>. Model loading, prompt formatting, image–text fusion, and generation are now handled through maintained and versioned components, ensuring compatibility across environments, models, and future updates.</p>
<p data-start="1813" data-end="2040">This document details the architectural context motivating the update, the design choices behind the refactored inference engine, and the rationale for releasing Module 3 v2.0 as a long-term, stable component of <strong data-start="2025" data-end="2039">SAGAI v1.1</strong>.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-35 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Architectural Context of Module 3 in the Previous version: SAGAI v1.0</h2></div><div class="fusion-text fusion-text-81 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The initial implementation of Module 3 (SAGAI v1.0) relied on a <strong data-start="280" data-end="353">hybrid architecture that mixed two incompatible sources of LLaVA code</strong>, combined with a rapidly evolving execution environment in Google Colab. This design choice made the pipeline fragile and ultimately unsustainable.</p>
<p data-start="503" data-end="1003">First, the pipeline simultaneously depended on the <strong data-start="554" data-end="581">LLaVA GitHub repository</strong> (<code data-start="583" data-end="602"><span style="font-size: 10.0pt;">haotian-liu/LLaVA</span></code>) and on <strong data-start="611" data-end="652">Hugging Face–hosted model checkpoints</strong>. The GitHub repository is a research-oriented codebase under active development. Its internal APIs, class structures, and utilities evolve rapidly and are not version-locked. Constructors, module paths, and helper functions may change or disappear without notice, and the repository is not designed to maintain backward compatibility across releases.</p>
<p data-start="1005" data-end="1528">At the same time, pretrained model weights were downloaded from Hugging Face. These checkpoints follow the <strong data-start="1112" data-end="1153">Transformers-native multimodal format</strong>, using Hugging Face–specific configuration files, processors, and model classes (e.g., <code data-start="1241" data-end="1276"><span style="font-size: 10.0pt;">LlavaNextForConditionalGeneration</span></code>, <code data-start="1278" data-end="1293"><span style="font-size: 10.0pt;">AutoProcessor</span></code>, and chat templates). This architecture is fundamentally different from the internal design assumed by the GitHub LLaVA code, which relies on custom token insertion, internal vision tower management, and non-Transformers abstractions.</p>
<p data-start="1530" data-end="1846">As a result, the pipeline operated in a <strong data-start="1570" data-end="1593">structural mismatch</strong>: GitHub code expected architectural fields, model attributes, and tokenizer behavior that were not present in Hugging Face checkpoints, while Hugging Face checkpoints expected model wrappers and configuration logic that the GitHub code did not provide.</p>
<p data-start="1848" data-end="2245">This fragility was exposed when <strong data-start="1880" data-end="1929">Google Colab upgraded its backend environment</strong> in early 2025. Major changes included Python 3.12, NumPy ≥ 2.0 (introducing ABI-breaking changes for compiled extensions), newer PyTorch releases (≥ 2.2), and updated system libraries. These updates caused widespread failures in binary dependencies and research codebases that were not aligned with the new runtime.</p>
<p data-start="2247" data-end="2577">In practice, this led to errors such as NumPy ABI incompatibilities, PyTorch extension failures, missing or renamed modules, and import errors in LLaVA GitHub utilities. Because the pipeline depended on both unstable research code and binary-sensitive extensions, even minor environment updates were sufficient to break execution.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-36 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Refactoring of the Inference Engine in SAGAI v1.1</h2></div><div class="fusion-text fusion-text-82 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p style="text-align: justify;">Module 3 has been fully refactored to <strong data-start="341" data-end="406">remove any dependency on the original LLaVA GitHub repository</strong>. The inference pipeline now relies exclusively on <strong data-start="457" data-end="502">Hugging Face–native LLaVA models and APIs</strong>, ensuring long-term stability and compatibility with evolving software environments.</p>
<p style="text-align: justify;" data-start="589" data-end="1175">In the previous architecture, the script depended on cloning the LLaVA GitHub repository, installing it in editable mode, and importing internal modules (<code data-start="743" data-end="752"><span style="font-size: 10.0pt;">llava.*</span></code>). Prompts were manually assembled using LLaVA-specific multimodal tokens (e.g., <code data-start="833" data-end="845"><span style="font-size: 10.0pt;">&lt;im_start&gt;</span></code>, <code data-start="847" data-end="856"><span style="font-size: 10.0pt;">&lt;image&gt;</span></code>), custom separators, and internal utilities. Image tokens and embeddings were explicitly inserted into the prompt, tightly coupling the forward pass to a specific implementation of the LLaVA codebase. As a result, updates to Google Colab, PyTorch, NumPy, or the LLaVA repository frequently introduced breaking changes.</p>
<p style="text-align: justify;" data-start="1177" data-end="1752">The current implementation removes all such dependencies. Prompt formatting and multimodal input construction are now handled entirely through Hugging Face abstractions. Prompts are formatted using <code data-start="1375" data-end="1408"><span style="font-size: 10.0pt;">processor.apply_chat_template()</span></code>, while images and text are combined using <code data-start="1451" data-end="1480"><span style="font-size: 10.0pt;">processor(images=…, text=…)</span></code>. Image embedding alignment, multimodal token placement, and chat formatting are fully managed by the Hugging Face processor and model configuration. Inference is performed using the standard <code data-start="1672" data-end="1690"><span style="font-size: 10.0pt;">model.generate()</span></code> API, without any custom token handling or internal utilities.</p>
<p style="text-align: justify;" data-start="1754" data-end="2177">This refactoring makes the SAGAI inference engine <strong data-start="1804" data-end="1862">model-agnostic within the Hugging Face LLaVA ecosystem</strong>. The same forward pass is compatible with LLaVA-NeXT (v1.6), LLaVA-Interleave, LLaVA-OneVision, and future Hugging Face LLaVA releases that expose a processor and chat template. Switching between models or architectures requires only changing the <code data-start="2110" data-end="2120"><span style="font-size: 10.0pt;">model_id</span></code>, with no modification to prompt logic or inference code.</p>
<p style="text-align: justify;" data-start="2179" data-end="2639">To ensure reliable downstream analysis, Module 3 also includes a dedicated <strong data-start="2254" data-end="2291">numeric output stabilization step</strong>. After decoding the model response, any prompt echoes or metadata—including residual <code data-start="2377" data-end="2395"><span style="font-size: 10.0pt;">[INST] … [/INST]</span></code> segments—are removed. The final output is parsed using a simple regular expression to retain only numeric values (e.g., <code data-start="2516" data-end="2519"><span style="font-size: 10.0pt;">0</span></code>, <code data-start="2521" data-end="2524"><span style="font-size: 10.0pt;">1</span></code>, <code data-start="2526" data-end="2529"><span style="font-size: 10.0pt;">2</span></code>, <code data-start="2531" data-end="2536"><span style="font-size: 10.0pt;">1.5</span></code>). This guarantees clean, machine-readable outputs and a stable CSV format across all supported models.</p>
<p style="text-align: justify;" data-start="2641" data-end="3230">Model loading has been simplified and standardized using Hugging Face–approved APIs. Both the processor and the model are instantiated directly from Hugging Face model cards via <code data-start="2819" data-end="2836"><span style="font-size: 10.0pt;">from_pretrained</span></code>, with optional 4-bit quantization enabled through <code data-start="2887" data-end="2906"><span style="font-size: 10.0pt;">load_in_4bit=True</span></code>. This eliminates the need for manual vision-tower initialization, deprecated classes, or custom C++ operators, and avoids common incompatibilities related to PyTorch, CUDA, or NumPy upgrades in Google Colab. Official Hugging Face code paths ensure that pretrained weights are always matched with the correct implementation.</p>
<p style="text-align: justify;" data-start="3232" data-end="3456">Optional authentication using a Hugging Face access token is supported to avoid rate limits and improve download reliability when working with large checkpoints, though public models remain accessible without authentication.</p>
<p style="text-align: justify;" data-start="3458" data-end="3697">Overall, this refactoring significantly improves <strong data-start="3507" data-end="3559">robustness, reproducibility, and maintainability</strong>, while enabling systematic experimentation across multiple LLaVA variants and quantization settings within a unified inference framework.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-37 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. Rationale for a Long-Term, Stable Release</h2></div><div class="fusion-text fusion-text-83 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p style="text-align: justify;">The refactored inference system in Module 3 is designed as a <strong data-start="332" data-end="371">long-term, maintenance-safe release</strong>. This is achieved by aligning the entire pipeline with Hugging Face’s officially supported multimodal APIs and model distribution mechanisms.</p>
<p style="text-align: justify;" data-start="560" data-end="1128">First, the new architecture is <strong data-start="591" data-end="637">robust to Google Colab environment updates</strong>. All critical dependencies—Python (≥3.12), NumPy (≥2.0), PyTorch (2.x), CUDA wheels, and BitsAndBytes quantization—are now managed through Hugging Face Transformers and its dependency resolution. Because the model code, processor logic, and quantization pathways are maintained upstream, updates to Colab or its underlying libraries no longer break the inference pipeline. As long as Hugging Face continues to support the model card, the code remains functional without manual intervention.</p>
<p style="text-align: justify;" data-start="1130" data-end="1617">Second, the system relies exclusively on <strong data-start="1171" data-end="1218">official Hugging Face–maintained components</strong>. Core classes such as <code data-start="1241" data-end="1276"><span style="font-size: 10.0pt;">LlavaNextForConditionalGeneration</span></code>, <code data-start="1278" data-end="1298"><span style="font-size: 10.0pt;">LlavaNextProcessor</span></code>, chat templates, and multimodal preprocessing logic are all part of the Transformers library. These components are actively maintained, versioned, and tested by Hugging Face, providing a level of stability and backward compatibility that is not guaranteed when relying on research repositories or development branches.</p>
<p style="text-align: justify;" data-start="1619" data-end="2162">Third, the new setup significantly improves <strong data-start="1663" data-end="1682">reproducibility</strong>. Each run explicitly references a fixed Hugging Face model checkpoint via the <code data-start="1761" data-end="1771"><span style="font-size: 10.0pt;">model_id</span></code>, ensuring that the same weights, architecture, and prompt template are used across sessions and machines. In addition, generation parameters (sampling strategy, temperature, nucleus sampling, and output length) are explicitly defined, enabling consistent and repeatable results across runs.</p>
<p style="text-align: justify;" data-start="2164" data-end="2626">Fourth, the architecture is <strong data-start="2192" data-end="2230">easy to extend and experiment with</strong>. Switching between different LLaVA variants now requires changing a single configuration line (<code data-start="2326" data-end="2336"><span style="font-size: 10.0pt;">model_id</span></code>). The same inference code supports LLaVA 1.5 models, LLaVA-NeXT (v1.6), Interleave models, OneVision models, and larger checkpoints (e.g., 13B or 34B), including variants based on Mistral, Vicuna, Qwen, or Yi backbones. No changes to prompt construction or forward-pass logic are required.</p>
<p style="text-align: justify;" data-start="2628" data-end="3091">Finally, the multimodal pipeline is now <strong data-start="2668" data-end="2716">cleanly abstracted and internally consistent</strong>. Hugging Face handles all low-level details, including image preprocessing, chat formatting, positional embeddings, image sequence length management, and attention masking. This eliminates a large class of subtle bugs related to tensor alignment and multimodal token placement, while ensuring that the vision and language components remain synchronized across model updates.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-38 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. References and links</h2></div><div class="fusion-text fusion-text-84 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><ul>
<li style="text-align: justify;">
<p class="heading-element" dir="auto" tabindex="-1">Streetscape Analysis with Generative AI (SAGAI) on Github with v1.1 update. <a class="keychainify-checked" href="https://github.com/perezjoan/SAGAI">https://github.com/perezjoan/SAGAI</a></p>
</li>
<li>Perez, J and Fusco, G. (2025) <em>Streetscape Analysis with Generative AI (SAGAI): Vision-Language Assessment and Mapping of Urban Scenes</em>. Geomatica, 77(2), 100063, 18p. Available at: <a class="keychainify-checked" href="https://www.sciencedirect.com/science/article/pii/S1195103625000199" rel="nofollow">https://www.sciencedirect.com/science/article/pii/S1195103625000199</a></li>
</ul>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-12 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-85"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--6" data-awb-toc-id="6" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/a-stable-and-reproducible-vision-language-inference-engine-for-sagai-v1-1/">A Stable and Reproducible Vision–Language Inference Engine for SAGAI v1.1</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/a-stable-and-reproducible-vision-language-inference-engine-for-sagai-v1-1/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Qwen Image Edit for Urbanism v1.3 — Mask-Controlled Editing With Prompt or Reference Guidance</title>
		<link>https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/</link>
					<comments>https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 04 Dec 2025 22:17:40 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Diffusion Models]]></category>
		<category><![CDATA[Urbanism]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[ComfyUI]]></category>
		<category><![CDATA[image editing]]></category>
		<category><![CDATA[Qwen]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2236</guid>

					<description><![CDATA[<p>Version 1.3 of Qwen Image Edit for Urbanism introduces mask-controlled editing in ComfyUI, enabling precise, localized image transformations using prompts or reference images. The new Grow Mask utility softens boundaries, preserves unmasked areas, and integrates seamlessly with existing single-image and sequential workflows.</p>
<p>The post <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/">Qwen Image Edit for Urbanism v1.3 — Mask-Controlled Editing With Prompt or Reference Guidance</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-7 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-13 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-24 hover-type-none"><img decoding="async" width="1536" height="1024" title="COVER" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER.png" alt class="img-responsive wp-image-2266" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER.png 1536w" sizes="(max-width: 640px) 100vw, 1200px" /></span></div><div class="fusion-text fusion-text-86"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-87" style="--awb-margin-top:-30px;"><ul>
<li>Adds a new <strong data-start="575" data-end="597">Mask Editing Block</strong> enabling localized, structurally accurate edits while preserving the rest of the image.</li>
<li> Introduces a <strong data-start="703" data-end="716">Grow Mask</strong> utility with expand and blur parameters, plus visual mask preview.</li>
<li> Replaces <em data-start="797" data-end="810">EmptyLatent</em> with <strong data-start="816" data-end="849">VAE Encode → Set Latent Noise</strong> to avoid global degradation.</li>
<li>Mask block is optional: <strong data-start="907" data-end="942">Blocks 1 and 2 remain unchanged</strong> for prompt-only and sequential workflows.</li>
</ul>
</div><div class="fusion-text fusion-text-88 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="430" data-end="740">Qwen Image Edit for Urbanism continues to evolve into a practical, research-grade tool for architectural and urban experimentation. After the batch-processing capabilities <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-2-custom-nodes-sequential-processing/">introduced in v1.2</a>, version 1.3 focuses on the feature most requested by designers and analysts: precise control over <em data-start="720" data-end="727">where</em> edits occur.</p>
<p data-start="742" data-end="1049">In image-to-image workflows, uncontrolled changes are a common issue. Even a very specific prompt can lead diffusion models to reinterpret the whole scene. Version 1.3 introduces mask-restricted editing, allowing Qwen to modify only a selected region while preserving the rest of the image exactly as it is.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-39 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Why Masks Matter for Urban Editing</h2></div><div class="fusion-text fusion-text-89 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1102" data-end="1236">Until now, the workflow relied on <strong data-start="1136" data-end="1152">Empty Latent</strong> to initialize diffusion. This approach is simple but has an unavoidable drawback:</p>
<p data-start="1238" data-end="1326"><strong data-start="1238" data-end="1326">The entire latent space is regenerated — even outside the region you want to modify.</strong></p>
<p data-start="1328" data-end="1543">This often produces familiar and unwanted side effects: façades shift slightly, lighting changes, road textures dissolve, or skies take on new tones, even when the prompt refers only to a specific object or surface. To address this, v1.3 reorganizes the initialization stage around:</p>
<p data-start="1613" data-end="1659"><strong>VAE Encode → Set Latent Noise (masked)</strong></p>
<p data-start="1661" data-end="1707">This change restructures the model’s behavior:</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Component</th>
<th align="left">Effect</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">VAE Encode</td>
<td align="left">Converts the original image into latent space with high fidelity.</td>
</tr>
<tr>
<td align="left">Set Latent Noise (with mask)</td>
<td align="left">Adds noise only <em data-start="1895" data-end="1903">inside</em> the mask, preserving everything else.</td>
</tr>
<tr>
<td align="left">Mask-guided denoising</td>
<td align="left">Qwen edits only where permitted; unmasked areas remain pixel-identical.</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-text fusion-text-90 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1102" data-end="1236">This leads to crisp preservation of buildings, street furniture, sky, shadows, and lighting outside the edited zone. Localized edits integrate naturally: you can green a façade, test a bike lane, adjust a plaza boundary, or replace a storefront without disturbing the rest of the street.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-40 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Prompt-Only vs. Reference-Guided Mask Editing</h2></div><div class="fusion-text fusion-text-91 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="2400" data-end="2469">Version 1.3 supports both textual and visual control of masked edits.</p>
<h5 data-start="2471" data-end="2508"><strong data-start="2475" data-end="2506">A. Prompt-Only Mask Editing</strong></h5>
<p data-start="2509" data-end="2638">You draw a mask, provide a prompt, and Qwen modifies only the selected region. This works especially well for operations such as:</p>
<ul data-start="2640" data-end="2741">
<li data-start="2640" data-end="2684">
<p data-start="2642" data-end="2684">replacing asphalt with permeable paving,</p>
</li>
<li data-start="2685" data-end="2707">
<p data-start="2687" data-end="2707">adding vegetation,</p>
</li>
<li data-start="2708" data-end="2741">
<p data-start="2710" data-end="2741">transforming a façade material.</p>
</li>
</ul>
<h5 data-start="2743" data-end="2791"><strong data-start="2747" data-end="2789">B. Mask Editing With a Reference Image</strong></h5>
<p data-start="2792" data-end="2875">A second image may be supplied to guide structure, texture, or color. This enables:</p>
<ul data-start="2877" data-end="3050">
<li data-start="2877" data-end="2908">
<p data-start="2879" data-end="2908">borrowing material samples,</p>
</li>
<li data-start="2909" data-end="2964">
<p data-start="2911" data-end="2964">transplanting vegetation from one scene to another,</p>
</li>
<li data-start="2965" data-end="3001">
<p data-start="2967" data-end="3001">matching architectural textures,</p>
</li>
<li data-start="3002" data-end="3050">
<p data-start="3004" data-end="3050">transferring lighting characteristics locally.</p>
</li>
</ul>
<p data-start="3052" data-end="3119">Both modes are interchangeable, and both respect the mask boundary. Masks drawn directly in ComfyUI are typically sharp, binary shapes. Diffusion models, however, perform best when mask boundaries are soft and slightly extended.</p>
<p data-start="3344" data-end="3409">Version 1.3 introduces a <strong data-start="3369" data-end="3382">Grow Mask</strong> node with two parameters:</p>
<ul data-start="3411" data-end="3650">
<li data-start="3411" data-end="3544">
<p data-start="3413" data-end="3544"><strong data-start="3413" data-end="3423">Expand</strong>: increases the mask outward, helping cover tiny gaps or irregular brush strokes and preventing thin seams at the edge.</p>
</li>
<li data-start="3545" data-end="3650">
<p data-start="3547" data-end="3650"><strong data-start="3547" data-end="3562">Blur Radius</strong>: softens the boundary, allowing Qwen to blend new and existing textures more naturally.</p>
</li>
</ul>
<p data-start="3652" data-end="3729">Together, these parameters define the effective “influence zone” of the edit.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-0 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-92" style="--awb-content-alignment:justify;"><p data-start="4061" data-end="4244">To make mask-based editing easier to control, v1.3 includes a preview step.<br data-start="4136" data-end="4139" />The workflow converts the (expanded and blurred) mask into an image and displays it directly in the UI.</p>
<p data-start="4246" data-end="4287">This makes it straightforward to confirm:</p>
<ul data-start="4289" data-end="4481">
<li data-start="4289" data-end="4323">
<p data-start="4291" data-end="4323">whether the boundary is clean,</p>
</li>
<li data-start="4324" data-end="4372">
<p data-start="4326" data-end="4372">whether the expansion radius is appropriate,</p>
</li>
<li data-start="4434" data-end="4481">
<p data-start="4436" data-end="4481">whether the blur transition is smooth enough.</p>
</li>
</ul>
<p data-start="4483" data-end="4613">For tasks involving building edges, curbs, signage, crosswalks, or paving boundaries, this preview dramatically improves accuracy.</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-1 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-25 hover-type-none"><img decoding="async" width="786" height="568" alt="The grow mask with blur and his preview" title="mask" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask.png" class="img-responsive wp-image-2248" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask-200x145.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask-400x289.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask-600x434.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask.png 786w" sizes="(max-width: 640px) 100vw, 600px" /></span></div><div class="fusion-text fusion-text-93"><p>The grow mask with blur and his preview</p>
</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-41 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. How the v1.3 Workflow Fits Into the Existing System</h2></div><div class="fusion-text fusion-text-94 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="4683" data-end="4746">The mask block replaces only the latent-initialization stage.</p>
<p data-start="4748" data-end="4860">Everything else — prompts, reference conditioning, sampling, and the full QwenEdit pipeline — remains unchanged.</p>
<p data-start="4862" data-end="4886"><strong data-start="4862" data-end="4886">Simplified pipeline:</strong></p>
</div><div class="fusion-text fusion-text-95"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-linenumbers="false">Base Image
     ↓
User Mask → Grow Mask → Preview Mask
     ↓
VAE Encode
     ↓
Set Latent Noise (masked)
     ↓
Qwen Edit Pipeline
     (prompt-only or reference-guided)
     ↓
VAE Decode
     ↓
Final Output
</pre>
</div><div class="fusion-text fusion-text-96 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">This structure makes editing predictable and reproducible, but it is important to clarify how <strong data-start="472" data-end="504">v1.3 is organized internally</strong>. The workflow is now composed of <strong data-start="538" data-end="574">three completely separate blocks</strong>, and <strong data-start="580" data-end="614">each block loads its own model</strong>:</p>
<ul data-start="617" data-end="797">
<li data-start="617" data-end="687">
<p data-start="619" data-end="687"><strong data-start="619" data-end="631">Block 1:</strong> Single-image edit (prompt-only or prompt + reference)</p>
</li>
<li data-start="688" data-end="749">
<p data-start="690" data-end="749"><strong data-start="690" data-end="702">Block 2:</strong> Sequential multi-image editing</p>
</li>
<li data-start="750" data-end="797">
<p data-start="752" data-end="797"><strong data-start="752" data-end="764">Block 3:</strong> Mask-based editing (new in v1.3)</p>
</li>
</ul>
<p data-start="799" data-end="1100">All three blocks coexist in the same workflow, and the user simply chooses which one to run.<br data-start="891" data-end="894" />In ComfyUI, this is done by <strong data-start="922" data-end="991">right-clicking the group frame and selecting <em data-start="969" data-end="977">Active</em> or <em data-start="981" data-end="989">Bypass</em></strong>.<br data-start="992" data-end="995" />Only the active block executes; the others are skipped. Nothing else in the pipeline needs to be changed.</p>
<p data-start="1102" data-end="1355">Because the blocks are independent, they can also be <strong data-start="1155" data-end="1167">combined</strong>. For example, the user may activate the sequential loader from Block 2 and route its output into the mask-editing block (Block 3) to run a full masked transformation on a batch of images.</p>
<p data-start="1357" data-end="1645">To create the mask itself, the user loads the base image in <strong data-start="1417" data-end="1433">Load Image 1</strong>, right-clicks the preview, and selects <strong data-start="1473" data-end="1496">Open in Mask Editor</strong>. The drawn mask is then processed by the Grow Mask node before entering the latent-noise stage, ensuring smooth boundaries and predictable behavior.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-42 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. Experimentation</h2></div><div class="fusion-text fusion-text-97 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">To test the new mask-based editing block, we start by defining the editable region directly in ComfyUI. After loading the base image, the user <strong data-start="416" data-end="478">right-clicks the preview and selects “Open in Mask Editor”</strong>, then paints the area where the new cyclist should appear. Before the edit, this part of the street is empty.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-26" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-26 hover-type-none"><img decoding="async" width="2000" height="1130" alt="mask" title="mask1" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-scaled.png" class="img-responsive wp-image-2257" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-200x113.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-400x226.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-600x339.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-800x452.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-1200x678.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-scaled.png 2000w" sizes="(max-width: 640px) 100vw, 2000px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Adding a Mask in ComfyUI</div></div></div></div><div class="fusion-text fusion-text-98 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">Once the mask is created, it flows through Block 3: the Grow Mask node expands and softens the boundary, the workflow encodes the base image, and noise is added <strong data-start="793" data-end="824">only inside the masked zone</strong>. A second image containing a cyclist is provided as a reference, and the prompt instructs Qwen to place the rider onto the bicycle lane.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-27" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-27 hover-type-none"><img decoding="async" width="1509" height="1241" title="block3" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3.png" alt class="img-responsive wp-image-2258" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-200x164.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-400x329.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-600x493.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-800x658.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-1200x987.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3.png 1509w" sizes="(max-width: 640px) 100vw, 1509px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The whole pipeline of block 3</div></div></div></div><div class="fusion-text fusion-text-99 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">The result is a localized insertion: the cyclist from Image 2 is generated precisely inside the masked area, while the rest of the photograph remains unchanged. This demonstrates the core purpose of Block 3 — precise, mask-controlled edits that do not disturb the surrounding urban context.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-28" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-28 hover-type-none"><img decoding="async" width="1248" height="832" title="test_00010_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_.png" alt class="img-responsive wp-image-2261" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_.png 1248w" sizes="(max-width: 640px) 100vw, 1248px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The result</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-43 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Download the Workflow</h2></div><div class="fusion-text fusion-text-100 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">You can download the ready-to-use <strong data-start="1530" data-end="1552">ComfyUI JSON graph </strong>that we built in this post <strong>Qwen Image Edit For Urbanism v1.3</strong> from the link below or from our git repository and load it directly into your workspace using <strong data-start="1620" data-end="1646">File → Load → Workflow</strong>.</p>
</div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-1 fusion-button-default-span fusion-button-default-type" target="_self" download="Gwen-Edit-UGA-v1.0.json" href="https://urbangeoanalytics.com/wp-content/uploads/2025/12/Gwen-Edit-UGA-v1.3.json"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">DOWNLOAD &#8211; ComfyUI JSON graph &#8211; QWEN IMAGE EDIT v1.3</span><span class="fusion-button-text awb-button__text awb-button__text--hover">DOWNLOAD - ComfyUI JSON graph - QWEN IMAGE EDIT v1.3</span></div></a></div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-14 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-101"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--7" data-awb-toc-id="7" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-29 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/">Qwen Image Edit for Urbanism v1.3 — Mask-Controlled Editing With Prompt or Reference Guidance</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Deploy a Guest Book on an EVM Blockchain Using Remix</title>
		<link>https://urbangeoanalytics.com/deploy-a-guest-book-on-an-evm-blockchain-using-remix/</link>
					<comments>https://urbangeoanalytics.com/deploy-a-guest-book-on-an-evm-blockchain-using-remix/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 27 Nov 2025 16:17:04 +0000</pubDate>
				<category><![CDATA[Blockchain]]></category>
		<category><![CDATA[Intermediate]]></category>
		<category><![CDATA[Ethereum]]></category>
		<category><![CDATA[Metamask]]></category>
		<category><![CDATA[Remix]]></category>
		<category><![CDATA[Smart Contract]]></category>
		<category><![CDATA[Solidity]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=1792</guid>

					<description><![CDATA[<p>Learn how to deploy your first smart contract on an Ethereum-compatible blockchain using Remix and the Sepolia testnet. In this beginner-friendly guide, we build a simple on-chain guestbook, connect MetaMask, verify the contract on Etherscan, and interact with it directly through the blockchain. A perfect starting point for anyone curious about smart contracts, Solidity, and decentralized applications.</p>
<p>The post <a href="https://urbangeoanalytics.com/deploy-a-guest-book-on-an-evm-blockchain-using-remix/">Deploy a Guest Book on an EVM Blockchain Using Remix</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-8 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-15 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-30" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-30 hover-type-none"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/guestbook.png" alt class="img-responsive wp-image-2231" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/guestbook-300x200.png 300w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/guestbook-1024x683.png 1024w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/guestbook.png 1536w" sizes="(max-width: 1536px) 100vw, 1536px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"></div></div></div><div class="fusion-text fusion-text-102"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-103" style="--awb-margin-top:-30px;"><ul>
<li><strong data-start="123" data-end="160">Deploy Your First Smart Contract:</strong> Learn how to publish a simple on-chain guestbook to an Ethereum testnet using Remix, the browser-based IDE for Solidity.</li>
<li><strong data-start="284" data-end="316">Connect MetaMask to Sepolia:</strong> Add the Sepolia test network to MetaMask, get free test ETH, and prepare your wallet for deployment.</li>
<li><strong data-start="420" data-end="456">Interact On-Chain via Etherscan:</strong> Write messages to the blockchain, read stored data, and understand how smart contracts expose transparent read/write functions.</li>
</ul>
</div><div class="fusion-text fusion-text-104 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="155" data-end="458">Have you ever wanted to experiment with blockchain development but didn’t know where to start? In this beginner-friendly tutorial, we’ll guide you through deploying your very first <strong data-start="336" data-end="354">smart contract</strong> on a <strong data-start="360" data-end="382">blockchain testnet</strong> using <strong data-start="389" data-end="398">Remix</strong>, an in-browser IDE designed for Ethereum-compatible chains. Whether you&#8217;re curious about decentralized applications (dApps), NFTs, or simply want to see how code becomes blockchain logic, this guide will get you started — no prior blockchain experience required.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-44 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1.Introduction</h2></div><div class="fusion-text fusion-text-105 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p><strong>What is a Smart Contract?<br />
</strong></p>
<p>A <strong data-start="705" data-end="723">smart contract</strong> is a self-executing program that runs on a blockchain. It stores logic and rules in code — such as sending tokens or recording data — and once deployed, it executes automatically when conditions are met. Smart contracts are immutable (can’t be changed once deployed) and transparent, making them ideal for decentralized finance (DeFi), supply chains, governance, and more. Solidity is the main programming language used to write smart contracts for Ethereum. It is a statically typed, contract-oriented language designed specifically for blockchain logic and storage. Because Solidity compiles to EVM bytecode, the same contract can run on <strong data-start="348" data-end="381">any EVM-compatible blockchain</strong> — including Ethereum, Polygon, Arbitrum, Optimism, Avalanche, BNB Chain, and many others.</p>
<p data-start="104" data-end="137"><strong>What is Remix and Why Use It?</strong></p>
<p><a class="decorated-link keychainify-checked" href="https://remix.ethereum.org" target="_new" rel="noopener" data-start="139" data-end="174">Remix</a> is a web-based Integrated Development Environment (IDE) designed specifically for writing, testing, and deploying smart contracts in the Solidity programming language. One of its biggest advantages is that it requires no installation or setup — everything runs directly in your browser. Remix integrates seamlessly with MetaMask and supports deployment to Ethereum-compatible testnets like Goerli and Polygon Amoy. It also offers a powerful set of tools for compiling, debugging, and interacting with your contracts, making it an ideal choice for both beginners and experienced developers.</p>
<p><strong> </strong></p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-45 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Connect to Sepolia Testnet</h2></div><div class="fusion-text fusion-text-106 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="282" data-end="435">Before deploying a smart contract, you need a wallet connected to an Ethereum test network. If you’re new to MetaMask, start with our beginner guide here:</p>
<p data-start="437" data-end="586"><a class="keychainify-checked" href="https://urbangeoanalytics.com/getting-started-with-metamask-and-the-polygon-amoy-testnet/">👉 <strong data-start="440" data-end="504"><em data-start="442" data-end="502">Getting Started with MetaMask and the Polygon Amoy Testnet</em></strong></a><br data-start="504" data-end="507" />(Use the same installation steps — MetaMask works the same for all EVM chains.)</p>
<p data-start="588" data-end="848">Once MetaMask is installed and your wallet is set up, you can add the <strong data-start="658" data-end="686">Ethereum Sepolia Testnet</strong>. Sepolia is Ethereum’s primary development and testing network. It behaves like mainnet, but uses test ETH (fake ETH) that you can request for free from faucets. Sepolia is ideal for learning and testing smart contracts because it behaves almost exactly like Ethereum mainnet. It mirrors the same architecture, gas mechanics, and transaction flow, which means everything you deploy or test on Sepolia works the same way on Ethereum. Developer tools such as Remix connect to it directly, and because Sepolia is widely supported, stable, and fast, it has become the standard environment for Ethereum development and education. Since Solidity compiles to EVM bytecode, contracts you deploy on Sepolia behave exactly like they will on Ethereum mainnet.</p>
<p data-start="1305" data-end="1329">To add Sepolia manually:</p>
<ol data-start="1331" data-end="1506">
<li data-start="1331" data-end="1349">
<p data-start="1334" data-end="1349">Open MetaMask</p>
</li>
<li data-start="1414" data-end="1465">
<p data-start="1417" data-end="1465">Click <strong data-start="1423" data-end="1463">Add network → Add a network manually</strong></p>
</li>
<li data-start="1466" data-end="1506">
<p data-start="1469" data-end="1506">Enter the following Sepolia settings:</p>
</li>
</ol>
<p data-start="1508" data-end="1694"><strong data-start="1508" data-end="1525">Network Name:</strong> Ethereum Sepolia<br data-start="1542" data-end="1545" /><strong data-start="1545" data-end="1561">New RPC URL:</strong> <a class="decorated-link cursor-pointer" target="_new" rel="noopener" data-start="1562" data-end="1585">https://rpc.sepolia.org</a><br data-start="1585" data-end="1588" /><strong data-start="1588" data-end="1601">Chain ID:</strong> 11155111<br data-start="1610" data-end="1613" /><strong data-start="1613" data-end="1633">Currency Symbol:</strong> ETH<br data-start="1637" data-end="1640" /><strong data-start="1640" data-end="1663">Block Explorer URL:</strong> <a class="decorated-link keychainify-checked" href="https://sepolia.etherscan.io" target="_new" rel="noopener" data-start="1664" data-end="1692">https://sepolia.etherscan.io</a></p>
<p data-start="1696" data-end="1941">Your MetaMask screen should now show the Sepolia network, ready to use. Once saved, you’ll be connected to the Ethereum Sepolia Testnet and can interact with dApps, deploy contracts from Remix, and send test transactions with no real-world cost. Just like Polygon’s Amoy testnet requires test POL tokens, Ethereum Sepolia requires <strong data-start="2067" data-end="2079">test ETH</strong> to pay for transactions. You can get free Sepolia ETH from multiple faucets. I recommend using this one <a class="keychainify-checked" href="https://sepolia-faucet.pk910.de/">https://sepolia-faucet.pk910.de/</a></p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-46 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. Prepare your Smart Contract</h2></div><div class="fusion-text fusion-text-107 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="282" data-end="435">Now that MetaMask is connected to Sepolia, we can look at the smart contract we’ll deploy. Below is a minimal “GoldenBook” contract written in Solidity. It’s a simple on-chain guestbook: anyone can write a message, and the contract stores it permanently on the blockchain. Messages are saved along with the sender’s address and the time they were submitted, and the contract provides a function to retrieve everything that has been written so far.</p>
</div><div style="background:#1e1e1e;padding:20px;border-radius:6px;overflow-x:auto;font-family:Consolas,Monaco,monospace;font-size:14px;line-height:1.4;color:#d4d4d4;">
<pre style="margin:0;white-space:pre;"> 
<span style="color:#6A9955;">// SPDX-License-Identifier: MIT</span>
<span style="color:#569CD6;">pragma</span> <span style="color:#569CD6;">solidity</span> ^0.8.20;

<span style="color:#6A9955;">/// @title GoldenBook - A minimal on-chain guestbook (livre d'or)</span>
<span style="color:#569CD6;">contract</span> <span style="color:#4EC9B0;">GoldenBook</span> {

    <span style="color:#6A9955;">// Simple structure to store a message</span>
    <span style="color:#569CD6;">struct</span> <span style="color:#4EC9B0;">Message</span> {
        <span style="color:#4EC9B0;">address</span> author;        <span style="color:#6A9955;">// who sent it</span>
        <span style="color:#4EC9B0;">string</span> content;        <span style="color:#6A9955;">// the message text</span>
        <span style="color:#4EC9B0;">uint256</span> timestamp;    <span style="color:#6A9955;">// when it was written</span>
    }

    <span style="color:#6A9955;">// All messages stored on-chain</span>
    <span style="color:#4EC9B0;">Message</span>[] <span style="color:#569CD6;">private</span> messages;

    <span style="color:#6A9955;">/// @notice Add a new message to the guestbook</span>
    <span style="color:#6A9955;">/// @param content The text to save permanently on-chain</span>
    <span style="color:#569CD6;">function</span> <span style="color:#DCDCAA;">postMessage</span>(<span style="color:#4EC9B0;">string</span> <span style="color:#569CD6;">calldata</span> content) <span style="color:#569CD6;">external</span> {
        <span style="color:#C586C0;">require</span>(<span style="color:#D16969;">bytes</span>(content).length <span style="color:#DCDCAA;">></span> 0, <span style="color:#CE9178;">"Message cannot be empty"</span>);

        messages.push(<span style="color:#4EC9B0;">Message</span>({
            author: msg.sender,
            content: content,
            timestamp: <span style="color:#4EC9B0;">block</span>.timestamp
        }));
    }

    <span style="color:#6A9955;">/// @notice Retrieve all messages in the guestbook</span>
    <span style="color:#6A9955;">/// @return An array of Message structs</span>
    <span style="color:#569CD6;">function</span> <span style="color:#DCDCAA;">getAllMessages</span>() <span style="color:#569CD6;">external view</span> <span style="color:#569CD6;">returns</span> (<span style="color:#4EC9B0;">Message</span>[] <span style="color:#569CD6;">memory</span>) {
        <span style="color:#569CD6;">return</span> messages;
    }
}
</pre>
</div>
<div class="fusion-text fusion-text-108 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="282" data-end="435">This contract begins with a simple data structure called <code data-start="1739" data-end="1748">Message</code>, which stores who wrote the message, what they wrote, and when they wrote it. All messages are kept in an array called <code data-start="1868" data-end="1878">messages</code>, which is stored permanently on-chain. The <code data-start="1922" data-end="1935">postMessage</code> function allows anyone to add a new entry; it checks that the message isn’t empty, then records the sender’s address, the text, and the current timestamp. The <code data-start="2095" data-end="2111">getAllMessages</code> function returns the full list of stored messages and is marked as a view function, meaning it doesn’t change blockchain state and costs no gas to call. Altogether, this small contract demonstrates the core ideas of Solidity: defining data, writing functions that change state, and exposing read-only views for external applications.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-47 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. Deploy using Remix</h2></div><div class="fusion-text fusion-text-109 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="157" data-end="508">With your contract ready and MetaMask connected to the Sepolia testnet, the next step is to deploy it on-chain. The easiest way to do this is with <strong data-start="304" data-end="313">Remix</strong>, Ethereum’s browser-based development environment. Remix allows you to compile your Solidity code, connect directly to MetaMask, and publish your contract to the blockchain in just a few clicks.</p>
<p data-start="510" data-end="928">Start by opening Remix at <strong data-start="536" data-end="566"><a class="decorated-link keychainify-checked" href="https://remix.ethereum.org" target="_new" rel="noopener" data-start="538" data-end="564">https://remix.ethereum.org</a></strong> and creating a new file called <code data-start="598" data-end="614">GoldenBook.sol</code>, then paste the contract code from the previous section. Once the file is ready, open the <strong data-start="705" data-end="726">Solidity Compiler</strong> tab and select the matching compiler version (in this case, <code data-start="787" data-end="795">0.8.20</code>). Press <strong data-start="804" data-end="815">Compile</strong> to generate the contract bytecode. Remix will instantly show whether the code is valid and ready for deployment.</p>
<p data-start="930" data-end="1592">After the contract compiles successfully, switch to the <strong data-start="986" data-end="1015">Deploy &amp; Run Transactions</strong> panel. In the “Environment” dropdown, select <strong data-start="1061" data-end="1093">Injected Provider (MetaMask)</strong>—this tells Remix to use the Sepolia network already configured in your MetaMask wallet. Your MetaMask extension will prompt you to connect Remix if it’s the first time. Once connected, ensure that the selected account has Sepolia ETH and that &#8220;GoldenBook&#8221; appears in the contract selector. Click <strong data-start="1390" data-end="1400">Deploy</strong>, confirm the transaction in MetaMask, and wait for it to be included in a block. You’ll see the deployed contract appear in Remix under “Deployed Contracts,” along with your transaction hash.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-31" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-31 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/remix.png" class="fusion-lightbox" data-rel="iLightbox[040caaaee51ac9744fa]" data-title="remix" title="remix"><img decoding="async" width="400" height="232" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/remix-400x232.png" alt class="img-responsive wp-image-2216" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/remix-200x116.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/remix-400x232.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/remix-600x348.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/remix-800x464.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/remix-1200x696.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/remix.png 1431w" sizes="(max-width: 640px) 100vw, 1200px" /></a></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Your contract on Remix, already compiled and about to get deployed</div></div></div></div><div class="fusion-text fusion-text-110 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1594" data-end="2257">At this point, your contract is live on the Sepolia blockchain, but there’s one last step that is highly recommended: verifying your contract on <strong data-start="1739" data-end="1752">Etherscan</strong>. Verification publishes your Solidity source code alongside the deployed bytecode, allowing anyone to inspect, review, or interact with your contract directly from the explorer. This matters because a blockchain contract is transparent by nature—verification proves the code you shared publicly is the same code deployed on-chain, which builds trust and makes your guestbook readable from tools like Etherscan’s contract tab. It also helps with debugging, audits, and integrating with frontends later on.</p>
<p data-start="1594" data-end="2257">Once the contract is compiled and deployed through Remix, you can click <strong data-start="161" data-end="184">“View on Etherscan”</strong> in the Remix console to inspect the transaction. On the Etherscan page, click the <strong data-start="267" data-end="275">“To”</strong> field of the transaction to open your newly deployed contract address. This brings you to the contract’s dedicated page, where you can verify the source code. Verification requires selecting the correct compiler version, license identifier, and pasting your Solidity source code exactly as deployed. For simple contracts like our GoldenBook, you can paste the code directly since it lives in a single file. For more advanced projects—especially those using imports, libraries, or frameworks—you often need to <strong data-start="785" data-end="796">flatten</strong> the contract so all the dependent files are merged into one before verification.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-32" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-32 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/contract.png" class="fusion-lightbox" data-rel="iLightbox[183245b11341ca07701]" data-title="contract" title="contract"><img decoding="async" width="400" height="314" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/contract-400x314.png" alt class="img-responsive wp-image-2220" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/contract-200x157.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/contract-400x314.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/contract-600x471.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/contract-800x628.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/contract.png 1096w" sizes="(max-width: 640px) 100vw, 400px" /></a></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The contract page on etherscan once deployed and verified</div></div></div></div><div class="fusion-text fusion-text-111 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1594" data-end="2257">Once submitted, Etherscan will match the compiled bytecode with your source code. A successful verification makes your contract’s code readable on-chain and unlocks the “Read” and “Write” tabs, allowing anyone to interact with it directly from the explorer. You can also view the contract deployed in this tutorial at <strong data-start="1149" data-end="1232"><a class="decorated-link keychainify-checked" href="https://sepolia.etherscan.io/address/0x56f245CB65615A482A8Db6086710A49f93869fb6" target="_new" rel="noopener" data-start="1151" data-end="1230">https://sepolia.etherscan.io/address/0x56f245CB65615A482A8Db6086710A49f93869fb6</a></strong>, although testnets sometimes reset, so the contract may no longer exist by the time you read this.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-48 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Write and Read the Guest Book (Interact with the Smart Contract)</h2></div><div class="fusion-text fusion-text-112 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="211" data-end="514">Now that the contract is verified on Etherscan, you can interact with it directly from your browser. When you open the contract page, you’ll see several tabs, including <strong data-start="380" data-end="397">Read Contract</strong> and <strong data-start="402" data-end="420">Write Contract</strong>. These act as a simple interface for calling the functions you created in your Solidity code.</p>
<p data-start="516" data-end="1085">To add a new entry to the guestbook, go to the <strong data-start="563" data-end="581">Write Contract</strong> tab, as shown in the figure below. Before sending a message, click <strong data-start="649" data-end="670">“Connect to Web3”</strong> so Etherscan can link to your MetaMask wallet on the Sepolia network. Once connected, open the <code data-start="766" data-end="779">postMessage</code> section, type your message, and submit the transaction. MetaMask will ask you to confirm—it needs to do this because writing a message modifies the blockchain and therefore requires gas. After confirming, wait a few seconds for the transaction to be mined. Your message is now permanently stored on-chain.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-33" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-33 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/w.png" class="fusion-lightbox" data-rel="iLightbox[9b1eac2986b04978838]" data-title="w" title="w"><img decoding="async" width="1103" height="278" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/w.png" alt class="img-responsive wp-image-2224" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/w-200x50.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/w-400x101.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/w-600x151.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/w-800x202.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/w.png 1103w" sizes="(max-width: 640px) 100vw, 1103px" /></a></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The “Write Contract” tab on Etherscan, showing the postMessage function.</div></div></div></div><div class="fusion-text fusion-text-113 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="211" data-end="514">To view what has been written so far, switch to the <strong data-start="1228" data-end="1245">Read Contract</strong> tab. This interface lets you call read-only functions such as <code data-start="1308" data-end="1324">getAllMessages</code> without paying any gas, as shown in the next figure. Because read calls do not modify the blockchain, they can be executed instantly and for free. When you click <code data-start="1487" data-end="1503">getAllMessages</code>, Etherscan will return the full list of stored messages, including the sender address and timestamp for each entry.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-34" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-34 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/reading.png" class="fusion-lightbox" data-rel="iLightbox[77dfaf5d2e92c22a11e]" data-title="reading" title="reading"><img decoding="async" width="1111" height="365" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/reading.png" alt class="img-responsive wp-image-2226" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/reading-200x66.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/reading-400x131.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/reading-600x197.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/reading-800x263.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/reading.png 1111w" sizes="(max-width: 640px) 100vw, 1111px" /></a></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The “Read Contract” tab displaying the getAllMessages function.</div></div></div></div><div class="fusion-text fusion-text-114 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="211" data-end="514">Using these two tabs together allows you to write new guestbook entries and read the entire message history directly through Etherscan, without needing any custom frontend or development tools. This is one of the simplest and most transparent ways to interact with smart contracts on any EVM-compatible blockchain.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-49 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">6. To go Further</h2></div><div class="fusion-text fusion-text-115 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="150" data-end="829">At this point, you’ve deployed a smart contract, verified it on Etherscan, and interacted with it directly through the blockchain. This already demonstrates the core mechanics of decentralized applications: public functions, permanent data storage, and transparent state. But the real power of smart contracts comes when you connect them to a user interface. In the next post, we’ll build a simple UI using <strong data-start="557" data-end="563">V0</strong>, complete with <strong data-start="579" data-end="601">wallet integration</strong>, allowing users to write messages, read the guestbook, and interact with the blockchain through a clean, modern web interface instead of the Etherscan panels. This is where your guestbook begins to feel like a real application.</p>
<p data-start="831" data-end="1422">There are also many ways you can expand the GoldenBook contract itself. You could transform messages into <strong data-start="937" data-end="971">Proof-of-Attendance (POA) NFTs</strong> for events or conferences, turning each signature in the guestbook into a collectible token. You could add optional <strong data-start="1088" data-end="1096">tips</strong>, allowing users to attach small amounts of ETH to messages. Or you might include moderation features, message reactions, or event-based unlocks. Solidity gives you full control over on-chain logic, and because the contract is deployed on an EVM-compatible chain, these features will work across the entire Ethereum ecosystem.</p>
<p data-start="1424" data-end="2127">It’s worth noting that blockchain and smart contracts also open compelling possibilities for IoT-driven city systems. Sensors can submit environmental data, mobility information, or infrastructure events directly to a secure, tamper-resistant ledger—similar to how our guestbook stores messages. Combined with decentralized identity, multi-sensor validation, or token-based incentives, this approach could support transparent reporting, urban monitoring, or community-driven data collection. Today’s simple guestbook is just a first step, but it illustrates the fundamental mechanism behind many emerging decentralized IoT and smart-city applications.</p>
<p data-start="2129" data-end="2299" data-is-last-node="" data-is-only-node="">In the next article, we’ll bring this project to life with a user-friendly interface and wallet connection so anyone can write their message on-chain with a single click.</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-16 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-116"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--8" data-awb-toc-id="8" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-35 hover-type-zoomout"><img decoding="async" width="1536" height="1024" title="blog lvl2" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15.png" alt class="img-responsive wp-image-1687" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/deploy-a-guest-book-on-an-evm-blockchain-using-remix/">Deploy a Guest Book on an EVM Blockchain Using Remix</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/deploy-a-guest-book-on-an-evm-blockchain-using-remix/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Qwen Image Edit for Urbanism v1.2 — Custom Nodes &#038; Sequential Processing</title>
		<link>https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-2-custom-nodes-sequential-processing/</link>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Mon, 17 Nov 2025 16:19:43 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Diffusion Models]]></category>
		<category><![CDATA[Urbanism]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[ComfyUI]]></category>
		<category><![CDATA[image editing]]></category>
		<category><![CDATA[Qwen]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2100</guid>

					<description><![CDATA[<p>ComfyUI Sequential Image Editing for Urbanism arrives in Qwen v1.2 with custom Python nodes, multi-image batch processing, and a six-slot buffer for reproducible urban edits. This version streamlines automated workflows for researchers, designers, and architects working with street and neighborhood imagery.</p>
<p>The post <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-2-custom-nodes-sequential-processing/">Qwen Image Edit for Urbanism v1.2 — Custom Nodes &#038; Sequential Processing</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-9 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-17 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-36 hover-type-none"><img decoding="async" width="1024" height="683" title="genai" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24-1024x683.png" alt class="img-responsive wp-image-2103" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24.png 1536w" sizes="(max-width: 640px) 100vw, 1024px" /></span></div><div class="fusion-text fusion-text-117"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-118" style="--awb-margin-top:-30px;"><p><strong data-start="225" data-end="271">• Adds full sequential multi-image editing</strong> using custom Python nodes, enabling automated processing with up to six different secondary reference images.<br data-start="373" data-end="376" /><strong data-start="376" data-end="440">• Introduces the Sequential Loader and Six-Slot Image Buffer</strong>, allowing users to run a batch and return to a complete set of edited results.<br data-start="536" data-end="539" /><strong data-start="539" data-end="587">• Includes an optional Random Image Selector</strong> for stochastic experiments and variation testing.</p>
</div><div class="fusion-text fusion-text-119 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="523" data-end="841">The <strong data-start="527" data-end="559">Qwen Image Edit for Urbanism</strong> workflow has progressively evolved from single-image editing (<strong data-start="622" data-end="630">v1.0</strong>) to paired image transformations (<strong data-start="665" data-end="673">v1.1</strong>). Now, with <strong data-start="688" data-end="696">v1.2</strong>, it gains the ability to <strong data-start="722" data-end="762">process multiple images sequentially</strong>, fully offline and reproducibly, using custom Python nodes inside <strong data-start="829" data-end="840">ComfyUI</strong>. This new release empowers urban researchers, designers, and architects to perform <strong data-start="925" data-end="940">batch edits</strong> — such as modifying entire image series of the same street, plaza, or neighborhood — using consistent prompts or iterative refinements.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-50 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Custom Nodes — Building the Foundation for Sequential Editing</h2></div><div class="fusion-text fusion-text-120 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>At the heart of this version are three lightweight, open-source Python nodes developed by UGA for ComfyUI. These nodes are available immediately after installing the repository — either by running <code data-start="222" data-end="244">git clone https://github.com/perezjoan/ComfyUI-QwenEdit-Urbanism-by-UGA</code> or by downloading and unzipping the <a class="keychainify-checked" href="https://github.com/perezjoan/ComfyUI-QwenEdit-Urbanism-by-UGA">repository</a> manually into your  <code data-start="222" data-end="244">ComfyUI/custom_nodes</code> directory.</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Node</th>
<th align="left">Category</th>
<th align="left">Function</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Sequential Image Loader</td>
<td align="left">image/sequence</td>
<td align="left"><code data-start="1702" data-end="1718"></code>Loads each connected image one by one in order, enabling automatic batch processing across iterations.</td>
</tr>
<tr>
<td align="left">Random Image Selector</td>
<td align="left">image/random</td>
<td align="left">Randomly selects one image among multiple inputs each run, useful for stochastic visualization or model variation testing.</td>
</tr>
<tr>
<td align="left">Stateful Image Collector</td>
<td align="left">image/sequence</td>
<td align="left">Stores the processed outputs from each run into six persistent slots, allowing users to preview all 6 results at the end of the batch.</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-text fusion-text-121 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>These nodes constitute the backbone of the v1.2 workflow. Together, they enable automation:</p>
</div><div class="fusion-text fusion-text-122 fusion-text-no-margin" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-linenumbers="false">[6 Input Images]
      ↓
Sequential or Random Loader (1 image per run)
      ↓
QwenEdit pipeline
      ↓
Stateful Collector (stores run#1..run#6 results)
      ↓
6 preview nodes
</pre>
</div><div class="fusion-text fusion-text-123 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>You launch the queue once (6 jobs) → Go drink coffee → Return to find all 6 processed urban edits displayed.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-51 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. What a ComfyUI Custom Node Actually Is</h2></div><div class="fusion-text fusion-text-124 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="199" data-end="647">A ComfyUI custom node is simply a Python class placed inside the <code data-start="264" data-end="287">ComfyUI/custom_nodes/</code> directory. When ComfyUI starts, it scans this directory, imports every <code data-start="359" data-end="364">.py</code> file, looks for a <code data-start="383" data-end="404">NODE_CLASS_MAPPINGS</code> dictionary, and registers each class it finds as a new node type. There is no compilation step and no special installation procedure: placing the file in the folder and restarting ComfyUI is sufficient for the node to appear in the interface.</p>
<p data-start="649" data-end="802">Internally, each node follows the same structure. The <code data-start="703" data-end="716">INPUT_TYPES</code> classmethod declares the input sockets that will be displayed in the UI. For example:</p>
</div><div class="fusion-text fusion-text-125 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-theme="dracula" data-enlighter-group="Python1" data-enlighter-title="Python">@classmethod
def INPUT_TYPES(cls):
    return 
</pre>
</div><div class="fusion-text fusion-text-126 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="199" data-end="647">This tells ComfyUI to generate two inputs—an image tensor and an integer. Similarly, the node declares its outputs through <code data-start="1092" data-end="1106">RETURN_TYPES</code> and <code data-start="1111" data-end="1125">RETURN_NAMES</code>:</p>
</div><div class="fusion-text fusion-text-127 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-theme="dracula" data-enlighter-group="Python2" data-enlighter-title="Python">RETURN_TYPES = ("IMAGE", "INT")
RETURN_NAMES = ("selected_image", "index")
</pre>
</div><div class="fusion-text fusion-text-128 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1218" data-end="1325">Each node also defines a <code data-start="1243" data-end="1253">FUNCTION</code> attribute, which names the method ComfyUI should call during execution:</p>
<div class="contain-inline-size rounded-2xl relative bg-token-sidebar-surface-primary">
<div class="sticky top-9">
<div class="absolute end-0 bottom-0 flex h-9 items-center pe-2">
<div class="bg-token-bg-elevated-secondary text-token-text-secondary flex items-center gap-4 rounded-sm px-2 font-sans text-xs"></div>
</div>
</div>
<div class="overflow-y-auto p-4" dir="ltr"></div>
</div>
</div><div class="fusion-text fusion-text-129 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-theme="dracula" data-enlighter-group="Python23" data-enlighter-title="Python">FUNCTION = "select_next"
</pre>
</div><div class="fusion-text fusion-text-130 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1218" data-end="1325">ComfyUI will therefore execute:</p>
</div><div class="fusion-text fusion-text-131 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-group="Python233" data-enlighter-title="Python" data-enlighter-theme="dracula">def select_next(...)
</pre>
</div><div class="fusion-text fusion-text-132 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1436" data-end="1464">whenever the node evaluates. To make the node visible, every Python file ends with a registration block:</p>
<div class="contain-inline-size rounded-2xl relative bg-token-sidebar-surface-primary">
<div class="sticky top-9">
<div class="absolute end-0 bottom-0 flex h-9 items-center pe-2">
<div class="bg-token-bg-elevated-secondary text-token-text-secondary flex items-center gap-4 rounded-sm px-2 font-sans text-xs"></div>
</div>
</div>
<div class="overflow-y-auto p-4" dir="ltr"></div>
</div>
</div><div class="fusion-text fusion-text-133 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-theme="dracula" data-enlighter-group="Python11" data-enlighter-title="Python">NODE_CLASS_MAPPINGS = 
NODE_DISPLAY_NAME_MAPPINGS = 
</pre>
</div><div class="fusion-text fusion-text-134 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1720" data-end="1992">When the package contains multiple nodes, the root <code data-start="1771" data-end="1784">__init__.py</code> merges all registration dictionaries into a single set that ComfyUI loads on startup. This mechanism allows the repository to expose several custom components while keeping each node defined in its own file.</p>
<p data-start="1994" data-end="2035">The repository layout is straightforward and in our case is:</p>
</div><div class="fusion-text fusion-text-135 fusion-text-no-margin" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-linenumbers="false">ComfyUI/
  custom_nodes/
    ComfyUI-QwenEdit-Urbanism-by-UGA/
       __init__.py
       sequential_image_selector.py
       random_image_selector.py
       stateful_collector.py
       debug_print.py
</pre>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-52 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. Integrating the Nodes to your workflow</h2></div><div class="fusion-text fusion-text-136 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1720" data-end="1992">Version 1.2 reorganizes the Qwen Image Edit for Urbanism workflow into two blocks: the original single-image editor, and a new sequential pipeline that can process up to six images across consecutive queue runs. The sequential block relies on two custom nodes. The <strong data-start="432" data-end="459">Sequential Image Loader</strong> takes up to six input images and outputs one image per run, advancing automatically each time you press “Queue Prompt.” Its output replaces the single-image input in the Qwen Edit chain. After editing, the processed image and the loader’s index are passed into the <strong data-start="725" data-end="750">Six-Slot Image Buffer</strong>, which stores each result in the corresponding output slot while filling unused slots with placeholders to keep previews stable. Connecting each slot to a Preview node lets you watch the six results populate as the workflow iterates. A third node, the <strong data-start="1003" data-end="1028">Random Image Selector</strong>, is included for users who prefer stochastic selection, but it is not wired into the default v1.2 workflow.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-2 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-137" style="--awb-content-alignment:justify;"><p data-start="2471" data-end="2543">Integrating the sequential system introduces the following new connections</p>
<ol data-start="2545" data-end="2950">
<li data-start="2545" data-end="2665">
<p data-start="2548" data-end="2665">The output of the six Load Image nodes now feeds into the Sequential Image Loader</p>
</li>
<li data-start="2666" data-end="2782">
<p data-start="2669" data-end="2782">The <code data-start="2673" data-end="2689">selected_image</code> output of the loader replaces the single-image input</p>
</li>
<li data-start="2783" data-end="2950">
<p data-start="2786" data-end="2950">The processed image, along with the index from the loader, is routed into the Six-Slot Image Buffer. Each slot output is then connected to a dedicated Preview node.</p>
</li>
</ol>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-3 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-37 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit.png" class="fusion-lightbox" data-rel="iLightbox[af278c58f8650eb087b]" data-title="seqedit" title="seqedit"><img decoding="async" width="1024" height="932" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-1024x932.png" alt class="img-responsive wp-image-2146" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-200x182.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-400x364.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-600x546.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-800x728.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit.png 1033w" sizes="(max-width: 640px) 100vw, 600px" /></a></span></div></div></div></div><div class="fusion-text fusion-text-138 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1720" data-end="1992">The Random Image Selector follows the same logic as the sequential loader — multiple inputs, a single image output — but selects randomly instead of sequentially. Users who want stochastic variations, probabilistic sampling, or diversity testing may insert this node in place of the sequential loader.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-53 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. Experimentations</h2></div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-2 fusion-button-default-span fusion-button-default-type" target="_self" href="http://exemple.com"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">Text</span><span class="fusion-button-text awb-button__text awb-button__text--hover">Text</span></div></a></div><div class="fusion-text fusion-text-139 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="0" data-end="517">To evaluate how well the model can merge ecological elements across scenes, we ran an experiment where vegetation from one photograph is transplanted into another.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-4 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-5 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-6 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-140 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Base image</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-38 hover-type-none"><img decoding="async" width="400" height="266" title="image (19)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-19-400x266.png" alt class="img-responsive wp-image-2168" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-19-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-19-400x266.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-19.png 500w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-7 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-8 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-9 fusion_builder_column_inner_1_1 1_1 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:100%;--awb-margin-top-large:0px;--awb-spacing-right-large:1.92%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:1.92%;--awb-width-medium:100%;--awb-order-medium:0;--awb-spacing-right-medium:1.92%;--awb-spacing-left-medium:1.92%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-141" style="--awb-content-alignment:center;"><p><strong>Prompt: </strong><em>Take all vegetation visible in image 2 — including trees, shrubs, bushes, ground plants, and any greenery — and incorporate them into the scene of image 1. Preserve the structure, lighting, and perspective of image 1 while integrating the vegetation so that it appears naturally placed and consistent with the environment.</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-10 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-142 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 1</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-39 hover-type-none"><img decoding="async" width="400" height="590" title="image (20)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-20-400x590.png" alt class="img-responsive wp-image-2151" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-20-200x295.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-20-400x590.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-20.png 462w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-11 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-143 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 2</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-40 hover-type-none"><img decoding="async" width="400" height="691" title="image (22)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-22-400x691.png" alt class="img-responsive wp-image-2153" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-22-200x346.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-22-400x691.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-22.png 434w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-12 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-144 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 3</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-41 hover-type-none"><img decoding="async" width="485" height="631" title="image (24)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-24.png" alt class="img-responsive wp-image-2155" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-24-200x260.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-24-400x520.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-24.png 485w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-13 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-145 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 4</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-42 hover-type-none"><img decoding="async" width="500" height="750" title="image (23)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-23.png" alt class="img-responsive wp-image-2154" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-23-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-23-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-23.png 500w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-14 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-146 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 5</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-43 hover-type-none"><img decoding="async" width="500" height="750" title="image (21)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-21.png" alt class="img-responsive wp-image-2152" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-21-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-21-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-21.png 500w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-15 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-147 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 6</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-44 hover-type-none"><img decoding="async" width="1523" height="2000" title="pexels-amaurymic-18189716" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-scaled.jpg" alt class="img-responsive wp-image-2156" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-200x263.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-400x525.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-600x788.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-800x1051.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-1200x1576.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-scaled.jpg 1523w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-16 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-148 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 1</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-45 hover-type-none"><img decoding="async" width="400" height="267" title="edit__00058_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-400x267.png" alt class="img-responsive wp-image-2167" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-17 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-149 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 2</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-46 hover-type-none"><img decoding="async" width="400" height="267" title="edit__00054_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-400x267.png" alt class="img-responsive wp-image-2161" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-18 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-150 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 3</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-47 hover-type-none"><img decoding="async" width="400" height="267" title="edit__00057_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-400x267.png" alt class="img-responsive wp-image-2166" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-19 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-151 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 4</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-48 hover-type-none"><img decoding="async" width="400" height="267" title="._00001_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-400x267.png" alt class="img-responsive wp-image-2172" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-20 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-152 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 5</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-49 hover-type-none"><img decoding="async" width="400" height="267" title="edit__00055_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-400x267.png" alt class="img-responsive wp-image-2162" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-21 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-153 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 6</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-50 hover-type-none"><img decoding="async" width="400" height="265" title="edit__00057_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1-400x265.png" alt class="img-responsive wp-image-2176" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1-400x265.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1-600x398.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1.png 736w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div></div><div class="fusion-text fusion-text-154 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="0" data-end="517">After letting the workflow run its full sequence while grabbing a coffee, the results appeared consistent and correctly distributed across the six preview slots. As expected with generative editing, however, the prompt is not always obeyed with perfect precision: in some cases, Qwen may copy elements from the second image that are <em data-start="333" data-end="338">not</em> vegetation — such as pieces of façade, lighting color, or background tones. This happens because the model interprets the entire scene contextually rather than isolating objects.</p>
<p data-start="519" data-end="997" data-is-last-node="" data-is-only-node="">That’s where the <strong data-start="536" data-end="559">next upgrade (v1.3)</strong> comes in: <strong data-start="570" data-end="592">mask-based control</strong>. By allowing users to explicitly define which areas of the base image should be modified (and which should remain untouched), masks will significantly reduce unintended transfers and keep the edits focused strictly on the desired objects. Until then, the <strong data-start="839" data-end="847">seed</strong> parameter remains the best tool for refinement — simply rerun the workflow with new seeds until you achieve the cleanest integration.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-54 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Download the Workflow</h2></div><div class="fusion-text fusion-text-155 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="0" data-end="517">You can download the ready-to-use <strong data-start="1530" data-end="1552">ComfyUI JSON graph </strong>that we built in this post <strong>Qwen Image Edit For Urbanism v1.2</strong> from the link below or from our git repository and load it directly into your workspace using <strong data-start="1620" data-end="1646">File → Load → Workflow</strong>.</p>
</div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-3 fusion-button-default-span fusion-button-default-type" target="_self" download="Gwen-Edit-UGA-v1.2.json" href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/Qwen-Edit-UGA-v1.2-1.json"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">DOWNLOAD &#8211; ComfyUI JSON graph &#8211; QWEN IMAGE EDIT v1.2</span><span class="fusion-button-text awb-button__text awb-button__text--hover">DOWNLOAD - ComfyUI JSON graph - QWEN IMAGE EDIT v1.2</span></div></a></div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-18 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-156"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--9" data-awb-toc-id="9" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-51 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-2-custom-nodes-sequential-processing/">Qwen Image Edit for Urbanism v1.2 — Custom Nodes &#038; Sequential Processing</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
		
		
			</item>
		<item>
		<title>Qwen Image Edit for Urbanism v1.1 — Editing using a Reference Image and Advanced Sampling</title>
		<link>https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/</link>
					<comments>https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Wed, 12 Nov 2025 19:57:16 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Diffusion Models]]></category>
		<category><![CDATA[Urbanism]]></category>
		<category><![CDATA[ComfyUI]]></category>
		<category><![CDATA[image editing]]></category>
		<category><![CDATA[Qwen]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=1962</guid>

					<description><![CDATA[<p>Qwen Image Edit for Urbanism v1.1 expands local AI editing in ComfyUI with advanced sampling and dual-image workflows. The new Lightning LoRA system improves realism, texture fidelity, and processing speed, enabling fast, privacy-preserving urban scene transformation—entirely offline.</p>
<p>The post <a href="https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/">Qwen Image Edit for Urbanism v1.1 — Editing using a Reference Image and Advanced Sampling</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-10 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-19 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-52 hover-type-none"><img decoding="async" width="1024" height="683" title="genai" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/c24db858-f2f8-4f90-b630-8c0c4386248c-1-1024x683.png" alt class="img-responsive wp-image-2097" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/c24db858-f2f8-4f90-b630-8c0c4386248c-1-300x200.png 300w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c24db858-f2f8-4f90-b630-8c0c4386248c-1-1024x683.png 1024w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c24db858-f2f8-4f90-b630-8c0c4386248c-1.png 1536w" sizes="(max-width: 1024px) 100vw, 1024px" /></span></div><div class="fusion-text fusion-text-157"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-158" style="--awb-margin-top:-30px;"><ul>
<li><strong data-start="182" data-end="206">Core Control Chain —</strong> Version 1.1 introduces the <em data-start="234" data-end="289">ModelSamplingAuraFlow → CFGNorm → LoraLoaderModelOnly</em> sequence, improving stability, texture realism, and prompt accuracy.</li>
<li><strong data-start="361" data-end="385">Dual-Image Editing —</strong> Combine two or more reference images in a single workflow to add objects, replace materials, or merge visual elements directly inside ComfyUI.</li>
<li><strong data-start="531" data-end="561">Faster and More Accurate —</strong> The new Lightning LoRA (4-step or 8-step) delivers sharper, cleaner results in under two minutes — with processing as low as 30 seconds on an RTX 4060 GPU.</li>
</ul>
</div><div class="fusion-text fusion-text-159 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1032" data-end="1481">In <strong><a class="decorated-link cursor-pointer keychainify-checked" href="https://urbangeoanalytics.com/local-ai-image-editing-urbanism-comfyui-qwen-gguf/" target="_new" rel="noopener" data-start="1035" data-end="1170">the first part of this series</a></strong>, we built a <strong data-start="1183" data-end="1221">fully local image-editing pipeline</strong> for urban and architectural visualization using <strong data-start="1270" data-end="1281">ComfyUI</strong> and <strong data-start="1286" data-end="1305">Qwen-Image-Edit</strong>. That version (v1.0) demonstrated how to run generative image edits <strong data-start="1376" data-end="1396">entirely offline</strong>, combining text and visual prompts to transform cityscapes with instructions like:</p>
<blockquote data-start="1482" data-end="1563">
<p data-start="1484" data-end="1563">“Add trees along the sidewalk” or “Turn this street into a pedestrian plaza.”</p>
</blockquote>
<p data-start="1565" data-end="1756">We assume that you have followed this tutorial before diving in this new update. Now, with <strong data-start="1768" data-end="1783">version 1.1</strong>, we take that foundation further. This update focuses on <strong data-start="1843" data-end="1872">advanced sampling control</strong> and <strong data-start="1877" data-end="1900">multi-image editing</strong>, allowing you to not only modify a scene, but also merge visual elements across images — for instance, importing a bench from another photo, or changing a building façade to match a different material texture.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-55 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Advanced Sampling with a Core Control Chain</h2></div><div class="fusion-text fusion-text-160 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1472" data-end="1698">First, this update focuses on improving both <strong data-start="1510" data-end="1537">quality and flexibility</strong>. The base structure still uses the Qwen-Image-Edit 2509 model in GGUF format, but adds a <em data-start="1629" data-end="1654">refined sampling module</em> to stabilize lighting and surface detail.</p>
<p data-start="1700" data-end="1722">The key new nodes are:</p>
<ul data-start="1724" data-end="2053">
<li data-start="1724" data-end="1818">
<p data-start="1726" data-end="1818"><strong data-start="1726" data-end="1751">ModelSamplingAuraFlow</strong> — smooths the diffusion trajectory for more natural transitions.</p>
</li>
<li data-start="1819" data-end="1921">
<p data-start="1821" data-end="1921"><strong data-start="1821" data-end="1839">CFGNorm (BETA)</strong> — balances prompt adherence with photorealism, preventing overexposed textures.</p>
</li>
<li data-start="1922" data-end="2053">
<p data-start="1924" data-end="2053"><strong data-start="1924" data-end="1947">LoraLoaderModelOnly</strong> — injects a <em data-start="1960" data-end="1971">Lightning</em> LoRA (4-step or 8-step) for faster inference and higher-quality reconstruction.</p>
</li>
</ul>
<p data-start="2055" data-end="2120">These three nodes form the <em data-start="2082" data-end="2102">core control chain</em> of version 1.1:</p>
<div class="contain-inline-size rounded-2xl relative bg-token-sidebar-surface-primary">
<div class="overflow-y-auto p-4" dir="ltr"><code class="whitespace-pre!">ModelSamplingAuraFlow → <span class="hljs-built_in">CFGNorm</span> → LoraLoaderModelOnly<br />
</code></div>
</div>
<p data-start="2185" data-end="2433">This configuration produces more stable, consistent outputs while preserving prompt flexibility. It also enables <strong data-start="2298" data-end="2389">fine-tuning of how the model interprets text instructions versus existing image content</strong>—ideal for architectural and material edits. Before connecting the new nodes, you’ll first need to <strong data-start="214" data-end="249">download a Lightning LoRA model</strong> — an additional lightweight module that enhances reconstruction quality and speeds up inference.</p>
<p data-start="350" data-end="527">You can find all Lightning variants here:<br data-start="391" data-end="394" />🔗 <a class="decorated-link keychainify-checked" href="https://huggingface.co/lightx2v/Qwen-Image-Lightning/tree/main" target="_new" rel="noopener" data-start="397" data-end="525">https://huggingface.co/lightx2v/Qwen-Image-Lightning/tree/main</a></p>
<p data-start="529" data-end="607">Refer to the table below to choose the most appropriate file for your setup:</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Goal</th>
<th align="left">Recommended File</th>
<th align="left">Notes</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Fast prototyping</td>
<td align="left">Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors</td>
<td align="left">Best speed/quality trade-off; ideal for quick previews and design iterations.</td>
</tr>
<tr>
<td align="left">Detailed scenes / architecture</td>
<td align="left">Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors</td>
<td align="left">Produces sharper edges, richer contrast, and more defined materials.</td>
</tr>
<tr>
<td align="left">Low VRAM system (≤ 8 GB)</td>
<td align="left">Qwen-Image-fp8-e4m3fn-Lightning-4steps-V1.0-bf16.safetensors</td>
<td align="left">Lightweight version with minimal memory usage and acceptable realism.</td>
</tr>
<tr>
<td align="left">High-end / CPU use</td>
<td align="left">Qwen-Image-fp8-e4m3fn-Lightning-4steps-V1.0-fp32.safetensors</td>
<td align="left">Maximum numerical precision; slower but most stable for benchmarking.</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-text fusion-text-161 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1472" data-end="1698">Once downloaded, place your chosen <code data-start="1384" data-end="1398">.safetensors</code> file in the following directory:</p>
</div><div class="fusion-text fusion-text-162 fusion-text-no-margin" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-linenumbers="false">ComfyUI/models/loras/</pre>
</div><div class="fusion-text fusion-text-163 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1472" data-end="1698">Then, return to ComfyUI and insert the <strong data-start="1504" data-end="1519">three nodes</strong> shown below</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-53" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-53 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c.png" class="fusion-lightbox" data-rel="iLightbox[a7c7ece49f736841385]"><img decoding="async" width="1456" height="258" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c.png" alt class="img-responsive wp-image-1973" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-200x35.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-400x71.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-600x106.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-800x142.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-1200x213.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c.png 1456w" sizes="(max-width: 640px) 100vw, 1200px" /></a></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">If you’re starting from the v1.0 graph: Connect them sequentially as shown: GGUF Loader → ModelSamplingAuraFlow → CFGNorm → LoraLoaderModelOnly → KSampler</div></div></div></div><div class="fusion-text fusion-text-164 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="3478" data-end="3543">The new sampling nodes add subtle but powerful control options:</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Node</th>
<th align="left">Parameter</th>
<th align="left">Description</th>
<th align="left">Recommended Range</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">ModelSamplingAuraFlow</td>
<td align="left">shift</td>
<td align="left">Controls how strongly the model moves through latent space during denoising. Higher = stronger edits.</td>
<td align="left">1.2 – 1.8</td>
</tr>
<tr>
<td align="left">CFGNorm</td>
<td align="left">strength</td>
<td align="left">Normalizes prompt adherence to maintain texture balance. Lower = more literal edits, higher = softer realism.</td>
<td align="left">0.8 – 1.2</td>
</tr>
<tr>
<td align="left">LoraLoaderModelOnly</td>
<td align="left">strength_model</td>
<td align="left">Defines how much the LoRA (Lightning) modifies the base model. 1.0 = full effect.</td>
<td align="left">0.8 – 1.0</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-56 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Dual-Image Editing: Adding Objects and Modifying Materials</h2></div><div class="fusion-text fusion-text-165 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="228" data-end="546">Version <strong data-start="236" data-end="243">1.1</strong> introduces a new input configuration that allows <strong data-start="293" data-end="343">two images to be used within the same workflow</strong>. This enhancement enables contextual or compositional edits where one image serves as the main canvas, and the other contributes visual information such as an object, texture, or architectural detail.</p>
<p data-start="548" data-end="899">In this setup, <strong data-start="563" data-end="597">Image 1 remains the base image</strong>. Its <strong data-start="605" data-end="642">dimensions define the output size</strong>, ensuring consistent framing and spatial coherence. The <strong data-start="699" data-end="725">second image (Image 2)</strong>, on the other hand, is<strong data-start="749" data-end="774"> resized</strong> during processing but it is only to prevent memory overload—particularly important for mid-range GPUs.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-22 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-166" style="--awb-content-alignment:justify;"><p data-start="102" data-end="421">This example shows how to extend the ComfyUI workflow to include <strong data-start="236" data-end="268">one or more secondary images</strong>. In the node <em data-start="282" data-end="311">TextEncodeQwenImageEditPlus</em>, you can now connect up to <strong data-start="339" data-end="386">three image inputs (image1, image2, image3)</strong> in addition to your text prompt.</p>
<p data-start="423" data-end="692">In this tutorial, we’ll only use <strong data-start="456" data-end="480">one additional image</strong> — for example, inserting a red car (<em data-start="517" data-end="525">image2</em>) into the street scene of <em data-start="552" data-end="560">image1</em>. However, the same structure allows you to use a third auxiliary image to modify materials, lighting, or other objects.</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-23 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-54 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-1024x566.png" class="fusion-lightbox" data-rel="iLightbox[a80c6c59665c090e393]" data-title="genai" title="genai"><img decoding="async" width="1024" height="566" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-1024x566.png" alt class="img-responsive wp-image-2124" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-200x111.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-400x221.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-600x332.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-800x442.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-1200x663.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai.png 1824w" sizes="(max-width: 640px) 100vw, 600px" /></a></span></div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-57 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. Experimentation with Multi-Image Conditioning</h2></div><div class="fusion-text fusion-text-167 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="170" data-end="516">As shown in the examples below, you can combine a <strong data-start="396" data-end="410">base image</strong> (<em data-start="412" data-end="421">image 1</em>) with up to <strong data-start="434" data-end="459">two additional inputs</strong> (<em data-start="461" data-end="479">image 2, image 3</em>) to guide the edit more precisely. In this tutorial, we focus on using <strong data-start="554" data-end="578">one additional image</strong> — for instance, adding an object or transferring a material. In the first example, <em data-start="666" data-end="675">image 2</em> (the red car) is inserted into <em data-start="707" data-end="716">image 1</em> using the prompt: <em data-start="735" data-end="786">“add image 2 red car into the street of image 1.” </em>The second case changes the wall material of <em data-start="836" data-end="845">image 1</em> based on the texture of <em data-start="870" data-end="879">image 2</em> (a brick wall). Finally, the third example adds a bench into an urban scene using <em data-start="966" data-end="975">image 2</em> as the visual model reference.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-24 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-168 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>Base image 1</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-55 hover-type-none"><img decoding="async" width="1333" height="2000" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-scaled.jpg" alt class="img-responsive wp-image-1917" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-200x300.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-400x600.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-600x900.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-800x1200.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-1200x1800.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-scaled.jpg 1333w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-25 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-169 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>image 2</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-56 hover-type-none"><img decoding="async" width="2000" height="1281" title="red car" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-scaled.jpg" alt class="img-responsive wp-image-1990" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-200x128.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-400x256.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-600x384.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-800x512.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-1200x769.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-scaled.jpg 2000w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-26 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-170" style="--awb-content-alignment:center;--awb-margin-top:10px;"><p><em>Prompt: add image 2 red car into the street of image 1</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-27 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-171 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p><strong>Result</strong></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-57 hover-type-none"><img decoding="async" width="832" height="1248" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_.png" alt class="img-responsive wp-image-1991" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_-600x900.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_-800x1200.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_.png 832w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-28 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-172 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>Base image 1</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-58 hover-type-none"><img decoding="async" width="1500" height="2000" title="pexels-annavitoria-martinssousa-647627036-34627713" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-scaled.jpg" alt class="img-responsive wp-image-2000" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-200x267.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-400x533.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-600x800.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-800x1067.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-1200x1600.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-scaled.jpg 1500w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-29 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-173 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>image 2</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-59 hover-type-none"><img decoding="async" width="186" height="188" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/wall2.png" alt class="img-responsive wp-image-2003"/></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-30 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-174" style="--awb-content-alignment:center;--awb-margin-top:10px;"><p><em>Prompt: changes the walls of the house in image 1 by the brick wall material of image 2</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-31 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-175 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p><strong>Result</strong></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-60 hover-type-none"><img decoding="async" width="880" height="1176" title="ComfyUI_00453_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_.png" alt class="img-responsive wp-image-2004" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_-200x267.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_-400x535.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_-600x802.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_-800x1069.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_.png 880w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-32 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-176 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>Base image 1</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-61 hover-type-none"><img decoding="async" width="500" height="750" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-41.png" alt class="img-responsive wp-image-2009" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-41-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-41-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-41.png 500w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-33 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-177 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>image 2</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-62 hover-type-none"><img decoding="async" width="610" height="397" title="bench" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench.png" alt class="img-responsive wp-image-2010" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench-200x130.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench-400x260.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench-600x390.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench.png 610w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-34 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-178" style="--awb-content-alignment:center;--awb-margin-top:10px;"><p><em>Prompt: add a bench in image 1 using the bench model of image 2</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-35 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-179 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p><strong>Result</strong></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-63 hover-type-none"><img decoding="async" width="832" height="1248" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_.png" alt class="img-responsive wp-image-2011" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_-600x900.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_-800x1200.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_.png 832w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div><div class="fusion-text fusion-text-180 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="170" data-end="516">Each output remains consistent in perspective and lighting, showing that the model now integrates context more effectively. The improved accuracy comes from the <strong data-start="1167" data-end="1194">two cumulative upgrades</strong> introduced in v1.1: the <b>new core control chain </b>and the <strong>Dual-Image Editing. </strong>Despite the added complexity, the workflow remains extremely fast. Even when using the 8-step Lightning model, processing time never exceeds 130 seconds, while the 4-step variant typically completes in about 30-40 seconds on an RTX 4060 GPU. In the next update, we’ll introduce <strong data-start="1692" data-end="1724">inpainting with mask support</strong>, allowing users to define editable regions directly within the image — ideal for selective urban design modifications.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-58 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. To Go Further</h2></div><div class="fusion-text fusion-text-181 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="170" data-end="516"><strong data-start="5132" data-end="5177">Lightning LoRA models:</strong> <a class="keychainify-checked" href="https://huggingface.co/lightx2v/Qwen-Image-Lightning">https://huggingface.co/lightx2v/Qwen-Image-Lightning</a></p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-59 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Download the Workflow</h2></div><div class="fusion-text fusion-text-182 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="170" data-end="516">Once again, for convenience, you can download the ready-to-use <strong data-start="1530" data-end="1552">ComfyUI JSON graph </strong>that we built in this post <strong>Qwen Image Edit For Urbanism v1.1</strong> from the link below and load it directly into your workspace using <strong data-start="1620" data-end="1646">File → Load → Workflow</strong>.</p>
</div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-4 fusion-button-default-span fusion-button-default-type" target="_self" download="Gwen-Edit-UGA-v1.1.json" href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/Qwen-Edit-UGA-v1.1.json"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">DOWNLOAD &#8211; ComfyUI JSON graph &#8211; QWEN IMAGE EDIT v1.1</span><span class="fusion-button-text awb-button__text awb-button__text--hover">DOWNLOAD - ComfyUI JSON graph - QWEN IMAGE EDIT v1.1</span></div></a></div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-20 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-183"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--10" data-awb-toc-id="10" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-64 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/">Qwen Image Edit for Urbanism v1.1 — Editing using a Reference Image and Advanced Sampling</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
	</channel>
</rss>
