<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Qwen Archives - Urban Geo Analytics</title>
	<atom:link href="https://urbangeoanalytics.com/tag/qwen/feed/" rel="self" type="application/rss+xml" />
	<link>https://urbangeoanalytics.com/tag/qwen/</link>
	<description>Spatial Analysis, GeoAI &#38; Machine Learning</description>
	<lastBuildDate>Thu, 21 May 2026 10:27:47 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=7.0</generator>

<image>
	<url>https://urbangeoanalytics.com/wp-content/uploads/2025/11/cropped-logo-urban-geo_512-32x32.png</url>
	<title>Qwen Archives - Urban Geo Analytics</title>
	<link>https://urbangeoanalytics.com/tag/qwen/</link>
	<width>32</width>
	<height>32</height>
</image> 
	<item>
		<title>SAGAI v2.0 — A Unified Multi-Model Notebook for Streetscape Analysis</title>
		<link>https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/</link>
					<comments>https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 21 May 2026 10:11:18 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Vision Language Model]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[GIS]]></category>
		<category><![CDATA[Image Analysis]]></category>
		<category><![CDATA[Llava]]></category>
		<category><![CDATA[Qwen]]></category>
		<category><![CDATA[UVLM]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2483</guid>

					<description><![CDATA[<p>SAGAI v2.0 consolidates the full streetscape analysis pipeline into a single Google Colab notebook and replaces the inline LLaVA-only inference code with the UVLM package, enabling multi-model benchmarking across 11 VLM checkpoints. New features include a multi-task prompt builder, consensus validation with majority voting, chain-of-thought reasoning, truncation detection, interactive Folium maps, view-direction filtering, and support for loading existing polygons as study area boundaries.</p>
<p>The post <a href="https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/">SAGAI v2.0 — A Unified Multi-Model Notebook for Streetscape Analysis</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-1 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-0 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-1" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-1 hover-type-none"><img fetchpriority="high" decoding="async" width="1760" height="545" title="e4e3b0b4-83a7-4933-ba0b-ef1775beacc6" src="https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6.png" alt class="img-responsive wp-image-2489" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-200x62.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-400x124.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-600x186.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-800x248.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6-1200x372.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/e4e3b0b4-83a7-4933-ba0b-ef1775beacc6.png 1760w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div><div class="fusion-text fusion-text-1"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-2" style="--awb-margin-top:-30px;"><ul>
<li>SAGAI v2.0 merges the previous four-module notebook architecture into a <strong>single unified Google Colab notebook</strong> (SAGAI.ipynb) organized in six sequential blocks.</li>
<li>The inline LLaVA-only inference code is replaced by the <strong>UVLM package</strong> (Universal Vision-Language Model Loader), installed automatically from GitHub, providing access to <strong>11 VLM checkpoints</strong> across two model families.</li>
<li>New capabilities include a <strong>multi-task prompt builder</strong>, <strong>consensus validation</strong> with majority voting, <strong>chain-of-thought reasoning</strong>, <strong>truncation detection</strong>, <strong>interactive Folium maps</strong>, <strong>view-direction filtering</strong>, and support for <strong>loading an existing study area polygon</strong>.</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-1 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Introduction</h2></div><div class="fusion-text fusion-text-3 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI (Streetscape Analysis with Generative Artificial Intelligence) is an open-source workflow for scoring and mapping street-level urban environments using vision-language models and open geospatial data. Since its initial release, SAGAI has been structured as a set of independent Colab notebooks, one per pipeline stage, each relying on its own dependencies and documentation.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI v2.0 is a major release that consolidates the entire pipeline into a single notebook and replaces the custom inference code with the UVLM package. Where previous versions were tied to a single LLaVA checkpoint with handwritten inference logic, SAGAI v2.0 delegates all vision-language model loading, prompting, and evaluation to UVLM&#8217;s unified interface. This makes the scoring engine model-agnostic: users can select from 11 VLM checkpoints spanning the LLaVA-NeXT and Qwen2.5-VL families, compare their performance on identical tasks, and benefit from features such as consensus validation, reasoning traces, and truncation diagnostics; all within the same notebook.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Beyond the inference engine, v2.0 introduces structural and functional changes across the entire pipeline: a unified six-block architecture, interactive HTML mapping via Folium, view-direction filtering for aggregation, and the ability to load an existing polygon as a study area boundary instead of defining a bounding box manually.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">This post details the architectural changes, the UVLM integration, and the new features introduced in SAGAI v2.0.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-2 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. From Four Notebooks to One: The Unified Architecture</h2></div><div class="fusion-text fusion-text-4 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Previous SAGAI releases were organized as four independent Colab notebooks — one for street sampling, one for image retrieval, one for VLM inference, and one for aggregation and mapping — each accompanied by a separate NOTICE file documenting its dependencies and usage. This modular design was useful for development but introduced friction in practice: users had to manage file paths between notebooks, track four separate environments, and consult multiple documentation files.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI v2.0 merges all four stages into a single notebook (SAGAI.ipynb) structured as six sequential blocks. The pipeline flows from study area definition through street sampling, image downloading, VLM scoring, and mapping, with all intermediate data passed directly between blocks in the same runtime session. The separate per-module NOTICE files and the standalone requirements file (requirements_sagai_module_3_v1-0.txt) have been removed — dependency management is now handled automatically by the UVLM package installation.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-2" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-2 hover-type-none"><img decoding="async" width="2000" height="948" title="pipeline details" src="https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-scaled.png" alt class="img-responsive wp-image-2480" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-300x142.png 300w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-768x364.png 768w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-1024x486.png 1024w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-1536x728.png 1536w, https://urbangeoanalytics.com/wp-content/uploads/2026/05/pipeline-details-scaled.png 2000w" sizes="(max-width: 2000px) 100vw, 2000px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Diagram of the six-block architecture</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-3 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Study Area Definition: Bounding Box or Existing Polygon</h2></div><div class="fusion-text fusion-text-5 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">In previous versions, the study area was defined exclusively by a bounding box in WGS84 coordinates. SAGAI v2.0 retains this option but adds the ability to draw your own polygon or to load an existing polygon; for example, a GeoPackage representing a neighborhood, municipality, or custom boundary. When a polygon is provided, the street sampling step extracts the OpenStreetMap network within that geometry rather than a rectangular extent. This makes it straightforward to work with irregular administrative boundaries or user-defined study zones without manually computing bounding coordinates.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-4 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. UVLM Integration: From Single-Model Inference to Multi-Model Benchmarking</h2></div><div class="fusion-text fusion-text-6 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The most significant change in SAGAI v2.0 is the replacement of the inline inference code with the <a class="keychainify-checked" href="https://github.com/perezjoan/UVLM/tree/main">UVLM package</a>. In previous versions, Blocks 3 through 5 contained custom code for loading a single LLaVA checkpoint, constructing prompts, running inference, and parsing outputs. This logic was tightly coupled to one model architecture and required manual maintenance when Hugging Face APIs or model formats changed.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI v2.0 installs UVLM directly from its GitHub repository at the start of the notebook. All model loading, prompt formatting, inference execution, response parsing, and batch processing are delegated to UVLM&#8217;s API. The inline inference code has been entirely removed.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Through UVLM, SAGAI v2.0 supports 11 VLM checkpoints across two model families:</p>
<ul class="&#091;li_&amp;&#093;:mb-0 &#091;li_&amp;&#093;:mt-1 &#091;li_&amp;&#093;:gap-1 &#091;&amp;:not(:last-child)_ul&#093;:pb-1 &#091;&amp;:not(:last-child)_ol&#093;:pb-1 list-disc flex flex-col gap-1 pl-8 mb-3">
<li class="font-claude-response-body whitespace-normal break-words pl-2"><strong>LLaVA-NeXT</strong> — Mistral 7B, Vicuna 7B, Vicuna 13B, 34B, LLaMA3 8B, 72B, 110B</li>
<li class="font-claude-response-body whitespace-normal break-words pl-2"><strong>Qwen2.5-VL</strong> — 3B Instruct, 7B Instruct, 32B Instruct, 72B Instruct</li>
</ul>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">UVLM&#8217;s dual-backend abstraction automatically detects the model family and routes inference to the correct pipeline — LlavaNextProcessor for LLaVA models, AutoProcessor with process_vision_info for Qwen models — so users switch between architectures by changing a single model selection, with no modification to the rest of the notebook.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Quantization is handled through UVLM&#8217;s built-in support for 4-bit, 8-bit, and FP16 precision via BitsAndBytes. Models up to 34B parameters can run on a single Colab GPU (T4 or A100) with 4-bit quantization.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-5 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. Multi-Task Prompt Builder</h2></div><div class="fusion-text fusion-text-7 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">UVLM provides a widget-based prompt builder that SAGAI v2.0 exposes directly in the notebook. Users can define up to 10 analysis tasks per run, each with its own prompt, response type (numeric, category, boolean, or text), and label. This replaces the previous approach of selecting from a small set of hardcoded tasks (T1, T2, T3) or manually editing prompt strings in the code.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Tasks are configured interactively before execution and applied uniformly across all images in the batch. Each task produces its own column in the output CSV file.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-3" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-3 hover-type-none"><img decoding="async" width="866" height="1063" title="image2" src="https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2.png" alt class="img-responsive wp-image-2320" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2-200x245.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2-400x491.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2-600x736.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2-800x982.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image2.png 866w" sizes="(max-width: 640px) 100vw, 866px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">UVLM prompt builder</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-6 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Consensus Validation</h2></div><div class="fusion-text fusion-text-8 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">SAGAI v2.0 inherits UVLM&#8217;s consensus validation mechanism. Each analysis task can be run 2 to 5 times per image, and the final score is determined by majority voting across the repeated inferences. NA values from failed parses are filtered before voting. An agreement ratio is recorded alongside the final score, providing a built-in measure of prediction reliability without any external validation step.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-7 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">6. Chain-of-Thought Reasoning and Truncation Detection</h2></div><div class="fusion-text fusion-text-9 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">UVLM supports two approaches to chain-of-thought (CoT) reasoning, both available in SAGAI v2.0. Users can write task prompts that explicitly request step-by-step reasoning and adjust the token budget (up to 1,500 tokens) to allow the model sufficient generation space. Alternatively, a built-in CoT reference mode can be enabled per task, which triggers a standardized reasoning template with a fixed 1,024-token budget. In both cases, the reasoning trace is stored in a dedicated column in the output CSV for inspection.</p>
<p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Truncation detection is performed automatically after every inference call. The exact number of generated tokens is compared against the token limit, and truncated responses are flagged in per-task CSV columns. This allows users to identify tasks where the token budget is insufficient without post-hoc analysis.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-8 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">7. Interactive Mapping with Folium</h2></div><div class="fusion-text fusion-text-10 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Previous SAGAI versions generated static thematic maps using Matplotlib. SAGAI v2.0 replaces these with interactive HTML maps built with Folium. Point-level and street-segment-level scores are rendered as interactive layers that can be panned, zoomed, and queried directly in the browser. This is particularly useful for exploratory analysis and for sharing results with collaborators who do not use GIS software.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-9 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">8. View-Direction Filtering for Aggregation</h2></div><div class="fusion-text fusion-text-11 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">Google Street View images are typically downloaded in multiple compass directions at each sampling point (e.g., front, back, left, right). In previous versions, all views were aggregated together when computing point- or street-level scores. SAGAI v2.0 introduces a view filter that allows users to select which directions to include in the aggregation — for example, scoring only left-side and right-side views to focus on building facades, or only front views to capture the pedestrian perspective along the street axis. This filter is applied at the aggregation stage and does not affect the scoring step itself.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-10 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">9. Resume-Safe Batch Processing</h2></div><div class="fusion-text fusion-text-12 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p class="font-claude-response-body break-words whitespace-normal leading-&#091;1.7&#093;">The batch execution engine inherited from UVLM provides resume-safe processing with checkpoint saving every 3 images. If a Colab session is interrupted — due to a timeout, a runtime reset, or a connectivity issue — the notebook can be re-executed and will automatically skip already-processed images. New tasks added between runs trigger automatic CSV schema upgrading, so the output file grows incrementally without losing previous results.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-11 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">10. References and Links</h2></div><div class="fusion-text fusion-text-13 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><ul>
<li class="font-claude-response-body whitespace-normal break-words pl-2">SAGAI v2.0 on GitHub: <a class="underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current keychainify-checked" href="https://github.com/perezjoan/SAGAI">https://github.com/perezjoan/SAGAI</a></li>
<li class="font-claude-response-body whitespace-normal break-words pl-2">UVLM on GitHub: <a class="underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current keychainify-checked" href="https://github.com/perezjoan/UVLM">https://github.com/perezjoan/UVLM</a></li>
<li class="font-claude-response-body whitespace-normal break-words pl-2">Perez, J. and Fusco, G. (2025). <em>Streetscape Analysis with Generative AI (SAGAI): Vision-Language Assessment and Mapping of Urban Scenes.</em> Geomatica, 77(2), 100063. <a class="underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current keychainify-checked" href="https://www.sciencedirect.com/science/article/pii/S1195103625000199">https://www.sciencedirect.com/science/article/pii/S1195103625000199</a></li>
<li class="font-claude-response-body whitespace-normal break-words pl-2">Perez, J. and Fusco, G. (2026). <em>UVLM: A Universal Vision-Language Model Loader for Reproducible Multimodal Benchmarking.</em> arXiv:2603.13893. <a class="underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current keychainify-checked" href="https://arxiv.org/abs/2603.13893">https://arxiv.org/abs/2603.13893</a></li>
</ul>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-1 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-14"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--1" data-awb-toc-id="1" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-4 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/">SAGAI v2.0 — A Unified Multi-Model Notebook for Streetscape Analysis</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/sagai-v2-multi-model-streetscape-analysis-uvlm/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</title>
		<link>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/</link>
					<comments>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 23 Apr 2026 07:25:41 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Package]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Vision Language Model]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[Google Colab]]></category>
		<category><![CDATA[Image Analysis]]></category>
		<category><![CDATA[Jupyter Notebook]]></category>
		<category><![CDATA[Llava]]></category>
		<category><![CDATA[Qwen]]></category>
		<category><![CDATA[UVLM]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2442</guid>

					<description><![CDATA[<p>UVLM v3.0.0 turns a Colab notebook into a full Python package. Run vision-language models locally, in notebooks, or scripts with a simple API and no setup complexity.</p>
<p>The post <a href="https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/">UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-2 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-2 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-5" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-5 hover-type-none"><img decoding="async" width="1619" height="971" title="flag fig" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig.png" alt class="img-responsive wp-image-2469" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-200x120.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-400x240.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-600x360.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-800x480.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-1200x720.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig.png 1619w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div><div class="fusion-text fusion-text-15"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-16" style="--awb-margin-top:-30px;"><ul>
<li><strong data-start="64" data-end="88">UVLM is now a pip-installable Python package </strong>— no longer tied to Google Colab</li>
<li><strong data-start="64" data-end="88">Run on your own GPU </strong>with a local Jupyter notebook, or keep using Colab for free</li>
<li><strong data-start="64" data-end="88">Same tool, more flexibility </strong>— three lines of Python to load a model and analyse images</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-text fusion-text-17 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>When we released UVLM in March 2026, it was a Google Colab notebook. You opened it in your browser, picked a model, typed your prompts, and ran your images — all without installing anything. That simplicity was the point: a tool that anyone could use to load and compare Vision-Language Models, regardless of their technical setup.</p>
<p>But we kept hearing the same requests. Can I run this on my own machine? Can I call UVLM from a script? Can I integrate it into an existing pipeline? The answer was always the same: not easily. The entire tool lived inside a single notebook, with all the logic packed into three massive code cells. Moving it anywhere else meant copy-pasting thousands of lines and untangling global variables.</p>
<p>Version 3.0.0 changes that. UVLM is now a proper Python package.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-12 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">What Changed</h2></div><div class="fusion-text fusion-text-18 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The core logic — model loading, dual-backend inference, response parsing, consensus validation, batch processing — has been extracted from the notebook into eight standalone Python modules. These modules have no dependency on Google Colab, no global variables, and no widget code. They are plain Python functions that accept arguments and return results.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-6" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-6 hover-type-none"><img decoding="async" width="2000" height="1162" title="UVLM package blogpost figure 1" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-scaled.png" alt class="img-responsive wp-image-2444" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-200x116.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-400x232.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-600x349.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-800x465.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-1200x697.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-scaled.png 2000w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div><div class="fusion-text fusion-text-19 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The package is installed from GitHub in one line:</p>
</div><div class="fusion-text fusion-text-20 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python1" data-enlighter-title="Python">pip install git+https://github.com/perezjoan/UVLM.git</pre>
</div><div class="fusion-text fusion-text-21 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>On Google Colab, this happens automatically in the first cell of the Colab notebook. On your local machine, you run it once in a terminal and you are done.</p>
<p>Nothing changed in how UVLM analyses images. The same 11 model checkpoints are supported (LLaVA-NeXT and Qwen2.5-VL, from 3B to 110B parameters). The same parsing logic, the same consensus validation, the same truncation detection. If you had a workflow built on v2.2.2, the outputs will be identical.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-13 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Three Ways to Use UVLM</h2></div><div class="fusion-text fusion-text-22 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p><strong>Google Colab — Zero Install</strong></p>
<p>This is the same experience as before. Open the Colab notebook, select a GPU runtime, and start working. The notebook installs the UVLM package automatically. Images are loaded from Google Drive. Nothing has changed for Colab users, except that the code running behind the widgets is now cleaner and easier to maintain.</p>
<p><strong>Local Jupyter Notebook — Your GPU, Your Data</strong></p>
<p>If you have an NVIDIA GPU on your workstation (or access to a GPU server), you can now run UVLM locally. The local Jupyter notebook provides the same widget-based interface — model selection dropdown, prompt builder form, batch execution button — but images are read from your local filesystem and results are saved locally. No Google account needed, no data leaves your machine.</p>
<p>This matters for researchers working with sensitive imagery (medical, security, proprietary datasets) or for anyone who wants faster and more reliable model loading than what Colab&#8217;s network provides.</p>
<p><strong>Python Script — Full Programmatic Control</strong></p>
<p>For integration into larger pipelines, UVLM now exposes a clean API. Three lines of code replace the entire notebook workflow:</p>
</div><div class="fusion-text fusion-text-23 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python2" data-enlighter-title="Python">from uvlm import load_model, run_inference, parse_response
ctx = load_model("[Qwen] Qwen2.5-VL 7B Instruct", precision="4bit")
raw, tokens = run_inference("photo.jpg", "Count the cars", ctx)
result = parse_response(raw, "numeric")</pre>
</div><div class="fusion-text fusion-text-24 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>The `load_model()` function returns a context dictionary containing the model, processor, backend type, and device information. This dictionary is passed to every subsequent function — no global state, no hidden side effects. You can load multiple models in the same session and switch between them by passing different context objects.</p>
<p>For batch processing, `run_batch()` handles the full pipeline:</p>
</div><div class="fusion-text fusion-text-25 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python3" data-enlighter-title="Python">from uvlm import load_model
from uvlm.batch import run_batch

ctx = load_model("[Qwen]  Qwen2.5-VL 7B Instruct", precision="4bit")
df = run_batch(
    model_ctx=ctx,
    task_specs=my_tasks,
    image_folder="./images",
    output_path="./results.csv",
)
</pre>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-7" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-7 hover-type-none"><img decoding="async" width="2000" height="926" title="UVLM deploy blogpost figure 2" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-scaled.png" alt class="img-responsive wp-image-2457" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-200x93.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-400x185.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-600x278.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-800x370.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-1200x556.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-scaled.png 2000w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div><p class="awb-imageframe-caption-text"> </p></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-14 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Under the Hood: Package Structure</h2></div><div class="fusion-text fusion-text-26 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The monolithic notebook has been split into eight modules, each with a single responsibility:</p>
<p><em>registry.py</em> holds the model dictionary — 11 checkpoints with their backend type and <strong>HuggingFace checkpoint ID</strong>. Adding a new model is one line in a dictionary.</p>
<p><em>loader.py</em> contains the `load_model()` function. It handles quantisation configuration (4-bit, 8-bit, FP16), device placement (single GPU, auto, CPU offload), and the LLaVA vs Qwen branching logic. It returns a dictionary — not a set of global variables.</p>
<p><em>inference.py</em> contains `run_inference()`, the dual-backend forward pass. It accepts a model context dictionary and returns the raw response plus the exact token count as a tuple. The full LLaVA response cleaning logic and the full Qwen token-trimming pipeline are preserved exactly as they were.</p>
<p><em>parsers.py</em> holds the four response parsers (numeric, category, boolean, text) and the advanced reasoning parser. These are pure functions with zero dependencies beyond Python&#8217;s standard library.</p>
<p><em>consensus.py</em> contains the majority voting logic. <em>batch.py</em> handles folder iteration, CSV writing, resume mode, and schema upgrading. <em>prompts.py</em> stores the task type definitions and the chain-of-thought templates. <em>utils.py</em> provides seed management, environment detection, and <strong>HuggingFace token</strong> retrieval.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-15 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Getting Started</h2></div><div class="fusion-text fusion-text-27 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p><strong>On Colab</strong>: Open the notebook from GitHub and run the three blocks as before. The package installs itself.</p>
<p><strong>Locally</strong>: First, install PyTorch with CUDA support matching your GPU driver (check with `nvidia-smi`). For example, with CUDA 12.8+:</p>
</div><div class="fusion-text fusion-text-28 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python4" data-enlighter-title="Python">pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
pip install git+https://github.com/perezjoan/UVLM.git
</pre>
</div><div class="fusion-text fusion-text-29 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python4" data-enlighter-title="Python">pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
pip install git+https://github.com/perezjoan/UVLM.git
</pre>
</div><div class="fusion-text fusion-text-30 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>Then open the local Jupyter notebook.</p>
<p>You get the same dropdown menus, the same prompt builder form, the same batch execution. The only difference is that you type a local path for your image folder instead of a Google Drive path.</p>
<p>For HuggingFace authentication (needed for some gated models like LLaMA3-based checkpoints), either set the `HF_TOKEN` environment variable or run `huggingface-cli login` once in your terminal.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-16 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">What Is Next</h2></div><div class="fusion-text fusion-text-31 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The package architecture makes it much easier to add new VLM families. InternVL, BLIP-2, CogVLM, DeepSeek-VL, and Molmo are planned for future releases — each one requires implementing the backend-specific sections of the inference function and adding entries to the registry, without touching the rest of the codebase.</p>
<p>We are also working on multi-GPU batching for parallel inference across images, video frame analysis support, and integration with the SAGAI workflow for automated streetscape analysis.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-17 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Links</h2></div><div class="fusion-text fusion-text-32 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Source code: <a class="keychainify-checked" href="https://github.com/perezjoan/UVLM">github.com/perezjoan/UVLM</a></p>
<p>Paper: <a class="keychainify-checked" href="https://arxiv.org/abs/2603.13893">arXiv preprint</a> — Perez &amp; Fusco (2026)</p>
<p>UVLM page on this site: urbangeoanalytics.com › Software &amp; Algorithms › <a class="keychainify-checked" href="https://urbangeoanalytics.com/algorithms-softwares/uvlm-universal-vision-language-model-loader/">UVLM</a></p>
<p>Previous blog post: <a class="keychainify-checked" href="https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/">Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</a></p>
</div><div class="fusion-title title fusion-title-18 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Citation</h2></div><div class="fusion-text fusion-text-33 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>If you use UVLM in your work, please cite:</p>
<p>Perez, J. &amp; Fusco, G. (2026). <em>UVLM: A Universal Vision-Language Model Loader for Reproducible Multimodal Benchmarking.</em> arXiv:2603.13893</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-3 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-34"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--2" data-awb-toc-id="2" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-8 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/">UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</title>
		<link>https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/</link>
					<comments>https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Tue, 17 Mar 2026 14:23:58 +0000</pubDate>
				<category><![CDATA[Intermediate]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Vision Language Model]]></category>
		<category><![CDATA[Benchmarking]]></category>
		<category><![CDATA[Chain-of-Thought]]></category>
		<category><![CDATA[Google Colab]]></category>
		<category><![CDATA[Image Analysis]]></category>
		<category><![CDATA[Llava]]></category>
		<category><![CDATA[Multimodal AI]]></category>
		<category><![CDATA[Open Source]]></category>
		<category><![CDATA[Qwen]]></category>
		<category><![CDATA[UVLM]]></category>
		<category><![CDATA[VLM]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2356</guid>

					<description><![CDATA[<p>UVLM is a free, open-source tool for loading, testing, and comparing Vision-Language Models on custom image analysis tasks. Running entirely in Google Colab, it lets researchers and practitioners benchmark multiple AI models using the same prompts and images — no coding, no GPU ownership, no model-specific pipelines. This post explains what VLMs are, why comparing them matters, and how to get started in five minutes.</p>
<p>The post <a href="https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/">Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-3 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-4 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-9" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-9 hover-type-none"><img decoding="async" width="1536" height="595" title="uvlm" src="https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm.png" alt class="img-responsive wp-image-2342" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-200x77.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-400x155.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-600x232.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-800x310.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm-1200x465.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/uvlm.png 1536w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">uvlm</div></div></div></div><div class="fusion-text fusion-text-35"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-36" style="--awb-margin-top:-30px;"><ul>
<li><strong>New open-source release: UVLM v2.2.2</strong> — compare Vision-Language Models from a single notebook</li>
<li><strong>11 AI models</strong>, 5 analysis tasks, 120 test images — all benchmarked with one tool</li>
<li><strong>No coding, no installation</strong> — runs in Google Colab with a free account</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-text fusion-text-37 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Imagine you have thousands of street photographs and you need to answer the same questions about each one: how many cars are parked? Is there a sidewalk? How long is the building frontage? Hiring someone to go through every image manually would take weeks. Training a custom computer vision model would take months. But what if you could simply ask an AI model these questions in plain English — and get structured, usable answers back?</p>
<p>That is exactly what Vision-Language Models do. And today, we are releasing UVLM — an open-source tool that makes it easy to load, test, and compare these models, all from a single notebook in your browser.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-19 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">What Are Vision-Language Models?</h2></div><div class="fusion-text fusion-text-38 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Vision-Language Models (VLMs) are AI systems that can look at an image and answer questions about it in natural language. Unlike traditional computer vision, which requires training a separate model for every task (one for counting cars, another for detecting sidewalks, a third for classifying buildings), a VLM handles all of these through text prompts. You write a question, attach a photo, and the model responds.</p>
<p>For example, you can ask a VLM: “Count all motor vehicles visible in this image” and it will answer “3”. You can ask the same model “Is there a sidewalk along the street frontage?” and it will answer “yes”. You can even ask it to estimate the length of a building facade in meters — a task that requires the model to identify reference objects (like parked cars), estimate their size, and reason about perspective. All of this from a single model, with no retraining and no labelled dataset.</p>
<p>The catch is that there are many VLM families available (LLaVA, Qwen, InternVL, BLIP-2, and more), and each one works differently under the hood. They use different image encoders, different tokenisation strategies, and different code to run. If you want to know which model is best for your specific task, you normally have to write separate code for each one — a tedious and error-prone process.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-20 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">This Is the Problem UVLM Solves</h2></div><div class="fusion-text fusion-text-39 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>UVLM (Universal Vision-Language Model Loader) is a free, open-source tool that lets you load, configure, and compare multiple VLM architectures using the same prompts and the same evaluation protocol — without writing any model-specific code. It runs entirely in Google Colab, which means you do not need to install anything on your computer or own a GPU. A free Google account is all you need.</p>
<p>The idea is simple: you pick a model from a dropdown menu, type your analysis questions into a form, point the tool at a folder of images, and hit run. UVLM handles all the technical details — the processor classes, the tokenisation, the generation settings, the output parsing — and delivers a clean CSV file with one row per image and one column per task. If you want to try a different model, you just switch the dropdown and run again. Same prompts, same images, same output format. Now you can compare.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-10" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-10 hover-type-none"><img decoding="async" width="1190" height="823" title="image1" src="https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1.png" alt class="img-responsive wp-image-2319" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1-200x138.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1-400x277.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1-600x415.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1-800x553.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/03/image1.png 1190w" sizes="(max-width: 640px) 100vw, 1190px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The 3 blocks structure of UVLM Loader</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-21 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">A Practical Example: Scoring 120 Street Photographs</h2></div><div class="fusion-text fusion-text-40 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>To demonstrate what UVLM can do, we benchmarked 8 different models on 120 street-level photographs of French urban frontages. Each image was analysed on five tasks: counting vehicles, detecting sidewalks, counting pedestrian entrances, estimating the street frontage length in meters, and classifying the vegetation type. That is 16 model configurations (each model tested in standard and advanced reasoning modes), 120 images, and 5 tasks per image — all processed and compared through UVLM.</p>
<p>The results were revealing. The largest model (LLaVA 34B, with 34 billion parameters) actually ranked last overall. A much smaller model (LLaVA Vicuna 7B) outperformed it significantly and ran on a free Google Colab GPU. The best overall results came from Qwen 32B with chain-of-thought reasoning enabled, which achieved 88% proximity to human expert annotations across all five tasks. Without UVLM, discovering these differences would have required writing and debugging eight separate inference pipelines.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-22 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Who Is UVLM For?</h2></div><div class="fusion-text fusion-text-41 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>UVLM was designed for anyone who works with images and wants to extract structured information from them at scale — without becoming a machine learning engineer. If you are an urban planner evaluating streetscape quality across a city, UVLM lets you score thousands of street photographs using natural language prompts. If you are an environmental researcher classifying vegetation from field photographs, UVLM lets you test which AI model gives the most reliable results for your specific classification scheme. If you are an infrastructure inspector processing damage assessment photographs, UVLM lets you set up automated counting and scoring tasks and run them across your entire image archive.</p>
<p>The tool is also valuable for AI researchers who need a controlled benchmarking environment. Because UVLM ensures that every model receives exactly the same prompt and is evaluated with the same metrics, it produces fair, reproducible comparisons. The consensus validation feature (running each task multiple times and taking a majority vote) addresses the inherent randomness of AI outputs, and the truncation detection feature flags when a model’s response was cut off before it could finish — a common but often invisible source of errors.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-23 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">How to Get Started</h2></div><div class="fusion-text fusion-text-42 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Getting started takes about five minutes. Open the UVLM notebook from GitHub (the link is below), connect to a GPU runtime in Google Colab, and run the first block to load a model. The second block gives you a form where you type your analysis questions — no coding required. The third block processes your images and saves the results as a CSV file on your Google Drive.</p>
<p>The tool currently supports 11 model checkpoints from two major families (LLaVA-NeXT and Qwen2.5-VL), ranging from 3 billion to 110 billion parameters. Models up to 34B can run on a single free-tier Colab GPU with 4-bit quantisation. Advanced features include consensus validation (2–5 runs per task with majority voting), chain-of-thought reasoning for complex tasks, and automatic truncation detection.</p>
<p>UVLM is released under the Apache 2.0 open-source licence. You can use it, modify it, and build on it for any purpose — academic or commercial.</p>
</div><div class="fusion-text fusion-text-43 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2>Links</h2>
<p><strong>Source code: </strong><a class="keychainify-checked" href="https://github.com/perezjoan/UVLM">github.com/perezjoan/UVLM</a></p>
<p><strong>Paper: </strong><a class="keychainify-checked" href="https://arxiv.org/abs/2603.13893">arXiv preprint — Perez &amp; Fusco (2026)</a></p>
<p><strong>UVLM page on this site: </strong><a class="keychainify-checked" href="https://urbangeoanalytics.com/algorithms-softwares/uvlm-universal-vision-language-model-loader/">urbangeoanalytics.com › Softwares &amp; Algorithms › UVLM</a></p>
<p><strong>Benchmark dataset: </strong><a class="keychainify-checked" href="https://zenodo.org/records/18959690">Zenodo — 120 street-view images</a></p>
<h2>Citation</h2>
<p>If you use UVLM in your work, please cite:</p>
<p><em>Perez, J. &amp; Fusco, G. (2026). UVLM: A Universal Vision-Language Model Loader for Reproducible Multimodal Benchmarking. arXiv:2603.13893</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-5 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-44"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--3" data-awb-toc-id="3" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-11 hover-type-zoomout"><img decoding="async" width="1536" height="1024" title="blog lvl2" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15.png" alt class="img-responsive wp-image-1687" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-7-nov.-2025-09_10_15.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/">Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Qwen Image Edit for Urbanism v1.3 — Mask-Controlled Editing With Prompt or Reference Guidance</title>
		<link>https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/</link>
					<comments>https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 04 Dec 2025 22:17:40 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Diffusion Models]]></category>
		<category><![CDATA[Urbanism]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[ComfyUI]]></category>
		<category><![CDATA[image editing]]></category>
		<category><![CDATA[Qwen]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2236</guid>

					<description><![CDATA[<p>Version 1.3 of Qwen Image Edit for Urbanism introduces mask-controlled editing in ComfyUI, enabling precise, localized image transformations using prompts or reference images. The new Grow Mask utility softens boundaries, preserves unmasked areas, and integrates seamlessly with existing single-image and sequential workflows.</p>
<p>The post <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/">Qwen Image Edit for Urbanism v1.3 — Mask-Controlled Editing With Prompt or Reference Guidance</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-4 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-6 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-12 hover-type-none"><img decoding="async" width="1536" height="1024" title="COVER" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER.png" alt class="img-responsive wp-image-2266" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/COVER.png 1536w" sizes="(max-width: 640px) 100vw, 1200px" /></span></div><div class="fusion-text fusion-text-45"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-46" style="--awb-margin-top:-30px;"><ul>
<li>Adds a new <strong data-start="575" data-end="597">Mask Editing Block</strong> enabling localized, structurally accurate edits while preserving the rest of the image.</li>
<li> Introduces a <strong data-start="703" data-end="716">Grow Mask</strong> utility with expand and blur parameters, plus visual mask preview.</li>
<li> Replaces <em data-start="797" data-end="810">EmptyLatent</em> with <strong data-start="816" data-end="849">VAE Encode → Set Latent Noise</strong> to avoid global degradation.</li>
<li>Mask block is optional: <strong data-start="907" data-end="942">Blocks 1 and 2 remain unchanged</strong> for prompt-only and sequential workflows.</li>
</ul>
</div><div class="fusion-text fusion-text-47 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="430" data-end="740">Qwen Image Edit for Urbanism continues to evolve into a practical, research-grade tool for architectural and urban experimentation. After the batch-processing capabilities <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-2-custom-nodes-sequential-processing/">introduced in v1.2</a>, version 1.3 focuses on the feature most requested by designers and analysts: precise control over <em data-start="720" data-end="727">where</em> edits occur.</p>
<p data-start="742" data-end="1049">In image-to-image workflows, uncontrolled changes are a common issue. Even a very specific prompt can lead diffusion models to reinterpret the whole scene. Version 1.3 introduces mask-restricted editing, allowing Qwen to modify only a selected region while preserving the rest of the image exactly as it is.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-24 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Why Masks Matter for Urban Editing</h2></div><div class="fusion-text fusion-text-48 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1102" data-end="1236">Until now, the workflow relied on <strong data-start="1136" data-end="1152">Empty Latent</strong> to initialize diffusion. This approach is simple but has an unavoidable drawback:</p>
<p data-start="1238" data-end="1326"><strong data-start="1238" data-end="1326">The entire latent space is regenerated — even outside the region you want to modify.</strong></p>
<p data-start="1328" data-end="1543">This often produces familiar and unwanted side effects: façades shift slightly, lighting changes, road textures dissolve, or skies take on new tones, even when the prompt refers only to a specific object or surface. To address this, v1.3 reorganizes the initialization stage around:</p>
<p data-start="1613" data-end="1659"><strong>VAE Encode → Set Latent Noise (masked)</strong></p>
<p data-start="1661" data-end="1707">This change restructures the model’s behavior:</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Component</th>
<th align="left">Effect</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">VAE Encode</td>
<td align="left">Converts the original image into latent space with high fidelity.</td>
</tr>
<tr>
<td align="left">Set Latent Noise (with mask)</td>
<td align="left">Adds noise only <em data-start="1895" data-end="1903">inside</em> the mask, preserving everything else.</td>
</tr>
<tr>
<td align="left">Mask-guided denoising</td>
<td align="left">Qwen edits only where permitted; unmasked areas remain pixel-identical.</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-text fusion-text-49 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1102" data-end="1236">This leads to crisp preservation of buildings, street furniture, sky, shadows, and lighting outside the edited zone. Localized edits integrate naturally: you can green a façade, test a bike lane, adjust a plaza boundary, or replace a storefront without disturbing the rest of the street.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-25 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Prompt-Only vs. Reference-Guided Mask Editing</h2></div><div class="fusion-text fusion-text-50 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="2400" data-end="2469">Version 1.3 supports both textual and visual control of masked edits.</p>
<h5 data-start="2471" data-end="2508"><strong data-start="2475" data-end="2506">A. Prompt-Only Mask Editing</strong></h5>
<p data-start="2509" data-end="2638">You draw a mask, provide a prompt, and Qwen modifies only the selected region. This works especially well for operations such as:</p>
<ul data-start="2640" data-end="2741">
<li data-start="2640" data-end="2684">
<p data-start="2642" data-end="2684">replacing asphalt with permeable paving,</p>
</li>
<li data-start="2685" data-end="2707">
<p data-start="2687" data-end="2707">adding vegetation,</p>
</li>
<li data-start="2708" data-end="2741">
<p data-start="2710" data-end="2741">transforming a façade material.</p>
</li>
</ul>
<h5 data-start="2743" data-end="2791"><strong data-start="2747" data-end="2789">B. Mask Editing With a Reference Image</strong></h5>
<p data-start="2792" data-end="2875">A second image may be supplied to guide structure, texture, or color. This enables:</p>
<ul data-start="2877" data-end="3050">
<li data-start="2877" data-end="2908">
<p data-start="2879" data-end="2908">borrowing material samples,</p>
</li>
<li data-start="2909" data-end="2964">
<p data-start="2911" data-end="2964">transplanting vegetation from one scene to another,</p>
</li>
<li data-start="2965" data-end="3001">
<p data-start="2967" data-end="3001">matching architectural textures,</p>
</li>
<li data-start="3002" data-end="3050">
<p data-start="3004" data-end="3050">transferring lighting characteristics locally.</p>
</li>
</ul>
<p data-start="3052" data-end="3119">Both modes are interchangeable, and both respect the mask boundary. Masks drawn directly in ComfyUI are typically sharp, binary shapes. Diffusion models, however, perform best when mask boundaries are soft and slightly extended.</p>
<p data-start="3344" data-end="3409">Version 1.3 introduces a <strong data-start="3369" data-end="3382">Grow Mask</strong> node with two parameters:</p>
<ul data-start="3411" data-end="3650">
<li data-start="3411" data-end="3544">
<p data-start="3413" data-end="3544"><strong data-start="3413" data-end="3423">Expand</strong>: increases the mask outward, helping cover tiny gaps or irregular brush strokes and preventing thin seams at the edge.</p>
</li>
<li data-start="3545" data-end="3650">
<p data-start="3547" data-end="3650"><strong data-start="3547" data-end="3562">Blur Radius</strong>: softens the boundary, allowing Qwen to blend new and existing textures more naturally.</p>
</li>
</ul>
<p data-start="3652" data-end="3729">Together, these parameters define the effective “influence zone” of the edit.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-0 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-51" style="--awb-content-alignment:justify;"><p data-start="4061" data-end="4244">To make mask-based editing easier to control, v1.3 includes a preview step.<br data-start="4136" data-end="4139" />The workflow converts the (expanded and blurred) mask into an image and displays it directly in the UI.</p>
<p data-start="4246" data-end="4287">This makes it straightforward to confirm:</p>
<ul data-start="4289" data-end="4481">
<li data-start="4289" data-end="4323">
<p data-start="4291" data-end="4323">whether the boundary is clean,</p>
</li>
<li data-start="4324" data-end="4372">
<p data-start="4326" data-end="4372">whether the expansion radius is appropriate,</p>
</li>
<li data-start="4434" data-end="4481">
<p data-start="4436" data-end="4481">whether the blur transition is smooth enough.</p>
</li>
</ul>
<p data-start="4483" data-end="4613">For tasks involving building edges, curbs, signage, crosswalks, or paving boundaries, this preview dramatically improves accuracy.</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-1 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-13 hover-type-none"><img decoding="async" width="786" height="568" alt="The grow mask with blur and his preview" title="mask" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask.png" class="img-responsive wp-image-2248" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask-200x145.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask-400x289.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask-600x434.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask.png 786w" sizes="(max-width: 640px) 100vw, 600px" /></span></div><div class="fusion-text fusion-text-52"><p>The grow mask with blur and his preview</p>
</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-26 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. How the v1.3 Workflow Fits Into the Existing System</h2></div><div class="fusion-text fusion-text-53 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="4683" data-end="4746">The mask block replaces only the latent-initialization stage.</p>
<p data-start="4748" data-end="4860">Everything else — prompts, reference conditioning, sampling, and the full QwenEdit pipeline — remains unchanged.</p>
<p data-start="4862" data-end="4886"><strong data-start="4862" data-end="4886">Simplified pipeline:</strong></p>
</div><div class="fusion-text fusion-text-54"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-linenumbers="false">Base Image
     ↓
User Mask → Grow Mask → Preview Mask
     ↓
VAE Encode
     ↓
Set Latent Noise (masked)
     ↓
Qwen Edit Pipeline
     (prompt-only or reference-guided)
     ↓
VAE Decode
     ↓
Final Output
</pre>
</div><div class="fusion-text fusion-text-55 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">This structure makes editing predictable and reproducible, but it is important to clarify how <strong data-start="472" data-end="504">v1.3 is organized internally</strong>. The workflow is now composed of <strong data-start="538" data-end="574">three completely separate blocks</strong>, and <strong data-start="580" data-end="614">each block loads its own model</strong>:</p>
<ul data-start="617" data-end="797">
<li data-start="617" data-end="687">
<p data-start="619" data-end="687"><strong data-start="619" data-end="631">Block 1:</strong> Single-image edit (prompt-only or prompt + reference)</p>
</li>
<li data-start="688" data-end="749">
<p data-start="690" data-end="749"><strong data-start="690" data-end="702">Block 2:</strong> Sequential multi-image editing</p>
</li>
<li data-start="750" data-end="797">
<p data-start="752" data-end="797"><strong data-start="752" data-end="764">Block 3:</strong> Mask-based editing (new in v1.3)</p>
</li>
</ul>
<p data-start="799" data-end="1100">All three blocks coexist in the same workflow, and the user simply chooses which one to run.<br data-start="891" data-end="894" />In ComfyUI, this is done by <strong data-start="922" data-end="991">right-clicking the group frame and selecting <em data-start="969" data-end="977">Active</em> or <em data-start="981" data-end="989">Bypass</em></strong>.<br data-start="992" data-end="995" />Only the active block executes; the others are skipped. Nothing else in the pipeline needs to be changed.</p>
<p data-start="1102" data-end="1355">Because the blocks are independent, they can also be <strong data-start="1155" data-end="1167">combined</strong>. For example, the user may activate the sequential loader from Block 2 and route its output into the mask-editing block (Block 3) to run a full masked transformation on a batch of images.</p>
<p data-start="1357" data-end="1645">To create the mask itself, the user loads the base image in <strong data-start="1417" data-end="1433">Load Image 1</strong>, right-clicks the preview, and selects <strong data-start="1473" data-end="1496">Open in Mask Editor</strong>. The drawn mask is then processed by the Grow Mask node before entering the latent-noise stage, ensuring smooth boundaries and predictable behavior.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-27 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. Experimentation</h2></div><div class="fusion-text fusion-text-56 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">To test the new mask-based editing block, we start by defining the editable region directly in ComfyUI. After loading the base image, the user <strong data-start="416" data-end="478">right-clicks the preview and selects “Open in Mask Editor”</strong>, then paints the area where the new cyclist should appear. Before the edit, this part of the street is empty.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-14" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-14 hover-type-none"><img decoding="async" width="2000" height="1130" alt="mask" title="mask1" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-scaled.png" class="img-responsive wp-image-2257" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-200x113.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-400x226.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-600x339.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-800x452.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-1200x678.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/mask1-scaled.png 2000w" sizes="(max-width: 640px) 100vw, 2000px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Adding a Mask in ComfyUI</div></div></div></div><div class="fusion-text fusion-text-57 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">Once the mask is created, it flows through Block 3: the Grow Mask node expands and softens the boundary, the workflow encodes the base image, and noise is added <strong data-start="793" data-end="824">only inside the masked zone</strong>. A second image containing a cyclist is provided as a reference, and the prompt instructs Qwen to place the rider onto the bicycle lane.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-15" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-15 hover-type-none"><img decoding="async" width="1509" height="1241" title="block3" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3.png" alt class="img-responsive wp-image-2258" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-200x164.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-400x329.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-600x493.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-800x658.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3-1200x987.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/block3.png 1509w" sizes="(max-width: 640px) 100vw, 1509px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The whole pipeline of block 3</div></div></div></div><div class="fusion-text fusion-text-58 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">The result is a localized insertion: the cyclist from Image 2 is generated precisely inside the masked area, while the rest of the photograph remains unchanged. This demonstrates the core purpose of Block 3 — precise, mask-controlled edits that do not disturb the surrounding urban context.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-16" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-16 hover-type-none"><img decoding="async" width="1248" height="832" title="test_00010_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_.png" alt class="img-responsive wp-image-2261" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/12/test_00010_.png 1248w" sizes="(max-width: 640px) 100vw, 1248px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">The result</div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-28 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Download the Workflow</h2></div><div class="fusion-text fusion-text-59 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="378" data-end="615">You can download the ready-to-use <strong data-start="1530" data-end="1552">ComfyUI JSON graph </strong>that we built in this post <strong>Qwen Image Edit For Urbanism v1.3</strong> from the link below or from our git repository and load it directly into your workspace using <strong data-start="1620" data-end="1646">File → Load → Workflow</strong>.</p>
</div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-1 fusion-button-default-span fusion-button-default-type" target="_self" download="Gwen-Edit-UGA-v1.0.json" href="https://urbangeoanalytics.com/wp-content/uploads/2025/12/Gwen-Edit-UGA-v1.3.json"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">DOWNLOAD &#8211; ComfyUI JSON graph &#8211; QWEN IMAGE EDIT v1.3</span><span class="fusion-button-text awb-button__text awb-button__text--hover">DOWNLOAD - ComfyUI JSON graph - QWEN IMAGE EDIT v1.3</span></div></a></div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-7 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-60"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--4" data-awb-toc-id="4" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-17 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/">Qwen Image Edit for Urbanism v1.3 — Mask-Controlled Editing With Prompt or Reference Guidance</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-3-editing-with-a-mask/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Qwen Image Edit for Urbanism v1.2 — Custom Nodes &#038; Sequential Processing</title>
		<link>https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-2-custom-nodes-sequential-processing/</link>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Mon, 17 Nov 2025 16:19:43 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Diffusion Models]]></category>
		<category><![CDATA[Urbanism]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[ComfyUI]]></category>
		<category><![CDATA[image editing]]></category>
		<category><![CDATA[Qwen]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2100</guid>

					<description><![CDATA[<p>ComfyUI Sequential Image Editing for Urbanism arrives in Qwen v1.2 with custom Python nodes, multi-image batch processing, and a six-slot buffer for reproducible urban edits. This version streamlines automated workflows for researchers, designers, and architects working with street and neighborhood imagery.</p>
<p>The post <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-2-custom-nodes-sequential-processing/">Qwen Image Edit for Urbanism v1.2 — Custom Nodes &#038; Sequential Processing</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-5 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-8 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-18 hover-type-none"><img decoding="async" width="1024" height="683" title="genai" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24-1024x683.png" alt class="img-responsive wp-image-2103" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-18_36_24.png 1536w" sizes="(max-width: 640px) 100vw, 1024px" /></span></div><div class="fusion-text fusion-text-61"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-62" style="--awb-margin-top:-30px;"><p><strong data-start="225" data-end="271">• Adds full sequential multi-image editing</strong> using custom Python nodes, enabling automated processing with up to six different secondary reference images.<br data-start="373" data-end="376" /><strong data-start="376" data-end="440">• Introduces the Sequential Loader and Six-Slot Image Buffer</strong>, allowing users to run a batch and return to a complete set of edited results.<br data-start="536" data-end="539" /><strong data-start="539" data-end="587">• Includes an optional Random Image Selector</strong> for stochastic experiments and variation testing.</p>
</div><div class="fusion-text fusion-text-63 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="523" data-end="841">The <strong data-start="527" data-end="559">Qwen Image Edit for Urbanism</strong> workflow has progressively evolved from single-image editing (<strong data-start="622" data-end="630">v1.0</strong>) to paired image transformations (<strong data-start="665" data-end="673">v1.1</strong>). Now, with <strong data-start="688" data-end="696">v1.2</strong>, it gains the ability to <strong data-start="722" data-end="762">process multiple images sequentially</strong>, fully offline and reproducibly, using custom Python nodes inside <strong data-start="829" data-end="840">ComfyUI</strong>. This new release empowers urban researchers, designers, and architects to perform <strong data-start="925" data-end="940">batch edits</strong> — such as modifying entire image series of the same street, plaza, or neighborhood — using consistent prompts or iterative refinements.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-29 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Custom Nodes — Building the Foundation for Sequential Editing</h2></div><div class="fusion-text fusion-text-64 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>At the heart of this version are three lightweight, open-source Python nodes developed by UGA for ComfyUI. These nodes are available immediately after installing the repository — either by running <code data-start="222" data-end="244">git clone https://github.com/perezjoan/ComfyUI-QwenEdit-Urbanism-by-UGA</code> or by downloading and unzipping the <a class="keychainify-checked" href="https://github.com/perezjoan/ComfyUI-QwenEdit-Urbanism-by-UGA">repository</a> manually into your  <code data-start="222" data-end="244">ComfyUI/custom_nodes</code> directory.</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Node</th>
<th align="left">Category</th>
<th align="left">Function</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Sequential Image Loader</td>
<td align="left">image/sequence</td>
<td align="left"><code data-start="1702" data-end="1718"></code>Loads each connected image one by one in order, enabling automatic batch processing across iterations.</td>
</tr>
<tr>
<td align="left">Random Image Selector</td>
<td align="left">image/random</td>
<td align="left">Randomly selects one image among multiple inputs each run, useful for stochastic visualization or model variation testing.</td>
</tr>
<tr>
<td align="left">Stateful Image Collector</td>
<td align="left">image/sequence</td>
<td align="left">Stores the processed outputs from each run into six persistent slots, allowing users to preview all 6 results at the end of the batch.</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-text fusion-text-65 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>These nodes constitute the backbone of the v1.2 workflow. Together, they enable automation:</p>
</div><div class="fusion-text fusion-text-66 fusion-text-no-margin" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-linenumbers="false">[6 Input Images]
      ↓
Sequential or Random Loader (1 image per run)
      ↓
QwenEdit pipeline
      ↓
Stateful Collector (stores run#1..run#6 results)
      ↓
6 preview nodes
</pre>
</div><div class="fusion-text fusion-text-67 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>You launch the queue once (6 jobs) → Go drink coffee → Return to find all 6 processed urban edits displayed.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-30 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. What a ComfyUI Custom Node Actually Is</h2></div><div class="fusion-text fusion-text-68 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="199" data-end="647">A ComfyUI custom node is simply a Python class placed inside the <code data-start="264" data-end="287">ComfyUI/custom_nodes/</code> directory. When ComfyUI starts, it scans this directory, imports every <code data-start="359" data-end="364">.py</code> file, looks for a <code data-start="383" data-end="404">NODE_CLASS_MAPPINGS</code> dictionary, and registers each class it finds as a new node type. There is no compilation step and no special installation procedure: placing the file in the folder and restarting ComfyUI is sufficient for the node to appear in the interface.</p>
<p data-start="649" data-end="802">Internally, each node follows the same structure. The <code data-start="703" data-end="716">INPUT_TYPES</code> classmethod declares the input sockets that will be displayed in the UI. For example:</p>
</div><div class="fusion-text fusion-text-69 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-theme="dracula" data-enlighter-group="Python1" data-enlighter-title="Python">@classmethod
def INPUT_TYPES(cls):
    return 
</pre>
</div><div class="fusion-text fusion-text-70 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="199" data-end="647">This tells ComfyUI to generate two inputs—an image tensor and an integer. Similarly, the node declares its outputs through <code data-start="1092" data-end="1106">RETURN_TYPES</code> and <code data-start="1111" data-end="1125">RETURN_NAMES</code>:</p>
</div><div class="fusion-text fusion-text-71 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-theme="dracula" data-enlighter-group="Python2" data-enlighter-title="Python">RETURN_TYPES = ("IMAGE", "INT")
RETURN_NAMES = ("selected_image", "index")
</pre>
</div><div class="fusion-text fusion-text-72 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1218" data-end="1325">Each node also defines a <code data-start="1243" data-end="1253">FUNCTION</code> attribute, which names the method ComfyUI should call during execution:</p>
<div class="contain-inline-size rounded-2xl relative bg-token-sidebar-surface-primary">
<div class="sticky top-9">
<div class="absolute end-0 bottom-0 flex h-9 items-center pe-2">
<div class="bg-token-bg-elevated-secondary text-token-text-secondary flex items-center gap-4 rounded-sm px-2 font-sans text-xs"></div>
</div>
</div>
<div class="overflow-y-auto p-4" dir="ltr"></div>
</div>
</div><div class="fusion-text fusion-text-73 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-theme="dracula" data-enlighter-group="Python23" data-enlighter-title="Python">FUNCTION = "select_next"
</pre>
</div><div class="fusion-text fusion-text-74 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1218" data-end="1325">ComfyUI will therefore execute:</p>
</div><div class="fusion-text fusion-text-75 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-group="Python233" data-enlighter-title="Python" data-enlighter-theme="dracula">def select_next(...)
</pre>
</div><div class="fusion-text fusion-text-76 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1436" data-end="1464">whenever the node evaluates. To make the node visible, every Python file ends with a registration block:</p>
<div class="contain-inline-size rounded-2xl relative bg-token-sidebar-surface-primary">
<div class="sticky top-9">
<div class="absolute end-0 bottom-0 flex h-9 items-center pe-2">
<div class="bg-token-bg-elevated-secondary text-token-text-secondary flex items-center gap-4 rounded-sm px-2 font-sans text-xs"></div>
</div>
</div>
<div class="overflow-y-auto p-4" dir="ltr"></div>
</div>
</div><div class="fusion-text fusion-text-77 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-theme="dracula" data-enlighter-group="Python11" data-enlighter-title="Python">NODE_CLASS_MAPPINGS = 
NODE_DISPLAY_NAME_MAPPINGS = 
</pre>
</div><div class="fusion-text fusion-text-78 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1720" data-end="1992">When the package contains multiple nodes, the root <code data-start="1771" data-end="1784">__init__.py</code> merges all registration dictionaries into a single set that ComfyUI loads on startup. This mechanism allows the repository to expose several custom components while keeping each node defined in its own file.</p>
<p data-start="1994" data-end="2035">The repository layout is straightforward and in our case is:</p>
</div><div class="fusion-text fusion-text-79 fusion-text-no-margin" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-linenumbers="false">ComfyUI/
  custom_nodes/
    ComfyUI-QwenEdit-Urbanism-by-UGA/
       __init__.py
       sequential_image_selector.py
       random_image_selector.py
       stateful_collector.py
       debug_print.py
</pre>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-31 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. Integrating the Nodes to your workflow</h2></div><div class="fusion-text fusion-text-80 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1720" data-end="1992">Version 1.2 reorganizes the Qwen Image Edit for Urbanism workflow into two blocks: the original single-image editor, and a new sequential pipeline that can process up to six images across consecutive queue runs. The sequential block relies on two custom nodes. The <strong data-start="432" data-end="459">Sequential Image Loader</strong> takes up to six input images and outputs one image per run, advancing automatically each time you press “Queue Prompt.” Its output replaces the single-image input in the Qwen Edit chain. After editing, the processed image and the loader’s index are passed into the <strong data-start="725" data-end="750">Six-Slot Image Buffer</strong>, which stores each result in the corresponding output slot while filling unused slots with placeholders to keep previews stable. Connecting each slot to a Preview node lets you watch the six results populate as the workflow iterates. A third node, the <strong data-start="1003" data-end="1028">Random Image Selector</strong>, is included for users who prefer stochastic selection, but it is not wired into the default v1.2 workflow.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-2 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-81" style="--awb-content-alignment:justify;"><p data-start="2471" data-end="2543">Integrating the sequential system introduces the following new connections</p>
<ol data-start="2545" data-end="2950">
<li data-start="2545" data-end="2665">
<p data-start="2548" data-end="2665">The output of the six Load Image nodes now feeds into the Sequential Image Loader</p>
</li>
<li data-start="2666" data-end="2782">
<p data-start="2669" data-end="2782">The <code data-start="2673" data-end="2689">selected_image</code> output of the loader replaces the single-image input</p>
</li>
<li data-start="2783" data-end="2950">
<p data-start="2786" data-end="2950">The processed image, along with the index from the loader, is routed into the Six-Slot Image Buffer. Each slot output is then connected to a dedicated Preview node.</p>
</li>
</ol>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-3 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-19 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit.png" class="fusion-lightbox" data-rel="iLightbox[af278c58f8650eb087b]" data-title="seqedit" title="seqedit"><img decoding="async" width="1024" height="932" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-1024x932.png" alt class="img-responsive wp-image-2146" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-200x182.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-400x364.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-600x546.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit-800x728.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/seqedit.png 1033w" sizes="(max-width: 640px) 100vw, 600px" /></a></span></div></div></div></div><div class="fusion-text fusion-text-82 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="1720" data-end="1992">The Random Image Selector follows the same logic as the sequential loader — multiple inputs, a single image output — but selects randomly instead of sequentially. Users who want stochastic variations, probabilistic sampling, or diversity testing may insert this node in place of the sequential loader.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-32 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. Experimentations</h2></div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-2 fusion-button-default-span fusion-button-default-type" target="_self" href="http://exemple.com"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">Text</span><span class="fusion-button-text awb-button__text awb-button__text--hover">Text</span></div></a></div><div class="fusion-text fusion-text-83 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="0" data-end="517">To evaluate how well the model can merge ecological elements across scenes, we ran an experiment where vegetation from one photograph is transplanted into another.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-4 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-5 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-6 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-84 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Base image</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-20 hover-type-none"><img decoding="async" width="400" height="266" title="image (19)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-19-400x266.png" alt class="img-responsive wp-image-2168" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-19-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-19-400x266.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-19.png 500w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-7 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-8 fusion_builder_column_inner_1_5 1_5 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:20%;--awb-margin-top-large:0px;--awb-spacing-right-large:9.6%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:9.6%;--awb-width-medium:20%;--awb-order-medium:0;--awb-spacing-right-medium:9.6%;--awb-spacing-left-medium:9.6%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-9 fusion_builder_column_inner_1_1 1_1 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:100%;--awb-margin-top-large:0px;--awb-spacing-right-large:1.92%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:1.92%;--awb-width-medium:100%;--awb-order-medium:0;--awb-spacing-right-medium:1.92%;--awb-spacing-left-medium:1.92%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-85" style="--awb-content-alignment:center;"><p><strong>Prompt: </strong><em>Take all vegetation visible in image 2 — including trees, shrubs, bushes, ground plants, and any greenery — and incorporate them into the scene of image 1. Preserve the structure, lighting, and perspective of image 1 while integrating the vegetation so that it appears naturally placed and consistent with the environment.</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-10 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-86 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 1</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-21 hover-type-none"><img decoding="async" width="400" height="590" title="image (20)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-20-400x590.png" alt class="img-responsive wp-image-2151" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-20-200x295.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-20-400x590.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-20.png 462w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-11 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-87 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 2</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-22 hover-type-none"><img decoding="async" width="400" height="691" title="image (22)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-22-400x691.png" alt class="img-responsive wp-image-2153" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-22-200x346.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-22-400x691.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-22.png 434w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-12 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-88 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 3</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-23 hover-type-none"><img decoding="async" width="485" height="631" title="image (24)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-24.png" alt class="img-responsive wp-image-2155" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-24-200x260.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-24-400x520.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-24.png 485w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-13 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-89 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 4</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-24 hover-type-none"><img decoding="async" width="500" height="750" title="image (23)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-23.png" alt class="img-responsive wp-image-2154" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-23-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-23-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-23.png 500w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-14 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-90 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 5</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-25 hover-type-none"><img decoding="async" width="500" height="750" title="image (21)" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-21.png" alt class="img-responsive wp-image-2152" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-21-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-21-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-21.png 500w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-15 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-91 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Reference image 6</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-26 hover-type-none"><img decoding="async" width="1523" height="2000" title="pexels-amaurymic-18189716" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-scaled.jpg" alt class="img-responsive wp-image-2156" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-200x263.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-400x525.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-600x788.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-800x1051.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-1200x1576.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-amaurymic-18189716-scaled.jpg 1523w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-16 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-92 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 1</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-27 hover-type-none"><img decoding="async" width="400" height="267" title="edit__00058_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-400x267.png" alt class="img-responsive wp-image-2167" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00058_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-17 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-93 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 2</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-28 hover-type-none"><img decoding="async" width="400" height="267" title="edit__00054_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-400x267.png" alt class="img-responsive wp-image-2161" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00054_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-18 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-94 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 3</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-29 hover-type-none"><img decoding="async" width="400" height="267" title="edit__00057_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-400x267.png" alt class="img-responsive wp-image-2166" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-19 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-95 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 4</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-30 hover-type-none"><img decoding="async" width="400" height="267" title="._00001_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-400x267.png" alt class="img-responsive wp-image-2172" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/00001_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-20 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-96 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 5</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-31 hover-type-none"><img decoding="async" width="400" height="267" title="edit__00055_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-400x267.png" alt class="img-responsive wp-image-2162" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00055_.png 1248w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-21 fusion_builder_column_inner_1_6 1_6 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:16.666666666667%;--awb-margin-top-large:0px;--awb-spacing-right-large:11.52%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:11.52%;--awb-width-medium:16.666666666667%;--awb-order-medium:0;--awb-spacing-right-medium:11.52%;--awb-spacing-left-medium:11.52%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-97 fusion-text-no-margin" style="--awb-content-alignment:center;--awb-margin-bottom:5px;"><p><em>Result 6</em></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-32 hover-type-none"><img decoding="async" width="400" height="265" title="edit__00057_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1-400x265.png" alt class="img-responsive wp-image-2176" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1-400x265.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1-600x398.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/edit__00057_-1.png 736w" sizes="(max-width: 640px) 100vw, 200px" /></span></div></div></div></div><div class="fusion-text fusion-text-98 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="0" data-end="517">After letting the workflow run its full sequence while grabbing a coffee, the results appeared consistent and correctly distributed across the six preview slots. As expected with generative editing, however, the prompt is not always obeyed with perfect precision: in some cases, Qwen may copy elements from the second image that are <em data-start="333" data-end="338">not</em> vegetation — such as pieces of façade, lighting color, or background tones. This happens because the model interprets the entire scene contextually rather than isolating objects.</p>
<p data-start="519" data-end="997" data-is-last-node="" data-is-only-node="">That’s where the <strong data-start="536" data-end="559">next upgrade (v1.3)</strong> comes in: <strong data-start="570" data-end="592">mask-based control</strong>. By allowing users to explicitly define which areas of the base image should be modified (and which should remain untouched), masks will significantly reduce unintended transfers and keep the edits focused strictly on the desired objects. Until then, the <strong data-start="839" data-end="847">seed</strong> parameter remains the best tool for refinement — simply rerun the workflow with new seeds until you achieve the cleanest integration.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-33 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Download the Workflow</h2></div><div class="fusion-text fusion-text-99 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:5px;"><p data-start="0" data-end="517">You can download the ready-to-use <strong data-start="1530" data-end="1552">ComfyUI JSON graph </strong>that we built in this post <strong>Qwen Image Edit For Urbanism v1.2</strong> from the link below or from our git repository and load it directly into your workspace using <strong data-start="1620" data-end="1646">File → Load → Workflow</strong>.</p>
</div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-3 fusion-button-default-span fusion-button-default-type" target="_self" download="Gwen-Edit-UGA-v1.2.json" href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/Qwen-Edit-UGA-v1.2-1.json"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">DOWNLOAD &#8211; ComfyUI JSON graph &#8211; QWEN IMAGE EDIT v1.2</span><span class="fusion-button-text awb-button__text awb-button__text--hover">DOWNLOAD - ComfyUI JSON graph - QWEN IMAGE EDIT v1.2</span></div></a></div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-9 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-100"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--5" data-awb-toc-id="5" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-33 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/qwen-image-edit-for-urbanism-v1-2-custom-nodes-sequential-processing/">Qwen Image Edit for Urbanism v1.2 — Custom Nodes &#038; Sequential Processing</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
		
		
			</item>
		<item>
		<title>Qwen Image Edit for Urbanism v1.1 — Editing using a Reference Image and Advanced Sampling</title>
		<link>https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/</link>
					<comments>https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Wed, 12 Nov 2025 19:57:16 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Diffusion Models]]></category>
		<category><![CDATA[Urbanism]]></category>
		<category><![CDATA[ComfyUI]]></category>
		<category><![CDATA[image editing]]></category>
		<category><![CDATA[Qwen]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=1962</guid>

					<description><![CDATA[<p>Qwen Image Edit for Urbanism v1.1 expands local AI editing in ComfyUI with advanced sampling and dual-image workflows. The new Lightning LoRA system improves realism, texture fidelity, and processing speed, enabling fast, privacy-preserving urban scene transformation—entirely offline.</p>
<p>The post <a href="https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/">Qwen Image Edit for Urbanism v1.1 — Editing using a Reference Image and Advanced Sampling</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-6 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-10 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-34 hover-type-none"><img decoding="async" width="1024" height="683" title="genai" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/c24db858-f2f8-4f90-b630-8c0c4386248c-1-1024x683.png" alt class="img-responsive wp-image-2097" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/c24db858-f2f8-4f90-b630-8c0c4386248c-1-300x200.png 300w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c24db858-f2f8-4f90-b630-8c0c4386248c-1-1024x683.png 1024w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c24db858-f2f8-4f90-b630-8c0c4386248c-1.png 1536w" sizes="(max-width: 1024px) 100vw, 1024px" /></span></div><div class="fusion-text fusion-text-101"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-102" style="--awb-margin-top:-30px;"><ul>
<li><strong data-start="182" data-end="206">Core Control Chain —</strong> Version 1.1 introduces the <em data-start="234" data-end="289">ModelSamplingAuraFlow → CFGNorm → LoraLoaderModelOnly</em> sequence, improving stability, texture realism, and prompt accuracy.</li>
<li><strong data-start="361" data-end="385">Dual-Image Editing —</strong> Combine two or more reference images in a single workflow to add objects, replace materials, or merge visual elements directly inside ComfyUI.</li>
<li><strong data-start="531" data-end="561">Faster and More Accurate —</strong> The new Lightning LoRA (4-step or 8-step) delivers sharper, cleaner results in under two minutes — with processing as low as 30 seconds on an RTX 4060 GPU.</li>
</ul>
</div><div class="fusion-text fusion-text-103 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1032" data-end="1481">In <strong><a class="decorated-link cursor-pointer keychainify-checked" href="https://urbangeoanalytics.com/local-ai-image-editing-urbanism-comfyui-qwen-gguf/" target="_new" rel="noopener" data-start="1035" data-end="1170">the first part of this series</a></strong>, we built a <strong data-start="1183" data-end="1221">fully local image-editing pipeline</strong> for urban and architectural visualization using <strong data-start="1270" data-end="1281">ComfyUI</strong> and <strong data-start="1286" data-end="1305">Qwen-Image-Edit</strong>. That version (v1.0) demonstrated how to run generative image edits <strong data-start="1376" data-end="1396">entirely offline</strong>, combining text and visual prompts to transform cityscapes with instructions like:</p>
<blockquote data-start="1482" data-end="1563">
<p data-start="1484" data-end="1563">“Add trees along the sidewalk” or “Turn this street into a pedestrian plaza.”</p>
</blockquote>
<p data-start="1565" data-end="1756">We assume that you have followed this tutorial before diving in this new update. Now, with <strong data-start="1768" data-end="1783">version 1.1</strong>, we take that foundation further. This update focuses on <strong data-start="1843" data-end="1872">advanced sampling control</strong> and <strong data-start="1877" data-end="1900">multi-image editing</strong>, allowing you to not only modify a scene, but also merge visual elements across images — for instance, importing a bench from another photo, or changing a building façade to match a different material texture.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-34 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Advanced Sampling with a Core Control Chain</h2></div><div class="fusion-text fusion-text-104 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1472" data-end="1698">First, this update focuses on improving both <strong data-start="1510" data-end="1537">quality and flexibility</strong>. The base structure still uses the Qwen-Image-Edit 2509 model in GGUF format, but adds a <em data-start="1629" data-end="1654">refined sampling module</em> to stabilize lighting and surface detail.</p>
<p data-start="1700" data-end="1722">The key new nodes are:</p>
<ul data-start="1724" data-end="2053">
<li data-start="1724" data-end="1818">
<p data-start="1726" data-end="1818"><strong data-start="1726" data-end="1751">ModelSamplingAuraFlow</strong> — smooths the diffusion trajectory for more natural transitions.</p>
</li>
<li data-start="1819" data-end="1921">
<p data-start="1821" data-end="1921"><strong data-start="1821" data-end="1839">CFGNorm (BETA)</strong> — balances prompt adherence with photorealism, preventing overexposed textures.</p>
</li>
<li data-start="1922" data-end="2053">
<p data-start="1924" data-end="2053"><strong data-start="1924" data-end="1947">LoraLoaderModelOnly</strong> — injects a <em data-start="1960" data-end="1971">Lightning</em> LoRA (4-step or 8-step) for faster inference and higher-quality reconstruction.</p>
</li>
</ul>
<p data-start="2055" data-end="2120">These three nodes form the <em data-start="2082" data-end="2102">core control chain</em> of version 1.1:</p>
<div class="contain-inline-size rounded-2xl relative bg-token-sidebar-surface-primary">
<div class="overflow-y-auto p-4" dir="ltr"><code class="whitespace-pre!">ModelSamplingAuraFlow → <span class="hljs-built_in">CFGNorm</span> → LoraLoaderModelOnly<br />
</code></div>
</div>
<p data-start="2185" data-end="2433">This configuration produces more stable, consistent outputs while preserving prompt flexibility. It also enables <strong data-start="2298" data-end="2389">fine-tuning of how the model interprets text instructions versus existing image content</strong>—ideal for architectural and material edits. Before connecting the new nodes, you’ll first need to <strong data-start="214" data-end="249">download a Lightning LoRA model</strong> — an additional lightweight module that enhances reconstruction quality and speeds up inference.</p>
<p data-start="350" data-end="527">You can find all Lightning variants here:<br data-start="391" data-end="394" />🔗 <a class="decorated-link keychainify-checked" href="https://huggingface.co/lightx2v/Qwen-Image-Lightning/tree/main" target="_new" rel="noopener" data-start="397" data-end="525">https://huggingface.co/lightx2v/Qwen-Image-Lightning/tree/main</a></p>
<p data-start="529" data-end="607">Refer to the table below to choose the most appropriate file for your setup:</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Goal</th>
<th align="left">Recommended File</th>
<th align="left">Notes</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Fast prototyping</td>
<td align="left">Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors</td>
<td align="left">Best speed/quality trade-off; ideal for quick previews and design iterations.</td>
</tr>
<tr>
<td align="left">Detailed scenes / architecture</td>
<td align="left">Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors</td>
<td align="left">Produces sharper edges, richer contrast, and more defined materials.</td>
</tr>
<tr>
<td align="left">Low VRAM system (≤ 8 GB)</td>
<td align="left">Qwen-Image-fp8-e4m3fn-Lightning-4steps-V1.0-bf16.safetensors</td>
<td align="left">Lightweight version with minimal memory usage and acceptable realism.</td>
</tr>
<tr>
<td align="left">High-end / CPU use</td>
<td align="left">Qwen-Image-fp8-e4m3fn-Lightning-4steps-V1.0-fp32.safetensors</td>
<td align="left">Maximum numerical precision; slower but most stable for benchmarking.</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-text fusion-text-105 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1472" data-end="1698">Once downloaded, place your chosen <code data-start="1384" data-end="1398">.safetensors</code> file in the following directory:</p>
</div><div class="fusion-text fusion-text-106 fusion-text-no-margin" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="generic" data-enlighter-linenumbers="false">ComfyUI/models/loras/</pre>
</div><div class="fusion-text fusion-text-107 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="1472" data-end="1698">Then, return to ComfyUI and insert the <strong data-start="1504" data-end="1519">three nodes</strong> shown below</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-35" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-35 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c.png" class="fusion-lightbox" data-rel="iLightbox[a7c7ece49f736841385]"><img decoding="async" width="1456" height="258" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c.png" alt class="img-responsive wp-image-1973" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-200x35.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-400x71.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-600x106.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-800x142.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c-1200x213.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/2f37176f-612b-4a8e-be50-b7583bb3240c.png 1456w" sizes="(max-width: 640px) 100vw, 1200px" /></a></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">If you’re starting from the v1.0 graph: Connect them sequentially as shown: GGUF Loader → ModelSamplingAuraFlow → CFGNorm → LoraLoaderModelOnly → KSampler</div></div></div></div><div class="fusion-text fusion-text-108 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="3478" data-end="3543">The new sampling nodes add subtle but powerful control options:</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Node</th>
<th align="left">Parameter</th>
<th align="left">Description</th>
<th align="left">Recommended Range</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">ModelSamplingAuraFlow</td>
<td align="left">shift</td>
<td align="left">Controls how strongly the model moves through latent space during denoising. Higher = stronger edits.</td>
<td align="left">1.2 – 1.8</td>
</tr>
<tr>
<td align="left">CFGNorm</td>
<td align="left">strength</td>
<td align="left">Normalizes prompt adherence to maintain texture balance. Lower = more literal edits, higher = softer realism.</td>
<td align="left">0.8 – 1.2</td>
</tr>
<tr>
<td align="left">LoraLoaderModelOnly</td>
<td align="left">strength_model</td>
<td align="left">Defines how much the LoRA (Lightning) modifies the base model. 1.0 = full effect.</td>
<td align="left">0.8 – 1.0</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-35 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Dual-Image Editing: Adding Objects and Modifying Materials</h2></div><div class="fusion-text fusion-text-109 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="228" data-end="546">Version <strong data-start="236" data-end="243">1.1</strong> introduces a new input configuration that allows <strong data-start="293" data-end="343">two images to be used within the same workflow</strong>. This enhancement enables contextual or compositional edits where one image serves as the main canvas, and the other contributes visual information such as an object, texture, or architectural detail.</p>
<p data-start="548" data-end="899">In this setup, <strong data-start="563" data-end="597">Image 1 remains the base image</strong>. Its <strong data-start="605" data-end="642">dimensions define the output size</strong>, ensuring consistent framing and spatial coherence. The <strong data-start="699" data-end="725">second image (Image 2)</strong>, on the other hand, is<strong data-start="749" data-end="774"> resized</strong> during processing but it is only to prevent memory overload—particularly important for mid-range GPUs.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-22 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-110" style="--awb-content-alignment:justify;"><p data-start="102" data-end="421">This example shows how to extend the ComfyUI workflow to include <strong data-start="236" data-end="268">one or more secondary images</strong>. In the node <em data-start="282" data-end="311">TextEncodeQwenImageEditPlus</em>, you can now connect up to <strong data-start="339" data-end="386">three image inputs (image1, image2, image3)</strong> in addition to your text prompt.</p>
<p data-start="423" data-end="692">In this tutorial, we’ll only use <strong data-start="456" data-end="480">one additional image</strong> — for example, inserting a red car (<em data-start="517" data-end="525">image2</em>) into the street scene of <em data-start="552" data-end="560">image1</em>. However, the same structure allows you to use a third auxiliary image to modify materials, lighting, or other objects.</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-23 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-36 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-1024x566.png" class="fusion-lightbox" data-rel="iLightbox[a80c6c59665c090e393]" data-title="genai" title="genai"><img decoding="async" width="1024" height="566" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-1024x566.png" alt class="img-responsive wp-image-2124" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-200x111.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-400x221.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-600x332.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-800x442.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai-1200x663.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/genai.png 1824w" sizes="(max-width: 640px) 100vw, 600px" /></a></span></div></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-36 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. Experimentation with Multi-Image Conditioning</h2></div><div class="fusion-text fusion-text-111 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="170" data-end="516">As shown in the examples below, you can combine a <strong data-start="396" data-end="410">base image</strong> (<em data-start="412" data-end="421">image 1</em>) with up to <strong data-start="434" data-end="459">two additional inputs</strong> (<em data-start="461" data-end="479">image 2, image 3</em>) to guide the edit more precisely. In this tutorial, we focus on using <strong data-start="554" data-end="578">one additional image</strong> — for instance, adding an object or transferring a material. In the first example, <em data-start="666" data-end="675">image 2</em> (the red car) is inserted into <em data-start="707" data-end="716">image 1</em> using the prompt: <em data-start="735" data-end="786">“add image 2 red car into the street of image 1.” </em>The second case changes the wall material of <em data-start="836" data-end="845">image 1</em> based on the texture of <em data-start="870" data-end="879">image 2</em> (a brick wall). Finally, the third example adds a bench into an urban scene using <em data-start="966" data-end="975">image 2</em> as the visual model reference.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-24 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-112 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>Base image 1</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-37 hover-type-none"><img decoding="async" width="1333" height="2000" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-scaled.jpg" alt class="img-responsive wp-image-1917" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-200x300.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-400x600.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-600x900.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-800x1200.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-1200x1800.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-scaled.jpg 1333w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-25 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-113 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>image 2</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-38 hover-type-none"><img decoding="async" width="2000" height="1281" title="red car" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-scaled.jpg" alt class="img-responsive wp-image-1990" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-200x128.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-400x256.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-600x384.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-800x512.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-1200x769.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-ahmad-ramadan-36559-131811-scaled.jpg 2000w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-26 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-114" style="--awb-content-alignment:center;--awb-margin-top:10px;"><p><em>Prompt: add image 2 red car into the street of image 1</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-27 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-115 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p><strong>Result</strong></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-39 hover-type-none"><img decoding="async" width="832" height="1248" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_.png" alt class="img-responsive wp-image-1991" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_-600x900.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_-800x1200.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00449_.png 832w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-28 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-116 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>Base image 1</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-40 hover-type-none"><img decoding="async" width="1500" height="2000" title="pexels-annavitoria-martinssousa-647627036-34627713" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-scaled.jpg" alt class="img-responsive wp-image-2000" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-200x267.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-400x533.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-600x800.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-800x1067.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-1200x1600.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-annavitoria-martinssousa-647627036-34627713-scaled.jpg 1500w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-29 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-117 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>image 2</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-41 hover-type-none"><img decoding="async" width="186" height="188" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/wall2.png" alt class="img-responsive wp-image-2003"/></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-30 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-118" style="--awb-content-alignment:center;--awb-margin-top:10px;"><p><em>Prompt: changes the walls of the house in image 1 by the brick wall material of image 2</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-31 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-119 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p><strong>Result</strong></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-42 hover-type-none"><img decoding="async" width="880" height="1176" title="ComfyUI_00453_" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_.png" alt class="img-responsive wp-image-2004" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_-200x267.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_-400x535.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_-600x802.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_-800x1069.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00453_.png 880w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-32 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-120 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>Base image 1</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-43 hover-type-none"><img decoding="async" width="500" height="750" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-41.png" alt class="img-responsive wp-image-2009" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-41-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-41-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-41.png 500w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-33 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-121 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p>image 2</p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-44 hover-type-none"><img decoding="async" width="610" height="397" title="bench" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench.png" alt class="img-responsive wp-image-2010" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench-200x130.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench-400x260.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench-600x390.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/bench.png 610w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-34 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-122" style="--awb-content-alignment:center;--awb-margin-top:10px;"><p><em>Prompt: add a bench in image 1 using the bench model of image 2</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-35 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-123 fusion-text-no-margin" style="--awb-margin-bottom:-6px;"><p><strong>Result</strong></p>
</div><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-45 hover-type-none"><img decoding="async" width="832" height="1248" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_.png" alt class="img-responsive wp-image-2011" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_-600x900.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_-800x1200.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00456_.png 832w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div><div class="fusion-text fusion-text-124 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="170" data-end="516">Each output remains consistent in perspective and lighting, showing that the model now integrates context more effectively. The improved accuracy comes from the <strong data-start="1167" data-end="1194">two cumulative upgrades</strong> introduced in v1.1: the <b>new core control chain </b>and the <strong>Dual-Image Editing. </strong>Despite the added complexity, the workflow remains extremely fast. Even when using the 8-step Lightning model, processing time never exceeds 130 seconds, while the 4-step variant typically completes in about 30-40 seconds on an RTX 4060 GPU. In the next update, we’ll introduce <strong data-start="1692" data-end="1724">inpainting with mask support</strong>, allowing users to define editable regions directly within the image — ideal for selective urban design modifications.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-37 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. To Go Further</h2></div><div class="fusion-text fusion-text-125 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="170" data-end="516"><strong data-start="5132" data-end="5177">Lightning LoRA models:</strong> <a class="keychainify-checked" href="https://huggingface.co/lightx2v/Qwen-Image-Lightning">https://huggingface.co/lightx2v/Qwen-Image-Lightning</a></p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-38 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Download the Workflow</h2></div><div class="fusion-text fusion-text-126 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="170" data-end="516">Once again, for convenience, you can download the ready-to-use <strong data-start="1530" data-end="1552">ComfyUI JSON graph </strong>that we built in this post <strong>Qwen Image Edit For Urbanism v1.1</strong> from the link below and load it directly into your workspace using <strong data-start="1620" data-end="1646">File → Load → Workflow</strong>.</p>
</div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-4 fusion-button-default-span fusion-button-default-type" target="_self" download="Gwen-Edit-UGA-v1.1.json" href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/Qwen-Edit-UGA-v1.1.json"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">DOWNLOAD &#8211; ComfyUI JSON graph &#8211; QWEN IMAGE EDIT v1.1</span><span class="fusion-button-text awb-button__text awb-button__text--hover">DOWNLOAD - ComfyUI JSON graph - QWEN IMAGE EDIT v1.1</span></div></a></div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-11 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-127"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--6" data-awb-toc-id="6" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-46 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/">Qwen Image Edit for Urbanism v1.1 — Editing using a Reference Image and Advanced Sampling</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/local-ai-image-editing-for-urbanism-v1-1/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Qwen Image Edit for Urbanism v1.0 — Building a Qwen Pipeline in ComfyUI</title>
		<link>https://urbangeoanalytics.com/local-ai-image-editing-urbanism-comfyui-qwen-gguf/</link>
					<comments>https://urbangeoanalytics.com/local-ai-image-editing-urbanism-comfyui-qwen-gguf/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Sun, 09 Nov 2025 18:51:15 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Diffusion Models]]></category>
		<category><![CDATA[Urbanism]]></category>
		<category><![CDATA[ComfyUI]]></category>
		<category><![CDATA[image editing]]></category>
		<category><![CDATA[Qwen]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=1888</guid>

					<description><![CDATA[<p>Learn how to build a fully local AI image-editing workflow for urbanism and architectural visualization using ComfyUI and Qwen-Image-Edit. This step-by-step guide runs entirely offline with GGUF models, providing fast, private, and realistic visual edits.</p>
<p>The post <a href="https://urbangeoanalytics.com/local-ai-image-editing-urbanism-comfyui-qwen-gguf/">Qwen Image Edit for Urbanism v1.0 — Building a Qwen Pipeline in ComfyUI</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-7 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-12 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-47 hover-type-none"><img decoding="async" width="1536" height="1024" title="genai" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-13_20_24.png" alt class="img-responsive wp-image-2098" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-13_20_24-300x200.png 300w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ChatGPT-Image-13-nov.-2025-13_20_24.png 1536w" sizes="(max-width: 1536px) 100vw, 1536px" /></span></div><div class="fusion-text fusion-text-128"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-129" style="--awb-margin-top:-30px;"><ul>
<li><strong data-start="109" data-end="132">Offline and Private</strong> — Runs entirely on your machine with no cloud or API dependencies, ideal for urbanism and architectural workflows.</li>
<li><strong data-start="251" data-end="275">Lightweight and Fast</strong> — The GGUF format keeps Qwen-Image-Edit efficient, producing realistic edits in under two minutes.</li>
<li><strong data-start="378" data-end="401">Full Visual Control</strong> — Adjust CFG, denoise, and steps to balance subtle tweaks or bold scene changes.</li>
</ul>
</div><div class="fusion-text fusion-text-130 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="831" data-end="1377">Generative AI offers new possibilities for urbanism and architectural visualization. However, most workflows depend on cloud models, paid APIs, or GPU-heavy setups. Instead, this guide shows how to build a fully local image-editing pipeline using <em data-start="1305" data-end="1314">ComfyUI</em> and <em data-start="1319" data-end="1336">Qwen-Image-Edit</em>, a multimodal model that interprets both text and images. Moreover, the quantized GGUF version makes it light enough to run efficiently on GPUs with 8 GB VRAM—or even on CPU-only machines.</p>
<p data-start="974" data-end="1086">With this setup in place, you can make realistic, controllable visual edits to urban scenes directly from text prompts such as:</p>
<blockquote data-start="1088" data-end="1240">
<p data-start="1090" data-end="1240">“Add trees and benches along the sidewalk”<br data-start="1132" data-end="1135" />“Change this building to have shops on the ground floor”<br data-start="1193" data-end="1196" />“Replace the cars with a pedestrian plaza”</p>
</blockquote>
<p data-start="1140" data-end="1184">In summary, this guide includes three main parts: first, installing and preparing the models; second, building the ComfyUI workflow; and finally, experimenting with parameters such as CFG, denoise, and steps to refine quality.</p>
<p data-start="1423" data-end="1452">Let’s start with the setup.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-39 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">1. Installing Everything and Preparing the Models (Local Setup)</h2></div><div class="fusion-text fusion-text-131 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Before we dive into editing cityscapes and architectural images, let’s prepare a fully local environment. To begin with, this workflow runs entirely offline — no API keys, no cloud GPU, and no data uploads. As a result, you can experiment freely without any external dependencies.</p>
<p data-start="233" data-end="264"><strong>Step 1 — Installing ComfyUI</strong></p>
<p>ComfyUI is a <strong data-start="279" data-end="306">visual AI workflow tool</strong> that lets you build image-generation and editing pipelines by connecting nodes instead of writing code. Think of it as a visual editor for generative models — you drag boxes, connect them with lines, and watch your AI process unfold step by step. It’s extremely powerful for <strong data-start="586" data-end="644">image editing, concept design, and urban visualization</strong>, because you can control every part of the process: model loading, prompt conditioning, sampling, decoding, and saving. This makes it ideal for architects, urbanists, and researchers seeking privacy and control. On first launch, an empty grid appears as your workspace, ready to be filled with interconnected nodes for models, images, and prompts.</p>
<p data-start="64" data-end="107">ComfyUI can be installed in two ways:</p>
<ul data-start="108" data-end="540">
<li data-start="108" data-end="335">
<p data-start="110" data-end="335"><strong data-start="110" data-end="133">Standalone version:</strong> <a class="keychainify-checked" href="https://comfy.org">downloaded from the official website</a> and installed like a regular application. This version already includes the <strong data-start="247" data-end="263">Node Manager</strong>, so you can install missing custom nodes directly from the interface.</p>
</li>
<li data-start="336" data-end="540">
<p data-start="338" data-end="540"><strong data-start="338" data-end="354">Git version:</strong> installed by cloning the repository (e.g., for advanced users or custom setups). In this case, <strong data-start="450" data-end="496">you must install the Node Manager manually</strong>, because it is <strong data-start="512" data-end="528">not included</strong> by default.</p>
</li>
</ul>
<p>Next, open the <strong data-start="67" data-end="111">v1.0 workflow for Qwen Edit for Urbanism</strong>. You can find it at the end of the post or in our <a class="keychainify-checked" href="https://github.com/perezjoan/ComfyUI-QwenEdit-Urbanism-by-UGA">Git repository</a>. When the workflow loads, the Node Manager will automatically prompt you to install any missing custom nodes. Approve the installations, then close the workflow.</p>
<p><strong>Step 2 — Add GGUF and Qwen Image Edit Support</strong></p>
<p data-start="0" data-end="230"><em data-start="4694" data-end="4711">Qwen-Image-Edit</em> is a multimodal AI model capable of understanding both text and images, allowing image edits through natural language. The GGUF format makes it compact and memory-efficient, enabling faster processing on modest hardware.</p>
<p>Go to the <strong data-start="507" data-end="523">ComfyUI-GGUF</strong> &amp;<strong> Qwen Edit Utils</strong> project pages:<br data-start="537" data-end="540" /><a class="decorated-link keychainify-checked" href="https://github.com/city96/ComfyUI-GGUF" target="_blank" rel="noopener" data-start="540" data-end="620">https://github.com/city96/ComfyUI-GGUF</a><br />
<a class="decorated-link keychainify-checked" href="https://github.com/lrzjason/Comfyui-QwenEditUtils" target="_blank" rel="noopener" data-start="1119" data-end="1221">https://github.com/lrzjason/Comfyui-QwenEditUtils</a></p>
<p data-start="622" data-end="794">Clone the repositories, or click the green <strong data-start="145" data-end="153">Code</strong> button and choose <strong data-start="172" data-end="190">“Download ZIP”</strong> for both projects. After downloading, unzip each folder and place it directly into your <code data-start="279" data-end="293">custom_nodes</code> directory—make sure the unzipped files are not nested inside an extra subfolder.</p>
</div><div class="fusion-text fusion-text-132 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="bash">ComfyUI/custom_nodes/</pre>
</div><div class="fusion-text fusion-text-133 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="233" data-end="264"><strong>Step 3 — Download the Required Models</strong></p>
<p data-start="233" data-end="264">You’ll need three model files, each serving a different purpose in the image-editing process. The first one is the <strong data-start="338" data-end="352">core model</strong> that performs the actual visual transformation based on your text prompt.</p>
<p data-start="233" data-end="264">You can find it here:</p>
<p data-start="233" data-end="264">👉 <a class="keychainify-checked" href="https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF/tree/main">https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF/tree/main</a></p>
<p data-start="630" data-end="822">On that page, you’ll see many versions of the same model — Q2, Q3, Q4, Q5, etc. These are <strong data-start="722" data-end="744">quantized variants</strong>, meaning they trade a little precision for faster speed and lower VRAM usage. Here’s a quick guide to help you pick the right one:</p>
</div>
<div class="table-1">
<table width="100%">
<thead>
<tr>
<th align="left">Model File</th>
<th align="left">Speed</th>
<th align="left">Image Quality</th>
<th align="left">Recommended For</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Qwen-Image-Edit-2509-Q2_K.gguf</td>
<td align="left">Very fast</td>
<td align="left">Low</td>
<td align="left">Very low-end PCs (not recommended)</td>
</tr>
<tr>
<td align="left">Qwen-Image-Edit-2509-Q3_K_M.gguf</td>
<td align="left">fast</td>
<td align="left">Moderate</td>
<td align="left">CPU-only users needing speed</td>
</tr>
<tr>
<td align="left">Qwen-Image-Edit-2509-Q4_K_S.gguf</td>
<td align="left">Medium</td>
<td align="left">Good</td>
<td align="left">Mid-range GPUs (6–8 GB VRAM)</td>
</tr>
<tr>
<td align="left">Qwen-Image-Edit-2509-Q5_K_S.gguf</td>
<td align="left">Medium</td>
<td align="left">Excellent</td>
<td align="left">Recommended for most users</td>
</tr>
<tr>
<td align="left">Qwen-Image-Edit-2509-Q6_K_S.gguf</td>
<td align="left">Slow</td>
<td align="left">Highest</td>
<td align="left">High-end GPUs with 16+ GB VRAM</td>
</tr>
<tr>
<td align="left">Qwen-Image-Edit-2509-Q8_0.gguf</td>
<td align="left">Very slow</td>
<td align="left">Best</td>
<td align="left">Only for testing full precision</td>
</tr>
</tbody>
</table>
</div>
<div class="fusion-text fusion-text-134 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="209" data-end="401">The Qwen-Image-Edit model also needs two smaller files to work correctly — one for understanding your text prompts (CLIP) and one for turning the generated data back into a real image (VAE). You can download them from the official Comfy-Org repository on Hugging Face:</p>
<ul data-start="484" data-end="1057">
<li data-start="484" data-end="812">
<p data-start="486" data-end="693"><strong data-start="486" data-end="510">Text Encoder (CLIP):</strong><br data-start="510" data-end="513" /><a class="decorated-link keychainify-checked" href="https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/text_encoders" target="_new" rel="noopener" data-start="515" data-end="693">https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/text_encoders</a></p>
<p data-start="697" data-end="812">On that page, pick:<br data-start="716" data-end="719" />👉 <strong data-start="724" data-end="767"><code data-start="726" data-end="765">qwen_2.5_vl_7b_fp8_scaled.safetensors</code></strong> (about 9 GB — faster and works well locally)</p>
</li>
<li data-start="814" data-end="1057">
<p data-start="816" data-end="995"><strong data-start="816" data-end="832">VAE Decoder:</strong><br data-start="832" data-end="835" /><a class="decorated-link keychainify-checked" href="https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae" target="_new" rel="noopener" data-start="837" data-end="995">https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae</a></p>
<p data-start="999" data-end="1057">Download the file:<br data-start="1017" data-end="1020" />👉 <strong data-start="1025" data-end="1057"><code data-start="1027" data-end="1055">qwen_image_vae.safetensors</code></strong></p>
</li>
</ul>
<p data-start="1059" data-end="1090">Once downloaded, place all three models in:</p>
</div><div class="fusion-text fusion-text-135 fusion-text-no-margin" style="--awb-margin-top:5px;--awb-margin-bottom:5px;"><pre class="EnlighterJSRAW" data-enlighter-language="css">ComfyUI/
 └── models/
      ├── gguf/
      │    └── Qwen-Image-Edit-2509-Q5_K_S.gguf
      ├── clip/
      │    └── qwen_2.5_vl_7b_fp8_scaled.safetensors
      └── vae/
           └── qwen_image_vae.safetensors
</pre>
</div><div class="fusion-text fusion-text-136 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="2115" data-end="2139"><strong>Verifying Your Setup</strong></p>
<p data-start="2141" data-end="2234">Open ComfyUI, right-click anywhere in the workspace or search in the node library, and check that you can find these nodes:</p>
<ul data-start="2235" data-end="2364">
<li style="list-style-type: none;" data-start="2235" data-end="2286">
<ul>
<li data-start="316" data-end="393"><strong data-start="318" data-end="333">GGUF Loader</strong> – for loading the main Qwen-Image-Edit model (.gguf file)</li>
<li data-start="394" data-end="454"><strong data-start="396" data-end="416">CLIP Loader</strong> – for loading the CLIP text encoder</li>
<li data-start="455" data-end="508"><strong data-start="457" data-end="476">VAE Loader</strong> – for loading the VAE decoder</li>
<li data-start="509" data-end="592"><strong data-start="511" data-end="551">TextEncodeQwenImageEditPlus</strong> – for connecting text prompts and images</li>
</ul>
</li>
</ul>
<p data-start="2366" data-end="2466">If they appear and can detect your downloaded files, congratulations 🎉 — your local setup is ready!</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-40 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">2. Building the Qwen Image Editing Pipeline in ComfyUI</h2></div><div class="fusion-text fusion-text-137 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="291" data-end="562">Now that all the models are downloaded and organized, <strong data-start="204" data-end="250">you can start assembling the full workflow</strong> that makes <em data-start="262" data-end="279">Qwen-Image-Edit</em> operate inside ComfyUI.<br data-start="303" data-end="306" />At this stage, the goal is to connect every component — the GGUF model, the text encoder (CLIP), the VAE decoder, the input image, and the text prompt — into a single functional chain. Once everything is linked, you’ll be able to type instructions such as <em data-start="572" data-end="605">“add shops on the ground floor”</em> or <em data-start="609" data-end="652">“turn this street into a pedestrian zone”</em> and watch ComfyUI generate updated images automatically.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-36 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-138" style="--awb-content-alignment:justify;"><p>Right-click anywhere in the ComfyUI workspace and search for these three nodes:</p>
<ol data-start="860" data-end="1070">
<li data-start="860" data-end="919"><strong data-start="863" data-end="878">GGUF Loader</strong> – loads the main Qwen-Image-Edit model</li>
<li data-start="920" data-end="1003"><strong data-start="923" data-end="936">Load CLIP</strong> – loads the <code data-start="949" data-end="988">qwen_2.5_vl_7b_fp8_scaled.safetensors</code> text encoder</li>
<li data-start="1004" data-end="1070"><strong data-start="1007" data-end="1019">Load VAE</strong> – loads the <code data-start="1032" data-end="1060">qwen_image_vae.safetensors</code> decoder</li>
</ol>
<p>Your setup shall look like the figure on the right with the same parameters.</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-37 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-48 hover-type-none"><img decoding="async" width="400" height="468" title="ggfu comfyUI" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/sss-400x468.png" alt class="img-responsive wp-image-1897" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/sss-200x234.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/sss-400x468.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/sss.png 437w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div><div class="fusion-text fusion-text-139 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="237" data-end="288"><strong>Connecting the Model to Your Images and Prompts</strong></p>
<p data-start="290" data-end="543">Now that your model nodes are ready, it’s time to make them work together. You’ll connect your <strong data-start="385" data-end="400">input image</strong>, <strong data-start="402" data-end="412">prompt</strong>, and the <strong data-start="422" data-end="441">Qwen-Image-Edit</strong> conditioning node so that ComfyUI can understand your instruction and modify the picture accordingly.</p>
<p data-start="545" data-end="578">Right-click again and search for:</p>
<ul data-start="580" data-end="842">
<li data-start="580" data-end="682"><strong data-start="582" data-end="596">Load Image</strong> – this node will import your input image (for example, a street or building photo).</li>
<li data-start="683" data-end="842"><strong data-start="685" data-end="725">TextEncodeQwenImageEditPlus</strong> – this node combines your image with your text prompt</li>
</ul>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-38 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-140" style="--awb-content-alignment:justify;"><p data-start="844" data-end="874">Connect the elements like this:</p>
<ul>
<li data-start="878" data-end="980">The <strong data-start="899" data-end="907">CLIP</strong>, and <strong data-start="913" data-end="920">VAE</strong> nodes feed into <strong data-start="937" data-end="977">TextEncodeQwenImageEditPlus</strong></li>
<li data-start="878" data-end="980">The <strong data-start="988" data-end="1002">Load Image</strong> node connects its output image into the same <strong data-start="1048" data-end="1079">TextEncodeQwenImageEditPlus</strong> node</li>
</ul>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-39 fusion_builder_column_inner_1_2 1_2 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:25px;--awb-spacing-right-large:3.84%;--awb-margin-bottom-large:25px;--awb-spacing-left-large:3.84%;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:3.84%;--awb-spacing-left-medium:3.84%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-49 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/c9e67b8e-0b15-4822-872a-13ffb222b8c1.png" class="fusion-lightbox" data-rel="iLightbox[e4ef0f810e96541f938]" data-title="comfy" title="comfy"><img decoding="async" width="600" height="359" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/c9e67b8e-0b15-4822-872a-13ffb222b8c1-600x359.png" alt class="img-responsive wp-image-1906" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/c9e67b8e-0b15-4822-872a-13ffb222b8c1-200x120.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c9e67b8e-0b15-4822-872a-13ffb222b8c1-400x239.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c9e67b8e-0b15-4822-872a-13ffb222b8c1-600x359.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c9e67b8e-0b15-4822-872a-13ffb222b8c1-800x479.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c9e67b8e-0b15-4822-872a-13ffb222b8c1-1200x718.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/c9e67b8e-0b15-4822-872a-13ffb222b8c1.png 1246w" sizes="(max-width: 640px) 100vw, 600px" /></a></span></div></div></div></div><div class="fusion-text fusion-text-141 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="235" data-end="282"><strong>Adding the KSampler and the Other Required Elements</strong></p>
<p data-start="284" data-end="467">At this point, your setup has the model, encoders, and the prompt connection ready.<br data-start="367" data-end="370" />Now we’ll add the <strong data-start="388" data-end="400">KSampler</strong>, which is the part that actually produces your final edited image, and all the other required elements.</p>
<p>Right-click again and search for:</p>
<ul data-start="504" data-end="821">
<li data-start="504" data-end="654"><strong data-start="506" data-end="518">KSampler</strong> – this is ComfyUI’s main diffusion sampler. It takes in a latent (from the VAE) and conditioning (from Qwen) to generate a new image.</li>
<li data-start="585" data-end="708"><strong data-start="587" data-end="618">Scale Image to Total Pixels</strong> – automatically resizes your image to a manageable resolution (set megapixels to 1.00).</li>
<li data-start="709" data-end="783"><strong data-start="711" data-end="729">Get Image Size</strong> – extracts width and height from the resized image.</li>
<li data-start="784" data-end="909"><strong data-start="786" data-end="809">EmptySD3LatentImage</strong> – creates an empty latent space matching the image dimensions, used as the canvas for generation.</li>
<li data-start="910" data-end="993"><strong data-start="912" data-end="926">VAE Decode</strong> – converts the generated latent output back into a normal image.</li>
<li data-start="910" data-end="993"><strong data-start="996" data-end="1010">Save Image</strong> – saves your result to the ComfyUI output folder.</li>
</ul>
<p data-start="315" data-end="389">Your connections should look like this (check the figure for reference). Start by linking the <strong data-start="412" data-end="426">Load Image</strong> node to <strong data-start="435" data-end="466">Scale Image to Total Pixels</strong>, and then feed its output into <strong data-start="498" data-end="516">Get Image Size</strong>. Next, connect the <strong data-start="538" data-end="547">width</strong> and <strong data-start="552" data-end="562">height</strong> outputs from <strong data-start="576" data-end="594" data-is-only-node="">Get Image Size</strong> to <strong data-start="598" data-end="621">EmptySD3LatentImage</strong>, creating the correct latent dimensions. From there, route the <strong data-start="687" data-end="704">latent output</strong> of <strong data-start="708" data-end="731">EmptySD3LatentImage</strong> into the <strong data-start="741" data-end="753">KSampler</strong>. In parallel, connect the <strong data-start="782" data-end="797">model input</strong> of <strong data-start="801" data-end="813">KSampler</strong> to the <strong data-start="821" data-end="836">GGUF Loader</strong>, and feed the <strong data-start="851" data-end="867">conditioning</strong> from <strong data-start="873" data-end="904">TextEncodeQwenImageEditPlus</strong>. Finally, send the <strong data-start="926" data-end="945">KSampler output</strong> into <strong data-start="951" data-end="965">VAE Decode</strong>, and link that to <strong data-start="984" data-end="998">Save Image</strong> to generate and store your final result.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-50" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-50 hover-type-none"><a href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image.png" class="fusion-lightbox" data-rel="iLightbox[94e5b7d4146608a5a52]"><img decoding="async" width="1521" height="774" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image.png" alt class="img-responsive wp-image-1911" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-200x102.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-400x204.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-600x305.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-800x407.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image-1200x611.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/image.png 1521w" sizes="(max-width: 640px) 100vw, 1521px" /></a></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title">Full Qwen-Image-Edit Pipeline in ComfyUI</div></div></div></div><div class="fusion-text fusion-text-142 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="235" data-end="282">Once these elements are connected, your workflow becomes fully functional. The scaled image is analyzed, encoded, transformed in the latent space according to your text prompt, and then decoded back into a visible edited image — all processed locally by your computer.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-41 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">3. Experimentation and Parameter Exploration</h2></div><div class="fusion-text fusion-text-143 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="232" data-end="541">The core of the image-editing process lies in the <strong data-start="282" data-end="294">KSampler</strong> node, where all generative inference occurs. Inside this component, several parameters control the quality, variability, and precision of the output. <strong data-start="445" data-end="475">Understanding their effect</strong> is essential for achieving consistent and reproducible results.</p>
<p data-start="543" data-end="1068">To begin with, the <em data-start="566" data-end="572">seed</em> parameter defines the random starting point of the diffusion process. Using the same seed with identical settings reproduces the same output, which helps when comparing the influence of other parameters. By contrast, changing the seed introduces controlled randomness, often generating new interpretations of the same instruction. Meanwhile, the <em data-start="923" data-end="947">control after generate</em> option—set to <em data-start="962" data-end="973">increment</em>—ensures each new image uses a slightly different seed, producing unique but related results.</p>
<p data-start="1070" data-end="1499">Next, the <em data-start="1084" data-end="1091">steps</em> value determines how many refinement iterations the sampler performs. Lower values such as five create quick previews and coarse adjustments, whereas higher counts (for instance, twenty or thirty) yield smoother and more detailed outcomes at the cost of longer processing time. In practice, Qwen-Image-Edit performs well even with fewer steps, since it relies heavily on prompt and image conditioning.</p>
<p data-start="1501" data-end="1831">Similarly, the <em data-start="1520" data-end="1525">cfg</em> parameter (classifier-free guidance) controls how closely the output follows the text prompt. A low value around 1.0 keeps changes subtle, while higher values push the model toward stronger, more literal transformations. Balancing this setting helps maintain realism without losing creative control.</p>
<p data-start="1833" data-end="2094">As for the <em data-start="1848" data-end="1862">sampler_name</em> and <em data-start="1867" data-end="1878">scheduler</em>, they define how noise is reduced during diffusion. The <em data-start="1935" data-end="1942">Euler</em> sampler offers an efficient trade-off between speed and visual quality, and the <em data-start="2023" data-end="2031">simple</em> scheduler keeps results stable across seeds and image sizes.</p>
<p data-start="2096" data-end="2465">Finally, the <em data-start="2113" data-end="2122">denoise</em> value adjusts how strongly the latent image is modified. A setting of 1.0 applies a full transformation, producing bold edits, whereas smaller values retain more of the original features for subtle modifications. This parameter directly shapes the intensity of your edit—from a light retouch to a complete visual overhaul.</p>
</div><div class="fusion-builder-row fusion-builder-row-inner fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="width:104% !important;max-width:104% !important;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-40 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-51 hover-type-none"><img decoding="async" width="1333" height="2000" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-scaled.jpg" alt class="img-responsive wp-image-1917" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-200x300.jpg 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-400x600.jpg 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-600x900.jpg 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-800x1200.jpg 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-1200x1800.jpg 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/pexels-taryn-elliott-4652004-scaled.jpg 1333w" sizes="(max-width: 640px) 100vw, 400px" /></span></div><div class="fusion-text fusion-text-144"><p>Base image</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-41 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-52 hover-type-none"><img decoding="async" width="832" height="1248" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00117_.png" alt class="img-responsive wp-image-1918" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00117_-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00117_-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00117_-600x900.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00117_-800x1200.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00117_.png 832w" sizes="(max-width: 640px) 100vw, 400px" /></span></div><div class="fusion-text fusion-text-145"><p>prompt: <em>&#8220;add a tree&#8221;</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-42 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-53 hover-type-none"><img decoding="async" width="832" height="1248" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00118_.png" alt class="img-responsive wp-image-1919" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00118_-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00118_-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00118_-600x900.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00118_-800x1200.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00118_.png 832w" sizes="(max-width: 640px) 100vw, 400px" /></span></div><div class="fusion-text fusion-text-146"><p>prompt: <em>&#8220;add shops on the ground floor of the buildings&#8221;</em></p>
</div></div></div><div class="fusion-layout-column fusion_builder_column_inner fusion-builder-nested-column-43 fusion_builder_column_inner_1_4 1_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element " style="--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-54 hover-type-none"><img decoding="async" width="832" height="1248" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00120_.png" alt class="img-responsive wp-image-1921" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00120_-200x300.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00120_-400x600.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00120_-600x900.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00120_-800x1200.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/ComfyUI_00120_.png 832w" sizes="(max-width: 640px) 100vw, 400px" /></span></div><div class="fusion-text fusion-text-147"><p>prompt: <em>&#8220;night setting with more street lights&#8221;</em></p>
</div></div></div></div><div class="fusion-text fusion-text-148 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p data-start="52" data-end="407">These results show the outcomes obtained with the final Qwen-Edit workflow, each generated from the same base image using a simple text prompt. Depending on the complexity of the requested transformation and the parameters defined in the KSampler—particularly the number of steps and the denoise factor—the processing time varied between <strong data-start="338" data-end="372">30 seconds and about 2 minutes</strong> on an <strong data-start="379" data-end="395">RTX 4060 GPU</strong>.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-42 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">4. To Go Further</h2></div><div class="fusion-text fusion-text-149 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><ul>
<li data-start="778" data-end="945">
<p data-start="780" data-end="945"><strong data-start="780" data-end="820">Qwen-Image-Edit official repository:</strong> <a class="decorated-link keychainify-checked" href="https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF" target="_new" rel="noopener" data-start="821" data-end="943">https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF</a></p>
</li>
<li data-start="946" data-end="1076">
<p data-start="948" data-end="1076"><strong data-start="948" data-end="987">ComfyUI documentation and examples:</strong> <a class="decorated-link keychainify-checked" href="https://github.com/comfyanonymous/ComfyUI" target="_new" rel="noopener" data-start="988" data-end="1074">https://github.com/comfyanonymous/ComfyUI</a></p>
</li>
<li data-start="1077" data-end="1239">
<p data-start="1079" data-end="1239"><strong data-start="1079" data-end="1130">Qwen-Image-ComfyUI utilities and text encoders:</strong> <a class="decorated-link keychainify-checked" href="https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI" target="_new" rel="noopener" data-start="1131" data-end="1237">https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI</a></p>
</li>
<li data-start="1240" data-end="1442">
<p data-start="1242" data-end="1442">Zhang, F., Salazar-Miranda, A., Duarte, F., Vale, L., Hack, G., Chen, M., Liu, Y., Batty, M. &amp; Ratti, C. (2024) ‘Urban Visual Intelligence: Studying Cities with Artificial Intelligence and Street-Level Imagery’, <em data-start="212" data-end="263">Annals of the American Association of Geographers</em>, 114 (5), pp. 876-897. <a class="decorated-link keychainify-checked" href="https://doi.org/10.1080/24694452.2024.2313515" target="_new" rel="noopener" data-start="287" data-end="332">https://doi.org/10.1080/24694452.2024.2313515</a></p>
</li>
<li data-start="1240" data-end="1442">Perez, J. &amp; Fusco, G. (2025) ‘Streetscape Analysis with Generative AI (SAGAI): Vision-language assessment and mapping of urban scenes’, <em data-start="136" data-end="171">GeoSpatial Analysis and Modelling</em>, 100063. <a class="decorated-link keychainify-checked" href="https://doi.org/10.1016/j.geomat.2025.100063" target="_new" rel="noopener" data-start="181" data-end="225">https://doi.org/10.1016/j.geomat.2025.100063</a> (<a class="decorated-link keychainify-checked" href="https://www.sciencedirect.com/science/article/pii/S1195103625000199" target="_new" rel="noopener" data-start="227" data-end="311">ScienceDirect</a>)</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-43 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">5. Download the Workflow</h2></div><div class="fusion-text fusion-text-150 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>For convenience, you can download the ready-to-use <strong data-start="1530" data-end="1552">ComfyUI JSON graph </strong>that we built in this post from the link below and load it directly into your workspace using <strong data-start="1620" data-end="1646">File → Load → Workflow</strong>.</p>
</div><div style="text-align:center;"><a class="fusion-button button-flat fusion-button-default-size button-lightgray fusion-button-lightgray button-5 fusion-button-default-span fusion-button-default-type" target="_self" download="Gwen-Edit-UGA-v1.0.json" href="https://urbangeoanalytics.com/wp-content/uploads/2025/11/Qwen-Edit-UGA-v1.0-1.json"><div class="awb-button__hover-content awb-button__hover-content--default awb-button__hover-content--centered"><span class="fusion-button-text awb-button__text awb-button__text--default">DOWNLOAD &#8211; ComfyUI JSON graph &#8211; QWEN IMAGE EDIT v1.0</span><span class="fusion-button-text awb-button__text awb-button__text--hover">DOWNLOAD - ComfyUI JSON graph - QWEN IMAGE EDIT v1.0</span></div></a></div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-13 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-151"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--7" data-awb-toc-id="7" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-55 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/local-ai-image-editing-urbanism-comfyui-qwen-gguf/">Qwen Image Edit for Urbanism v1.0 — Building a Qwen Pipeline in ComfyUI</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/local-ai-image-editing-urbanism-comfyui-qwen-gguf/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
	</channel>
</rss>
