<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Package Archives - Urban Geo Analytics</title>
	<atom:link href="https://urbangeoanalytics.com/category/package/feed/" rel="self" type="application/rss+xml" />
	<link>https://urbangeoanalytics.com/category/package/</link>
	<description>Spatial Analysis, GeoAI &#38; Machine Learning</description>
	<lastBuildDate>Thu, 23 Apr 2026 07:25:41 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=6.9.4</generator>

<image>
	<url>https://urbangeoanalytics.com/wp-content/uploads/2025/11/cropped-logo-urban-geo_512-32x32.png</url>
	<title>Package Archives - Urban Geo Analytics</title>
	<link>https://urbangeoanalytics.com/category/package/</link>
	<width>32</width>
	<height>32</height>
</image> 
	<item>
		<title>UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</title>
		<link>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/</link>
					<comments>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/#respond</comments>
		
		<dc:creator><![CDATA[Joan Perez]]></dc:creator>
		<pubDate>Thu, 23 Apr 2026 07:25:41 +0000</pubDate>
				<category><![CDATA[Advanced]]></category>
		<category><![CDATA[Package]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Vision Language Model]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[Google Colab]]></category>
		<category><![CDATA[Image Analysis]]></category>
		<category><![CDATA[Jupyter Notebook]]></category>
		<category><![CDATA[Llava]]></category>
		<category><![CDATA[Qwen]]></category>
		<category><![CDATA[UVLM]]></category>
		<guid isPermaLink="false">https://urbangeoanalytics.com/?p=2442</guid>

					<description><![CDATA[<p>UVLM v3.0.0 turns a Colab notebook into a full Python package. Run vision-language models locally, in notebooks, or scripts with a simple API and no setup complexity.</p>
<p>The post <a href="https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/">UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></description>
										<content:encoded><![CDATA[<div class="fusion-fullwidth fullwidth-box fusion-builder-row-1 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling" style="--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;" id="contenu" ><div class="fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap" style="max-width:1248px;margin-left: calc(-4% / 2 );margin-right: calc(-4% / 2 );"><div class="fusion-layout-column fusion_builder_column fusion-builder-column-0 fusion_builder_column_3_4 3_4 fusion-flex-column" style="--awb-bg-size:cover;--awb-width-large:75%;--awb-margin-top-large:0px;--awb-spacing-right-large:2.56%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:2.56%;--awb-width-medium:75%;--awb-order-medium:0;--awb-spacing-right-medium:2.56%;--awb-spacing-left-medium:2.56%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;" id="contenu" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-1" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-1 hover-type-none"><img fetchpriority="high" decoding="async" width="1619" height="971" title="flag fig" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig.png" alt class="img-responsive wp-image-2469" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-200x120.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-400x240.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-600x360.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-800x480.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig-1200x720.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/flag-fig.png 1619w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div><div class="fusion-text fusion-text-1"><h5><strong>Highlights</strong></h5>
</div><div class="fusion-text fusion-text-2" style="--awb-margin-top:-30px;"><ul>
<li><strong data-start="64" data-end="88">UVLM is now a pip-installable Python package </strong>— no longer tied to Google Colab</li>
<li><strong data-start="64" data-end="88">Run on your own GPU </strong>with a local Jupyter notebook, or keep using Colab for free</li>
<li><strong data-start="64" data-end="88">Same tool, more flexibility </strong>— three lines of Python to load a model and analyse images</li>
</ul>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-text fusion-text-3 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>When we released UVLM in March 2026, it was a Google Colab notebook. You opened it in your browser, picked a model, typed your prompts, and ran your images — all without installing anything. That simplicity was the point: a tool that anyone could use to load and compare Vision-Language Models, regardless of their technical setup.</p>
<p>But we kept hearing the same requests. Can I run this on my own machine? Can I call UVLM from a script? Can I integrate it into an existing pipeline? The answer was always the same: not easily. The entire tool lived inside a single notebook, with all the logic packed into three massive code cells. Moving it anywhere else meant copy-pasting thousands of lines and untangling global variables.</p>
<p>Version 3.0.0 changes that. UVLM is now a proper Python package.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-1 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">What Changed</h2></div><div class="fusion-text fusion-text-4 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The core logic — model loading, dual-backend inference, response parsing, consensus validation, batch processing — has been extracted from the notebook into eight standalone Python modules. These modules have no dependency on Google Colab, no global variables, and no widget code. They are plain Python functions that accept arguments and return results.</p>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-2" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-2 hover-type-none"><img decoding="async" width="2000" height="1162" title="UVLM package blogpost figure 1" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-scaled.png" alt class="img-responsive wp-image-2444" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-200x116.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-400x232.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-600x349.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-800x465.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-1200x697.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-package-blogpost-figure-1-scaled.png 2000w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div></div></div></div><div class="fusion-text fusion-text-5 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The package is installed from GitHub in one line:</p>
</div><div class="fusion-text fusion-text-6 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python1" data-enlighter-title="Python">pip install git+https://github.com/perezjoan/UVLM.git</pre>
</div><div class="fusion-text fusion-text-7 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>On Google Colab, this happens automatically in the first cell of the Colab notebook. On your local machine, you run it once in a terminal and you are done.</p>
<p>Nothing changed in how UVLM analyses images. The same 11 model checkpoints are supported (LLaVA-NeXT and Qwen2.5-VL, from 3B to 110B parameters). The same parsing logic, the same consensus validation, the same truncation detection. If you had a workflow built on v2.2.2, the outputs will be identical.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-2 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Three Ways to Use UVLM</h2></div><div class="fusion-text fusion-text-8 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p><strong>Google Colab — Zero Install</strong></p>
<p>This is the same experience as before. Open the Colab notebook, select a GPU runtime, and start working. The notebook installs the UVLM package automatically. Images are loaded from Google Drive. Nothing has changed for Colab users, except that the code running behind the widgets is now cleaner and easier to maintain.</p>
<p><strong>Local Jupyter Notebook — Your GPU, Your Data</strong></p>
<p>If you have an NVIDIA GPU on your workstation (or access to a GPU server), you can now run UVLM locally. The local Jupyter notebook provides the same widget-based interface — model selection dropdown, prompt builder form, batch execution button — but images are read from your local filesystem and results are saved locally. No Google account needed, no data leaves your machine.</p>
<p>This matters for researchers working with sensitive imagery (medical, security, proprietary datasets) or for anyone who wants faster and more reliable model loading than what Colab&#8217;s network provides.</p>
<p><strong>Python Script — Full Programmatic Control</strong></p>
<p>For integration into larger pipelines, UVLM now exposes a clean API. Three lines of code replace the entire notebook workflow:</p>
</div><div class="fusion-text fusion-text-9 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python2" data-enlighter-title="Python">from uvlm import load_model, run_inference, parse_response
ctx = load_model("[Qwen] Qwen2.5-VL 7B Instruct", precision="4bit")
raw, tokens = run_inference("photo.jpg", "Count the cars", ctx)
result = parse_response(raw, "numeric")</pre>
</div><div class="fusion-text fusion-text-10 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>The `load_model()` function returns a context dictionary containing the model, processor, backend type, and device information. This dictionary is passed to every subsequent function — no global state, no hidden side effects. You can load multiple models in the same session and switch between them by passing different context objects.</p>
<p>For batch processing, `run_batch()` handles the full pipeline:</p>
</div><div class="fusion-text fusion-text-11 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python3" data-enlighter-title="Python">from uvlm import load_model
from uvlm.batch import run_batch

ctx = load_model("[Qwen]  Qwen2.5-VL 7B Instruct", precision="4bit")
df = run_batch(
    model_ctx=ctx,
    task_specs=my_tasks,
    image_folder="./images",
    output_path="./results.csv",
)
</pre>
</div><div class="fusion-image-element awb-imageframe-style awb-imageframe-style-below awb-imageframe-style-3" style="text-align:center;--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--body_typography-font-family);--awb-caption-title-font-weight:var(--body_typography-font-weight);--awb-caption-title-font-style:var(--body_typography-font-style);--awb-caption-title-size:var(--body_typography-font-size);--awb-caption-title-transform:var(--body_typography-text-transform);--awb-caption-title-line-height:var(--body_typography-line-height);--awb-caption-title-letter-spacing:var(--body_typography-letter-spacing);"><span class=" fusion-imageframe imageframe-none imageframe-3 hover-type-none"><img decoding="async" width="2000" height="926" title="UVLM deploy blogpost figure 2" src="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-scaled.png" alt class="img-responsive wp-image-2457" srcset="https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-200x93.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-400x185.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-600x278.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-800x370.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-1200x556.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2026/04/UVLM-deploy-blogpost-figure-2-scaled.png 2000w" sizes="(max-width: 640px) 100vw, 1200px" /></span><div class="awb-imageframe-caption-container" style="text-align:center;"><div class="awb-imageframe-caption"><div class="awb-imageframe-caption-title"> </div><p class="awb-imageframe-caption-text"> </p></div></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-3 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Under the Hood: Package Structure</h2></div><div class="fusion-text fusion-text-12 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The monolithic notebook has been split into eight modules, each with a single responsibility:</p>
<p><em>registry.py</em> holds the model dictionary — 11 checkpoints with their backend type and <strong>HuggingFace checkpoint ID</strong>. Adding a new model is one line in a dictionary.</p>
<p><em>loader.py</em> contains the `load_model()` function. It handles quantisation configuration (4-bit, 8-bit, FP16), device placement (single GPU, auto, CPU offload), and the LLaVA vs Qwen branching logic. It returns a dictionary — not a set of global variables.</p>
<p><em>inference.py</em> contains `run_inference()`, the dual-backend forward pass. It accepts a model context dictionary and returns the raw response plus the exact token count as a tuple. The full LLaVA response cleaning logic and the full Qwen token-trimming pipeline are preserved exactly as they were.</p>
<p><em>parsers.py</em> holds the four response parsers (numeric, category, boolean, text) and the advanced reasoning parser. These are pure functions with zero dependencies beyond Python&#8217;s standard library.</p>
<p><em>consensus.py</em> contains the majority voting logic. <em>batch.py</em> handles folder iteration, CSV writing, resume mode, and schema upgrading. <em>prompts.py</em> stores the task type definitions and the chain-of-thought templates. <em>utils.py</em> provides seed management, environment detection, and <strong>HuggingFace token</strong> retrieval.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-4 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Getting Started</h2></div><div class="fusion-text fusion-text-13 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p><strong>On Colab</strong>: Open the notebook from GitHub and run the three blocks as before. The package installs itself.</p>
<p><strong>Locally</strong>: First, install PyTorch with CUDA support matching your GPU driver (check with `nvidia-smi`). For example, with CUDA 12.8+:</p>
</div><div class="fusion-text fusion-text-14 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python4" data-enlighter-title="Python">pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
pip install git+https://github.com/perezjoan/UVLM.git
</pre>
</div><div class="fusion-text fusion-text-15 fusion-text-no-margin" style="--awb-margin-top:1px;--awb-margin-bottom:25px;"><pre class="EnlighterJSRAW" data-enlighter-language="python" data-enlighter-theme="dracula" data-enlighter-group="Python4" data-enlighter-title="Python">pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
pip install git+https://github.com/perezjoan/UVLM.git
</pre>
</div><div class="fusion-text fusion-text-16 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:5px;--awb-margin-bottom:25px;"><p>Then open the local Jupyter notebook.</p>
<p>You get the same dropdown menus, the same prompt builder form, the same batch execution. The only difference is that you type a local path for your image folder instead of a Google Drive path.</p>
<p>For HuggingFace authentication (needed for some gated models like LLaMA3-based checkpoints), either set the `HF_TOKEN` environment variable or run `huggingface-cli login` once in your terminal.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-5 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">What Is Next</h2></div><div class="fusion-text fusion-text-17 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>The package architecture makes it much easier to add new VLM families. InternVL, BLIP-2, CogVLM, DeepSeek-VL, and Molmo are planned for future releases — each one requires implementing the backend-specific sections of the inference function and adding entries to the registry, without touching the rest of the codebase.</p>
<p>We are also working on multi-GPU batching for parallel inference across images, video frame analysis support, and integration with the SAGAI workflow for automated streetscape analysis.</p>
</div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:25px;margin-bottom:25px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-title title fusion-title-6 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Links</h2></div><div class="fusion-text fusion-text-18 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>Source code: <a class="keychainify-checked" href="https://github.com/perezjoan/UVLM">github.com/perezjoan/UVLM</a></p>
<p>Paper: <a class="keychainify-checked" href="https://arxiv.org/abs/2603.13893">arXiv preprint</a> — Perez &amp; Fusco (2026)</p>
<p>UVLM page on this site: urbangeoanalytics.com › Software &amp; Algorithms › <a class="keychainify-checked" href="https://urbangeoanalytics.com/algorithms-softwares/uvlm-universal-vision-language-model-loader/">UVLM</a></p>
<p>Previous blog post: <a class="keychainify-checked" href="https://urbangeoanalytics.com/introducing-uvlm-free-tool-compare-ai-vision-language-models/">Introducing UVLM: A Free Tool to Compare AI Models That Understand Images</a></p>
</div><div class="fusion-title title fusion-title-7 fusion-sep-none fusion-title-text fusion-title-size-two" style="--awb-margin-top:25px;--awb-margin-bottom:25px;"><h2 class="fusion-title-heading title-heading-left fusion-responsive-typography-calculated" style="margin:0;--fontSize:48;line-height:var(--awb-typography1-line-height);">Citation</h2></div><div class="fusion-text fusion-text-19 fusion-text-no-margin" style="--awb-content-alignment:justify;--awb-margin-top:25px;--awb-margin-bottom:25px;"><p>If you use UVLM in your work, please cite:</p>
<p>Perez, J. &amp; Fusco, G. (2026). <em>UVLM: A Universal Vision-Language Model Loader for Reproducible Multimodal Benchmarking.</em> arXiv:2603.13893</p>
</div></div></div><div class="fusion-layout-column fusion_builder_column fusion-builder-column-1 awb-sticky awb-sticky-medium awb-sticky-large fusion_builder_column_1_4 1_4 fusion-flex-column" style="--awb-padding-top:20px;--awb-padding-right:20px;--awb-padding-bottom:20px;--awb-padding-left:20px;--awb-bg-size:cover;--awb-border-color:var(--awb-color6);--awb-border-style:solid;--awb-width-large:25%;--awb-margin-top-large:0px;--awb-spacing-right-large:7.68%;--awb-margin-bottom-large:20px;--awb-spacing-left-large:7.68%;--awb-width-medium:25%;--awb-order-medium:0;--awb-spacing-right-medium:7.68%;--awb-spacing-left-medium:7.68%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;--awb-sticky-offset:150px;" data-scroll-devices="small-visibility,medium-visibility,large-visibility"><div class="fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column"><div class="fusion-text fusion-text-20"><p><span style="color: #143c4e;"><strong>Table of contents</strong></span></p>
</div><div class="awb-toc-el awb-toc-el--1" data-awb-toc-id="1" data-awb-toc-options="{&quot;allowed_heading_tags&quot;:{&quot;h2&quot;:0},&quot;ignore_headings&quot;:&quot;&quot;,&quot;ignore_headings_words&quot;:&quot;&quot;,&quot;enable_cache&quot;:&quot;no&quot;,&quot;highlight_current_heading&quot;:&quot;yes&quot;,&quot;hide_hidden_titles&quot;:&quot;no&quot;,&quot;limit_container&quot;:&quot;page_content&quot;,&quot;select_custom_headings&quot;:&quot;.contenu H2, .contenu H3&quot;,&quot;icon&quot;:&quot;fa-flag fas&quot;,&quot;counter_type&quot;:&quot;none&quot;}" style="--awb-item-padding-right:5px;--awb-item-padding-left:5px;"><div class="awb-toc-el__content"></div></div><div class="fusion-separator fusion-full-width-sep" style="align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;"><div class="fusion-separator-border sep-single sep-solid" style="--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color6);border-color:var(--awb-color6);border-top-width:1px;"></div></div><div class="fusion-image-element " style="--awb-margin-top:25px;--awb-margin-bottom:25px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);--awb-filter:saturate(100%);--awb-filter-transition:filter 0.3s ease;--awb-filter-hover:saturate(0%);"><span class=" fusion-imageframe imageframe-none imageframe-4 hover-type-zoomout"><img decoding="async" width="1536" height="1024" src="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png" alt class="img-responsive wp-image-1688" srcset="https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-200x133.png 200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-400x267.png 400w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-600x400.png 600w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-800x533.png 800w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3-1200x800.png 1200w, https://urbangeoanalytics.com/wp-content/uploads/2025/11/blog-lvl3.png 1536w" sizes="(max-width: 640px) 100vw, 400px" /></span></div></div></div></div></div>
<p>The post <a href="https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/">UVLM v3.0.0: From Colab Notebook to Python Package — Run Vision-Language Models Anywhere</a> appeared first on <a href="https://urbangeoanalytics.com">Urban Geo Analytics</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://urbangeoanalytics.com/uvlm-python-package-vision-language-models/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
	</channel>
</rss>
