diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index 7354a67e67..bd384722fc 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl
@@ -225,6 +225,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
     # See `hub_builder.bzl%hub_builder()` for `HubBuilder`
     pip_hub_map = {}
     simpleapi_cache = {}
+    facts = {}
 
     for mod in module_ctx.modules:
         for pip_attr in mod.tags.parse:
@@ -242,6 +243,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
                     evaluate_markers_fn = kwargs.get("evaluate_markers", None),
                     available_interpreters = kwargs.get("available_interpreters", INTERPRETER_LABELS),
                     logger = repo_utils.logger(module_ctx, "pypi:hub:" + hub_name),
+                    facts = facts,
                 )
                 pip_hub_map[pip_attr.hub_name] = builder
             elif pip_hub_map[hub_name].module_name != mod.name:
@@ -288,6 +290,25 @@ You cannot use both the additive_build_content and additive_build_content_file a
         hub_group_map[hub.name] = out.group_map
         hub_whl_map[hub.name] = out.whl_map
 
+    facts = {
+        "fact_version": facts.get("fact_version"),
+    } | {
+        index_url: {
+            k: _sorted_dict(f.get(k))
+            for k in [
+                "dist_filenames",
+                "dist_hashes",
+                "dist_yanked",
+            ]
+            if f.get(k)
+        }
+        for index_url, f in facts.items()
+        if index_url not in ["fact_version"]
+    }
+    if len(facts) == 1:
+        # only version is present, skip writing
+        facts = None
+
     return struct(
         config = config,
         exposed_packages = exposed_packages,
@@ -296,6 +317,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
         hub_whl_map = hub_whl_map,
         whl_libraries = whl_libraries,
         whl_mods = whl_mods,
+        facts = facts,
         platform_config_settings = {
             hub_name: {
                 platform_name: sorted([str(Label(cv)) for cv in p.config_settings])
@@ -305,6 +327,12 @@ You cannot use both the additive_build_content and additive_build_content_file a
         },
     )
 
+def _sorted_dict(d):
+    if not d:
+        return {}
+
+    return {k: v for k, v in sorted(d.items())}
+
 def _pip_impl(module_ctx):
     """Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories.
 
@@ -393,9 +421,11 @@ def _pip_impl(module_ctx):
             groups = mods.hub_group_map.get(hub_name),
         )
 
-    return module_ctx.extension_metadata(
-        reproducible = True,
-    )
+    kwargs = {"reproducible": True}
+    if mods.facts:
+        kwargs["facts"] = mods.facts
+
+    return module_ctx.extension_metadata(**kwargs)
 
 _default_attrs = {
     "arch_name": attr.string(
diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl
index f0aa6a73bc..2bc12bf7b6 100644
--- a/python/private/pypi/hub_builder.bzl
+++ b/python/private/pypi/hub_builder.bzl
@@ -31,6 +31,7 @@ def hub_builder(
         simpleapi_download_fn,
         evaluate_markers_fn,
         logger,
+        facts = None,
         simpleapi_cache = {}):
     """Return a hub builder instance
 
@@ -47,6 +48,7 @@ def hub_builder(
             used during the `repository_rule` and must be always compatible with the host.
         simpleapi_download_fn: the function used to download from SimpleAPI.
         simpleapi_cache: the cache for the download results.
+        facts: the facts if they are available.
         logger: the logger for this builder.
     """
 
@@ -89,6 +91,10 @@ def hub_builder(
         # Functions to download according to the config
         # dict[str python_version, callable]
         _get_index_urls = {},
+        # Contains the dict to store the facts to be written to the lockfile that
+        # can be safely cached for future invocations.
+        # dict[str, dict[str, str]]
+        _facts = facts,
         # Tells whether to use the downloader for a package.
         # dict[str python_version, dict[str package_name, bool use_downloader]]
         _use_downloader = {},
@@ -399,11 +405,16 @@ def _set_get_index_urls(self, pip_attr):
                 d
                 for d in distributions
                 if _use_downloader(self, python_version, d)
-            ],
+            ] if type(distributions) == "list" else {
+                d: versions
+                for d, versions in distributions.items()
+                if _use_downloader(self, python_version, d)
+            },
             envsubst = pip_attr.envsubst,
             # Auth related info
             netrc = pip_attr.netrc,
             auth_patterns = pip_attr.auth_patterns,
+            facts = self._facts,
         ),
         cache = self._simpleapi_cache,
         parallel_download = pip_attr.parallel_download,
diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index 5c05c753fd..f9a6e672bf 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl
@@ -170,16 +170,15 @@ def parse_requirements(
 
     index_urls = {}
     if get_index_urls:
-        index_urls = get_index_urls(
-            ctx,
-            # Use list({}) as a way to have a set
-            list({
-                req.distribution: None
-                for reqs in requirements_by_platform.values()
-                for req in reqs.values()
-                if not req.srcs.url
-            }),
-        )
+        distributions = {}
+        for reqs in requirements_by_platform.values():
+            for req in reqs.values():
+                if req.srcs.url:
+                    continue
+
+                distributions.setdefault(req.distribution, []).append(req.srcs.version)
+
+        index_urls = get_index_urls(ctx, distributions)
 
     ret = []
     for name, reqs in sorted(requirements_by_platform.items()):
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index a41f0750c4..da11a77635 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -16,12 +16,14 @@
 Parse SimpleAPI HTML in Starlark.
 """
 
-def parse_simpleapi_html(*, url, content):
+def parse_simpleapi_html(*, url, content, distribution = None, return_absolute = True):
     """Get the package URLs for given shas by parsing the Simple API HTML.
 
     Args:
         url(str): The URL that the HTML content can be downloaded from.
+        distribution(str): TODO
         content(str): The Simple API HTML content.
+        return_absolute: {type}`bool` TODO
 
     Returns:
         A list of structs with:
@@ -33,6 +35,9 @@ def parse_simpleapi_html(*, url, content):
           present, then the 'metadata_url' is also present. Defaults to "".
         * metadata_url: The URL for the METADATA if we can download it. Defaults to "".
     """
+    if not distribution:
+        _, _, distribution = url.strip("/").rpartition("/")
+
     sdists = {}
     whls = {}
     lines = content.split("<a href=\"")
@@ -55,7 +60,8 @@ def parse_simpleapi_html(*, url, content):
     sha256s_by_version = {}
     for line in lines[1:]:
         dist_url, _, tail = line.partition("#sha256=")
-        dist_url = _absolute_url(url, dist_url)
+        if return_absolute:
+            dist_url = absolute_url(index_url = url, url = dist_url)
 
         sha256, _, tail = tail.partition("\"")
 
@@ -64,7 +70,7 @@ def parse_simpleapi_html(*, url, content):
 
         head, _, _ = tail.rpartition("</a>")
         maybe_metadata, _, filename = head.rpartition(">")
-        version = _version(filename)
+        version = pkg_version(filename, distribution)
         sha256s_by_version.setdefault(version, []).append(sha256)
 
         metadata_sha256 = ""
@@ -79,13 +85,17 @@ def parse_simpleapi_html(*, url, content):
                 break
 
         if filename.endswith(".whl"):
+            metadata_url = metadata_url or ""
+            if return_absolute and metadata_url:
+                metadata_url = absolute_url(index_url = url, url = metadata_url)
+
             whls[sha256] = struct(
                 filename = filename,
                 version = version,
                 url = dist_url,
                 sha256 = sha256,
                 metadata_sha256 = metadata_sha256,
-                metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
+                metadata_url = metadata_url,
                 yanked = yanked,
             )
         else:
@@ -110,18 +120,36 @@ _SDIST_EXTS = [
     ".zip",
 ]
 
-def _version(filename):
+def pkg_version(filename, distribution = None):
+    """pkg_version extracts the version from the filename.
+
+    TODO: move this to a different location
+
+    Args:
+        filename: TODO
+        distribution: TODO
+
+    Returns:
+        version string
+    """
     # See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format
 
-    _, _, tail = filename.partition("-")
-    version, _, _ = tail.partition("-")
-    if version != tail:
-        # The format is {name}-{version}-{whl_specifiers}.whl
-        return version
+    if filename.endswith(".whl"):
+        _, _, tail = filename.partition("-")
+        version, _, _ = tail.partition("-")
+        if version != tail:
+            # The format is {name}-{version}-{whl_specifiers}.whl
+            return version
+
+    if not distribution:
+        fail("for parsing sdists passing 'distribution' is mandatory")
 
     # NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path
 
     # {name}-{version}.{ext}
+    # TODO @aignas 2026-01-20: test for handling dashes in names, can't think of any other way to
+    # get the version from the filename but to pass in the distribution name to this function.
+    version = filename[len(distribution) + 1:]
     for ext in _SDIST_EXTS:
         version, _, _ = version.partition(ext)  # build or name
 
@@ -147,21 +175,30 @@ def _is_downloadable(url):
     """
     return url.startswith("http://") or url.startswith("https://") or url.startswith("file://")
 
-def _absolute_url(index_url, candidate):
-    if candidate == "":
-        return candidate
+def absolute_url(*, index_url, url):
+    """Return an absolute URL in case the url is not absolute.
+
+    Args:
+        index_url: {type}`str` The index_url.
+        url: {type}`str` The url of the artifact.
+
+    Returns:
+        `url` if it is absolute, or absolute URL based on the `index_url`.
+    """
+    if url == "":
+        return url
 
-    if _is_downloadable(candidate):
-        return candidate
+    if _is_downloadable(url):
+        return url
 
-    if candidate.startswith("/"):
+    if url.startswith("/"):
         # absolute path
         root_directory = _get_root_directory(index_url)
-        return "{}{}".format(root_directory, candidate)
+        return "{}{}".format(root_directory, url)
 
-    if candidate.startswith(".."):
+    if url.startswith(".."):
         # relative path with up references
-        candidate_parts = candidate.split("..")
+        candidate_parts = url.split("..")
         last = candidate_parts[-1]
         for _ in range(len(candidate_parts) - 1):
             index_url, _, _ = index_url.rstrip("/").rpartition("/")
@@ -169,4 +206,4 @@ def _absolute_url(index_url, candidate):
         return "{}/{}".format(index_url, last.strip("/"))
 
     # relative path without up-references
-    return "{}/{}".format(index_url.rstrip("/"), candidate)
+    return "{}/{}".format(index_url.rstrip("/"), url)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 52ff02a178..b1481f8981 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -21,7 +21,9 @@ load("//python/private:auth.bzl", _get_auth = "get_auth")
 load("//python/private:envsubst.bzl", "envsubst")
 load("//python/private:normalize_name.bzl", "normalize_name")
 load("//python/private:text_util.bzl", "render")
-load(":parse_simpleapi_html.bzl", "parse_simpleapi_html")
+load(":parse_simpleapi_html.bzl", "absolute_url", "parse_simpleapi_html", "pkg_version")
+
+_FACT_VERSION = "v1"
 
 def simpleapi_download(
         ctx,
@@ -43,12 +45,13 @@ def simpleapi_download(
              separate packages.
            * extra_index_urls: Extra index URLs that will be looked up after
              the main is looked up.
-           * sources: list[str], the sources to download things for. Each value is
-             the contents of requirements files.
+           * sources: list[str] | dict[str, list[str]], the sources to download things for. Each
+               value is the contents of requirements files.
            * envsubst: list[str], the envsubst vars for performing substitution in index url.
            * netrc: The netrc parameter for ctx.download, see http_file for docs.
            * auth_patterns: The auth_patterns parameter for ctx.download, see
                http_file for docs.
+           * facts: The facts to write to if we support them.
         cache: A dictionary that can be used as a cache between calls during a
             single evaluation of the extension. We use a dictionary as a cache
             so that we can reuse calls to the simple API when evaluating the
@@ -81,27 +84,42 @@ def simpleapi_download(
     index_urls = [attr.index_url] + attr.extra_index_urls
     read_simpleapi = read_simpleapi or _read_simpleapi
 
+    if attr.facts:
+        ctx.report_progress("Fetch package lists from PyPI index or read from MODULE.bazel.lock")
+    else:
+        ctx.report_progress("Fetch package lists from PyPI index")
+
+    cache = simpleapi_cache(
+        memory_cache = memory_cache(cache),
+        facts_cache = facts_cache(getattr(ctx, "facts", None), attr.facts),
+    )
+
     found_on_index = {}
     warn_overrides = False
-    ctx.report_progress("Fetch package lists from PyPI index")
+
+    # Normalize the inputs
+    if type(attr.sources) == "list":
+        fail("TODO")
+    else:
+        input_sources = attr.sources
+
     for i, index_url in enumerate(index_urls):
         if i != 0:
             # Warn the user about a potential fix for the overrides
             warn_overrides = True
 
         async_downloads = {}
-        sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
-        for pkg in sources:
+        sources = {pkg: versions for pkg, versions in input_sources.items() if pkg not in found_on_index}
+        for pkg, versions in sources.items():
             pkg_normalized = normalize_name(pkg)
             result = read_simpleapi(
                 ctx = ctx,
-                url = "{}/{}/".format(
-                    index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
-                    pkg,
-                ),
                 attr = attr,
                 cache = cache,
+                index_url = index_url_overrides.get(pkg_normalized, index_url),
+                distribution = pkg,
                 get_auth = get_auth,
+                requested_versions = {v: None for v in versions},
                 **download_kwargs
             )
             if hasattr(result, "wait"):
@@ -109,6 +127,7 @@ def simpleapi_download(
                 async_downloads[pkg] = struct(
                     pkg_normalized = pkg_normalized,
                     wait = result.wait,
+                    fns = result.fns,
                 )
             elif result.success:
                 contents[pkg_normalized] = result.output
@@ -164,49 +183,14 @@ If you would like to skip downloading metadata for these packages please add 'si
 
     return contents
 
-def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
-    """Read SimpleAPI.
-
-    Args:
-        ctx: The module_ctx or repository_ctx.
-        url: str, the url parameter that can be passed to ctx.download.
-        attr: The attribute that contains necessary info for downloading. The
-          following attributes must be present:
-           * envsubst: The envsubst values for performing substitutions in the URL.
-           * netrc: The netrc parameter for ctx.download, see http_file for docs.
-           * auth_patterns: The auth_patterns parameter for ctx.download, see
-               http_file for docs.
-        cache: A dict for storing the results.
-        get_auth: A function to get auth information. Used in tests.
-        **download_kwargs: Any extra params to ctx.download.
-            Note that output and auth will be passed for you.
-
-    Returns:
-        A similar object to what `download` would return except that in result.out
-        will be the parsed simple api contents.
-    """
-    # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for
-    # the whl location and we cannot handle multiple URLs at once by passing
-    # them to ctx.download if we want to correctly handle the relative URLs.
-    # TODO: Add a test that env subbed index urls do not leak into the lock file.
-
-    real_url = strip_empty_path_segments(envsubst(
-        url,
-        attr.envsubst,
-        ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
-    ))
-
-    cache_key = real_url
-    if cache_key in cache:
-        return struct(success = True, output = cache[cache_key])
-
+def _download_simpleapi(*, ctx, url, real_url, attr_envsubst, get_auth, **kwargs):
     output_str = envsubst(
         url,
-        attr.envsubst,
+        attr_envsubst,
         # Use env names in the subst values - this will be unique over
         # the lifetime of the execution of this function and we also use
         # `~` as the separator to ensure that we don't get clashes.
-        {e: "~{}~".format(e) for e in attr.envsubst}.get,
+        {e: "~{}~".format(e) for e in attr_envsubst}.get,
     )
 
     # Transform the URL into a valid filename
@@ -217,22 +201,50 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
 
     get_auth = get_auth or _get_auth
 
-    # NOTE: this may have block = True or block = False in the download_kwargs
+    # NOTE: this may have block = True or block = False in the kwargs
     download = ctx.download(
         url = [real_url],
         output = output,
         auth = get_auth(ctx, [real_url], ctx_attr = attr),
         allow_fail = True,
-        **download_kwargs
+        **kwargs
     )
 
-    if download_kwargs.get("block") == False:
-        # Simulate the same API as ctx.download has
+    return _await(
+        download,
+        _read,
+        ctx = ctx,
+        output = output,
+    )
+
+def _await(download, fn, **kwargs):
+    if hasattr(download, "fns"):
+        download.fns.append(
+            lambda result: fn(result = result, **kwargs),
+        )
+        return download
+    elif hasattr(download, "wait"):
+        # Have a reference type which we can iterate later when aggregating the result
+        fns = [lambda result: fn(result = result, **kwargs)]
+
+        def wait():
+            result = download.wait()
+            for fn in fns:
+                result = fn(result = result)
+            return result
+
         return struct(
-            wait = lambda: _read_index_result(ctx, download.wait(), output, real_url, cache, cache_key),
+            wait = wait,
+            fns = fns,
         )
 
-    return _read_index_result(ctx, download, output, real_url, cache, cache_key)
+    return fn(result = download, **kwargs)
+
+def _read(ctx, result, output):
+    if not result.success:
+        return result
+
+    return struct(success = True, output = ctx.read(output))
 
 def strip_empty_path_segments(url):
     """Removes empty path segments from a URL. Does nothing for urls with no scheme.
@@ -255,15 +267,349 @@ def strip_empty_path_segments(url):
     else:
         return "{}://{}".format(scheme, stripped)
 
-def _read_index_result(ctx, result, output, url, cache, cache_key):
-    if not result.success:
-        return struct(success = False)
+def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_versions, get_auth = None, **download_kwargs):
+    """Read SimpleAPI.
 
-    content = ctx.read(output)
+    Args:
+        ctx: The module_ctx or repository_ctx.
+        index_url: str, the PyPI SimpleAPI index URL
+        distribution: str, the distribution to download
+        attr: The attribute that contains necessary info for downloading. The
+          following attributes must be present:
+           * envsubst: The envsubst values for performing substitutions in the URL.
+           * netrc: The netrc parameter for ctx.download, see http_file for docs.
+           * auth_patterns: The auth_patterns parameter for ctx.download, see
+               http_file for docs.
+        cache: A dict for storing the results.
+        get_auth: A function to get auth information. Used in tests.
+        requested_versions: the list of requested versions.
+        **download_kwargs: Any extra params to ctx.download.
+            Note that output and auth will be passed for you.
 
-    output = parse_simpleapi_html(url = url, content = content)
-    if output:
-        cache.setdefault(cache_key, output)
-        return struct(success = True, output = output, cache_key = cache_key)
-    else:
+    Returns:
+        A similar object to what `download` would return except that in result.out
+        will be the parsed simple api contents.
+    """
+
+    index_url = index_url.rstrip("/")
+
+    # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for
+    # the whl location and we cannot handle multiple URLs at once by passing
+    # them to ctx.download if we want to correctly handle the relative URLs.
+    # TODO: Add a test that env subbed index urls do not leak into the lock file.
+
+    cached = cache.get(index_url, distribution, requested_versions)
+    if cached:
+        return struct(success = True, output = cached)
+
+    url = "{}/{}/".format(index_url, distribution)
+    real_url = strip_empty_path_segments(envsubst(
+        url,
+        attr.envsubst,
+        ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
+    ))
+
+    download = _download_simpleapi(
+        ctx = ctx,
+        url = url,
+        real_url = real_url,
+        attr_envsubst = attr.envsubst,
+        get_auth = get_auth,
+        **download_kwargs
+    )
+
+    return _await(
+        download,
+        _read_index_result,
+        index_url = index_url,
+        distribution = distribution,
+        real_url = real_url,
+        cache = cache,
+        requested_versions = requested_versions,
+    )
+
+def _read_index_result(*, result, index_url, distribution, real_url, cache, requested_versions):
+    if not result.success or not result.output:
+        return struct(success = False)
+
+    # TODO @aignas 2026-02-08: make this the only behaviour, maybe can get rid of `real_url
+    output = parse_simpleapi_html(
+        url = real_url,
+        content = result.output,
+        return_absolute = False,
+    )
+    if not output:
         return struct(success = False)
+
+    cache.setdefault(index_url, distribution, requested_versions, output)
+    return struct(success = True, output = output)
+
+def simpleapi_cache(memory_cache, facts_cache):
+    """SimpleAPI cache for making fewer calls.
+
+    Args:
+        memory_cache: the storage to store things in memory.
+        facts_cache: the storage to retrieve known facts.
+
+    Returns:
+        struct with 2 methods, `get` and `setdefault`.
+    """
+    return struct(
+        get = lambda index_url, distribution, versions: _cache_get(
+            memory_cache,
+            facts_cache,
+            index_url,
+            distribution,
+            versions,
+        ),
+        setdefault = lambda index_url, distribution, versions, value: _cache_setdefault(
+            memory_cache,
+            facts_cache,
+            index_url,
+            distribution,
+            versions,
+            value,
+        ),
+    )
+
+def _cache_get(cache, facts, index_url, distribution, versions):
+    if not facts:
+        return cache.get(index_url, distribution, versions)
+
+    if versions:
+        cached = facts.get(index_url, distribution, versions)
+        if cached:
+            return cached
+
+    cached = cache.get(index_url, distribution, versions)
+    if not cached:
+        return None
+
+    # Ensure that we write back to the facts, this happens if we request versions that
+    # we don't have facts for but we have in-memory cache of SimpleAPI query results
+    if versions:
+        facts.setdefault(index_url, distribution, cached)
+    return cached
+
+def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
+    filtered = cache.setdefault(index_url, distribution, versions, value)
+
+    if facts and versions:
+        facts.setdefault(index_url, distribution, filtered)
+
+    return filtered
+
+def memory_cache(cache = None):
+    """SimpleAPI cache for making fewer calls.
+
+    Args:
+        cache: the storage to store things in memory.
+
+    Returns:
+        struct with 2 methods, `get` and `setdefault`.
+    """
+    if cache == None:
+        cache = {}
+
+    return struct(
+        get = lambda index_url, distribution, versions: _memcache_get(
+            cache,
+            index_url,
+            distribution,
+            versions,
+        ),
+        setdefault = lambda index_url, distribution, versions, value: _memcache_setdefault(
+            cache,
+            index_url,
+            distribution,
+            versions,
+            value,
+        ),
+    )
+
+def _vkey(versions):
+    if not versions:
+        return ""
+
+    if len(versions) == 1:
+        if type(versions) == "dict":
+            return versions.keys()[0]
+        else:
+            return versions[0]
+
+    return ",".join(sorted(versions))
+
+def _memcache_get(cache, index_url, distribution, versions):
+    if not versions:
+        return cache.get((index_url, distribution, ""))
+
+    vkey = _vkey(versions)
+    filtered = cache.get((index_url, distribution, vkey))
+    if filtered:
+        return filtered
+
+    unfiltered = cache.get((index_url, distribution, ""))
+    if not unfiltered:
+        return None
+
+    filtered = _filter_packages(unfiltered, versions, index_url, distribution)
+    cache.setdefault((index_url, distribution, vkey), filtered)
+    return filtered
+
+def _memcache_setdefault(cache, index_url, distribution, versions, value):
+    cache.setdefault((index_url, distribution, ""), value)
+    if not versions:
+        return value
+
+    filtered = _filter_packages(value, versions, index_url, distribution)
+
+    vkey = _vkey(versions)
+    cache.setdefault((index_url, distribution, vkey), filtered)
+    return filtered
+
+def _filter_packages(dists, requested_versions, index_url, distribution):
+    if dists == None:
+        return None
+
+    if not requested_versions:
+        return dists
+
+    sha256s_by_version = {}
+    whls = {}
+    sdists = {}
+    for sha256, d in dists.sdists.items():
+        if d.version not in requested_versions:
+            continue
+
+        sdists[sha256] = _with_absolute_url(d, index_url, distribution)
+        sha256s_by_version.setdefault(d.version, []).append(sha256)
+
+    for sha256, d in dists.whls.items():
+        if d.version not in requested_versions:
+            continue
+
+        whls[sha256] = _with_absolute_url(d, index_url, distribution)
+        sha256s_by_version.setdefault(d.version, []).append(sha256)
+
+    if not whls and not sdists:
+        return None
+
+    return struct(
+        whls = whls,
+        sdists = sdists,
+        sha256s_by_version = sha256s_by_version,
+    )
+
+def facts_cache(known_facts, facts, facts_version = _FACT_VERSION):
+    if known_facts == None:
+        return None
+
+    return struct(
+        get = lambda index_url, distribution, versions: _get_from_facts(
+            facts,
+            known_facts,
+            index_url,
+            distribution,
+            versions,
+            facts_version,
+        ),
+        setdefault = lambda url, distribution, value: _store_facts(facts, facts_version, url, value),
+        known_facts = known_facts,
+        facts = facts,
+    )
+
+def _get_from_facts(facts, known_facts, index_url, distribution, requested_versions, facts_version):
+    if known_facts.get("fact_version") != facts_version:
+        # cannot trust known facts, different version that we know how to parse
+        return None
+
+    known_sources = {}
+
+    known_facts = known_facts.get(index_url, {})
+
+    index_url_for_distro = "{}/{}/".format(index_url, distribution)
+    for url, sha256 in known_facts.get("dist_hashes", {}).items():
+        filename = known_facts.get("dist_filenames", {}).get(sha256)
+        if not filename:
+            _, _, filename = url.rpartition("/")
+
+        version = pkg_version(filename, distribution)
+        if version not in requested_versions:
+            # TODO @aignas 2026-01-21: do the check by requested shas at some point
+            # We don't have sufficient info in the lock file, need to call the API
+            #
+            continue
+
+        if filename.endswith(".whl"):
+            dists = known_sources.setdefault("whls", {})
+        else:
+            dists = known_sources.setdefault("sdists", {})
+
+        known_sources.setdefault("sha256s_by_version", {}).setdefault(version, []).append(sha256)
+
+        dists.setdefault(sha256, struct(
+            sha256 = sha256,
+            filename = filename,
+            version = version,
+            url = absolute_url(index_url = index_url_for_distro, url = url),
+            yanked = known_facts.get("dist_yanked", {}).get(sha256, False),
+        ))
+
+    if not known_sources:
+        return None
+
+    output = struct(
+        whls = known_sources.get("whls", {}),
+        sdists = known_sources.get("sdists", {}),
+        sha256s_by_version = known_sources.get("sha256s_by_version", {}),
+    )
+    _store_facts(facts, facts_version, index_url, output)
+    return output
+
+def _with_absolute_url(d, index_url, distribution):
+    index_url_for_distro = "{}/{}/".format(index_url.rstrip("/"), distribution)
+
+    # TODO @aignas 2026-02-08: think of a better way to do this
+    # TODO @aignas 2026-02-08: if the url is absolute, return d
+    kwargs = dict()
+    for attr in [
+        "sha256",
+        "filename",
+        "version",
+        "metadata_sha256",
+        "metadata_url",
+        "yanked",
+        "url",
+    ]:
+        if hasattr(d, attr):
+            kwargs[attr] = getattr(d, attr)
+            if attr == "url":
+                kwargs[attr] = absolute_url(index_url = index_url_for_distro, url = kwargs[attr])
+
+    return struct(**kwargs)
+
+def _store_facts(facts, fact_version, index_url, value):
+    """Store values as facts in the lock file.
+
+    The main idea is to ensure that the lock file is small and it is only storing what
+    we would need to fetch from the internet. Any derivative information we can
+    from this that can be achieved using pure Starlark functions should be done in
+    Starlark.
+    """
+    if not value:
+        return value
+
+    facts["fact_version"] = fact_version
+
+    # Store the distributions by index URL that we find them on.
+    facts = facts.setdefault(index_url, {})
+
+    for sha256, d in (value.sdists | value.whls).items():
+        facts.setdefault("dist_hashes", {}).setdefault(d.url, sha256)
+        if not d.url.endswith(d.filename):
+            facts.setdefault("dist_filenames", {}).setdefault(d.url, d.filename)
+        if d.yanked:
+            # TODO @aignas 2026-01-21: store yank reason
+            facts.setdefault("dist_yanked", {}).setdefault(sha256, True)
+
+    return value
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index 03cefd13c5..f73e23ba37 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -1036,7 +1036,13 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef
                 index_url = "pypi.org",
                 index_url_overrides = {},
                 netrc = None,
-                sources = ["simple", "plat_pkg", "pip_fallback", "some_other_pkg"],
+                facts = None,
+                sources = {
+                    "pip_fallback": ["0.0.1"],
+                    "plat_pkg": ["0.0.4"],
+                    "simple": ["0.0.1"],
+                    "some_other_pkg": ["0.0.1"],
+                },
             ),
             "cache": {},
             "parallel_download": False,
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index 8dc307235a..2a0b1f8811 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -15,19 +15,28 @@
 ""
 
 load("@rules_testing//lib:test_suite.bzl", "test_suite")
-load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments")  # buildifier: disable=bzl-visibility
+load("@rules_testing//lib:truth.bzl", "subjects")
+load(
+    "//python/private/pypi:simpleapi_download.bzl",
+    "memory_cache",
+    "simpleapi_download",
+    "strip_empty_path_segments",
+)  # buildifier: disable=bzl-visibility
 
 _tests = []
 
 def _test_simple(env):
     calls = []
 
-    def read_simpleapi(ctx, url, attr, cache, get_auth, block):
+    def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
         _ = ctx  # buildifier: disable=unused-variable
+        _ = distribution
+        _ = requested_versions
         _ = attr
         _ = cache
         _ = get_auth
         env.expect.that_bool(block).equals(False)
+        url = "{}/{}/".format(index_url, distribution)
         calls.append(url)
         if "foo" in url and "main" in url:
             return struct(
@@ -49,8 +58,9 @@ def _test_simple(env):
             index_url_overrides = {},
             index_url = "main",
             extra_index_urls = ["extra"],
-            sources = ["foo", "bar", "baz"],
+            sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
             envsubst = [],
+            facts = None,
         ),
         cache = {},
         parallel_download = True,
@@ -75,11 +85,14 @@ def _test_fail(env):
     calls = []
     fails = []
 
-    def read_simpleapi(ctx, url, attr, cache, get_auth, block):
+    def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
         _ = ctx  # buildifier: disable=unused-variable
+        _ = distribution
+        _ = requested_versions
         _ = attr
         _ = cache
         _ = get_auth
+        url = "{}/{}/".format(index_url, distribution)
         env.expect.that_bool(block).equals(False)
         calls.append(url)
         if "foo" in url:
@@ -109,8 +122,9 @@ def _test_fail(env):
             },
             index_url = "main",
             extra_index_urls = ["extra"],
-            sources = ["foo", "bar", "baz"],
+            sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
             envsubst = [],
+            facts = None,
         ),
         cache = {},
         parallel_download = True,
@@ -122,13 +136,13 @@ def _test_fail(env):
         """
 Failed to download metadata of the following packages from urls:
 {
-    "foo": "invalid",
     "bar": ["main", "extra"],
+    "foo": "invalid",
 }
 
 If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[
-    "foo",
     "bar",
+    "foo",
 ]' to your 'pip.parse' call.
 """,
     ])
@@ -162,8 +176,9 @@ def _test_download_url(env):
             index_url_overrides = {},
             index_url = "https://example.com/main/simple/",
             extra_index_urls = [],
-            sources = ["foo", "bar", "baz"],
+            sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
             envsubst = [],
+            facts = None,
         ),
         cache = {},
         parallel_download = False,
@@ -198,8 +213,9 @@ def _test_download_url_parallel(env):
             index_url_overrides = {},
             index_url = "https://example.com/main/simple/",
             extra_index_urls = [],
-            sources = ["foo", "bar", "baz"],
+            sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
             envsubst = [],
+            facts = None,
         ),
         cache = {},
         parallel_download = True,
@@ -234,8 +250,9 @@ def _test_download_envsubst_url(env):
             index_url_overrides = {},
             index_url = "$INDEX_URL",
             extra_index_urls = [],
-            sources = ["foo", "bar", "baz"],
+            sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
             envsubst = ["INDEX_URL"],
+            facts = None,
         ),
         cache = {},
         parallel_download = False,
@@ -260,6 +277,68 @@ def _test_strip_empty_path_segments(env):
 
 _tests.append(_test_strip_empty_path_segments)
 
+def _expect_cache_result(env, cache, key, sdists, whls):
+    got = env.expect.that_struct(
+        cache.get(*key),
+        attrs = dict(
+            whls = subjects.dict,
+            sdists = subjects.dict,
+        ),
+    )
+    got.whls().contains_exactly(whls)
+    got.sdists().contains_exactly(sdists)
+
+def _test_memory_cache(env):
+    memory = {}
+    cache = memory_cache(memory)
+    all_packages = struct(
+        sdists = {
+            "aa": struct(version = "1.0"),
+            "ab": struct(version = "1.1"),
+        },
+        whls = {
+            "ba": struct(version = "1.0"),
+            "bb": struct(version = "1.1"),
+        },
+    )
+    cache.setdefault("index", "distro", None, all_packages)
+    env.expect.that_dict(memory).contains_exactly({
+        ("index", "distro", ""): all_packages,
+    })
+    _expect_cache_result(
+        env,
+        cache,
+        ("index", "distro", ["1.0"]),
+        sdists = {
+            "aa": struct(version = "1.0"),
+        },
+        whls = {
+            "ba": struct(version = "1.0"),
+        },
+    )
+    env.expect.that_dict(memory).keys().contains_exactly([
+        ("index", "distro", ""),
+        ("index", "distro", "1.0"),
+    ])
+    _expect_cache_result(
+        env,
+        cache,
+        ("index", "distro", ["1.1"]),
+        sdists = {
+            "ab": struct(version = "1.1"),
+        },
+        whls = {
+            "bb": struct(version = "1.1"),
+        },
+    )
+    env.expect.that_dict(memory).keys().contains_exactly([
+        ("index", "distro", ""),
+        ("index", "distro", "1.0"),
+        ("index", "distro", "1.1"),
+    ])
+
+_tests.append(_test_memory_cache)
+
 def simpleapi_download_test_suite(name):
     """Create the test suite.