Skip to content
36 changes: 33 additions & 3 deletions python/private/pypi/extension.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
# See `hub_builder.bzl%hub_builder()` for `HubBuilder`
pip_hub_map = {}
simpleapi_cache = {}
facts = {}

for mod in module_ctx.modules:
for pip_attr in mod.tags.parse:
Expand All @@ -242,6 +243,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
evaluate_markers_fn = kwargs.get("evaluate_markers", None),
available_interpreters = kwargs.get("available_interpreters", INTERPRETER_LABELS),
logger = repo_utils.logger(module_ctx, "pypi:hub:" + hub_name),
facts = facts,
)
pip_hub_map[pip_attr.hub_name] = builder
elif pip_hub_map[hub_name].module_name != mod.name:
Expand Down Expand Up @@ -288,6 +290,25 @@ You cannot use both the additive_build_content and additive_build_content_file a
hub_group_map[hub.name] = out.group_map
hub_whl_map[hub.name] = out.whl_map

facts = {
"fact_version": facts.get("fact_version"),
} | {
index_url: {
k: _sorted_dict(f.get(k))
for k in [
"dist_filenames",
"dist_hashes",
"dist_yanked",
]
if f.get(k)
}
for index_url, f in facts.items()
if index_url not in ["fact_version"]
}
if len(facts) == 1:
# only version is present, skip writing
facts = None

return struct(
config = config,
exposed_packages = exposed_packages,
Expand All @@ -296,6 +317,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
hub_whl_map = hub_whl_map,
whl_libraries = whl_libraries,
whl_mods = whl_mods,
facts = facts,
platform_config_settings = {
hub_name: {
platform_name: sorted([str(Label(cv)) for cv in p.config_settings])
Expand All @@ -305,6 +327,12 @@ You cannot use both the additive_build_content and additive_build_content_file a
},
)

def _sorted_dict(d):
if not d:
return {}

return {k: v for k, v in sorted(d.items())}

def _pip_impl(module_ctx):
"""Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories.

Expand Down Expand Up @@ -393,9 +421,11 @@ def _pip_impl(module_ctx):
groups = mods.hub_group_map.get(hub_name),
)

return module_ctx.extension_metadata(
reproducible = True,
)
kwargs = {"reproducible": True}
if mods.facts:
kwargs["facts"] = mods.facts

return module_ctx.extension_metadata(**kwargs)

_default_attrs = {
"arch_name": attr.string(
Expand Down
13 changes: 12 additions & 1 deletion python/private/pypi/hub_builder.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def hub_builder(
simpleapi_download_fn,
evaluate_markers_fn,
logger,
facts = None,
simpleapi_cache = {}):
"""Return a hub builder instance

Expand All @@ -47,6 +48,7 @@ def hub_builder(
used during the `repository_rule` and must be always compatible with the host.
simpleapi_download_fn: the function used to download from SimpleAPI.
simpleapi_cache: the cache for the download results.
facts: the facts if they are available.
logger: the logger for this builder.
"""

Expand Down Expand Up @@ -89,6 +91,10 @@ def hub_builder(
# Functions to download according to the config
# dict[str python_version, callable]
_get_index_urls = {},
# Contains the dict to store the facts to be written to the lockfile that
# can be safely cached for future invocations.
# dict[str, dict[str, str]]
_facts = facts,
# Tells whether to use the downloader for a package.
# dict[str python_version, dict[str package_name, bool use_downloader]]
_use_downloader = {},
Expand Down Expand Up @@ -399,11 +405,16 @@ def _set_get_index_urls(self, pip_attr):
d
for d in distributions
if _use_downloader(self, python_version, d)
],
] if type(distributions) == "list" else {
d: versions
for d, versions in distributions.items()
if _use_downloader(self, python_version, d)
},
envsubst = pip_attr.envsubst,
# Auth related info
netrc = pip_attr.netrc,
auth_patterns = pip_attr.auth_patterns,
facts = self._facts,
),
cache = self._simpleapi_cache,
parallel_download = pip_attr.parallel_download,
Expand Down
19 changes: 9 additions & 10 deletions python/private/pypi/parse_requirements.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,15 @@ def parse_requirements(

index_urls = {}
if get_index_urls:
index_urls = get_index_urls(
ctx,
# Use list({}) as a way to have a set
list({
req.distribution: None
for reqs in requirements_by_platform.values()
for req in reqs.values()
if not req.srcs.url
}),
)
distributions = {}
for reqs in requirements_by_platform.values():
for req in reqs.values():
if req.srcs.url:
continue

distributions.setdefault(req.distribution, []).append(req.srcs.version)

index_urls = get_index_urls(ctx, distributions)

ret = []
for name, reqs in sorted(requirements_by_platform.items()):
Expand Down
77 changes: 57 additions & 20 deletions python/private/pypi/parse_simpleapi_html.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
Parse SimpleAPI HTML in Starlark.
"""

def parse_simpleapi_html(*, url, content):
def parse_simpleapi_html(*, url, content, distribution = None, return_absolute = True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There are a few TODOs in the docstrings for new and modified functions in this file that should be addressed:

  • parse_simpleapi_html (lines 24, 26): The distribution and return_absolute parameters need descriptions.
  • pkg_version (lines 129, 130): The filename and distribution parameters need descriptions.

Please fill these in to improve the documentation.

"""Get the package URLs for given shas by parsing the Simple API HTML.

Args:
url(str): The URL that the HTML content can be downloaded from.
distribution(str): TODO
content(str): The Simple API HTML content.
return_absolute: {type}`bool` TODO

Returns:
A list of structs with:
Expand All @@ -33,6 +35,9 @@ def parse_simpleapi_html(*, url, content):
present, then the 'metadata_url' is also present. Defaults to "".
* metadata_url: The URL for the METADATA if we can download it. Defaults to "".
"""
if not distribution:
_, _, distribution = url.strip("/").rpartition("/")

sdists = {}
whls = {}
lines = content.split("<a href=\"")
Expand All @@ -55,7 +60,8 @@ def parse_simpleapi_html(*, url, content):
sha256s_by_version = {}
for line in lines[1:]:
dist_url, _, tail = line.partition("#sha256=")
dist_url = _absolute_url(url, dist_url)
if return_absolute:
dist_url = absolute_url(index_url = url, url = dist_url)

sha256, _, tail = tail.partition("\"")

Expand All @@ -64,7 +70,7 @@ def parse_simpleapi_html(*, url, content):

head, _, _ = tail.rpartition("</a>")
maybe_metadata, _, filename = head.rpartition(">")
version = _version(filename)
version = pkg_version(filename, distribution)
sha256s_by_version.setdefault(version, []).append(sha256)

metadata_sha256 = ""
Expand All @@ -79,13 +85,17 @@ def parse_simpleapi_html(*, url, content):
break

if filename.endswith(".whl"):
metadata_url = metadata_url or ""
if return_absolute and metadata_url:
metadata_url = absolute_url(index_url = url, url = metadata_url)

whls[sha256] = struct(
filename = filename,
version = version,
url = dist_url,
sha256 = sha256,
metadata_sha256 = metadata_sha256,
metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
metadata_url = metadata_url,
yanked = yanked,
)
else:
Expand All @@ -110,18 +120,36 @@ _SDIST_EXTS = [
".zip",
]

def _version(filename):
def pkg_version(filename, distribution = None):
"""pkg_version extracts the version from the filename.

TODO: move this to a different location

Args:
filename: TODO
distribution: TODO

Returns:
version string
"""
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format

_, _, tail = filename.partition("-")
version, _, _ = tail.partition("-")
if version != tail:
# The format is {name}-{version}-{whl_specifiers}.whl
return version
if filename.endswith(".whl"):
_, _, tail = filename.partition("-")
version, _, _ = tail.partition("-")
if version != tail:
# The format is {name}-{version}-{whl_specifiers}.whl
return version

if not distribution:
fail("for parsing sdists passing 'distribution' is mandatory")

# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path

# {name}-{version}.{ext}
# TODO @aignas 2026-01-20: test for handling dashes in names, can't think of any other way to
# get the version from the filename but to pass in the distribution name to this function.
version = filename[len(distribution) + 1:]
for ext in _SDIST_EXTS:
version, _, _ = version.partition(ext) # build or name

Expand All @@ -147,26 +175,35 @@ def _is_downloadable(url):
"""
return url.startswith("http://") or url.startswith("https://") or url.startswith("file://")

def _absolute_url(index_url, candidate):
if candidate == "":
return candidate
def absolute_url(*, index_url, url):
"""Return an absolute URL in case the url is not absolute.

Args:
index_url: {type}`str` The index_url.
url: {type}`str` The url of the artifact.

Returns:
`url` if it is absolute, or absolute URL based on the `index_url`.
"""
if url == "":
return url

if _is_downloadable(candidate):
return candidate
if _is_downloadable(url):
return url

if candidate.startswith("/"):
if url.startswith("/"):
# absolute path
root_directory = _get_root_directory(index_url)
return "{}{}".format(root_directory, candidate)
return "{}{}".format(root_directory, url)

if candidate.startswith(".."):
if url.startswith(".."):
# relative path with up references
candidate_parts = candidate.split("..")
candidate_parts = url.split("..")
last = candidate_parts[-1]
for _ in range(len(candidate_parts) - 1):
index_url, _, _ = index_url.rstrip("/").rpartition("/")

return "{}/{}".format(index_url, last.strip("/"))

# relative path without up-references
return "{}/{}".format(index_url.rstrip("/"), candidate)
return "{}/{}".format(index_url.rstrip("/"), url)
Loading