Source code for pyproject_metadata

# SPDX-License-Identifier: MIT

"""
This is pyproject_metadata, a library for working with PEP 621 metadata.

Example usage:

.. code-block:: python

   from pyproject_metadata import StandardMetadata

   metadata = StandardMetadata.from_pyproject(
       parsed_pyproject, allow_extra_keys=False, all_errors=True, metadata_version="2.3"
   )

   pkg_info = metadata.as_rfc822()
   with open("METADATA", "wb") as f:
       f.write(pkg_info.as_bytes())

   ep = self.metadata.entrypoints.copy()
   ep["console_scripts"] = self.metadata.scripts
   ep["gui_scripts"] = self.metadata.gui_scripts
   for group, entries in ep.items():
       if entries:
           with open("entry_points.txt", "w", encoding="utf-8") as f:
               print(f"[{group}]", file=f)
               for name, target in entries.items():
                   print(f"{name} = {target}", file=f)
               print(file=f)

"""

from __future__ import annotations

import copy
import dataclasses
import email.message
import email.policy
import email.utils
import os
import os.path
import pathlib
import re
import sys
import typing
import warnings

# Build backends may vendor this package, so all imports are relative.
from . import constants
from .errors import ConfigurationError, ConfigurationWarning, ErrorCollector
from .pyproject import License, PyProjectReader, Readme

if typing.TYPE_CHECKING:
    from collections.abc import Mapping
    from typing import Any

    from packaging.requirements import Requirement

    if sys.version_info < (3, 11):
        from typing_extensions import Self
    else:
        from typing import Self

    from .project_table import Dynamic, PyProjectTable

import packaging.markers
import packaging.specifiers
import packaging.utils
import packaging.version

__version__ = "0.9.0"

__all__ = [
    "ConfigurationError",
    "License",
    "RFC822Message",
    "RFC822Policy",
    "Readme",
    "StandardMetadata",
    "field_to_metadata",
    "extras_build_system",
    "extras_project",
    "extras_top_level",
]


def __dir__() -> list[str]:
    return __all__


[docs] def field_to_metadata(field: str) -> frozenset[str]: """ Return the METADATA fields that correspond to a project field. """ return frozenset(constants.PROJECT_TO_METADATA[field])
[docs] def extras_top_level(pyproject_table: Mapping[str, Any]) -> set[str]: """ Return any extra keys in the top-level of the pyproject table. """ return set(pyproject_table) - constants.KNOWN_TOPLEVEL_FIELDS
[docs] def extras_build_system(pyproject_table: Mapping[str, Any]) -> set[str]: """ Return any extra keys in the build-system table. """ return ( set(pyproject_table.get("build-system", [])) - constants.KNOWN_BUILD_SYSTEM_FIELDS )
[docs] def extras_project(pyproject_table: Mapping[str, Any]) -> set[str]: """ Return any extra keys in the project table. """ return set(pyproject_table.get("project", [])) - constants.KNOWN_PROJECT_FIELDS
@dataclasses.dataclass class _SmartMessageSetter: """ This provides a nice internal API for setting values in an Message to reduce boilerplate. If a value is None, do nothing. """ message: email.message.Message def __setitem__(self, name: str, value: str | None) -> None: if not value: return self.message[name] = value def set_payload(self, payload: str) -> None: self.message.set_payload(payload) @dataclasses.dataclass class _JSonMessageSetter: """ This provides an API to build a JSON message output in the same way as the classic Message. Line breaks are preserved this way. """ data: dict[str, str | list[str]] def __setitem__(self, name: str, value: str | None) -> None: name = name.lower() key = name.replace("-", "_") if value is None: return if name == "keywords": values = (x.strip() for x in value.split(",")) self.data[key] = [x for x in values if x] elif name in constants.KNOWN_MULTIUSE: entry = self.data.setdefault(key, []) assert isinstance(entry, list) entry.append(value) else: self.data[key] = value def set_payload(self, payload: str) -> None: self["description"] = payload
[docs] class RFC822Policy(email.policy.EmailPolicy): """ This is :class:`email.policy.EmailPolicy`, but with a simple ``header_store_parse`` implementation that handles multiline values, and some nice defaults. """ utf8 = True mangle_from_ = False max_line_length = 0
[docs] def header_store_parse(self, name: str, value: str) -> tuple[str, str]: if name.lower() not in constants.KNOWN_METADATA_FIELDS: msg = f"Unknown field {name!r}" raise ConfigurationError(msg, key=name) size = len(name) + 2 value = value.replace("\n", "\n" + " " * size) return (name, value)
[docs] class RFC822Message(email.message.EmailMessage): """ This is :class:`email.message.EmailMessage` with two small changes: it defaults to our `RFC822Policy`, and it correctly writes unicode when being called with `bytes()`. """ def __init__(self) -> None: super().__init__(policy=RFC822Policy())
[docs] def as_bytes( self, unixfrom: bool = False, policy: email.policy.Policy | None = None ) -> bytes: """ This handles unicode encoding. """ return self.as_string(unixfrom, policy=policy).encode("utf-8")
[docs] @dataclasses.dataclass class StandardMetadata: """ This class represents the standard metadata fields for a project. It can be used to read metadata from a pyproject.toml table, validate it, and write it to an RFC822 message or JSON. """ name: str version: packaging.version.Version | None = None description: str | None = None license: License | str | None = None license_files: list[pathlib.Path] | None = None readme: Readme | None = None requires_python: packaging.specifiers.SpecifierSet | None = None dependencies: list[Requirement] = dataclasses.field(default_factory=list) optional_dependencies: dict[str, list[Requirement]] = dataclasses.field( default_factory=dict ) entrypoints: dict[str, dict[str, str]] = dataclasses.field(default_factory=dict) authors: list[tuple[str, str | None]] = dataclasses.field(default_factory=list) maintainers: list[tuple[str, str | None]] = dataclasses.field(default_factory=list) urls: dict[str, str] = dataclasses.field(default_factory=dict) classifiers: list[str] = dataclasses.field(default_factory=list) keywords: list[str] = dataclasses.field(default_factory=list) scripts: dict[str, str] = dataclasses.field(default_factory=dict) gui_scripts: dict[str, str] = dataclasses.field(default_factory=dict) dynamic: list[Dynamic] = dataclasses.field(default_factory=list) """ This field is used to track dynamic fields. You can't set a field not in this list. """ dynamic_metadata: list[str] = dataclasses.field(default_factory=list) """ This is a list of METADATA fields that can change in between SDist and wheel. Requires metadata_version 2.2+. """ metadata_version: str | None = None """ This is the target metadata version. If None, it will be computed as a minimum based on the fields set. """ all_errors: bool = False """ If True, all errors will be collected and raised in an ExceptionGroup. """ def __post_init__(self) -> None: self.validate() @property def auto_metadata_version(self) -> str: """ This computes the metadata version based on the fields set in the object if ``metadata_version`` is None. """ if self.metadata_version is not None: return self.metadata_version if isinstance(self.license, str) or self.license_files is not None: return "2.4" if self.dynamic_metadata: return "2.2" return "2.1" @property def canonical_name(self) -> str: """ Return the canonical name of the project. """ return packaging.utils.canonicalize_name(self.name)
[docs] @classmethod def from_pyproject( # noqa: C901 cls, data: Mapping[str, Any], project_dir: str | os.PathLike[str] = os.path.curdir, metadata_version: str | None = None, dynamic_metadata: list[str] | None = None, *, allow_extra_keys: bool | None = None, all_errors: bool = False, ) -> Self: """ Read metadata from a pyproject.toml table. This is the main method for creating an instance of this class. It also supports two additional fields: ``allow_extra_keys`` to control what happens when extra keys are present in the pyproject table, and ``all_errors``, to raise all errors in an ExceptionGroup instead of raising the first one. """ pyproject = PyProjectReader(collect_errors=all_errors) pyproject_table: PyProjectTable = data # type: ignore[assignment] if "project" not in pyproject_table: msg = "Section {key} missing in pyproject.toml" pyproject.config_error(msg, key="project") pyproject.finalize("Failed to parse pyproject.toml") msg = "Unreachable code" # pragma: no cover raise AssertionError(msg) # pragma: no cover project = pyproject_table["project"] project_dir = pathlib.Path(project_dir) if not allow_extra_keys: extra_keys = extras_project(data) if extra_keys: extra_keys_str = ", ".join(sorted(f"{k!r}" for k in extra_keys)) msg = "Extra keys present in {key}: {extra_keys}" pyproject.config_error( msg, key="project", extra_keys=extra_keys_str, warn=allow_extra_keys is None, ) dynamic = pyproject.get_dynamic(project) for field in dynamic: if field in data["project"]: msg = 'Field {key} declared as dynamic in "project.dynamic" but is defined' pyproject.config_error(msg, key=f"project.{field}") raw_name = project.get("name") name = "UNKNOWN" if raw_name is None: msg = "Field {key} missing" pyproject.config_error(msg, key="project.name") else: tmp_name = pyproject.ensure_str(raw_name, "project.name") if tmp_name is not None: name = tmp_name version: packaging.version.Version | None = packaging.version.Version("0.0.0") raw_version = project.get("version") if raw_version is not None: version_string = pyproject.ensure_str(raw_version, "project.version") if version_string is not None: try: version = ( packaging.version.Version(version_string) if version_string else None ) except packaging.version.InvalidVersion: msg = "Invalid {key} value, expecting a valid PEP 440 version" pyproject.config_error( msg, key="project.version", got=version_string ) elif "version" not in dynamic: msg = ( "Field {key} missing and 'version' not specified in \"project.dynamic\"" ) pyproject.config_error(msg, key="project.version") # Description fills Summary, which cannot be multiline # However, throwing an error isn't backward compatible, # so leave it up to the users for now. project_description_raw = project.get("description") description = ( pyproject.ensure_str(project_description_raw, "project.description") if project_description_raw is not None else None ) requires_python_raw = project.get("requires-python") requires_python = None if requires_python_raw is not None: requires_python_string = pyproject.ensure_str( requires_python_raw, "project.requires-python" ) if requires_python_string is not None: try: requires_python = packaging.specifiers.SpecifierSet( requires_python_string ) except packaging.specifiers.InvalidSpecifier: msg = "Invalid {key} value, expecting a valid specifier set" pyproject.config_error( msg, key="project.requires-python", got=requires_python_string ) self = None with pyproject.collect(): self = cls( name=name, version=version, description=description, license=pyproject.get_license(project, project_dir), license_files=pyproject.get_license_files(project, project_dir), readme=pyproject.get_readme(project, project_dir), requires_python=requires_python, dependencies=pyproject.get_dependencies(project), optional_dependencies=pyproject.get_optional_dependencies(project), entrypoints=pyproject.get_entrypoints(project), authors=pyproject.ensure_people( project.get("authors", []), "project.authors" ), maintainers=pyproject.ensure_people( project.get("maintainers", []), "project.maintainers" ), urls=pyproject.ensure_dict(project.get("urls", {}), "project.urls") or {}, classifiers=pyproject.ensure_list( project.get("classifiers", []), "project.classifiers" ) or [], keywords=pyproject.ensure_list( project.get("keywords", []), "project.keywords" ) or [], scripts=pyproject.ensure_dict( project.get("scripts", {}), "project.scripts" ) or {}, gui_scripts=pyproject.ensure_dict( project.get("gui-scripts", {}), "project.gui-scripts" ) or {}, dynamic=dynamic, dynamic_metadata=dynamic_metadata or [], metadata_version=metadata_version, all_errors=all_errors, ) pyproject.finalize("Failed to parse pyproject.toml") assert self is not None return self
[docs] def as_rfc822(self) -> RFC822Message: """ Return an RFC822 message with the metadata. """ message = RFC822Message() smart_message = _SmartMessageSetter(message) self._write_metadata(smart_message) return message
[docs] def as_json(self) -> dict[str, str | list[str]]: """ Return a JSON message with the metadata. """ message: dict[str, str | list[str]] = {} smart_message = _JSonMessageSetter(message) self._write_metadata(smart_message) return message
[docs] def validate(self, *, warn: bool = True) -> None: # noqa: C901 """ Validate metadata for consistency and correctness. Will also produce warnings if ``warn`` is given. Respects ``all_errors``. This is called when loading a pyproject.toml, and when making metadata. Checks: - ``metadata_version`` is a known version or None - ``name`` is a valid project name - ``license_files`` can't be used with classic ``license`` - License classifiers can't be used with SPDX license - ``description`` is a single line (warning) - ``license`` is not an SPDX license expression if metadata_version >= 2.4 (warning) - License classifiers deprecated for metadata_version >= 2.4 (warning) - ``license`` is an SPDX license expression if metadata_version >= 2.4 - ``license_files`` is supported only for metadata_version >= 2.4 - ``project_url`` can't contain keys over 32 characters """ errors = ErrorCollector(collect_errors=self.all_errors) if self.auto_metadata_version not in constants.KNOWN_METADATA_VERSIONS: msg = "The metadata_version must be one of {versions} or None (default)" errors.config_error(msg, versions=constants.KNOWN_METADATA_VERSIONS) # See https://packaging.python.org/en/latest/specifications/core-metadata/#name and # https://packaging.python.org/en/latest/specifications/name-normalization/#name-format if not re.match( r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", self.name, re.IGNORECASE ): msg = ( "Invalid project name {name!r}. A valid name consists only of ASCII letters and " "numbers, period, underscore and hyphen. It must start and end with a letter or number" ) errors.config_error(msg, key="project.name", name=self.name) if self.license_files is not None and isinstance(self.license, License): msg = '{key} must not be used when "project.license" is not a SPDX license expression' errors.config_error(msg, key="project.license-files") if isinstance(self.license, str) and any( c.startswith("License ::") for c in self.classifiers ): msg = "Setting {key} to an SPDX license expression is not compatible with 'License ::' classifiers" errors.config_error(msg, key="project.license") if warn: if self.description and "\n" in self.description: warnings.warn( 'The one-line summary "project.description" should not contain more than one line. Readers might merge or truncate newlines.', ConfigurationWarning, stacklevel=2, ) if self.auto_metadata_version not in constants.PRE_SPDX_METADATA_VERSIONS: if isinstance(self.license, License): warnings.warn( 'Set "project.license" to an SPDX license expression for metadata >= 2.4', ConfigurationWarning, stacklevel=2, ) elif any(c.startswith("License ::") for c in self.classifiers): warnings.warn( "'License ::' classifiers are deprecated for metadata >= 2.4, use a SPDX license expression for \"project.license\" instead", ConfigurationWarning, stacklevel=2, ) if ( isinstance(self.license, str) and self.auto_metadata_version in constants.PRE_SPDX_METADATA_VERSIONS ): msg = "Setting {key} to an SPDX license expression is supported only when emitting metadata version >= 2.4" errors.config_error(msg, key="project.license") if ( self.license_files is not None and self.auto_metadata_version in constants.PRE_SPDX_METADATA_VERSIONS ): msg = "{key} is supported only when emitting metadata version >= 2.4" errors.config_error(msg, key="project.license-files") for name in self.urls: if len(name) > 32: msg = "{key} names cannot be more than 32 characters long" errors.config_error(msg, key="project.urls", got=name) errors.finalize("Metadata validation failed")
def _write_metadata( # noqa: C901 self, smart_message: _SmartMessageSetter | _JSonMessageSetter ) -> None: """ Write the metadata to the message. Handles JSON or Message. """ self.validate(warn=False) smart_message["Metadata-Version"] = self.auto_metadata_version smart_message["Name"] = self.name if not self.version: msg = "Missing version field" raise ConfigurationError(msg) smart_message["Version"] = str(self.version) # skip 'Platform' # skip 'Supported-Platform' if self.description: smart_message["Summary"] = self.description smart_message["Keywords"] = ",".join(self.keywords) or None # skip 'Home-page' # skip 'Download-URL' smart_message["Author"] = _name_list(self.authors) smart_message["Author-Email"] = _email_list(self.authors) smart_message["Maintainer"] = _name_list(self.maintainers) smart_message["Maintainer-Email"] = _email_list(self.maintainers) if isinstance(self.license, License): smart_message["License"] = self.license.text elif isinstance(self.license, str): smart_message["License-Expression"] = self.license if self.license_files is not None: for license_file in sorted(set(self.license_files)): smart_message["License-File"] = os.fspath(license_file.as_posix()) elif ( self.auto_metadata_version not in constants.PRE_SPDX_METADATA_VERSIONS and isinstance(self.license, License) and self.license.file ): smart_message["License-File"] = os.fspath(self.license.file.as_posix()) for classifier in self.classifiers: smart_message["Classifier"] = classifier # skip 'Provides-Dist' # skip 'Obsoletes-Dist' # skip 'Requires-External' for name, url in self.urls.items(): smart_message["Project-URL"] = f"{name}, {url}" if self.requires_python: smart_message["Requires-Python"] = str(self.requires_python) for dep in self.dependencies: smart_message["Requires-Dist"] = str(dep) for extra, requirements in self.optional_dependencies.items(): norm_extra = extra.replace(".", "-").replace("_", "-").lower() smart_message["Provides-Extra"] = norm_extra for requirement in requirements: smart_message["Requires-Dist"] = str( _build_extra_req(norm_extra, requirement) ) if self.readme: if self.readme.content_type: smart_message["Description-Content-Type"] = self.readme.content_type smart_message.set_payload(self.readme.text) # Core Metadata 2.2 if self.auto_metadata_version != "2.1": for field in self.dynamic_metadata: if field.lower() in {"name", "version", "dynamic"}: msg = f"Field cannot be set as dynamic metadata: {field}" raise ConfigurationError(msg) if field.lower() not in constants.KNOWN_METADATA_FIELDS: msg = f"Field is not known: {field}" raise ConfigurationError(msg) smart_message["Dynamic"] = field
def _name_list(people: list[tuple[str, str | None]]) -> str | None: """ Build a comma-separated list of names. """ return ", ".join(name for name, email_ in people if not email_) or None def _email_list(people: list[tuple[str, str | None]]) -> str | None: """ Build a comma-separated list of emails. """ return ( ", ".join( email.utils.formataddr((name, _email)) for name, _email in people if _email ) or None ) def _build_extra_req( extra: str, requirement: Requirement, ) -> Requirement: """ Build a new requirement with an extra marker. """ requirement = copy.copy(requirement) if requirement.marker: if "or" in requirement.marker._markers: requirement.marker = packaging.markers.Marker( f"({requirement.marker}) and extra == {extra!r}" ) else: requirement.marker = packaging.markers.Marker( f"{requirement.marker} and extra == {extra!r}" ) else: requirement.marker = packaging.markers.Marker(f"extra == {extra!r}") return requirement