| import logging |
| import mimetypes |
| import os |
| import pathlib |
| from typing import Callable, Iterable, Optional, Tuple |
| |
| from pip._internal.models.candidate import InstallationCandidate |
| from pip._internal.models.link import Link |
| from pip._internal.utils.urls import path_to_url, url_to_path |
| from pip._internal.vcs import is_url |
| |
| logger = logging.getLogger(__name__) |
| |
| FoundCandidates = Iterable[InstallationCandidate] |
| FoundLinks = Iterable[Link] |
| CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]] |
| PageValidator = Callable[[Link], bool] |
| |
| |
| class LinkSource: |
| @property |
| def link(self) -> Optional[Link]: |
| """Returns the underlying link, if there's one.""" |
| raise NotImplementedError() |
| |
| def page_candidates(self) -> FoundCandidates: |
| """Candidates found by parsing an archive listing HTML file.""" |
| raise NotImplementedError() |
| |
| def file_links(self) -> FoundLinks: |
| """Links found by specifying archives directly.""" |
| raise NotImplementedError() |
| |
| |
| def _is_html_file(file_url: str) -> bool: |
| return mimetypes.guess_type(file_url, strict=False)[0] == "text/html" |
| |
| |
| class _FlatDirectorySource(LinkSource): |
| """Link source specified by ``--find-links=<path-to-dir>``. |
| |
| This looks the content of the directory, and returns: |
| |
| * ``page_candidates``: Links listed on each HTML file in the directory. |
| * ``file_candidates``: Archives in the directory. |
| """ |
| |
| def __init__( |
| self, |
| candidates_from_page: CandidatesFromPage, |
| path: str, |
| ) -> None: |
| self._candidates_from_page = candidates_from_page |
| self._path = pathlib.Path(os.path.realpath(path)) |
| |
| @property |
| def link(self) -> Optional[Link]: |
| return None |
| |
| def page_candidates(self) -> FoundCandidates: |
| for path in self._path.iterdir(): |
| url = path_to_url(str(path)) |
| if not _is_html_file(url): |
| continue |
| yield from self._candidates_from_page(Link(url)) |
| |
| def file_links(self) -> FoundLinks: |
| for path in self._path.iterdir(): |
| url = path_to_url(str(path)) |
| if _is_html_file(url): |
| continue |
| yield Link(url) |
| |
| |
| class _LocalFileSource(LinkSource): |
| """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``. |
| |
| If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to |
| the option, it is converted to a URL first. This returns: |
| |
| * ``page_candidates``: Links listed on an HTML file. |
| * ``file_candidates``: The non-HTML file. |
| """ |
| |
| def __init__( |
| self, |
| candidates_from_page: CandidatesFromPage, |
| link: Link, |
| ) -> None: |
| self._candidates_from_page = candidates_from_page |
| self._link = link |
| |
| @property |
| def link(self) -> Optional[Link]: |
| return self._link |
| |
| def page_candidates(self) -> FoundCandidates: |
| if not _is_html_file(self._link.url): |
| return |
| yield from self._candidates_from_page(self._link) |
| |
| def file_links(self) -> FoundLinks: |
| if _is_html_file(self._link.url): |
| return |
| yield self._link |
| |
| |
| class _RemoteFileSource(LinkSource): |
| """``--find-links=<url>`` or ``--[extra-]index-url=<url>``. |
| |
| This returns: |
| |
| * ``page_candidates``: Links listed on an HTML file. |
| * ``file_candidates``: The non-HTML file. |
| """ |
| |
| def __init__( |
| self, |
| candidates_from_page: CandidatesFromPage, |
| page_validator: PageValidator, |
| link: Link, |
| ) -> None: |
| self._candidates_from_page = candidates_from_page |
| self._page_validator = page_validator |
| self._link = link |
| |
| @property |
| def link(self) -> Optional[Link]: |
| return self._link |
| |
| def page_candidates(self) -> FoundCandidates: |
| if not self._page_validator(self._link): |
| return |
| yield from self._candidates_from_page(self._link) |
| |
| def file_links(self) -> FoundLinks: |
| yield self._link |
| |
| |
| class _IndexDirectorySource(LinkSource): |
| """``--[extra-]index-url=<path-to-directory>``. |
| |
| This is treated like a remote URL; ``candidates_from_page`` contains logic |
| for this by appending ``index.html`` to the link. |
| """ |
| |
| def __init__( |
| self, |
| candidates_from_page: CandidatesFromPage, |
| link: Link, |
| ) -> None: |
| self._candidates_from_page = candidates_from_page |
| self._link = link |
| |
| @property |
| def link(self) -> Optional[Link]: |
| return self._link |
| |
| def page_candidates(self) -> FoundCandidates: |
| yield from self._candidates_from_page(self._link) |
| |
| def file_links(self) -> FoundLinks: |
| return () |
| |
| |
| def build_source( |
| location: str, |
| *, |
| candidates_from_page: CandidatesFromPage, |
| page_validator: PageValidator, |
| expand_dir: bool, |
| cache_link_parsing: bool, |
| ) -> Tuple[Optional[str], Optional[LinkSource]]: |
| |
| path: Optional[str] = None |
| url: Optional[str] = None |
| if os.path.exists(location): # Is a local path. |
| url = path_to_url(location) |
| path = location |
| elif location.startswith("file:"): # A file: URL. |
| url = location |
| path = url_to_path(location) |
| elif is_url(location): |
| url = location |
| |
| if url is None: |
| msg = ( |
| "Location '%s' is ignored: " |
| "it is either a non-existing path or lacks a specific scheme." |
| ) |
| logger.warning(msg, location) |
| return (None, None) |
| |
| if path is None: |
| source: LinkSource = _RemoteFileSource( |
| candidates_from_page=candidates_from_page, |
| page_validator=page_validator, |
| link=Link(url, cache_link_parsing=cache_link_parsing), |
| ) |
| return (url, source) |
| |
| if os.path.isdir(path): |
| if expand_dir: |
| source = _FlatDirectorySource( |
| candidates_from_page=candidates_from_page, |
| path=path, |
| ) |
| else: |
| source = _IndexDirectorySource( |
| candidates_from_page=candidates_from_page, |
| link=Link(url, cache_link_parsing=cache_link_parsing), |
| ) |
| return (url, source) |
| elif os.path.isfile(path): |
| source = _LocalFileSource( |
| candidates_from_page=candidates_from_page, |
| link=Link(url, cache_link_parsing=cache_link_parsing), |
| ) |
| return (url, source) |
| logger.warning( |
| "Location '%s' is ignored: it is neither a file nor a directory.", |
| location, |
| ) |
| return (url, None) |