From 4ddfc3a077bd88d742d2d80c6c3e2a0a7f470878 Mon Sep 17 00:00:00 2001 From: Michal Szczepanski Date: Mon, 15 Apr 2024 14:58:39 +0200 Subject: [PATCH] feat: add local pypi package mirror (#333) --- pypiserver/_app.py | 8 +- pypiserver/config.py | 11 +++ pypiserver/mirror_cache.py | 91 ++++++++++++++++++++++ requirements/mirror-cache-requirements.txt | 2 + 4 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 pypiserver/mirror_cache.py create mode 100644 requirements/mirror-cache-requirements.txt diff --git a/pypiserver/_app.py b/pypiserver/_app.py index b75e50f..6b0edef 100644 --- a/pypiserver/_app.py +++ b/pypiserver/_app.py @@ -14,6 +14,7 @@ from urllib.parse import urljoin, urlparse from pypiserver.config import RunConfig from . import __version__ from . import core +from . import mirror_cache from .bottle import ( static_file, redirect, @@ -286,7 +287,9 @@ def simple(project): key=lambda x: (x.parsed_version, x.relfn), ) if not packages: - if not config.disable_fallback: + if config.mirror: + return mirror_cache.MirrorCache.add(project=project, config=config) + elif not config.disable_fallback: return redirect(f"{config.fallback_url.rstrip('/')}/{project}/") return HTTPError(404, f"Not Found ({normalized} does not exist)\n\n") @@ -364,7 +367,8 @@ def server_static(filename): "Cache-Control", f"public, max-age={config.cache_control}" ) return response - + if config.mirror and mirror_cache.MirrorCache.has_project(filename): + return mirror_cache.MirrorCache.get_static_file(filename=filename, config=config) return HTTPError(404, f"Not Found ({filename} does not exist)\n\n") diff --git a/pypiserver/config.py b/pypiserver/config.py index 5850c90..e03f4ff 100644 --- a/pypiserver/config.py +++ b/pypiserver/config.py @@ -517,6 +517,14 @@ def get_parser() -> argparse.ArgumentParser: "to '%%s' to see them all." ), ) + run_parser.add_argument( + "--mirror", + default=0, + action="count", + help=( + "Mirror packages to local disk" + ), + ) update_parser = subparsers.add_parser( "update", @@ -720,6 +728,7 @@ class RunConfig(_ConfigCommon): overwrite: bool, welcome_msg: str, cache_control: t.Optional[int], + mirror: bool, log_req_frmt: str, log_res_frmt: str, log_err_frmt: str, @@ -745,6 +754,7 @@ class RunConfig(_ConfigCommon): # Derived properties self._derived_properties = self._derived_properties + ("auther",) self.auther = self.get_auther(auther) + self.mirror = mirror @classmethod def kwargs_from_namespace( @@ -764,6 +774,7 @@ class RunConfig(_ConfigCommon): "overwrite": namespace.overwrite, "welcome_msg": namespace.welcome, "cache_control": namespace.cache_control, + "mirror": namespace.mirror, "log_req_frmt": namespace.log_req_frmt, "log_res_frmt": namespace.log_res_frmt, "log_err_frmt": namespace.log_err_frmt, diff --git a/pypiserver/mirror_cache.py b/pypiserver/mirror_cache.py new file mode 100644 index 0000000..0e13987 --- /dev/null +++ b/pypiserver/mirror_cache.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import logging +from collections import OrderedDict +from pypiserver.bottle import HTTPError, redirect +from pypiserver.config import RunConfig +log = logging.getLogger(__name__) +try: + import requests + from bs4 import BeautifulSoup + import_ok = True +except ImportError: + import_ok = False + logging.error("mirror_cache import dependencies error") + + +class CacheElement: + def __init__(self, project: str): + self.project = project + self.html = "" + self.cache = dict() + + def add(self, href: str): + targz = href.split("/")[-1] + pkg_name = targz.split("#")[0] + self.cache[f"{self.project}/{pkg_name}"] = href + return f"/packages/{self.project}/{targz}" + + +class MirrorCache: + cache: OrderedDict[str, CacheElement] = dict() + cache_limit = 10 + + @classmethod + def add(cls, project: str, config: RunConfig) -> str: + if not import_ok: + return redirect(f"{config.fallback_url.rstrip('/')}/{project}/") + + if project in cls.cache: + log.info(f"mirror_cache serve html from cache {project}") + return cls.cache[project].html + + element = CacheElement(project=project) + + resp = requests.get(f"{config.fallback_url.rstrip('/')}/{project}/") + soup = BeautifulSoup(resp.content, "html.parser") + links = soup.find_all("a") + for link in links: + # new href with mapping to old href for later + new_href = element.add(href=link["href"]) + # create new link + new_link = soup.new_tag("a") + new_link.string = link.text.strip() + new_link["href"] = new_href + link.replace_with(new_link) + element.html = str(soup) + cls.cache[project] = element + log.info(f"mirror_cache add project '{project}' to cache") + # purge + if len(cls.cache) > cls.cache_limit: + item = cls.cache.popitem(last=False) + log.info(f"mirror_cache limit '{cls.cache_limit}' exceeded, purged last item - {item}") + return element.html + + @classmethod + def has_project(cls, filename): + project = filename.split("/")[0] + return project in cls.cache + + @classmethod + def get_static_file(cls, filename, config: RunConfig): + if not import_ok: + return HTTPError(404, f"Not Found ({filename} does not exist)\n\n") + project = filename.split("/")[0] + element = cls.cache[project] + if filename in element.cache: + href = element.cache[filename] + resp = requests.get(href) + cls.add_to_cache(filename=filename, resp=resp, config=config) + return resp + log.info(f"mirror_cache not found in cache {filename} ") + return HTTPError(404, f"Not Found ({filename} does not exist)\n\n") + + @classmethod + def add_to_cache(cls, filename: str, resp: requests.Response, config: RunConfig): + project = filename.split("/")[0] + os.makedirs(os.path.join(config.package_root, project), exist_ok=True) + log.info(f"mirror_cache add file '{filename}' to cache") + with open(f"{config.package_root}/{filename}", "wb+") as f: + f.write(resp.content) diff --git a/requirements/mirror-cache-requirements.txt b/requirements/mirror-cache-requirements.txt new file mode 100644 index 0000000..daca14b --- /dev/null +++ b/requirements/mirror-cache-requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4==4.12.3 +requests==2.31.0