feat: add local pypi package mirror (#333)

This commit is contained in:
Michal Szczepanski 2024-04-15 14:58:39 +02:00
parent 2f0a56c380
commit 4ddfc3a077
4 changed files with 110 additions and 2 deletions

@ -14,6 +14,7 @@ from urllib.parse import urljoin, urlparse
from pypiserver.config import RunConfig from pypiserver.config import RunConfig
from . import __version__ from . import __version__
from . import core from . import core
from . import mirror_cache
from .bottle import ( from .bottle import (
static_file, static_file,
redirect, redirect,
@ -286,7 +287,9 @@ def simple(project):
key=lambda x: (x.parsed_version, x.relfn), key=lambda x: (x.parsed_version, x.relfn),
) )
if not packages: if not packages:
if not config.disable_fallback: if config.mirror:
return mirror_cache.MirrorCache.add(project=project, config=config)
elif not config.disable_fallback:
return redirect(f"{config.fallback_url.rstrip('/')}/{project}/") return redirect(f"{config.fallback_url.rstrip('/')}/{project}/")
return HTTPError(404, f"Not Found ({normalized} does not exist)\n\n") return HTTPError(404, f"Not Found ({normalized} does not exist)\n\n")
@ -364,7 +367,8 @@ def server_static(filename):
"Cache-Control", f"public, max-age={config.cache_control}" "Cache-Control", f"public, max-age={config.cache_control}"
) )
return response return response
if config.mirror and mirror_cache.MirrorCache.has_project(filename):
return mirror_cache.MirrorCache.get_static_file(filename=filename, config=config)
return HTTPError(404, f"Not Found ({filename} does not exist)\n\n") return HTTPError(404, f"Not Found ({filename} does not exist)\n\n")

@ -517,6 +517,14 @@ def get_parser() -> argparse.ArgumentParser:
"to '%%s' to see them all." "to '%%s' to see them all."
), ),
) )
run_parser.add_argument(
"--mirror",
default=0,
action="count",
help=(
"Mirror packages to local disk"
),
)
update_parser = subparsers.add_parser( update_parser = subparsers.add_parser(
"update", "update",
@ -720,6 +728,7 @@ class RunConfig(_ConfigCommon):
overwrite: bool, overwrite: bool,
welcome_msg: str, welcome_msg: str,
cache_control: t.Optional[int], cache_control: t.Optional[int],
mirror: bool,
log_req_frmt: str, log_req_frmt: str,
log_res_frmt: str, log_res_frmt: str,
log_err_frmt: str, log_err_frmt: str,
@ -745,6 +754,7 @@ class RunConfig(_ConfigCommon):
# Derived properties # Derived properties
self._derived_properties = self._derived_properties + ("auther",) self._derived_properties = self._derived_properties + ("auther",)
self.auther = self.get_auther(auther) self.auther = self.get_auther(auther)
self.mirror = mirror
@classmethod @classmethod
def kwargs_from_namespace( def kwargs_from_namespace(
@ -764,6 +774,7 @@ class RunConfig(_ConfigCommon):
"overwrite": namespace.overwrite, "overwrite": namespace.overwrite,
"welcome_msg": namespace.welcome, "welcome_msg": namespace.welcome,
"cache_control": namespace.cache_control, "cache_control": namespace.cache_control,
"mirror": namespace.mirror,
"log_req_frmt": namespace.log_req_frmt, "log_req_frmt": namespace.log_req_frmt,
"log_res_frmt": namespace.log_res_frmt, "log_res_frmt": namespace.log_res_frmt,
"log_err_frmt": namespace.log_err_frmt, "log_err_frmt": namespace.log_err_frmt,

@ -0,0 +1,91 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import logging
from collections import OrderedDict
from pypiserver.bottle import HTTPError, redirect
from pypiserver.config import RunConfig
log = logging.getLogger(__name__)
try:
import requests
from bs4 import BeautifulSoup
import_ok = True
except ImportError:
import_ok = False
logging.error("mirror_cache import dependencies error")
class CacheElement:
def __init__(self, project: str):
self.project = project
self.html = ""
self.cache = dict()
def add(self, href: str):
targz = href.split("/")[-1]
pkg_name = targz.split("#")[0]
self.cache[f"{self.project}/{pkg_name}"] = href
return f"/packages/{self.project}/{targz}"
class MirrorCache:
cache: OrderedDict[str, CacheElement] = dict()
cache_limit = 10
@classmethod
def add(cls, project: str, config: RunConfig) -> str:
if not import_ok:
return redirect(f"{config.fallback_url.rstrip('/')}/{project}/")
if project in cls.cache:
log.info(f"mirror_cache serve html from cache {project}")
return cls.cache[project].html
element = CacheElement(project=project)
resp = requests.get(f"{config.fallback_url.rstrip('/')}/{project}/")
soup = BeautifulSoup(resp.content, "html.parser")
links = soup.find_all("a")
for link in links:
# new href with mapping to old href for later
new_href = element.add(href=link["href"])
# create new link
new_link = soup.new_tag("a")
new_link.string = link.text.strip()
new_link["href"] = new_href
link.replace_with(new_link)
element.html = str(soup)
cls.cache[project] = element
log.info(f"mirror_cache add project '{project}' to cache")
# purge
if len(cls.cache) > cls.cache_limit:
item = cls.cache.popitem(last=False)
log.info(f"mirror_cache limit '{cls.cache_limit}' exceeded, purged last item - {item}")
return element.html
@classmethod
def has_project(cls, filename):
project = filename.split("/")[0]
return project in cls.cache
@classmethod
def get_static_file(cls, filename, config: RunConfig):
if not import_ok:
return HTTPError(404, f"Not Found ({filename} does not exist)\n\n")
project = filename.split("/")[0]
element = cls.cache[project]
if filename in element.cache:
href = element.cache[filename]
resp = requests.get(href)
cls.add_to_cache(filename=filename, resp=resp, config=config)
return resp
log.info(f"mirror_cache not found in cache {filename} ")
return HTTPError(404, f"Not Found ({filename} does not exist)\n\n")
@classmethod
def add_to_cache(cls, filename: str, resp: requests.Response, config: RunConfig):
project = filename.split("/")[0]
os.makedirs(os.path.join(config.package_root, project), exist_ok=True)
log.info(f"mirror_cache add file '{filename}' to cache")
with open(f"{config.package_root}/{filename}", "wb+") as f:
f.write(resp.content)

@ -0,0 +1,2 @@
beautifulsoup4==4.12.3
requests==2.31.0