From 8306de15db9914b3769df27019ad3188371e2ffb Mon Sep 17 00:00:00 2001 From: Matthew Planchard Date: Sun, 7 Feb 2021 17:04:06 -0600 Subject: [PATCH] Gunicorn/gevent docker, log fixes, cache busting (#371) Updates the Docker configuration to use the gunicorn server with gevent workers by default. Adds `waitress` to the docker container, so that if no server is specified, we will fall back to that rather than `wsgiref`. Making this happen brought a few other issues to light, which are also addressed here. - Docker log output not immediately being flushed to stdout (#358): resolved by setting the `PYTHONUNBUFFERED` env var to `t` in the docker container - When the WSGIRef server is selected, its access logs are written directly to stderr, rather than going through the logging machinery: resolved by adding a new `WsgiHandler` class and passing in to bottle's `run()` method when running the wsgi server. This required a new `ServerCheck` class to determine whether the wsgi server is selected when the `auto` option is used - When using `gunicorn` along with the watchdog cache, package uplaods were not being picked up by the watcher. Updated the `add_package` and `remove_package` methods on the `CachingFileBackend` to bust the cache --- .dockerignore | 1 + Dockerfile | 32 +++++--- docker/docker-requirements.txt | 14 +++- docker/entrypoint.sh | 4 +- docker/gunicorn.conf.py | 14 ++++ docker/test_docker.py | 91 +++++++++++++++++----- pypiserver/__main__.py | 134 +++++++++++++++++++++++++++++---- pypiserver/_app.py | 2 +- pypiserver/backend.py | 22 +++++- requirements/test.pip | 2 +- tests/test_main.py | 73 ++++++++++++++++-- 11 files changed, 334 insertions(+), 55 deletions(-) create mode 100644 docker/gunicorn.conf.py diff --git a/.dockerignore b/.dockerignore index e79794e..4c84ef5 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,6 +2,7 @@ !pypiserver !requirements !docker/docker-requirements.txt +!docker/gunicorn.conf.py !docker/entrypoint.sh !README.rst !setup.cfg diff --git a/Dockerfile b/Dockerfile index 8f95333..852f874 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,26 +31,37 @@ RUN apk add --no-cache --virtual .build-deps \ FROM base AS builder_dependencies -COPY pypiserver /code/pypiserver -COPY requirements /code/requirements -COPY docker/docker-requirements.txt /code -COPY setup.cfg /code -COPY setup.py /code -COPY README.rst /code +WORKDIR /code +COPY docker/docker-requirements.txt . + +# Install requirements RUN apk add --no-cache --virtual .build-deps \ build-base \ libffi-dev \ && mkdir /install \ - && python -m pip install --no-warn-script-location \ - --prefix=/install \ - /code --requirement /code/docker-requirements.txt + && python -m pip install \ + --no-warn-script-location \ + --prefix=/install \ + --requirement docker-requirements.txt + +# Install pypiserver +# - do this separately from deps so that when developing, every change does not +# require reinstalling deps +COPY pypiserver pypiserver +COPY setup.cfg . +COPY setup.py . +COPY README.rst . +RUN python -m pip install --no-warn-script-location --prefix=/install . FROM base + +WORKDIR /data # Copy the libraries installed via pip COPY --from=builder_dependencies /install /usr/local COPY --from=builder_gosu /usr/local/bin/gosu /usr/local/bin/gosu COPY docker/entrypoint.sh /entrypoint.sh +COPY docker/gunicorn.conf.py /data # Use a consistent user and group ID so that linux users # can create a corresponding system user and set permissions @@ -64,10 +75,11 @@ RUN apk add bash \ && chmod +x /entrypoint.sh VOLUME /data/packages -WORKDIR /data ENV PYPISERVER_PORT=8080 # PORT is deprecated. Please use PYPISERVER_PORT instead ENV PORT=$PYPISERVER_PORT +# Flush logs immediately to stdout +ENV PYTHONUNBUFFERED=t EXPOSE $PYPISERVER_PORT ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/docker-requirements.txt b/docker/docker-requirements.txt index 2bdb0e4..ab6853a 100644 --- a/docker/docker-requirements.txt +++ b/docker/docker-requirements.txt @@ -1,3 +1,11 @@ -passlib==1.7.2 -bcrypt==3.1.7 -watchdog==0.10.3 +# We use gunicorn as the default server in the docker container, with gevent +# workers +gevent==21.1.2 +gunicorn==20.0.4 +passlib==1.7.4 +bcrypt==3.2.0 +# If a user overrides args but does not override the server arg, we fall back to +# whatever bottle chooses as a default. Since the wsgiref server is not +# production-ready, install waitress as a fallback for these cases. +waitress==1.4.4 +watchdog==1.0.2 diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index f231b12..9fcfcb4 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -117,7 +117,9 @@ fi if [[ "$*" == "" ]]; then - CMD=("run" "-p" "${PYPISERVER_PORT:-$PORT}") + # Use the gunicorn server by default, since it's more performant than + # bottle's default server + CMD=("run" "-p" "${PYPISERVER_PORT:-$PORT}" "--server" "gunicorn") else # this reassigns the array to the CMD variable CMD=( "${@}" ) diff --git a/docker/gunicorn.conf.py b/docker/gunicorn.conf.py new file mode 100644 index 0000000..a307335 --- /dev/null +++ b/docker/gunicorn.conf.py @@ -0,0 +1,14 @@ +"""Default gunicorn config for the docker environment. + +To override, mount a new gunicorn config at /data/gunicorn.conf.py in your +Docker container. +""" + +# pylint: disable=invalid-name + +# Enable to log every request +# accesslog = "-" +errorlog = "-" +preload_app = True +workers = 1 +worker_class = "gevent" diff --git a/docker/test_docker.py b/docker/test_docker.py index 6229e6b..cbb0988 100644 --- a/docker/test_docker.py +++ b/docker/test_docker.py @@ -279,23 +279,50 @@ class TestPermissions: run("docker", "container", "rm", "-f", container_id) +class ContainerInfo(t.NamedTuple): + """Info about a running container""" + + container_id: str + port: int + args: tuple + + class TestBasics: """Test basic pypiserver functionality in a simple unauthed container.""" - HOST_PORT = get_socket() - - @pytest.fixture(scope="class") - def container(self, image: str) -> t.Iterator[str]: + # We want to automatically parametrize this class' tests with a variety of + # pypiserver args, since it should work the same in all of these cases + @pytest.fixture( + scope="class", + params=[ + # default (gunicorn) server with cached backend + (), + # default (gunicorn) server with non-cached backend + ("--backend", "simple-dir"), + # explicit gunicorn server with a non-cached backend + ("--server", "gunicorn", "--backend", "simple-dir"), + # explicit gunicorn server + ("--server", "gunicorn"), + # explicit waitress server + ("--server", "wsgiref"), + # explicit wsgiref server + ("--server", "wsgiref"), + ], + ) + def container( + self, request: pytest.FixtureRequest, image: str + ) -> t.Iterator[ContainerInfo]: """Run the pypiserver container. Returns the container ID. """ - res = run( + port = get_socket() + args = ( "docker", "run", "--rm", "--publish", - f"{self.HOST_PORT}:8080", + f"{port}:8080", "--detach", image, "run", @@ -303,17 +330,18 @@ class TestBasics: ".", "--authenticate", ".", - capture=True, + *request.param, # type: ignore ) - wait_for_container(self.HOST_PORT) + res = run(*args, capture=True) + wait_for_container(port) container_id = res.out.strip() - yield container_id + yield ContainerInfo(container_id, port, args) run("docker", "container", "rm", "-f", container_id) @pytest.fixture(scope="class") def upload_mypkg( self, - container: str, # pylint: disable=unused-argument + container: ContainerInfo, mypkg_paths: t.Dict[str, Path], ) -> None: """Upload mypkg to the container.""" @@ -323,7 +351,7 @@ class TestBasics: "twine", "upload", "--repository-url", - f"http://localhost:{self.HOST_PORT}", + f"http://localhost:{container.port}", "--username", "", "--password", @@ -332,7 +360,7 @@ class TestBasics: ) @pytest.mark.usefixtures("upload_mypkg") - def test_download(self) -> None: + def test_download(self, container: ContainerInfo) -> None: """Download mypkg from the container.""" with tempfile.TemporaryDirectory() as tmpdir: run( @@ -341,7 +369,7 @@ class TestBasics: "pip", "download", "--index-url", - f"http://localhost:{self.HOST_PORT}/simple", + f"http://localhost:{container.port}/simple", "--dest", tmpdir, "pypiserver_mypkg", @@ -352,7 +380,7 @@ class TestBasics: ) @pytest.mark.usefixtures("upload_mypkg", "cleanup") - def test_install(self) -> None: + def test_install(self, container: ContainerInfo) -> None: """Install mypkg from the container. Note this also ensures that name normalization is working, @@ -365,15 +393,40 @@ class TestBasics: "pip", "install", "--index-url", - f"http://localhost:{self.HOST_PORT}/simple", + f"http://localhost:{container.port}/simple", "pypiserver-mypkg", ) run("python", "-c", "'import pypiserver_mypkg; mypkg.pkg_name()'") - @pytest.mark.usefixtures("container") - def test_welcome(self) -> None: + def test_expected_server(self, container: ContainerInfo) -> None: + """Ensure we run the server we think we're running.""" + resp = httpx.get(f"http://localhost:{container.port}") + server = resp.headers["server"].lower() + arg_pairs = tuple(zip(container.args, container.args[1:])) + if ( + container.args[-1] == "pypiserver:test" + or ("--server", "gunicorn") in arg_pairs + ): + # We specified no overriding args, so we should run gunicorn, or + # we specified gunicorn in overriding args. + assert "gunicorn" in server + elif ("--server", "wsgiref") in arg_pairs: + # We explicitly specified the wsgiref server + assert "wsgiserver" in server + elif ("--server", "waitress") in arg_pairs: + # We explicitly specified the wsgiref server + assert "waitress" in server + else: + # We overrode args, so instead of using the gunicorn default, + # we use the `auto` option. Bottle won't choose gunicorn as an + # auto server, so we have waitress installed in the docker container + # as a fallback for these scenarios, since wsgiref is not production + # ready + assert "waitress" in server + + def test_welcome(self, container: ContainerInfo) -> None: """View the welcome page.""" - resp = httpx.get(f"http://localhost:{self.HOST_PORT}") + resp = httpx.get(f"http://localhost:{container.port}") assert resp.status_code == 200 assert "pypiserver" in resp.text @@ -485,7 +538,7 @@ class TestAuthed: "pip", "download", "--index-url", - f"http://localhost:{self.HOST_PORT}/simple", + f"http://foo:bar@localhost:{self.HOST_PORT}/simple", "--dest", tmpdir, "pypiserver_mypkg", diff --git a/pypiserver/__main__.py b/pypiserver/__main__.py index d138131..9611aea 100644 --- a/pypiserver/__main__.py +++ b/pypiserver/__main__.py @@ -1,26 +1,28 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 """Entrypoint for pypiserver.""" -from __future__ import print_function - +import enum +import importlib import logging import sys import typing as t +from pathlib import Path +from wsgiref.simple_server import WSGIRequestHandler import functools as ft from pypiserver.config import Config, UpdateConfig - log = logging.getLogger("pypiserver.main") def init_logging( - level=logging.NOTSET, - frmt=None, - filename=None, + level: int = logging.NOTSET, + frmt: str = None, + filename: t.Union[str, Path] = None, stream: t.Optional[t.IO] = sys.stderr, - logger=None, -): + logger: logging.Logger = None, +) -> None: + """Configure the specified logger, or the root logger otherwise.""" logger = logger or logging.getLogger() logger.setLevel(level) @@ -36,14 +38,90 @@ def init_logging( logger.addHandler(handler) -def main(argv=None): +class WsgiHandler(WSGIRequestHandler): + """A simple request handler to configure logging.""" + + # The default `FixedHandler` that bottle's `WSGIRefServer` uses does not + # log in a particularly predictable or configurable way. We'll pass this + # in to use instead. + def address_string(self) -> str: # Prevent reverse DNS lookups please. + # This method copied directly from bottle's `FixedHandler` and + # maintained on the Chesterton's fence principle (i.e. I don't know + # why it's important, so I'm not going to get rid of it) + return self.client_address[0] + + def log_message( + self, format: str, *args: t.Any # pylint: disable=redefined-builtin + ) -> None: + """Log a message.""" + # The log_message method on the `HttpRequestHandler` base class just + # writes directly to stderr. We'll use its same formatting, but pass + # it through the logger instead. + log.info( + "%s - - [%s] %s\n", + self.address_string(), + self.log_date_time_string(), + format % args, + ) + + +class AutoServer(enum.Enum): + """Expected servers that can be automaticlaly selected by bottle.""" + + Waitress = enum.auto() + Paste = enum.auto() + Twisted = enum.auto() + CherryPy = enum.auto() + WsgiRef = enum.auto() + + +# Possible automatically selected servers. This MUST match the available +# auto servers in bottle.py +AUTO_SERVER_IMPORTS = ( + (AutoServer.Waitress, "waitress"), + (AutoServer.Paste, "paste"), + (AutoServer.Twisted, "twisted.web"), + (AutoServer.CherryPy, "cheroot.wsgi"), + (AutoServer.CherryPy, "cherrypy.wsgiserver"), + # this should always be available because it's part of the stdlib + (AutoServer.WsgiRef, "wsgiref"), +) + + +def _can_import(name: str) -> bool: + """Attempt to import a module. Return a bool indicating success.""" + try: + importlib.import_module(name) + return True + except ImportError: + return False + + +def guess_auto_server() -> AutoServer: + """Guess which server bottle will use for the auto setting.""" + # Return the first server that can be imported. + server = next( + (s for s, i in AUTO_SERVER_IMPORTS if _can_import(i)), + None, + ) + if server is None: + raise RuntimeError( + "Unexpected error determining bottle auto server. There may be an " + "issue with this python environment. Please report this bug at " + "https://github.com/pypiserver/pypiserver/issues" + ) + return server + + +def main(argv: t.Sequence[str] = None) -> None: """Application entrypoint for pypiserver. This function drives the application (as opposed to the library) implementation of pypiserver. Usage from the commandline will result in this function being called. """ - import pypiserver + # pylint: disable=import-outside-toplevel + import pypiserver # pylint: disable=redefined-outer-name if argv is None: # The first item in sys.argv is the name of the python file being @@ -84,25 +162,55 @@ def main(argv=None): from pypiserver import bottle bottle.debug(config.verbosity > 1) - bottle._stderr = ft.partial( + bottle._stderr = ft.partial( # pylint: disable=protected-access _logwrite, logging.getLogger(bottle.__name__), logging.INFO ) # Here `app` is a Bottle instance, which we pass to bottle.run() to run # the server app = pypiserver.app_from_config(config) + + if config.server_method == "gunicorn": + # When bottle runs gunicorn, gunicorn tries to pull its arguments from + # sys.argv. Because pypiserver's arguments don't match gunicorn's, + # this leads to errors. + # Gunicorn can be configured by using a `gunicorn.conf.py` config file + # or by specifying the `GUNICORN_CMD_ARGS` env var. See gunicorn + # docs for more info. + sys.argv = ["gunicorn"] + + wsgi_kwargs = {"handler_class": WsgiHandler} + + if config.server_method == "auto": + expected_server = guess_auto_server() + extra_kwargs = ( + wsgi_kwargs if expected_server is AutoServer.WsgiRef else {} + ) + log.debug( + "Server 'auto' selected. Expecting bottle to run '%s'. " + "Passing extra keyword args: %s", + expected_server.name, + extra_kwargs, + ) + else: + extra_kwargs = wsgi_kwargs if config.server_method == "wsgiref" else {} + log.debug( + "Running bottle with selected server '%s'", config.server_method + ) + bottle.run( app=app, host=config.host, port=config.port, server=config.server_method, + **extra_kwargs, ) def _logwrite(logger, level, msg): if msg: line_endings = ["\r\n", "\n\r", "\n"] - for le in line_endings: + for le in line_endings: # pylint: disable=invalid-name if msg.endswith(le): msg = msg[: -len(le)] if msg: diff --git a/pypiserver/_app.py b/pypiserver/_app.py index fc40e55..f1b9794 100644 --- a/pypiserver/_app.py +++ b/pypiserver/_app.py @@ -223,7 +223,7 @@ def handle_rpc(): .childNodes[0] .wholeText.strip() ) - log.info(f"Processing RPC2 request for '{methodname}'") + log.debug(f"Processing RPC2 request for '{methodname}'") if methodname == "search": value = ( parser.getElementsByTagName("string")[0] diff --git a/pypiserver/backend.py b/pypiserver/backend.py index 1cf9588..4119f8c 100644 --- a/pypiserver/backend.py +++ b/pypiserver/backend.py @@ -2,6 +2,7 @@ import abc import functools import hashlib import itertools +import logging import os import typing as t from pathlib import Path @@ -18,6 +19,9 @@ if t.TYPE_CHECKING: from .config import _ConfigCommon as Configuration +log = logging.getLogger(__name__) + + PathLike = t.Union[str, os.PathLike] @@ -147,7 +151,15 @@ class SimpleFileBackend(Backend): def remove_package(self, pkg: PkgFile) -> None: if pkg.fn is not None: - os.remove(pkg.fn) + try: + os.remove(pkg.fn) + except FileNotFoundError: + log.warning( + "Tried to remove %s, but it is already gone", pkg.fn + ) + except OSError: + log.exception("Unexpected error removing package: %s", pkg.fn) + raise def exists(self, filename: str) -> bool: return any( @@ -167,6 +179,14 @@ class CachingFileBackend(SimpleFileBackend): self.cache_manager = cache_manager or CacheManager() # type: ignore + def add_package(self, filename: str, stream: t.BinaryIO) -> None: + super().add_package(filename, stream) + self.cache_manager.invalidate_root_cache(self.roots[0]) + + def remove_package(self, pkg: PkgFile) -> None: + super().remove_package(pkg) + self.cache_manager.invalidate_root_cache(pkg.root) + def get_all_packages(self) -> t.Iterable[PkgFile]: return itertools.chain.from_iterable( self.cache_manager.listdir(r, listdir) for r in self.roots diff --git a/requirements/test.pip b/requirements/test.pip index 2cca1a5..e452a2a 100644 --- a/requirements/test.pip +++ b/requirements/test.pip @@ -5,7 +5,7 @@ gevent>=1.1b4; python_version >= '3' httpx pip passlib>=1.6 -pytest>=6 +pytest>=6.2.2 pytest-cov setuptools tox diff --git a/tests/test_main.py b/tests/test_main.py index 846ceb3..3d10f83 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,13 +1,13 @@ import logging import os import pathlib -from pathlib import Path import sys import typing as t from unittest import mock import pytest +import pypiserver.bottle from pypiserver import __main__ from pypiserver.bottle import Bottle @@ -67,14 +67,18 @@ def test_noargs(main): # Assert we're calling with the default host, port, and server, and # assume that we've popped `app` off of the bottle args in our `main` # fixture. - assert main([]) == {"host": "0.0.0.0", "port": 8080, "server": "auto"} + exp_kwargs = {"host": "0.0.0.0", "port": 8080, "server": "auto"} + actual_kwargs = main([]) + # Only assert our expected are are present. We may pass extra kwargs + # for particular servers, depending on what is available in the python + # path. + assert all(map(lambda k: exp_kwargs[k] == actual_kwargs[k], exp_kwargs)) def test_port(main): - expected = dict(host="0.0.0.0", port=8081, server="auto") - assert main(["--port=8081"]) == expected - assert main(["--port", "8081"]) == expected - assert main(["-p", "8081"]) == expected + assert main(["--port=8081"])["port"] == 8081 + assert main(["--port", "8081"])["port"] == 8081 + assert main(["-p", "8081"])["port"] == 8081 def test_server(main): @@ -82,6 +86,26 @@ def test_server(main): assert main(["--server", "cherrypy"])["server"] == "cherrypy" +def test_wsgiserver_extra_args_present(monkeypatch, main): + """The wsgi server gets extra keyword arguments.""" + monkeypatch.setattr( + __main__, + "guess_auto_server", + lambda: __main__.AutoServer.WsgiRef, + ) + assert main([])["handler_class"] is __main__.WsgiHandler + + +def test_wsgiserver_extra_kwargs_absent(monkeypatch, main): + """Other servers don't get wsgiserver args.""" + monkeypatch.setattr( + __main__, + "guess_auto_server", + lambda: __main__.AutoServer.Waitress, + ) + assert "handler_class" not in main([]) + + def test_root_multiple(main): # Remember we're already setting THIS_DIR as a root in the `main` fixture main([str(THIS_DIR.parent)]) @@ -233,3 +257,40 @@ def test_blacklist_file(main): """ main(["-U", "--blacklist-file", str(IGNORELIST_FILE)]) assert main.update_kwargs["ignorelist"] == ["mypiserver", "something"] + + +def test_auto_servers() -> None: + """Test auto servers.""" + # A list of bottle ServerAdapters + bottle_adapters = tuple( + a.__name__.lower() for a in pypiserver.bottle.AutoServer.adapters + ) + # We are going to expect that our AutoServer enum names must match those + # at least closely enough to be recognizable. + our_mappings = tuple(map(str.lower, __main__.AutoServer.__members__)) + + # Assert that all of our mappings are represented in bottle adapters + assert all( + any(mapping in a for a in bottle_adapters) for mapping in our_mappings + ) + + # Assert that our import checking order matches the order in which the + # adapters are defined in the AutoServer + our_check_order = tuple(i[0] for i in __main__.AUTO_SERVER_IMPORTS) + + # Some of the servers have more than one check, so we need to rmeove + # duplicates before we check for identity with the AutoServer definition. + seen: t.Dict[__main__.AutoServer, __main__.AutoServer] = {} + our_check_order = tuple( + seen.setdefault(i, i) for i in our_check_order if i not in seen + ) + + # We should have the same number of deduped checkers as there are bottle + # adapters + assert len(our_check_order) == len(bottle_adapters) + + # And the order should be the same + assert all( + us.name.lower() in them + for us, them in zip(our_check_order, bottle_adapters) + )