Gunicorn/gevent docker, log fixes, cache busting (#371)

Updates the Docker configuration to use the gunicorn server with gevent
workers by default. Adds `waitress` to the docker container, so that if
no server is specified, we will fall back to that rather than `wsgiref`.

Making this happen brought a few other issues to light, which are also
addressed here.

- Docker log output not immediately being flushed to stdout (#358):
  resolved by setting the `PYTHONUNBUFFERED` env var to `t` in the
  docker container
- When the WSGIRef server is selected, its access logs are written
  directly to stderr, rather than going through the logging machinery:
  resolved by adding a new `WsgiHandler` class and passing in to
  bottle's `run()` method when running the wsgi server. This required a
  new `ServerCheck` class to determine whether the wsgi server is
  selected when the `auto` option is used
- When using `gunicorn` along with the watchdog cache, package uplaods
  were not being picked up by the watcher. Updated the `add_package`
  and `remove_package` methods on the `CachingFileBackend` to bust the
  cache
This commit is contained in:
Matthew Planchard 2021-02-07 17:04:06 -06:00 committed by GitHub
parent d868005e1f
commit 8306de15db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 334 additions and 55 deletions

@ -2,6 +2,7 @@
!pypiserver
!requirements
!docker/docker-requirements.txt
!docker/gunicorn.conf.py
!docker/entrypoint.sh
!README.rst
!setup.cfg

@ -31,26 +31,37 @@ RUN apk add --no-cache --virtual .build-deps \
FROM base AS builder_dependencies
COPY pypiserver /code/pypiserver
COPY requirements /code/requirements
COPY docker/docker-requirements.txt /code
COPY setup.cfg /code
COPY setup.py /code
COPY README.rst /code
WORKDIR /code
COPY docker/docker-requirements.txt .
# Install requirements
RUN apk add --no-cache --virtual .build-deps \
build-base \
libffi-dev \
&& mkdir /install \
&& python -m pip install --no-warn-script-location \
--prefix=/install \
/code --requirement /code/docker-requirements.txt
&& python -m pip install \
--no-warn-script-location \
--prefix=/install \
--requirement docker-requirements.txt
# Install pypiserver
# - do this separately from deps so that when developing, every change does not
# require reinstalling deps
COPY pypiserver pypiserver
COPY setup.cfg .
COPY setup.py .
COPY README.rst .
RUN python -m pip install --no-warn-script-location --prefix=/install .
FROM base
WORKDIR /data
# Copy the libraries installed via pip
COPY --from=builder_dependencies /install /usr/local
COPY --from=builder_gosu /usr/local/bin/gosu /usr/local/bin/gosu
COPY docker/entrypoint.sh /entrypoint.sh
COPY docker/gunicorn.conf.py /data
# Use a consistent user and group ID so that linux users
# can create a corresponding system user and set permissions
@ -64,10 +75,11 @@ RUN apk add bash \
&& chmod +x /entrypoint.sh
VOLUME /data/packages
WORKDIR /data
ENV PYPISERVER_PORT=8080
# PORT is deprecated. Please use PYPISERVER_PORT instead
ENV PORT=$PYPISERVER_PORT
# Flush logs immediately to stdout
ENV PYTHONUNBUFFERED=t
EXPOSE $PYPISERVER_PORT
ENTRYPOINT ["/entrypoint.sh"]

@ -1,3 +1,11 @@
passlib==1.7.2
bcrypt==3.1.7
watchdog==0.10.3
# We use gunicorn as the default server in the docker container, with gevent
# workers
gevent==21.1.2
gunicorn==20.0.4
passlib==1.7.4
bcrypt==3.2.0
# If a user overrides args but does not override the server arg, we fall back to
# whatever bottle chooses as a default. Since the wsgiref server is not
# production-ready, install waitress as a fallback for these cases.
waitress==1.4.4
watchdog==1.0.2

@ -117,7 +117,9 @@ fi
if [[ "$*" == "" ]]; then
CMD=("run" "-p" "${PYPISERVER_PORT:-$PORT}")
# Use the gunicorn server by default, since it's more performant than
# bottle's default server
CMD=("run" "-p" "${PYPISERVER_PORT:-$PORT}" "--server" "gunicorn")
else
# this reassigns the array to the CMD variable
CMD=( "${@}" )

14
docker/gunicorn.conf.py Normal file

@ -0,0 +1,14 @@
"""Default gunicorn config for the docker environment.
To override, mount a new gunicorn config at /data/gunicorn.conf.py in your
Docker container.
"""
# pylint: disable=invalid-name
# Enable to log every request
# accesslog = "-"
errorlog = "-"
preload_app = True
workers = 1
worker_class = "gevent"

@ -279,23 +279,50 @@ class TestPermissions:
run("docker", "container", "rm", "-f", container_id)
class ContainerInfo(t.NamedTuple):
"""Info about a running container"""
container_id: str
port: int
args: tuple
class TestBasics:
"""Test basic pypiserver functionality in a simple unauthed container."""
HOST_PORT = get_socket()
@pytest.fixture(scope="class")
def container(self, image: str) -> t.Iterator[str]:
# We want to automatically parametrize this class' tests with a variety of
# pypiserver args, since it should work the same in all of these cases
@pytest.fixture(
scope="class",
params=[
# default (gunicorn) server with cached backend
(),
# default (gunicorn) server with non-cached backend
("--backend", "simple-dir"),
# explicit gunicorn server with a non-cached backend
("--server", "gunicorn", "--backend", "simple-dir"),
# explicit gunicorn server
("--server", "gunicorn"),
# explicit waitress server
("--server", "wsgiref"),
# explicit wsgiref server
("--server", "wsgiref"),
],
)
def container(
self, request: pytest.FixtureRequest, image: str
) -> t.Iterator[ContainerInfo]:
"""Run the pypiserver container.
Returns the container ID.
"""
res = run(
port = get_socket()
args = (
"docker",
"run",
"--rm",
"--publish",
f"{self.HOST_PORT}:8080",
f"{port}:8080",
"--detach",
image,
"run",
@ -303,17 +330,18 @@ class TestBasics:
".",
"--authenticate",
".",
capture=True,
*request.param, # type: ignore
)
wait_for_container(self.HOST_PORT)
res = run(*args, capture=True)
wait_for_container(port)
container_id = res.out.strip()
yield container_id
yield ContainerInfo(container_id, port, args)
run("docker", "container", "rm", "-f", container_id)
@pytest.fixture(scope="class")
def upload_mypkg(
self,
container: str, # pylint: disable=unused-argument
container: ContainerInfo,
mypkg_paths: t.Dict[str, Path],
) -> None:
"""Upload mypkg to the container."""
@ -323,7 +351,7 @@ class TestBasics:
"twine",
"upload",
"--repository-url",
f"http://localhost:{self.HOST_PORT}",
f"http://localhost:{container.port}",
"--username",
"",
"--password",
@ -332,7 +360,7 @@ class TestBasics:
)
@pytest.mark.usefixtures("upload_mypkg")
def test_download(self) -> None:
def test_download(self, container: ContainerInfo) -> None:
"""Download mypkg from the container."""
with tempfile.TemporaryDirectory() as tmpdir:
run(
@ -341,7 +369,7 @@ class TestBasics:
"pip",
"download",
"--index-url",
f"http://localhost:{self.HOST_PORT}/simple",
f"http://localhost:{container.port}/simple",
"--dest",
tmpdir,
"pypiserver_mypkg",
@ -352,7 +380,7 @@ class TestBasics:
)
@pytest.mark.usefixtures("upload_mypkg", "cleanup")
def test_install(self) -> None:
def test_install(self, container: ContainerInfo) -> None:
"""Install mypkg from the container.
Note this also ensures that name normalization is working,
@ -365,15 +393,40 @@ class TestBasics:
"pip",
"install",
"--index-url",
f"http://localhost:{self.HOST_PORT}/simple",
f"http://localhost:{container.port}/simple",
"pypiserver-mypkg",
)
run("python", "-c", "'import pypiserver_mypkg; mypkg.pkg_name()'")
@pytest.mark.usefixtures("container")
def test_welcome(self) -> None:
def test_expected_server(self, container: ContainerInfo) -> None:
"""Ensure we run the server we think we're running."""
resp = httpx.get(f"http://localhost:{container.port}")
server = resp.headers["server"].lower()
arg_pairs = tuple(zip(container.args, container.args[1:]))
if (
container.args[-1] == "pypiserver:test"
or ("--server", "gunicorn") in arg_pairs
):
# We specified no overriding args, so we should run gunicorn, or
# we specified gunicorn in overriding args.
assert "gunicorn" in server
elif ("--server", "wsgiref") in arg_pairs:
# We explicitly specified the wsgiref server
assert "wsgiserver" in server
elif ("--server", "waitress") in arg_pairs:
# We explicitly specified the wsgiref server
assert "waitress" in server
else:
# We overrode args, so instead of using the gunicorn default,
# we use the `auto` option. Bottle won't choose gunicorn as an
# auto server, so we have waitress installed in the docker container
# as a fallback for these scenarios, since wsgiref is not production
# ready
assert "waitress" in server
def test_welcome(self, container: ContainerInfo) -> None:
"""View the welcome page."""
resp = httpx.get(f"http://localhost:{self.HOST_PORT}")
resp = httpx.get(f"http://localhost:{container.port}")
assert resp.status_code == 200
assert "pypiserver" in resp.text
@ -485,7 +538,7 @@ class TestAuthed:
"pip",
"download",
"--index-url",
f"http://localhost:{self.HOST_PORT}/simple",
f"http://foo:bar@localhost:{self.HOST_PORT}/simple",
"--dest",
tmpdir,
"pypiserver_mypkg",

@ -1,26 +1,28 @@
#! /usr/bin/env python
#! /usr/bin/env python3
"""Entrypoint for pypiserver."""
from __future__ import print_function
import enum
import importlib
import logging
import sys
import typing as t
from pathlib import Path
from wsgiref.simple_server import WSGIRequestHandler
import functools as ft
from pypiserver.config import Config, UpdateConfig
log = logging.getLogger("pypiserver.main")
def init_logging(
level=logging.NOTSET,
frmt=None,
filename=None,
level: int = logging.NOTSET,
frmt: str = None,
filename: t.Union[str, Path] = None,
stream: t.Optional[t.IO] = sys.stderr,
logger=None,
):
logger: logging.Logger = None,
) -> None:
"""Configure the specified logger, or the root logger otherwise."""
logger = logger or logging.getLogger()
logger.setLevel(level)
@ -36,14 +38,90 @@ def init_logging(
logger.addHandler(handler)
def main(argv=None):
class WsgiHandler(WSGIRequestHandler):
"""A simple request handler to configure logging."""
# The default `FixedHandler` that bottle's `WSGIRefServer` uses does not
# log in a particularly predictable or configurable way. We'll pass this
# in to use instead.
def address_string(self) -> str: # Prevent reverse DNS lookups please.
# This method copied directly from bottle's `FixedHandler` and
# maintained on the Chesterton's fence principle (i.e. I don't know
# why it's important, so I'm not going to get rid of it)
return self.client_address[0]
def log_message(
self, format: str, *args: t.Any # pylint: disable=redefined-builtin
) -> None:
"""Log a message."""
# The log_message method on the `HttpRequestHandler` base class just
# writes directly to stderr. We'll use its same formatting, but pass
# it through the logger instead.
log.info(
"%s - - [%s] %s\n",
self.address_string(),
self.log_date_time_string(),
format % args,
)
class AutoServer(enum.Enum):
"""Expected servers that can be automaticlaly selected by bottle."""
Waitress = enum.auto()
Paste = enum.auto()
Twisted = enum.auto()
CherryPy = enum.auto()
WsgiRef = enum.auto()
# Possible automatically selected servers. This MUST match the available
# auto servers in bottle.py
AUTO_SERVER_IMPORTS = (
(AutoServer.Waitress, "waitress"),
(AutoServer.Paste, "paste"),
(AutoServer.Twisted, "twisted.web"),
(AutoServer.CherryPy, "cheroot.wsgi"),
(AutoServer.CherryPy, "cherrypy.wsgiserver"),
# this should always be available because it's part of the stdlib
(AutoServer.WsgiRef, "wsgiref"),
)
def _can_import(name: str) -> bool:
"""Attempt to import a module. Return a bool indicating success."""
try:
importlib.import_module(name)
return True
except ImportError:
return False
def guess_auto_server() -> AutoServer:
"""Guess which server bottle will use for the auto setting."""
# Return the first server that can be imported.
server = next(
(s for s, i in AUTO_SERVER_IMPORTS if _can_import(i)),
None,
)
if server is None:
raise RuntimeError(
"Unexpected error determining bottle auto server. There may be an "
"issue with this python environment. Please report this bug at "
"https://github.com/pypiserver/pypiserver/issues"
)
return server
def main(argv: t.Sequence[str] = None) -> None:
"""Application entrypoint for pypiserver.
This function drives the application (as opposed to the library)
implementation of pypiserver. Usage from the commandline will result in
this function being called.
"""
import pypiserver
# pylint: disable=import-outside-toplevel
import pypiserver # pylint: disable=redefined-outer-name
if argv is None:
# The first item in sys.argv is the name of the python file being
@ -84,25 +162,55 @@ def main(argv=None):
from pypiserver import bottle
bottle.debug(config.verbosity > 1)
bottle._stderr = ft.partial(
bottle._stderr = ft.partial( # pylint: disable=protected-access
_logwrite, logging.getLogger(bottle.__name__), logging.INFO
)
# Here `app` is a Bottle instance, which we pass to bottle.run() to run
# the server
app = pypiserver.app_from_config(config)
if config.server_method == "gunicorn":
# When bottle runs gunicorn, gunicorn tries to pull its arguments from
# sys.argv. Because pypiserver's arguments don't match gunicorn's,
# this leads to errors.
# Gunicorn can be configured by using a `gunicorn.conf.py` config file
# or by specifying the `GUNICORN_CMD_ARGS` env var. See gunicorn
# docs for more info.
sys.argv = ["gunicorn"]
wsgi_kwargs = {"handler_class": WsgiHandler}
if config.server_method == "auto":
expected_server = guess_auto_server()
extra_kwargs = (
wsgi_kwargs if expected_server is AutoServer.WsgiRef else {}
)
log.debug(
"Server 'auto' selected. Expecting bottle to run '%s'. "
"Passing extra keyword args: %s",
expected_server.name,
extra_kwargs,
)
else:
extra_kwargs = wsgi_kwargs if config.server_method == "wsgiref" else {}
log.debug(
"Running bottle with selected server '%s'", config.server_method
)
bottle.run(
app=app,
host=config.host,
port=config.port,
server=config.server_method,
**extra_kwargs,
)
def _logwrite(logger, level, msg):
if msg:
line_endings = ["\r\n", "\n\r", "\n"]
for le in line_endings:
for le in line_endings: # pylint: disable=invalid-name
if msg.endswith(le):
msg = msg[: -len(le)]
if msg:

@ -223,7 +223,7 @@ def handle_rpc():
.childNodes[0]
.wholeText.strip()
)
log.info(f"Processing RPC2 request for '{methodname}'")
log.debug(f"Processing RPC2 request for '{methodname}'")
if methodname == "search":
value = (
parser.getElementsByTagName("string")[0]

@ -2,6 +2,7 @@ import abc
import functools
import hashlib
import itertools
import logging
import os
import typing as t
from pathlib import Path
@ -18,6 +19,9 @@ if t.TYPE_CHECKING:
from .config import _ConfigCommon as Configuration
log = logging.getLogger(__name__)
PathLike = t.Union[str, os.PathLike]
@ -147,7 +151,15 @@ class SimpleFileBackend(Backend):
def remove_package(self, pkg: PkgFile) -> None:
if pkg.fn is not None:
os.remove(pkg.fn)
try:
os.remove(pkg.fn)
except FileNotFoundError:
log.warning(
"Tried to remove %s, but it is already gone", pkg.fn
)
except OSError:
log.exception("Unexpected error removing package: %s", pkg.fn)
raise
def exists(self, filename: str) -> bool:
return any(
@ -167,6 +179,14 @@ class CachingFileBackend(SimpleFileBackend):
self.cache_manager = cache_manager or CacheManager() # type: ignore
def add_package(self, filename: str, stream: t.BinaryIO) -> None:
super().add_package(filename, stream)
self.cache_manager.invalidate_root_cache(self.roots[0])
def remove_package(self, pkg: PkgFile) -> None:
super().remove_package(pkg)
self.cache_manager.invalidate_root_cache(pkg.root)
def get_all_packages(self) -> t.Iterable[PkgFile]:
return itertools.chain.from_iterable(
self.cache_manager.listdir(r, listdir) for r in self.roots

@ -5,7 +5,7 @@ gevent>=1.1b4; python_version >= '3'
httpx
pip
passlib>=1.6
pytest>=6
pytest>=6.2.2
pytest-cov
setuptools
tox

@ -1,13 +1,13 @@
import logging
import os
import pathlib
from pathlib import Path
import sys
import typing as t
from unittest import mock
import pytest
import pypiserver.bottle
from pypiserver import __main__
from pypiserver.bottle import Bottle
@ -67,14 +67,18 @@ def test_noargs(main):
# Assert we're calling with the default host, port, and server, and
# assume that we've popped `app` off of the bottle args in our `main`
# fixture.
assert main([]) == {"host": "0.0.0.0", "port": 8080, "server": "auto"}
exp_kwargs = {"host": "0.0.0.0", "port": 8080, "server": "auto"}
actual_kwargs = main([])
# Only assert our expected are are present. We may pass extra kwargs
# for particular servers, depending on what is available in the python
# path.
assert all(map(lambda k: exp_kwargs[k] == actual_kwargs[k], exp_kwargs))
def test_port(main):
expected = dict(host="0.0.0.0", port=8081, server="auto")
assert main(["--port=8081"]) == expected
assert main(["--port", "8081"]) == expected
assert main(["-p", "8081"]) == expected
assert main(["--port=8081"])["port"] == 8081
assert main(["--port", "8081"])["port"] == 8081
assert main(["-p", "8081"])["port"] == 8081
def test_server(main):
@ -82,6 +86,26 @@ def test_server(main):
assert main(["--server", "cherrypy"])["server"] == "cherrypy"
def test_wsgiserver_extra_args_present(monkeypatch, main):
"""The wsgi server gets extra keyword arguments."""
monkeypatch.setattr(
__main__,
"guess_auto_server",
lambda: __main__.AutoServer.WsgiRef,
)
assert main([])["handler_class"] is __main__.WsgiHandler
def test_wsgiserver_extra_kwargs_absent(monkeypatch, main):
"""Other servers don't get wsgiserver args."""
monkeypatch.setattr(
__main__,
"guess_auto_server",
lambda: __main__.AutoServer.Waitress,
)
assert "handler_class" not in main([])
def test_root_multiple(main):
# Remember we're already setting THIS_DIR as a root in the `main` fixture
main([str(THIS_DIR.parent)])
@ -233,3 +257,40 @@ def test_blacklist_file(main):
"""
main(["-U", "--blacklist-file", str(IGNORELIST_FILE)])
assert main.update_kwargs["ignorelist"] == ["mypiserver", "something"]
def test_auto_servers() -> None:
"""Test auto servers."""
# A list of bottle ServerAdapters
bottle_adapters = tuple(
a.__name__.lower() for a in pypiserver.bottle.AutoServer.adapters
)
# We are going to expect that our AutoServer enum names must match those
# at least closely enough to be recognizable.
our_mappings = tuple(map(str.lower, __main__.AutoServer.__members__))
# Assert that all of our mappings are represented in bottle adapters
assert all(
any(mapping in a for a in bottle_adapters) for mapping in our_mappings
)
# Assert that our import checking order matches the order in which the
# adapters are defined in the AutoServer
our_check_order = tuple(i[0] for i in __main__.AUTO_SERVER_IMPORTS)
# Some of the servers have more than one check, so we need to rmeove
# duplicates before we check for identity with the AutoServer definition.
seen: t.Dict[__main__.AutoServer, __main__.AutoServer] = {}
our_check_order = tuple(
seen.setdefault(i, i) for i in our_check_order if i not in seen
)
# We should have the same number of deduped checkers as there are bottle
# adapters
assert len(our_check_order) == len(bottle_adapters)
# And the order should be the same
assert all(
us.name.lower() in them
for us, them in zip(our_check_order, bottle_adapters)
)