mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-27 16:10:30 +08:00
Merge pull request #1024 from occupytheweb/913_inference_server_docker_setup
inference: Define container images and compose descriptor for the inference stack
This commit is contained in:
@@ -51,6 +51,10 @@ docker compose up --build
|
||||
Then, navigate to `http://localhost:3000` (It may take some time to boot up) and
|
||||
interact with the website.
|
||||
|
||||
> **Note:** If an issue occurs with the build, please head to the
|
||||
> [FAQ](https://projects.laion.ai/Open-Assistant/docs/faq) and check out the
|
||||
> entries about Docker.
|
||||
|
||||
> **Note:** When logging in via email, navigate to `http://localhost:1080` to
|
||||
> get the magic email login link.
|
||||
|
||||
|
||||
@@ -131,3 +131,61 @@ services:
|
||||
ports:
|
||||
- "3000:3000"
|
||||
command: bash wait-for-postgres.sh node server.js
|
||||
|
||||
inference-server:
|
||||
build:
|
||||
dockerfile: docker/inference/Dockerfile.server
|
||||
context: .
|
||||
target: dev
|
||||
image: oasst-inference-server:dev
|
||||
environment:
|
||||
- "PORT=8000"
|
||||
- "REDIS_HOST=redis"
|
||||
volumes:
|
||||
- "./oasst-shared:/opt/inference/lib/oasst-shared"
|
||||
- "./inference/server:/opt/inference/server"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
profiles: ["inference"]
|
||||
|
||||
inference-worker:
|
||||
build:
|
||||
dockerfile: docker/inference/Dockerfile.worker
|
||||
context: .
|
||||
target: dev
|
||||
image: oasst-inference-worker:dev
|
||||
environment:
|
||||
- "BACKEND_URL=ws://inference-server:8000"
|
||||
- "INFERENCE_SERVER_URL=http://inference-text-generation-server"
|
||||
volumes:
|
||||
- "./oasst-shared:/opt/inference/lib/oasst-shared"
|
||||
- "./inference/worker:/opt/inference/worker"
|
||||
depends_on:
|
||||
- inference-server
|
||||
deploy:
|
||||
replicas: 1
|
||||
profiles: ["inference"]
|
||||
|
||||
inference-text-client:
|
||||
build:
|
||||
dockerfile: docker/inference/Dockerfile.text-client
|
||||
context: .
|
||||
image: oasst-inference-text-client
|
||||
environment:
|
||||
- "BACKEND_URL=http://inference-server:8000"
|
||||
tty: true
|
||||
stdin_open: true
|
||||
volumes:
|
||||
- "./inference/worker:/opt/inference/worker"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- inference-server
|
||||
profiles: ["inference"]
|
||||
|
||||
inference-text-generation-server:
|
||||
image: ykilcher/text-generation-inference
|
||||
environment:
|
||||
- "MODEL_ID=distilgpt2"
|
||||
profiles: ["inference"]
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
ARG MODULE="inference"
|
||||
ARG SERVICE="server"
|
||||
|
||||
ARG APP_USER="${MODULE}-${SERVICE}"
|
||||
ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
|
||||
|
||||
|
||||
FROM python:3-slim as build
|
||||
ARG APP_RELATIVE_PATH
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY ./${APP_RELATIVE_PATH}/requirements.txt .
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/pip \
|
||||
pip install \
|
||||
--cache-dir=/var/cache/pip \
|
||||
--target=lib \
|
||||
-r requirements.txt
|
||||
|
||||
|
||||
|
||||
FROM python:3.10-alpine3.17 as base-env
|
||||
ARG APP_USER
|
||||
ARG APP_RELATIVE_PATH
|
||||
ARG MODULE
|
||||
ARG SERVICE
|
||||
|
||||
ENV APP_BASE="/opt/${MODULE}"
|
||||
ENV APP_ROOT="${APP_BASE}/${SERVICE}"
|
||||
ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
|
||||
ENV SHARED_LIBS_BASE="${APP_BASE}/lib"
|
||||
|
||||
ENV PATH="${PATH}:${APP_LIBS}/bin"
|
||||
ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
|
||||
|
||||
ENV PORT="8000"
|
||||
|
||||
|
||||
RUN adduser \
|
||||
--disabled-password \
|
||||
--no-create-home \
|
||||
"${APP_USER}"
|
||||
|
||||
USER ${APP_USER}
|
||||
|
||||
WORKDIR ${APP_ROOT}
|
||||
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/main.py .
|
||||
|
||||
|
||||
|
||||
FROM base-env as dev
|
||||
ARG APP_USER
|
||||
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared
|
||||
|
||||
USER root
|
||||
RUN --mount=type=cache,target=/var/cache/pip,from=build \
|
||||
pip install \
|
||||
--cache-dir=/var/cache/pip \
|
||||
-e "${SHARED_LIBS_BASE}/oasst-shared"
|
||||
USER ${APP_USER}
|
||||
|
||||
|
||||
VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
|
||||
|
||||
|
||||
CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}"
|
||||
|
||||
|
||||
|
||||
FROM base-env as prod
|
||||
ARG APP_USER
|
||||
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared
|
||||
RUN --mount=type=cache,target=/var/cache/pip,from=dev \
|
||||
pip install \
|
||||
--cache-dir=/var/cache/pip \
|
||||
--target="${APP_LIBS}" \
|
||||
/tmp/lib/oasst-shared
|
||||
|
||||
|
||||
CMD uvicorn main:app --host 0.0.0.0 --port "${PORT}"
|
||||
@@ -0,0 +1,50 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
ARG APP_USER="text-client"
|
||||
ARG APP_RELATIVE_PATH="inference/text-client"
|
||||
|
||||
|
||||
FROM python:3.10-alpine3.17 as build
|
||||
ARG APP_RELATIVE_PATH
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY ./${APP_RELATIVE_PATH}/requirements.txt .
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/pip \
|
||||
pip install \
|
||||
--cache-dir=/var/cache/pip \
|
||||
--target=lib \
|
||||
-r requirements.txt
|
||||
|
||||
|
||||
|
||||
FROM python:3.10-alpine3.17 as base-env
|
||||
ARG APP_USER
|
||||
ARG APP_RELATIVE_PATH
|
||||
|
||||
ENV APP_ROOT="/opt/${APP_RELATIVE_PATH}"
|
||||
ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
|
||||
|
||||
ENV PATH="${PATH}:${APP_LIBS}/bin"
|
||||
ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
|
||||
|
||||
|
||||
RUN adduser \
|
||||
--disabled-password \
|
||||
--no-create-home \
|
||||
"${APP_USER}"
|
||||
|
||||
USER ${APP_USER}
|
||||
|
||||
WORKDIR ${APP_ROOT}
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
|
||||
|
||||
|
||||
|
||||
FROM base-env as prod
|
||||
|
||||
|
||||
CMD python3 __main__.py --backend-url "${BACKEND_URL}"
|
||||
@@ -0,0 +1,85 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
ARG MODULE="inference"
|
||||
ARG SERVICE="worker"
|
||||
|
||||
ARG APP_USER="${MODULE}-${SERVICE}"
|
||||
ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
|
||||
|
||||
|
||||
FROM python:3.10-alpine3.17 as build
|
||||
ARG APP_RELATIVE_PATH
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY ./${APP_RELATIVE_PATH}/requirements.txt .
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/pip \
|
||||
pip install \
|
||||
--cache-dir=/var/cache/pip \
|
||||
--target=lib \
|
||||
-r requirements.txt
|
||||
|
||||
|
||||
|
||||
FROM python:3.10-alpine3.17 as base-env
|
||||
ARG APP_USER
|
||||
ARG APP_RELATIVE_PATH
|
||||
ARG MODULE
|
||||
ARG SERVICE
|
||||
|
||||
ENV APP_BASE="/opt/${MODULE}"
|
||||
ENV APP_ROOT="${APP_BASE}/${SERVICE}"
|
||||
ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
|
||||
ENV SHARED_LIBS_BASE="${APP_BASE}/lib"
|
||||
|
||||
ENV PATH="${PATH}:${APP_LIBS}/bin"
|
||||
ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
|
||||
|
||||
|
||||
RUN adduser \
|
||||
--disabled-password \
|
||||
--no-create-home \
|
||||
"${APP_USER}"
|
||||
|
||||
USER ${APP_USER}
|
||||
|
||||
WORKDIR ${APP_ROOT}
|
||||
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
|
||||
|
||||
|
||||
CMD python3 __main__.py --backend-url "${BACKEND_URL}" --inference-server-url "${INFERENCE_SERVER_URL}"
|
||||
|
||||
|
||||
|
||||
FROM base-env as dev
|
||||
ARG APP_USER
|
||||
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared
|
||||
|
||||
USER root
|
||||
RUN --mount=type=cache,target=/var/cache/pip,from=build \
|
||||
pip install \
|
||||
--cache-dir=/var/cache/pip \
|
||||
-e "${SHARED_LIBS_BASE}/oasst-shared"
|
||||
USER ${APP_USER}
|
||||
|
||||
|
||||
VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
|
||||
|
||||
|
||||
|
||||
FROM base-env as prod
|
||||
ARG APP_USER
|
||||
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared
|
||||
RUN --mount=type=cache,target=/var/cache/pip,from=dev \
|
||||
pip install \
|
||||
--cache-dir=/var/cache/pip \
|
||||
--target="${APP_LIBS}" \
|
||||
/tmp/lib/oasst-shared
|
||||
@@ -9,6 +9,33 @@ For more details and information check out
|
||||
[this SO thread](https://stackoverflow.com/questions/66514436/difference-between-docker-compose-and-docker-compose)
|
||||
that explains it all in detail.
|
||||
|
||||
### Enable Docker's BuildKit Backend
|
||||
|
||||
[BuildKit](https://docs.docker.com/build/buildkit/) is Docker's new and improved
|
||||
builder backend. In addition to being faster and more efficient, it supports
|
||||
many new features, among which is the ability to provide a persistent cache,
|
||||
which outlives builds, to compilers and package managers. This is very useful to
|
||||
speed up consecutive builds, and is used by some container images of
|
||||
OpenAssistant's stack.
|
||||
|
||||
The BuildKit backend is used by
|
||||
[default by Compose V2](https://www.docker.com/blog/announcing-compose-v2-general-availability/)
|
||||
(see above). <br/> But if you want to build an image with `docker build` instead
|
||||
of `docker compose build`, you might need to enable BuildKit.
|
||||
|
||||
To do so, just add `DOCKER_BUILDKIT=1` to your environment.
|
||||
|
||||
For instance:
|
||||
|
||||
```shell
|
||||
export DOCKER_BUILDKIT=1
|
||||
```
|
||||
|
||||
You could also, more conveniently,
|
||||
[enable BuildKit by default](https://docs.docker.com/build/buildkit/#:~:text=To%20enable%20docker%20BuildKit%20by%20default),
|
||||
or use
|
||||
[Docker Buildx](https://docs.docker.com/build/#:~:text=The%20new%20client%20Docker%20Buildx).
|
||||
|
||||
### Pre-commit
|
||||
|
||||
We are using pre-commit to ensure the quality of the code as well as the same
|
||||
|
||||
+53
-3
@@ -1,14 +1,64 @@
|
||||
# OpenAssitant Inference
|
||||
# OpenAssistant Inference
|
||||
|
||||
Preliminary implementation of the inference engine for OpenAssistant.
|
||||
|
||||
## Development Variant 1 (you'll need tmux)
|
||||
## Development Variant 1 (docker compose)
|
||||
|
||||
The services of the inference stack are prefixed with "inference-" in the
|
||||
[unified compose descriptor](../docker-compose.yaml). <br/> Prior to building
|
||||
those, please ensure that you have Docker's new
|
||||
[BuildKit](https://docs.docker.com/build/buildkit/) backend enabled. See the
|
||||
[FAQ](https://projects.laion.ai/Open-Assistant/docs/faq#enable-dockers-buildkit-backend)
|
||||
for more info.
|
||||
|
||||
To build the services, run:
|
||||
|
||||
```shell
|
||||
docker compose --profile inference build
|
||||
```
|
||||
|
||||
Spin up the stack:
|
||||
|
||||
```shell
|
||||
docker compose --profile inference up -d
|
||||
```
|
||||
|
||||
Tail the logs:
|
||||
|
||||
```shell
|
||||
docker compose logs -f \
|
||||
inference-server \
|
||||
inference-worker \
|
||||
inference-text-client \
|
||||
inference-text-generation-server
|
||||
```
|
||||
|
||||
Attach to the text-client, and start chatting:
|
||||
|
||||
```shell
|
||||
docker attach open-assistant-inference-text-client-1
|
||||
```
|
||||
|
||||
> **Note:** In the last step, `open-assistant-inference-text-client-1` refers to
|
||||
> the name of the `text-client` container started in step 2.
|
||||
|
||||
> **Note:** The compose file contains the bind mounts enabling you to develop on
|
||||
> the modules of the inference stack, and the `oasst-shared` package, without
|
||||
> rebuilding.
|
||||
|
||||
> **Note:** You can spin up any number of workers by adjusting the number of
|
||||
> replicas of the `inference-worker` service to your liking.
|
||||
|
||||
> **Note:** Please wait for the `inference-text-generation-server` service to
|
||||
> output `{"message":"Connected"}` before starting to chat.
|
||||
|
||||
## Development Variant 2 (you'll need tmux)
|
||||
|
||||
Run `./full-dev-setup.sh` to start the full development setup. Make sure to wait
|
||||
until the 2nd terminal is ready and says `{"message":"Connected"}` before
|
||||
entering input into the last terminal.
|
||||
|
||||
## Development Variant 2 (you'll need multiple terminals)
|
||||
## Development Variant 3 (you'll need multiple terminals)
|
||||
|
||||
Run a redis container (or use the one of the general docker compose file):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user