diff --git a/README.md b/README.md index 7cac0788..927314ff 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,10 @@ docker compose up --build Then, navigate to `http://localhost:3000` (It may take some time to boot up) and interact with the website. +> **Note:** If an issue occurs with the build, please head to the +> [FAQ](https://projects.laion.ai/Open-Assistant/docs/faq) and check out the +> entries about Docker. + > **Note:** When logging in via email, navigate to `http://localhost:1080` to > get the magic email login link. diff --git a/docker-compose.yaml b/docker-compose.yaml index 908457cd..b9c9c82f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -131,3 +131,61 @@ services: ports: - "3000:3000" command: bash wait-for-postgres.sh node server.js + + inference-server: + build: + dockerfile: docker/inference/Dockerfile.server + context: . + target: dev + image: oasst-inference-server:dev + environment: + - "PORT=8000" + - "REDIS_HOST=redis" + volumes: + - "./oasst-shared:/opt/inference/lib/oasst-shared" + - "./inference/server:/opt/inference/server" + restart: unless-stopped + depends_on: + redis: + condition: service_healthy + profiles: ["inference"] + + inference-worker: + build: + dockerfile: docker/inference/Dockerfile.worker + context: . + target: dev + image: oasst-inference-worker:dev + environment: + - "BACKEND_URL=ws://inference-server:8000" + - "INFERENCE_SERVER_URL=http://inference-text-generation-server" + volumes: + - "./oasst-shared:/opt/inference/lib/oasst-shared" + - "./inference/worker:/opt/inference/worker" + depends_on: + - inference-server + deploy: + replicas: 1 + profiles: ["inference"] + + inference-text-client: + build: + dockerfile: docker/inference/Dockerfile.text-client + context: . + image: oasst-inference-text-client + environment: + - "BACKEND_URL=http://inference-server:8000" + tty: true + stdin_open: true + volumes: + - "./inference/worker:/opt/inference/worker" + restart: unless-stopped + depends_on: + - inference-server + profiles: ["inference"] + + inference-text-generation-server: + image: ykilcher/text-generation-inference + environment: + - "MODEL_ID=distilgpt2" + profiles: ["inference"] diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server new file mode 100644 index 00000000..0838a21e --- /dev/null +++ b/docker/inference/Dockerfile.server @@ -0,0 +1,90 @@ +# syntax=docker/dockerfile:1 + +ARG MODULE="inference" +ARG SERVICE="server" + +ARG APP_USER="${MODULE}-${SERVICE}" +ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}" + + +FROM python:3-slim as build +ARG APP_RELATIVE_PATH + +WORKDIR /build + +COPY ./${APP_RELATIVE_PATH}/requirements.txt . + +RUN --mount=type=cache,target=/var/cache/pip \ + pip install \ + --cache-dir=/var/cache/pip \ + --target=lib \ + -r requirements.txt + + + +FROM python:3.10-alpine3.17 as base-env +ARG APP_USER +ARG APP_RELATIVE_PATH +ARG MODULE +ARG SERVICE + +ENV APP_BASE="/opt/${MODULE}" +ENV APP_ROOT="${APP_BASE}/${SERVICE}" +ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib" +ENV SHARED_LIBS_BASE="${APP_BASE}/lib" + +ENV PATH="${PATH}:${APP_LIBS}/bin" +ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}" + +ENV PORT="8000" + + +RUN adduser \ + --disabled-password \ + --no-create-home \ + "${APP_USER}" + +USER ${APP_USER} + +WORKDIR ${APP_ROOT} + + +COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS} +COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/main.py . + + + +FROM base-env as dev +ARG APP_USER + + +COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared + +USER root +RUN --mount=type=cache,target=/var/cache/pip,from=build \ + pip install \ + --cache-dir=/var/cache/pip \ + -e "${SHARED_LIBS_BASE}/oasst-shared" +USER ${APP_USER} + + +VOLUME [ "${APP_BASE}/lib/oasst-shared" ] + + +CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}" + + + +FROM base-env as prod +ARG APP_USER + + +COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared +RUN --mount=type=cache,target=/var/cache/pip,from=dev \ + pip install \ + --cache-dir=/var/cache/pip \ + --target="${APP_LIBS}" \ + /tmp/lib/oasst-shared + + +CMD uvicorn main:app --host 0.0.0.0 --port "${PORT}" diff --git a/docker/inference/Dockerfile.text-client b/docker/inference/Dockerfile.text-client new file mode 100644 index 00000000..23a54abe --- /dev/null +++ b/docker/inference/Dockerfile.text-client @@ -0,0 +1,50 @@ +# syntax=docker/dockerfile:1 + +ARG APP_USER="text-client" +ARG APP_RELATIVE_PATH="inference/text-client" + + +FROM python:3.10-alpine3.17 as build +ARG APP_RELATIVE_PATH + +WORKDIR /build + +COPY ./${APP_RELATIVE_PATH}/requirements.txt . + +RUN --mount=type=cache,target=/var/cache/pip \ + pip install \ + --cache-dir=/var/cache/pip \ + --target=lib \ + -r requirements.txt + + + +FROM python:3.10-alpine3.17 as base-env +ARG APP_USER +ARG APP_RELATIVE_PATH + +ENV APP_ROOT="/opt/${APP_RELATIVE_PATH}" +ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib" + +ENV PATH="${PATH}:${APP_LIBS}/bin" +ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}" + + +RUN adduser \ + --disabled-password \ + --no-create-home \ + "${APP_USER}" + +USER ${APP_USER} + +WORKDIR ${APP_ROOT} + +COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS} +COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py . + + + +FROM base-env as prod + + +CMD python3 __main__.py --backend-url "${BACKEND_URL}" diff --git a/docker/inference/Dockerfile.worker b/docker/inference/Dockerfile.worker new file mode 100644 index 00000000..06f040ab --- /dev/null +++ b/docker/inference/Dockerfile.worker @@ -0,0 +1,85 @@ +# syntax=docker/dockerfile:1 + +ARG MODULE="inference" +ARG SERVICE="worker" + +ARG APP_USER="${MODULE}-${SERVICE}" +ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}" + + +FROM python:3.10-alpine3.17 as build +ARG APP_RELATIVE_PATH + +WORKDIR /build + +COPY ./${APP_RELATIVE_PATH}/requirements.txt . + +RUN --mount=type=cache,target=/var/cache/pip \ + pip install \ + --cache-dir=/var/cache/pip \ + --target=lib \ + -r requirements.txt + + + +FROM python:3.10-alpine3.17 as base-env +ARG APP_USER +ARG APP_RELATIVE_PATH +ARG MODULE +ARG SERVICE + +ENV APP_BASE="/opt/${MODULE}" +ENV APP_ROOT="${APP_BASE}/${SERVICE}" +ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib" +ENV SHARED_LIBS_BASE="${APP_BASE}/lib" + +ENV PATH="${PATH}:${APP_LIBS}/bin" +ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}" + + +RUN adduser \ + --disabled-password \ + --no-create-home \ + "${APP_USER}" + +USER ${APP_USER} + +WORKDIR ${APP_ROOT} + + +COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS} +COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py . + + +CMD python3 __main__.py --backend-url "${BACKEND_URL}" --inference-server-url "${INFERENCE_SERVER_URL}" + + + +FROM base-env as dev +ARG APP_USER + + +COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared + +USER root +RUN --mount=type=cache,target=/var/cache/pip,from=build \ + pip install \ + --cache-dir=/var/cache/pip \ + -e "${SHARED_LIBS_BASE}/oasst-shared" +USER ${APP_USER} + + +VOLUME [ "${APP_BASE}/lib/oasst-shared" ] + + + +FROM base-env as prod +ARG APP_USER + + +COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared +RUN --mount=type=cache,target=/var/cache/pip,from=dev \ + pip install \ + --cache-dir=/var/cache/pip \ + --target="${APP_LIBS}" \ + /tmp/lib/oasst-shared diff --git a/docs/docs/faq/faq.md b/docs/docs/faq/faq.md index 0db57d30..025ae02b 100644 --- a/docs/docs/faq/faq.md +++ b/docs/docs/faq/faq.md @@ -9,6 +9,33 @@ For more details and information check out [this SO thread](https://stackoverflow.com/questions/66514436/difference-between-docker-compose-and-docker-compose) that explains it all in detail. +### Enable Docker's BuildKit Backend + +[BuildKit](https://docs.docker.com/build/buildkit/) is Docker's new and improved +builder backend. In addition to being faster and more efficient, it supports +many new features, among which is the ability to provide a persistent cache, +which outlives builds, to compilers and package managers. This is very useful to +speed up consecutive builds, and is used by some container images of +OpenAssistant's stack. + +The BuildKit backend is used by +[default by Compose V2](https://www.docker.com/blog/announcing-compose-v2-general-availability/) +(see above).
But if you want to build an image with `docker build` instead +of `docker compose build`, you might need to enable BuildKit. + +To do so, just add `DOCKER_BUILDKIT=1` to your environment. + +For instance: + +```shell +export DOCKER_BUILDKIT=1 +``` + +You could also, more conveniently, +[enable BuildKit by default](https://docs.docker.com/build/buildkit/#:~:text=To%20enable%20docker%20BuildKit%20by%20default), +or use +[Docker Buildx](https://docs.docker.com/build/#:~:text=The%20new%20client%20Docker%20Buildx). + ### Pre-commit We are using pre-commit to ensure the quality of the code as well as the same diff --git a/inference/README.md b/inference/README.md index 6e1da2c7..0475c876 100644 --- a/inference/README.md +++ b/inference/README.md @@ -1,14 +1,64 @@ -# OpenAssitant Inference +# OpenAssistant Inference Preliminary implementation of the inference engine for OpenAssistant. -## Development Variant 1 (you'll need tmux) +## Development Variant 1 (docker compose) + +The services of the inference stack are prefixed with "inference-" in the +[unified compose descriptor](../docker-compose.yaml).
Prior to building +those, please ensure that you have Docker's new +[BuildKit](https://docs.docker.com/build/buildkit/) backend enabled. See the +[FAQ](https://projects.laion.ai/Open-Assistant/docs/faq#enable-dockers-buildkit-backend) +for more info. + +To build the services, run: + +```shell +docker compose --profile inference build +``` + +Spin up the stack: + +```shell +docker compose --profile inference up -d +``` + +Tail the logs: + +```shell +docker compose logs -f \ + inference-server \ + inference-worker \ + inference-text-client \ + inference-text-generation-server +``` + +Attach to the text-client, and start chatting: + +```shell +docker attach open-assistant-inference-text-client-1 +``` + +> **Note:** In the last step, `open-assistant-inference-text-client-1` refers to +> the name of the `text-client` container started in step 2. + +> **Note:** The compose file contains the bind mounts enabling you to develop on +> the modules of the inference stack, and the `oasst-shared` package, without +> rebuilding. + +> **Note:** You can spin up any number of workers by adjusting the number of +> replicas of the `inference-worker` service to your liking. + +> **Note:** Please wait for the `inference-text-generation-server` service to +> output `{"message":"Connected"}` before starting to chat. + +## Development Variant 2 (you'll need tmux) Run `./full-dev-setup.sh` to start the full development setup. Make sure to wait until the 2nd terminal is ready and says `{"message":"Connected"}` before entering input into the last terminal. -## Development Variant 2 (you'll need multiple terminals) +## Development Variant 3 (you'll need multiple terminals) Run a redis container (or use the one of the general docker compose file):