mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-27 16:10:30 +08:00
[merge] Fix conflict
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
name: Build Inference Images
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- docker/inference/**
|
||||
- inference/**
|
||||
|
||||
jobs:
|
||||
build-inference-server:
|
||||
uses: ./.github/workflows/docker-build.yaml
|
||||
with:
|
||||
image-name: oasst-inference-server
|
||||
context: .
|
||||
dockerfile: docker/inference/Dockerfile.server
|
||||
build-args: ""
|
||||
build-inference-worker-full:
|
||||
uses: ./.github/workflows/docker-build.yaml
|
||||
with:
|
||||
image-name: oasst-inference-worker-full
|
||||
context: .
|
||||
dockerfile: docker/inference/Dockerfile.worker-full
|
||||
build-args: ""
|
||||
@@ -26,6 +26,7 @@ jobs:
|
||||
environment: ${{ inputs.stack-name }}
|
||||
env:
|
||||
WEB_ADMIN_USERS: ${{ secrets.DEV_WEB_ADMIN_USERS }}
|
||||
WEB_MODERATOR_USERS: ${{ secrets.DEV_WEB_MODERATOR_USERS }}
|
||||
WEB_DISCORD_CLIENT_ID: ${{ secrets.DEV_WEB_DISCORD_CLIENT_ID }}
|
||||
WEB_DISCORD_CLIENT_SECRET: ${{ secrets.DEV_WEB_DISCORD_CLIENT_SECRET }}
|
||||
WEB_EMAIL_SERVER_HOST: ${{ secrets.DEV_WEB_EMAIL_SERVER_HOST }}
|
||||
|
||||
+1
-1
@@ -1,5 +1,5 @@
|
||||
* @yk @andreaskoepf
|
||||
/website/ @fozziethebeat @k-nearest-neighbor @AbdBarho
|
||||
/website/ @fozziethebeat @k-nearest-neighbor @AbdBarho @notmd
|
||||
/model/ @theblackcat102 @sanagno
|
||||
/copilot/ @fozziethebeat @andreaskoepf @yk
|
||||
/docs/ @andrewm4894 @andreaskoepf @yk
|
||||
|
||||
@@ -82,6 +82,7 @@
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
retries: 10
|
||||
shm_size: 1G
|
||||
loop:
|
||||
- name: backend
|
||||
- name: web
|
||||
@@ -169,6 +170,8 @@
|
||||
network_mode: "oasst-{{ stack_name }}"
|
||||
env:
|
||||
ADMIN_USERS: "{{ lookup('ansible.builtin.env', 'WEB_ADMIN_USERS') }}"
|
||||
MODERATOR_USERS:
|
||||
"{{ lookup('ansible.builtin.env', 'WEB_MODERATOR_USERS') }}"
|
||||
DATABASE_URL:
|
||||
"postgres://postgres:{{ postgres_password }}@oasst-{{ stack_name
|
||||
}}-postgres-web/postgres"
|
||||
|
||||
@@ -27,6 +27,7 @@ from oasst_backend.utils.database_utils import CommitMode, managed_tx_function
|
||||
from oasst_shared.exceptions import OasstError, OasstErrorCode
|
||||
from oasst_shared.schemas import protocol as protocol_schema
|
||||
from oasst_shared.utils import utcnow
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
from pydantic import BaseModel
|
||||
from sqlmodel import Session, select
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
@@ -100,6 +101,13 @@ if settings.OFFICIAL_WEB_API_KEY:
|
||||
)
|
||||
|
||||
|
||||
if settings.ENABLE_PROM_METRICS:
|
||||
|
||||
@app.on_event("startup")
|
||||
async def enable_prom_metrics():
|
||||
Instrumentator().instrument(app).expose(app)
|
||||
|
||||
|
||||
if settings.RATE_LIMIT:
|
||||
|
||||
@app.on_event("startup")
|
||||
|
||||
@@ -197,6 +197,8 @@ class Settings(BaseSettings):
|
||||
|
||||
ROOT_TOKENS: List[str] = ["1234"] # supply a string that can be parsed to a json list
|
||||
|
||||
ENABLE_PROM_METRICS: bool = True # enable prometheus metrics at /metrics
|
||||
|
||||
@validator("DATABASE_URI", pre=True)
|
||||
def assemble_db_connection(cls, v: Optional[str], values: Dict[str, Any]) -> Any:
|
||||
if isinstance(v, str):
|
||||
|
||||
@@ -947,6 +947,9 @@ class PromptRepository:
|
||||
if deleted is not None:
|
||||
qry = qry.filter(Message.deleted == deleted)
|
||||
|
||||
if lang is not None:
|
||||
qry = qry.filter(Message.lang == lang)
|
||||
|
||||
if desc:
|
||||
qry = qry.order_by(Message.created_date.desc(), Message.id.desc())
|
||||
else:
|
||||
@@ -955,9 +958,6 @@ class PromptRepository:
|
||||
if limit is not None:
|
||||
qry = qry.limit(limit)
|
||||
|
||||
if lang is not None:
|
||||
qry = qry.filter(Message.lang == lang)
|
||||
|
||||
return self._add_user_emojis_all(qry)
|
||||
|
||||
def update_children_counts(self, message_tree_id: UUID):
|
||||
|
||||
@@ -6,6 +6,7 @@ fastapi-limiter==0.1.5
|
||||
fastapi-utils==0.2.1
|
||||
loguru==0.6.0
|
||||
numpy==1.22.4
|
||||
prometheus-fastapi-instrumentator==5.9.1
|
||||
psycopg2-binary==2.9.5
|
||||
pydantic==1.9.1
|
||||
pydantic[email]==1.9.1
|
||||
|
||||
+30
-23
@@ -136,6 +136,23 @@ services:
|
||||
- "3000:3000"
|
||||
command: bash wait-for-postgres.sh node server.js
|
||||
|
||||
# This DB is for Inference
|
||||
inference-db:
|
||||
image: postgres
|
||||
restart: always
|
||||
ports:
|
||||
- 5434:5432
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: oasst_inference
|
||||
healthcheck:
|
||||
test: ["CMD", "pg_isready", "-U", "postgres"]
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
retries: 10
|
||||
profiles: ["inference"]
|
||||
|
||||
inference-server:
|
||||
build:
|
||||
dockerfile: docker/inference/Dockerfile.server
|
||||
@@ -145,13 +162,25 @@ services:
|
||||
environment:
|
||||
- "PORT=8000"
|
||||
- "REDIS_HOST=redis"
|
||||
- POSTGRES_HOST=inference-db
|
||||
- POSTGRES_DB=oasst_inference
|
||||
volumes:
|
||||
- "./oasst-shared:/opt/inference/lib/oasst-shared"
|
||||
- "./inference/server:/opt/inference/server"
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8000:8000"
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
inference-db:
|
||||
condition: service_healthy
|
||||
profiles: ["inference"]
|
||||
|
||||
inference-text-generation-server:
|
||||
image: ghcr.io/huggingface/text-generation-inference
|
||||
environment:
|
||||
- "MODEL_ID=distilgpt2"
|
||||
profiles: ["inference"]
|
||||
|
||||
inference-worker:
|
||||
@@ -167,29 +196,7 @@ services:
|
||||
- "./oasst-shared:/opt/inference/lib/oasst-shared"
|
||||
- "./inference/worker:/opt/inference/worker"
|
||||
depends_on:
|
||||
- inference-server
|
||||
- inference-text-generation-server
|
||||
deploy:
|
||||
replicas: 1
|
||||
profiles: ["inference"]
|
||||
|
||||
inference-text-client:
|
||||
build:
|
||||
dockerfile: docker/inference/Dockerfile.text-client
|
||||
context: .
|
||||
image: oasst-inference-text-client
|
||||
environment:
|
||||
- "BACKEND_URL=http://inference-server:8000"
|
||||
tty: true
|
||||
stdin_open: true
|
||||
volumes:
|
||||
- "./inference/worker:/opt/inference/worker"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- inference-server
|
||||
profiles: ["inference"]
|
||||
|
||||
inference-text-generation-server:
|
||||
image: ghcr.io/huggingface/text-generation-inference
|
||||
environment:
|
||||
- "MODEL_ID=distilgpt2"
|
||||
profiles: ["inference"]
|
||||
|
||||
@@ -14,5 +14,6 @@ COPY ./backend/alembic /app/alembic
|
||||
COPY ./backend/alembic.ini /app/alembic.ini
|
||||
COPY ./backend/main.py /app/main.py
|
||||
COPY ./backend/import.py /app/import.py
|
||||
COPY ./backend/export.py /app/export.py
|
||||
COPY ./backend/oasst_backend /app/oasst_backend
|
||||
COPY ./backend/test_data /app/test_data
|
||||
|
||||
@@ -7,7 +7,7 @@ ARG APP_USER="${MODULE}-${SERVICE}"
|
||||
ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
|
||||
|
||||
|
||||
FROM python:3-slim as build
|
||||
FROM python:3.10-slim as build
|
||||
ARG APP_RELATIVE_PATH
|
||||
|
||||
WORKDIR /build
|
||||
@@ -22,7 +22,7 @@ RUN --mount=type=cache,target=/var/cache/pip \
|
||||
|
||||
|
||||
|
||||
FROM python:3.10-alpine3.17 as base-env
|
||||
FROM python:3.10-slim as base-env
|
||||
ARG APP_USER
|
||||
ARG APP_RELATIVE_PATH
|
||||
ARG MODULE
|
||||
@@ -50,6 +50,9 @@ WORKDIR ${APP_ROOT}
|
||||
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/alembic alembic
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/alembic.ini .
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/oasst_inference_server oasst_inference_server
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/main.py .
|
||||
|
||||
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
ARG APP_USER="text-client"
|
||||
ARG APP_RELATIVE_PATH="inference/text-client"
|
||||
|
||||
|
||||
FROM python:3.10-alpine3.17 as build
|
||||
ARG APP_RELATIVE_PATH
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY ./${APP_RELATIVE_PATH}/requirements.txt .
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/pip \
|
||||
pip install \
|
||||
--cache-dir=/var/cache/pip \
|
||||
--target=lib \
|
||||
-r requirements.txt
|
||||
|
||||
|
||||
|
||||
FROM python:3.10-alpine3.17 as base-env
|
||||
ARG APP_USER
|
||||
ARG APP_RELATIVE_PATH
|
||||
|
||||
ENV APP_ROOT="/opt/${APP_RELATIVE_PATH}"
|
||||
ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
|
||||
|
||||
ENV PATH="${PATH}:${APP_LIBS}/bin"
|
||||
ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
|
||||
|
||||
|
||||
RUN adduser \
|
||||
--disabled-password \
|
||||
--no-create-home \
|
||||
"${APP_USER}"
|
||||
|
||||
USER ${APP_USER}
|
||||
|
||||
WORKDIR ${APP_ROOT}
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
|
||||
|
||||
|
||||
|
||||
FROM base-env as prod
|
||||
|
||||
|
||||
CMD python3 __main__.py --backend-url "${BACKEND_URL}"
|
||||
@@ -48,7 +48,7 @@ WORKDIR ${APP_ROOT}
|
||||
|
||||
|
||||
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
|
||||
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/*.py .
|
||||
|
||||
|
||||
CMD python3 __main__.py --backend-url "${BACKEND_URL}" --inference-server-url "${INFERENCE_SERVER_URL}"
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
FROM ghcr.io/huggingface/text-generation-inference
|
||||
|
||||
ARG MODULE="inference"
|
||||
ARG SERVICE="worker"
|
||||
|
||||
ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
|
||||
|
||||
WORKDIR /worker
|
||||
COPY ./oasst-shared /tmp/oasst-shared
|
||||
RUN conda create -n worker python=3.10 -y
|
||||
RUN /opt/miniconda/envs/worker/bin/pip install /tmp/oasst-shared
|
||||
|
||||
COPY ./${APP_RELATIVE_PATH}/requirements.txt .
|
||||
RUN /opt/miniconda/envs/worker/bin/pip install -r requirements.txt
|
||||
|
||||
COPY ./${APP_RELATIVE_PATH}/*.py .
|
||||
COPY ./${APP_RELATIVE_PATH}/worker_full_main.sh /entrypoint.sh
|
||||
|
||||
ENV MODEL_ID="distilgpt2"
|
||||
ENV INFERENCE_SERVER_URL="http://localhost:80"
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
@@ -3,9 +3,11 @@
|
||||
# Creates a tmux window with splits for the individual services
|
||||
|
||||
tmux new-session -d -s "inference-dev-setup"
|
||||
tmux send-keys "docker run --rm -it -p 6379:6379 redis" C-m
|
||||
tmux send-keys "docker run --rm -it -p 5432:5432 -e POSTGRES_PASSWORD=postgres --name postgres postgres" C-m
|
||||
tmux split-window -h
|
||||
tmux send-keys "docker run --rm -it -p 8001:80 -e MODEL_ID=distilgpt2 ghcr.io/huggingface/text-generation-inference" C-m
|
||||
tmux send-keys "docker run --rm -it -p 6379:6379 --name redis redis" C-m
|
||||
tmux split-window -h
|
||||
tmux send-keys "docker run --rm -it -p 8001:80 -e MODEL_ID=distilgpt2 -v $HOME/.cache/huggingface:/root/.cache/huggingface --name text-generation-inference ghcr.io/huggingface/text-generation-inference" C-m
|
||||
tmux split-window -h
|
||||
tmux send-keys "cd server" C-m
|
||||
tmux send-keys "uvicorn main:app --reload" C-m
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
# A generic, single database configuration.
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts
|
||||
script_location = %(here)s/alembic
|
||||
|
||||
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
||||
# Uncomment the line below if you want the files to be prepended with date and time
|
||||
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
||||
# for all available tokens
|
||||
file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
# defaults to the current working directory.
|
||||
prepend_sys_path = .
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
# If specified, requires the python-dateutil library that can be
|
||||
# installed by adding `alembic[tz]` to the pip requirements
|
||||
# string value is passed to dateutil.tz.gettz()
|
||||
# leave blank for localtime
|
||||
# timezone =
|
||||
|
||||
# max length of characters to apply to the
|
||||
# "slug" field
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during
|
||||
# the 'revision' command, regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without
|
||||
# a source .py file to be detected as revisions in the
|
||||
# versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification; This defaults
|
||||
# to alembic/versions. When using multiple version
|
||||
# directories, initial revisions must be specified with --version-path.
|
||||
# The path separator used here should be the separator specified by "version_path_separator" below.
|
||||
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
|
||||
|
||||
# version path separator; As mentioned above, this is the character used to split
|
||||
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
||||
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
||||
# Valid values for version_path_separator are:
|
||||
#
|
||||
# version_path_separator = :
|
||||
# version_path_separator = ;
|
||||
# version_path_separator = space
|
||||
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
||||
|
||||
# the output encoding used when revision files
|
||||
# are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
# sqlalchemy.url = postgresql://<username>:<password>@<host>/<database_name>
|
||||
sqlalchemy.url = postgresql://postgres:postgres@localhost:5432/postgres
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts. See the documentation for further
|
||||
# detail and examples
|
||||
|
||||
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
||||
hooks = black
|
||||
black.type = console_scripts
|
||||
black.entrypoint = black
|
||||
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
@@ -0,0 +1 @@
|
||||
Generic single-database configuration.
|
||||
@@ -0,0 +1,78 @@
|
||||
from logging.config import fileConfig
|
||||
|
||||
import sqlmodel
|
||||
from alembic import context
|
||||
from oasst_inference_server import models # noqa: F401
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
# from myapp import mymodel
|
||||
# target_metadata = mymodel.Base.metadata
|
||||
target_metadata = sqlmodel.SQLModel.metadata
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode.
|
||||
|
||||
In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
with connectable.connect() as connection:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.get_context()._ensure_version_table()
|
||||
connection.execute("LOCK TABLE alembic_version IN ACCESS EXCLUSIVE MODE")
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
@@ -0,0 +1,25 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import sqlmodel
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = ${repr(up_revision)}
|
||||
down_revision = ${repr(down_revision)}
|
||||
branch_labels = ${repr(branch_labels)}
|
||||
depends_on = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -0,0 +1,36 @@
|
||||
"""initial commit
|
||||
|
||||
Revision ID: 3a4cd8777eb2
|
||||
Revises:
|
||||
Create Date: 2023-02-10 02:21:27.086772
|
||||
|
||||
"""
|
||||
import sqlalchemy as sa
|
||||
import sqlmodel
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "3a4cd8777eb2"
|
||||
down_revision = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table(
|
||||
"chat",
|
||||
sa.Column("conversation", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column("pending_message_request", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column("message_request_state", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column("id", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_table("chat")
|
||||
# ### end Alembic commands ###
|
||||
+107
-93
@@ -1,18 +1,34 @@
|
||||
import asyncio
|
||||
import enum
|
||||
import uuid
|
||||
import contextlib
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import alembic.command
|
||||
import alembic.config
|
||||
import fastapi
|
||||
import pydantic
|
||||
import redis.asyncio as redis
|
||||
import sqlmodel
|
||||
import websockets.exceptions
|
||||
from fastapi import Depends
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from loguru import logger
|
||||
from oasst_shared.schemas import inference, protocol
|
||||
from oasst_inference_server import interface
|
||||
from oasst_inference_server.chat_repository import ChatRepository
|
||||
from oasst_inference_server.database import db_engine
|
||||
from oasst_inference_server.settings import settings
|
||||
from oasst_shared.schemas import inference
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
|
||||
app = fastapi.FastAPI()
|
||||
|
||||
|
||||
# add prometheus metrics at /metrics
|
||||
@app.on_event("startup")
|
||||
async def enable_prom_metrics():
|
||||
Instrumentator().instrument(app).expose(app)
|
||||
|
||||
|
||||
# Allow CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -23,106 +39,97 @@ app.add_middleware(
|
||||
)
|
||||
|
||||
|
||||
class Settings(pydantic.BaseSettings):
|
||||
redis_host: str = "localhost"
|
||||
redis_port: int = 6379
|
||||
redis_db: int = 0
|
||||
|
||||
sse_retry_timeout: int = 15000
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
||||
# create async redis client
|
||||
redisClient = redis.Redis(
|
||||
host=settings.redis_host, port=settings.redis_port, db=settings.redis_db, decode_responses=True
|
||||
)
|
||||
|
||||
|
||||
class CreateChatRequest(pydantic.BaseModel):
|
||||
pass
|
||||
def create_session():
|
||||
with sqlmodel.Session(db_engine) as session:
|
||||
yield session
|
||||
|
||||
|
||||
class CreateChatResponse(pydantic.BaseModel):
|
||||
id: str
|
||||
def create_chat_repository(session: sqlmodel.Session = Depends(create_session)):
|
||||
repository = ChatRepository(session)
|
||||
return repository
|
||||
|
||||
|
||||
class MessageRequest(pydantic.BaseModel):
|
||||
message: str = pydantic.Field(..., repr=False)
|
||||
model_name: str = "distilgpt2"
|
||||
max_new_tokens: int = 100
|
||||
if settings.update_alembic:
|
||||
|
||||
def compatible_with(self, worker_config: inference.WorkerConfig) -> bool:
|
||||
return self.model_name == worker_config.model_name
|
||||
@app.on_event("startup")
|
||||
def alembic_upgrade():
|
||||
logger.info("Attempting to upgrade alembic on startup")
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
alembic_ini_path = Path(__file__).parent / "alembic.ini"
|
||||
alembic_cfg = alembic.config.Config(str(alembic_ini_path))
|
||||
alembic_cfg.set_main_option("sqlalchemy.url", settings.database_uri)
|
||||
alembic.command.upgrade(alembic_cfg, "head")
|
||||
logger.info("Successfully upgraded alembic on startup")
|
||||
break
|
||||
except Exception:
|
||||
logger.exception("Alembic upgrade failed on startup")
|
||||
retry += 1
|
||||
if retry >= settings.alembic_retries:
|
||||
raise
|
||||
|
||||
timeout = settings.alembic_retry_timeout * 2**retry
|
||||
logger.warning(f"Retrying alembic upgrade in {timeout} seconds")
|
||||
time.sleep(timeout)
|
||||
|
||||
|
||||
class TokenResponseEvent(pydantic.BaseModel):
|
||||
token: inference.TokenResponse
|
||||
|
||||
|
||||
class MessageRequestState(str, enum.Enum):
|
||||
pending = "pending"
|
||||
in_progress = "in_progress"
|
||||
complete = "complete"
|
||||
aborted_by_worker = "aborted_by_worker"
|
||||
|
||||
|
||||
class DbChatEntry(pydantic.BaseModel):
|
||||
id: str = pydantic.Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
conversation: protocol.Conversation = pydantic.Field(default_factory=protocol.Conversation)
|
||||
pending_message_request: MessageRequest | None = None
|
||||
message_request_state: MessageRequestState | None = None
|
||||
|
||||
|
||||
# TODO: make real database
|
||||
CHATS: dict[str, DbChatEntry] = {}
|
||||
@app.get("/chat")
|
||||
async def list_chats(chat_repository: ChatRepository = Depends(create_chat_repository)) -> interface.ListChatsResponse:
|
||||
"""Lists all chats."""
|
||||
logger.info("Listing all chats.")
|
||||
chats = chat_repository.get_chat_list()
|
||||
return interface.ListChatsResponse(chats=chats)
|
||||
|
||||
|
||||
@app.post("/chat")
|
||||
async def create_chat(request: CreateChatRequest) -> CreateChatResponse:
|
||||
async def create_chat(
|
||||
request: interface.CreateChatRequest, chat_repository: ChatRepository = Depends(create_chat_repository)
|
||||
) -> interface.ChatListEntry:
|
||||
"""Allows a client to create a new chat."""
|
||||
logger.info(f"Received {request}")
|
||||
chat = DbChatEntry()
|
||||
CHATS[chat.id] = chat
|
||||
return CreateChatResponse(id=chat.id)
|
||||
chat = chat_repository.create_chat()
|
||||
return chat.to_list_entry()
|
||||
|
||||
|
||||
@app.get("/chat/{id}")
|
||||
async def get_chat(id: str) -> protocol.Conversation:
|
||||
async def get_chat(id: str, chat_repository: ChatRepository = Depends(create_chat_repository)) -> interface.ChatEntry:
|
||||
"""Allows a client to get the current state of a chat."""
|
||||
return CHATS[id].conversation
|
||||
chat = chat_repository.get_chat_entry_by_id(id)
|
||||
return chat
|
||||
|
||||
|
||||
@app.post("/chat/{id}/message")
|
||||
async def create_message(id: str, message_request: MessageRequest, fastapi_request: fastapi.Request):
|
||||
async def create_message(
|
||||
id: str,
|
||||
message_request: interface.MessageRequest,
|
||||
fastapi_request: fastapi.Request,
|
||||
chat_repository: ChatRepository = Depends(create_chat_repository),
|
||||
) -> EventSourceResponse:
|
||||
"""Allows the client to stream the results of a request."""
|
||||
|
||||
chat = CHATS[id]
|
||||
if not chat.conversation.is_prompter_turn:
|
||||
raise fastapi.HTTPException(status_code=400, detail="Not your turn")
|
||||
if chat.pending_message_request is not None:
|
||||
raise fastapi.HTTPException(status_code=400, detail="Already pending")
|
||||
try:
|
||||
chat_repository.add_prompter_message(id=id, message_request=message_request)
|
||||
except Exception:
|
||||
logger.exception("Error adding prompter message")
|
||||
return fastapi.Response(status_code=500)
|
||||
|
||||
chat.conversation.messages.append(
|
||||
protocol.ConversationMessage(
|
||||
text=message_request.message,
|
||||
is_assistant=False,
|
||||
)
|
||||
)
|
||||
|
||||
chat.pending_message_request = message_request
|
||||
chat.message_request_state = MessageRequestState.pending
|
||||
|
||||
async def event_generator():
|
||||
async def event_generator(id):
|
||||
result_data = []
|
||||
|
||||
try:
|
||||
while True:
|
||||
if await fastapi_request.is_disconnected():
|
||||
logger.warning("Client disconnected")
|
||||
break
|
||||
return
|
||||
|
||||
item = await redisClient.blpop(chat.id, 1)
|
||||
item = await redisClient.blpop(id, 1)
|
||||
if item is None:
|
||||
continue
|
||||
|
||||
@@ -135,47 +142,44 @@ async def create_message(id: str, message_request: MessageRequest, fastapi_reque
|
||||
|
||||
yield {
|
||||
"retry": settings.sse_retry_timeout,
|
||||
"data": TokenResponseEvent(token=response_packet.token).json(),
|
||||
"data": interface.TokenResponseEvent(token=response_packet.token).json(),
|
||||
}
|
||||
logger.info(f"Finished streaming {chat.id} {len(result_data)=}")
|
||||
logger.info(f"Finished streaming {id} {len(result_data)=}")
|
||||
except Exception:
|
||||
logger.exception(f"Error streaming {chat.id}")
|
||||
logger.exception(f"Error streaming {id}")
|
||||
raise
|
||||
|
||||
chat.conversation.messages.append(
|
||||
protocol.ConversationMessage(
|
||||
text=response_packet.generated_text.text,
|
||||
is_assistant=True,
|
||||
)
|
||||
)
|
||||
chat.pending_message_request = None
|
||||
try:
|
||||
with contextlib.contextmanager(create_session)() as session:
|
||||
chat_repository = create_chat_repository(session)
|
||||
chat_repository.add_assistant_message(id=id, text=response_packet.generated_text.text)
|
||||
except Exception:
|
||||
logger.exception("Error adding assistant message")
|
||||
|
||||
return EventSourceResponse(event_generator())
|
||||
return EventSourceResponse(event_generator(id))
|
||||
|
||||
|
||||
@app.websocket("/work")
|
||||
async def work(websocket: fastapi.WebSocket):
|
||||
async def work(websocket: fastapi.WebSocket, chat_repository: ChatRepository = Depends(create_chat_repository)):
|
||||
await websocket.accept()
|
||||
worker_config = inference.WorkerConfig.parse_raw(await websocket.receive_text())
|
||||
try:
|
||||
while True:
|
||||
print(websocket.client_state)
|
||||
if websocket.client_state == fastapi.websockets.WebSocketState.DISCONNECTED:
|
||||
logger.warning("Worker disconnected")
|
||||
break
|
||||
# find a pending task that matches the worker's config
|
||||
# could also be implemented using task queues
|
||||
# but general compatibility matching is tricky
|
||||
for chat in CHATS.values():
|
||||
if (request := chat.pending_message_request) is not None:
|
||||
if chat.message_request_state == MessageRequestState.pending:
|
||||
if request.compatible_with(worker_config):
|
||||
break
|
||||
for chat in chat_repository.get_pending_chats():
|
||||
request = chat.pending_message_request
|
||||
if request.compatible_with(worker_config):
|
||||
break
|
||||
else:
|
||||
logger.debug("No pending tasks")
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
chat.message_request_state = MessageRequestState.in_progress
|
||||
chat_repository.set_chat_state(chat.id, interface.MessageRequestState.in_progress)
|
||||
|
||||
work_request = inference.WorkRequest(
|
||||
conversation=chat.conversation,
|
||||
@@ -183,29 +187,39 @@ async def work(websocket: fastapi.WebSocket):
|
||||
max_new_tokens=request.max_new_tokens,
|
||||
)
|
||||
|
||||
logger.info(f"Created {work_request}")
|
||||
logger.info(f"Created {work_request=}")
|
||||
try:
|
||||
await websocket.send_text(work_request.json())
|
||||
except websockets.exceptions.ConnectionClosedError:
|
||||
logger.warning("Worker disconnected")
|
||||
websocket.close()
|
||||
chat.message_request_state = MessageRequestState.pending
|
||||
chat_repository.set_chat_state(chat.id, interface.MessageRequestState.pending)
|
||||
break
|
||||
|
||||
logger.debug(f"Sent {work_request=} to worker.")
|
||||
|
||||
try:
|
||||
in_progress = False
|
||||
while True:
|
||||
# maybe unnecessary to parse and re-serialize
|
||||
# could just pass the raw string and mark end via empty string
|
||||
response_packet = inference.WorkResponsePacket.parse_raw(await websocket.receive_text())
|
||||
in_progress = True
|
||||
await redisClient.rpush(chat.id, response_packet.json())
|
||||
if response_packet.is_end:
|
||||
logger.debug(f"Received {response_packet=} from worker. Ending.")
|
||||
break
|
||||
except fastapi.WebSocketException:
|
||||
# TODO: handle this better
|
||||
logger.exception(f"Websocket closed during handling of {chat.id}")
|
||||
chat.message_request_state = MessageRequestState.aborted_by_worker
|
||||
if in_progress:
|
||||
logger.warning(f"Aborting {chat.id=}")
|
||||
chat_repository.set_chat_state(chat.id, interface.MessageRequestState.aborted_by_worker)
|
||||
else:
|
||||
logger.warning(f"Marking {chat.id=} as pending since no work was done.")
|
||||
chat_repository.set_chat_state(chat.id, interface.MessageRequestState.pending)
|
||||
raise
|
||||
|
||||
chat.message_request_state = MessageRequestState.complete
|
||||
chat_repository.set_chat_state(chat.id, interface.MessageRequestState.complete)
|
||||
except fastapi.WebSocketException:
|
||||
logger.exception("Websocket closed")
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
import fastapi
|
||||
import sqlmodel
|
||||
from loguru import logger
|
||||
from oasst_inference_server import interface, models
|
||||
from oasst_shared.schemas import protocol
|
||||
from sqlalchemy.sql.operators import is_not
|
||||
|
||||
|
||||
class ChatRepository:
|
||||
def __init__(self, session: sqlmodel.Session) -> None:
|
||||
self.session = session
|
||||
|
||||
def get_chats(self) -> list[models.DbChatEntry]:
|
||||
return self.session.exec(sqlmodel.select(models.DbChatEntry)).all()
|
||||
|
||||
def get_pending_chats(self) -> list[models.DbChatEntry]:
|
||||
return self.session.exec(
|
||||
sqlmodel.select(models.DbChatEntry).where(
|
||||
is_not(models.DbChatEntry.pending_message_request, None),
|
||||
models.DbChatEntry.message_request_state == interface.MessageRequestState.pending,
|
||||
)
|
||||
).all()
|
||||
|
||||
def get_chat_list(self) -> list[interface.ChatListEntry]:
|
||||
chats = self.get_chats()
|
||||
return [chat.to_list_entry() for chat in chats]
|
||||
|
||||
def get_chat_by_id(self, id: str) -> models.DbChatEntry:
|
||||
chat = self.session.exec(sqlmodel.select(models.DbChatEntry).where(models.DbChatEntry.id == id)).one()
|
||||
return chat
|
||||
|
||||
def get_chat_entry_by_id(self, id: str) -> interface.ChatEntry:
|
||||
return self.get_chat_by_id(id).to_entry()
|
||||
|
||||
def create_chat(self) -> models.DbChatEntry:
|
||||
chat = models.DbChatEntry()
|
||||
self.session.add(chat)
|
||||
self.session.commit()
|
||||
return chat
|
||||
|
||||
def add_prompter_message(self, id: str, message_request: interface.MessageRequest) -> None:
|
||||
logger.info(f"Adding prompter message {message_request} to chat {id}")
|
||||
chat = self.get_chat_by_id(id)
|
||||
if not chat.conversation.is_prompter_turn:
|
||||
raise fastapi.HTTPException(status_code=400, detail="Not your turn")
|
||||
if chat.pending_message_request is not None:
|
||||
raise fastapi.HTTPException(status_code=400, detail="Already pending")
|
||||
|
||||
chat.conversation.messages.append(
|
||||
protocol.ConversationMessage(
|
||||
text=message_request.message,
|
||||
is_assistant=False,
|
||||
)
|
||||
)
|
||||
|
||||
chat.pending_message_request = message_request
|
||||
chat.message_request_state = interface.MessageRequestState.pending
|
||||
self.session.commit()
|
||||
logger.debug(f"Added prompter message {message_request} to chat {id}")
|
||||
|
||||
def add_assistant_message(self, id: str, text: str) -> None:
|
||||
logger.info(f"Adding assistant message {text} to chat {id}")
|
||||
chat = self.get_chat_by_id(id)
|
||||
chat.conversation.messages.append(
|
||||
protocol.ConversationMessage(
|
||||
text=text,
|
||||
is_assistant=True,
|
||||
)
|
||||
)
|
||||
chat.pending_message_request = None
|
||||
self.session.commit()
|
||||
logger.debug(f"Added assistant message {text} to chat {id}")
|
||||
|
||||
def set_chat_state(self, id: str, state: interface.MessageRequestState) -> None:
|
||||
logger.info(f"Setting chat {id} state to {state}")
|
||||
chat = self.get_chat_by_id(id)
|
||||
chat.message_request_state = state
|
||||
self.session.commit()
|
||||
logger.debug(f"Set chat {id} state to {state}")
|
||||
@@ -0,0 +1,41 @@
|
||||
import json
|
||||
|
||||
import pydantic.json
|
||||
import sqlmodel
|
||||
from loguru import logger
|
||||
from oasst_inference_server import models
|
||||
from oasst_inference_server.settings import settings
|
||||
|
||||
|
||||
def default_json_serializer(obj):
|
||||
class_name = obj.__class__.__name__
|
||||
encoded = pydantic.json.pydantic_encoder(obj)
|
||||
encoded["_classname_"] = class_name
|
||||
return encoded
|
||||
|
||||
|
||||
def custom_json_serializer(obj):
|
||||
return json.dumps(obj, default=default_json_serializer)
|
||||
|
||||
|
||||
def custom_json_deserializer(s):
|
||||
d = json.loads(s)
|
||||
if not isinstance(d, dict):
|
||||
return d
|
||||
match d.get("_classname_"):
|
||||
case "Conversation":
|
||||
return models.protocol.Conversation.parse_obj(d)
|
||||
case "MessageRequest":
|
||||
return models.interface.MessageRequest.parse_obj(d)
|
||||
case None:
|
||||
return d
|
||||
case _:
|
||||
logger.error(f"Unknown class {d['_classname_']}")
|
||||
raise ValueError(f"Unknown class {d['_classname_']}")
|
||||
|
||||
|
||||
db_engine = sqlmodel.create_engine(
|
||||
settings.database_uri,
|
||||
json_serializer=custom_json_serializer,
|
||||
json_deserializer=custom_json_deserializer,
|
||||
)
|
||||
@@ -0,0 +1,41 @@
|
||||
import enum
|
||||
|
||||
import pydantic
|
||||
from oasst_shared.schemas import inference, protocol
|
||||
|
||||
|
||||
class MessageRequest(pydantic.BaseModel):
|
||||
message: str = pydantic.Field(..., repr=False)
|
||||
model_name: str = "distilgpt2"
|
||||
max_new_tokens: int = 100
|
||||
|
||||
def compatible_with(self, worker_config: inference.WorkerConfig) -> bool:
|
||||
return self.model_name == worker_config.model_name
|
||||
|
||||
|
||||
class TokenResponseEvent(pydantic.BaseModel):
|
||||
token: inference.TokenResponse
|
||||
|
||||
|
||||
class MessageRequestState(str, enum.Enum):
|
||||
pending = "pending"
|
||||
in_progress = "in_progress"
|
||||
complete = "complete"
|
||||
aborted_by_worker = "aborted_by_worker"
|
||||
|
||||
|
||||
class CreateChatRequest(pydantic.BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
class ChatListEntry(pydantic.BaseModel):
|
||||
id: str
|
||||
|
||||
|
||||
class ChatEntry(pydantic.BaseModel):
|
||||
id: str
|
||||
conversation: protocol.Conversation
|
||||
|
||||
|
||||
class ListChatsResponse(pydantic.BaseModel):
|
||||
chats: list[ChatListEntry]
|
||||
@@ -0,0 +1,23 @@
|
||||
from uuid import uuid4
|
||||
|
||||
import sqlalchemy as sa
|
||||
import sqlalchemy.dialects.postgresql as pg
|
||||
from oasst_inference_server import interface
|
||||
from oasst_shared.schemas import protocol
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
|
||||
class DbChatEntry(SQLModel, table=True):
|
||||
__tablename__ = "chat"
|
||||
|
||||
id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
|
||||
|
||||
conversation: protocol.Conversation = Field(default_factory=protocol.Conversation, sa_column=sa.Column(pg.JSONB))
|
||||
pending_message_request: interface.MessageRequest | None = Field(None, sa_column=sa.Column(pg.JSONB))
|
||||
message_request_state: interface.MessageRequestState | None = Field(None, sa_column=sa.Column(pg.JSONB))
|
||||
|
||||
def to_list_entry(self) -> interface.ChatListEntry:
|
||||
return interface.ChatListEntry(id=self.id)
|
||||
|
||||
def to_entry(self) -> interface.ChatEntry:
|
||||
return interface.ChatEntry(id=self.id, conversation=self.conversation)
|
||||
@@ -0,0 +1,38 @@
|
||||
from typing import Any
|
||||
|
||||
import pydantic
|
||||
|
||||
|
||||
class Settings(pydantic.BaseSettings):
|
||||
redis_host: str = "localhost"
|
||||
redis_port: int = 6379
|
||||
redis_db: int = 0
|
||||
|
||||
sse_retry_timeout: int = 15000
|
||||
update_alembic: bool = True
|
||||
alembic_retries: int = 5
|
||||
alembic_retry_timeout: int = 1
|
||||
|
||||
postgres_host: str = "localhost"
|
||||
postgres_port: str = "5432"
|
||||
postgres_user: str = "postgres"
|
||||
postgres_password: str = "postgres"
|
||||
postgres_db: str = "postgres"
|
||||
|
||||
database_uri: str | None = None
|
||||
|
||||
@pydantic.validator("database_uri", pre=True)
|
||||
def assemble_db_connection(cls, v: str | None, values: dict[str, Any]) -> Any:
|
||||
if isinstance(v, str):
|
||||
return v
|
||||
return pydantic.PostgresDsn.build(
|
||||
scheme="postgresql",
|
||||
user=values.get("postgres_user"),
|
||||
password=values.get("postgres_password"),
|
||||
host=values.get("postgres_host"),
|
||||
port=values.get("postgres_port"),
|
||||
path=f"/{values.get('postgres_db') or ''}",
|
||||
)
|
||||
|
||||
|
||||
settings = Settings()
|
||||
@@ -1,6 +1,10 @@
|
||||
alembic
|
||||
fastapi[all]
|
||||
loguru
|
||||
prometheus-fastapi-instrumentator
|
||||
psycopg2-binary
|
||||
pydantic
|
||||
redis
|
||||
sqlmodel
|
||||
sse-starlette
|
||||
websockets
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Simple REPL frontend."""
|
||||
|
||||
import json
|
||||
import time
|
||||
|
||||
import requests
|
||||
import sseclient
|
||||
@@ -12,28 +13,37 @@ app = typer.Typer()
|
||||
@app.command()
|
||||
def main(backend_url: str = "http://127.0.0.1:8000"):
|
||||
"""Simple REPL client."""
|
||||
chat_id = requests.post(f"{backend_url}/chat", json={}).json()["id"]
|
||||
while True:
|
||||
message = typer.prompt("User").strip()
|
||||
try:
|
||||
chat_id = requests.post(f"{backend_url}/chat", json={}).json()["id"]
|
||||
typer.echo(f"Chat ID: {chat_id}")
|
||||
while True:
|
||||
message = typer.prompt("User").strip()
|
||||
|
||||
# wait for stream to be ready
|
||||
# could implement a queue position indicator
|
||||
# could be implemented with long polling
|
||||
# but server load needs to be considered
|
||||
response = requests.post(
|
||||
f"{backend_url}/chat/{chat_id}/message",
|
||||
json={"message": message},
|
||||
stream=True,
|
||||
headers={"Accept": "text/event-stream"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
# wait for stream to be ready
|
||||
# could implement a queue position indicator
|
||||
# could be implemented with long polling
|
||||
# but server load needs to be considered
|
||||
response = requests.post(
|
||||
f"{backend_url}/chat/{chat_id}/message",
|
||||
json={"message": message},
|
||||
stream=True,
|
||||
headers={"Accept": "text/event-stream"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
client = sseclient.SSEClient(response)
|
||||
print("Assistant: ", end="", flush=True)
|
||||
for event in client.events():
|
||||
data = json.loads(event.data)
|
||||
print(data["token"]["text"], end="", flush=True)
|
||||
print()
|
||||
client = sseclient.SSEClient(response)
|
||||
print("Assistant: ", end="", flush=True)
|
||||
for event in client.events():
|
||||
data = json.loads(event.data)
|
||||
print(data["token"]["text"], end="", flush=True)
|
||||
print()
|
||||
except typer.Abort:
|
||||
typer.echo("Exiting...")
|
||||
break
|
||||
except Exception:
|
||||
typer.echo("Error, restarting chat...")
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,25 +1,22 @@
|
||||
import json
|
||||
|
||||
import interface
|
||||
import rel
|
||||
import requests
|
||||
import sseclient
|
||||
import typer
|
||||
import utils
|
||||
import websocket
|
||||
from loguru import logger
|
||||
from oasst_shared.schemas import inference, protocol
|
||||
from settings import settings
|
||||
|
||||
app = typer.Typer()
|
||||
# touch
|
||||
|
||||
|
||||
@app.command()
|
||||
def main(
|
||||
backend_url: str = "ws://localhost:8000",
|
||||
model_name: str = "distilgpt2",
|
||||
inference_server_url: str = "http://localhost:8001",
|
||||
):
|
||||
def main():
|
||||
utils.wait_for_inference_server(settings.inference_server_url)
|
||||
|
||||
def on_open(ws: websocket.WebSocket):
|
||||
logger.info("Connected to backend, sending config...")
|
||||
worker_config = inference.WorkerConfig(model_name=model_name)
|
||||
worker_config = inference.WorkerConfig(model_name=settings.model_id)
|
||||
ws.send(worker_config.json())
|
||||
logger.info("Config sent, waiting for work...")
|
||||
|
||||
@@ -43,19 +40,12 @@ def main(
|
||||
|
||||
prompt = prefix + "\n".join(messages) + "\nAssistant:"
|
||||
|
||||
parameters = interface.GenerateStreamParameters.from_work_request(work_request)
|
||||
response = requests.post(
|
||||
f"{inference_server_url}/generate_stream",
|
||||
f"{settings.inference_server_url}/generate_stream",
|
||||
json={
|
||||
"inputs": prompt,
|
||||
"parameters": {
|
||||
"max_new_tokens": work_request.max_new_tokens,
|
||||
"do_sample": work_request.do_sample,
|
||||
"top_k": work_request.top_k,
|
||||
"top_p": work_request.top_p,
|
||||
"temperature": work_request.temperature,
|
||||
"seed": work_request.seed,
|
||||
# "stop": ["\nUser:", "\nAssistant:"], # TODO: make this a bit more workable because it's mutliple tokens
|
||||
},
|
||||
"parameters": parameters.dict(),
|
||||
},
|
||||
stream=True,
|
||||
headers={"Accept": "text/event-stream"},
|
||||
@@ -68,29 +58,39 @@ def main(
|
||||
return
|
||||
|
||||
client = sseclient.SSEClient(response)
|
||||
stream_response = None
|
||||
token_buffer = utils.TokenBuffer(stop_sequences=parameters.stop)
|
||||
for event in client.events():
|
||||
logger.debug(f"Received event: {event}")
|
||||
data = json.loads(event.data)
|
||||
if data["generated_text"]:
|
||||
break
|
||||
token = data["token"]
|
||||
stream_response = interface.GenerateStreamResponse.parse_raw(event.data)
|
||||
token = stream_response.token
|
||||
for send_token in token_buffer.add(token):
|
||||
ws.send(
|
||||
inference.WorkResponsePacket(
|
||||
token=send_token.to_token_response(),
|
||||
).json()
|
||||
)
|
||||
if stream_response is None:
|
||||
logger.error("No stream response received")
|
||||
return
|
||||
|
||||
for send_token in token_buffer.finish(reason=stream_response.details.finish_reason):
|
||||
ws.send(
|
||||
inference.WorkResponsePacket(
|
||||
token=inference.TokenResponse(
|
||||
text=token["text"],
|
||||
log_prob=token["logprob"],
|
||||
token_id=token["id"],
|
||||
)
|
||||
token=send_token.to_token_response(),
|
||||
).json()
|
||||
)
|
||||
|
||||
ws.send(
|
||||
inference.WorkResponsePacket(
|
||||
is_end=True,
|
||||
generated_text=inference.GeneratedTextResponse(
|
||||
text=data["generated_text"],
|
||||
text=stream_response.generated_text,
|
||||
finish_reason=stream_response.details.finish_reason,
|
||||
),
|
||||
).json()
|
||||
)
|
||||
logger.info("Work complete. Waiting for more work...")
|
||||
|
||||
def on_error(ws: websocket.WebSocket, error: Exception):
|
||||
try:
|
||||
@@ -102,7 +102,7 @@ def main(
|
||||
logger.warning(f"Connection closed: {close_status_code=} {close_msg=}")
|
||||
|
||||
ws = websocket.WebSocketApp(
|
||||
f"{backend_url}/work",
|
||||
f"{settings.backend_url}/work",
|
||||
on_message=on_message,
|
||||
on_error=on_error,
|
||||
on_close=on_close,
|
||||
@@ -115,4 +115,4 @@ def main(
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
main()
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
from typing import Literal
|
||||
|
||||
import pydantic
|
||||
from oasst_shared.schemas import inference
|
||||
|
||||
|
||||
class GenerateStreamParameters(pydantic.BaseModel):
|
||||
max_new_tokens: int | None
|
||||
do_sample: bool | None
|
||||
top_k: int | None
|
||||
top_p: float | None
|
||||
temperature: float | None
|
||||
repetition_penalty: float | None
|
||||
seed: int | None
|
||||
stop: list[str] = ["\nUser:", "\nAssistant:"] # TODO: make this a bit more workable because it's mutliple tokens
|
||||
details: bool = True
|
||||
|
||||
@staticmethod
|
||||
def from_work_request(work_request: inference.WorkRequest) -> "GenerateStreamParameters":
|
||||
return GenerateStreamParameters(
|
||||
max_new_tokens=work_request.max_new_tokens,
|
||||
do_sample=work_request.do_sample,
|
||||
top_k=work_request.top_k,
|
||||
top_p=work_request.top_p,
|
||||
temperature=work_request.temperature,
|
||||
repetition_penalty=work_request.repetition_penalty,
|
||||
seed=work_request.seed,
|
||||
)
|
||||
|
||||
|
||||
class Token(pydantic.BaseModel):
|
||||
text: str
|
||||
logprob: float
|
||||
id: int
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.text)
|
||||
|
||||
def to_token_response(self) -> inference.TokenResponse:
|
||||
return inference.TokenResponse(
|
||||
text=self.text,
|
||||
log_prob=self.logprob,
|
||||
token_id=self.id,
|
||||
)
|
||||
|
||||
|
||||
class StreamDetails(pydantic.BaseModel):
|
||||
generated_tokens: int
|
||||
seed: int | None
|
||||
finish_reason: Literal["length", "eos_token", "stop_sequence"]
|
||||
|
||||
|
||||
class GenerateStreamResponse(pydantic.BaseModel):
|
||||
token: Token
|
||||
generated_text: str | None
|
||||
details: StreamDetails | None
|
||||
@@ -1,6 +1,6 @@
|
||||
loguru
|
||||
pydantic
|
||||
rel
|
||||
requests
|
||||
sseclient-py
|
||||
typer
|
||||
websocket-client
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
import pydantic
|
||||
|
||||
|
||||
class Settings(pydantic.BaseSettings):
|
||||
backend_url: str = "ws://localhost:8000"
|
||||
model_id: str = "distilgpt2"
|
||||
inference_server_url: str = "http://localhost:8001"
|
||||
|
||||
|
||||
settings = Settings()
|
||||
@@ -0,0 +1,62 @@
|
||||
import collections
|
||||
import random
|
||||
import time
|
||||
from typing import Literal
|
||||
|
||||
import interface
|
||||
import requests
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class TokenBuffer:
|
||||
def __init__(self, stop_sequences: list[str]) -> None:
|
||||
self.stop_sequences = stop_sequences
|
||||
self.longest_stop_len = max((len(stop) for stop in stop_sequences), default=0)
|
||||
self.tokens = collections.deque()
|
||||
self.token_lens = collections.deque()
|
||||
self.total_len = 0
|
||||
|
||||
def add(self, token: interface.Token):
|
||||
self.tokens.append(token)
|
||||
self.token_lens.append(len(token))
|
||||
self.total_len += len(token)
|
||||
while True:
|
||||
if not self.tokens:
|
||||
break
|
||||
head_len = self.token_lens[0]
|
||||
if self.total_len - head_len >= self.longest_stop_len:
|
||||
token = self.tokens.popleft()
|
||||
self.token_lens.popleft()
|
||||
self.total_len -= head_len
|
||||
yield token
|
||||
else:
|
||||
break
|
||||
|
||||
def finish(self, reason: Literal["length", "eos_token", "stop_sequence"]):
|
||||
if reason == "stop_sequence":
|
||||
end_sequence = ""
|
||||
while self.tokens:
|
||||
end_sequence = self.tokens.pop().text + end_sequence
|
||||
if end_sequence in self.stop_sequences:
|
||||
break
|
||||
yield from self.tokens
|
||||
else:
|
||||
yield from self.tokens
|
||||
|
||||
|
||||
def wait_for_inference_server(inference_server_url: str, timeout: int = 600):
|
||||
health_url = f"{inference_server_url}/health"
|
||||
time_limit = time.time() + timeout
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(health_url)
|
||||
response.raise_for_status()
|
||||
except (requests.HTTPError, requests.ConnectionError):
|
||||
if time.time() > time_limit:
|
||||
raise
|
||||
sleep_duration = random.uniform(0, 10)
|
||||
logger.warning(f"Inference server not ready. Retrying in {sleep_duration} seconds")
|
||||
time.sleep(sleep_duration)
|
||||
else:
|
||||
logger.info("Inference server is ready")
|
||||
break
|
||||
Executable
+9
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
text-generation-launcher &
|
||||
|
||||
/opt/miniconda/envs/worker/bin/python /worker &
|
||||
|
||||
wait -n
|
||||
|
||||
exit $?
|
||||
@@ -2,20 +2,22 @@
|
||||
|
||||
Trainer code based on huggingface. Compatible with deepspeed or accelerate
|
||||
|
||||
Requirements
|
||||
|
||||
```
|
||||
wandb
|
||||
evaluate
|
||||
datasets
|
||||
transformers
|
||||
torch==1.12
|
||||
```
|
||||
|
||||
Start training reward model
|
||||
Install Python requirements
|
||||
|
||||
```bash
|
||||
python trainer.py configs/electra-base-dis-webgpt.yml
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
Write or inherit a `configs/<config-name>.yml` file to store training
|
||||
configuration details.
|
||||
|
||||
> The configuration file must have _at least_ all the keys present in
|
||||
> [`configs/dummy.yml`](configs/dummy.yml)
|
||||
|
||||
Run training procedure
|
||||
|
||||
```bash
|
||||
python trainer.py configs/<config-name>.yml
|
||||
```
|
||||
|
||||
Additional axis labeling, this outputs a 4 summary quality evaluation metrics
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
model_name: X
|
||||
tokenizer_name: X
|
||||
max_length: X
|
||||
num_train_epochs: X
|
||||
warmup_steps: X
|
||||
scheduler: X
|
||||
learning_rate: X
|
||||
deepspeed: X
|
||||
fp16: X
|
||||
local_rank: X
|
||||
gradient_checkpointing: X
|
||||
gradient_accumulation_steps: X
|
||||
per_device_train_batch_size: X
|
||||
per_device_eval_batch_size: X
|
||||
weight_decay: X
|
||||
max_grad_norm: X
|
||||
eval_steps: X
|
||||
save_steps: X
|
||||
wandb_entity: X
|
||||
datasets:
|
||||
- X
|
||||
@@ -1,62 +1,18 @@
|
||||
# Train using supervised examples
|
||||
|
||||
Requirements
|
||||
## Requirements
|
||||
|
||||
```
|
||||
wandb
|
||||
evaluate
|
||||
datasets
|
||||
transformers
|
||||
torch
|
||||
```
|
||||
`pip install -r requirements.txt`
|
||||
|
||||
Start training reward model
|
||||
Start training SFT model
|
||||
|
||||
```bash
|
||||
python trainer.py --configs defaults galactica-125
|
||||
python trainer.py --configs defaults galactica-125m
|
||||
```
|
||||
|
||||
## Dataset
|
||||
|
||||
For now we only support webgpt and summary dataset from OpenAI. Once
|
||||
open-asisstant dataset are available it will be added here.
|
||||
|
||||
## Model
|
||||
|
||||
Normally you should be able to add new models in configs/config.yml
|
||||
|
||||
```
|
||||
your-model-name:
|
||||
learning_rate: 2e-6
|
||||
model_name: <huggingface model name>
|
||||
weight_decay: 0.01
|
||||
max_length: 812
|
||||
warmup_steps: 600
|
||||
gradient_checkpointing: false
|
||||
gradient_accumulation_steps: 5
|
||||
per_device_train_batch_size: 4
|
||||
per_device_eval_batch_size: 4
|
||||
```
|
||||
|
||||
```
|
||||
python trainer.py --configs defaults your-model-name
|
||||
```
|
||||
|
||||
However, if the model of your choice doesn't have pad_token, eos_token,
|
||||
sep_token, you have to update utils.py `get_tokenizer` to use the right token.
|
||||
|
||||
## Deepspeed support
|
||||
|
||||
You can edit the configs/zero_config.json and use any stage you wish. The
|
||||
current config uses zero-stage 3. For more details on how to setup the config
|
||||
checkout [this page](https://www.deepspeed.ai/tutorials/zero/)
|
||||
|
||||
Once you are satisfy with your deepzero config, you can add --deepspeed flag at
|
||||
the end to trigger deepspeed
|
||||
|
||||
```
|
||||
python trainer.py --configs defaults your-model-name --deepspeed
|
||||
```
|
||||
For `wandb`: update the `entity` argument in `trainer.py`'s call to `wandb.init`
|
||||
to be your weights and biases username per
|
||||
[docs](https://docs.wandb.ai/ref/python/init).
|
||||
|
||||
## Dataset choices
|
||||
|
||||
@@ -80,6 +36,74 @@ Currently only these languages are supported via prompt translation:
|
||||
ar,de,fr,en,it,nl,tr,ru,ms,ko,ja,zh
|
||||
```
|
||||
|
||||
## Dataset sub-sampling
|
||||
|
||||
We can subsample the **training** data by passing either the `fraction` or
|
||||
`size` argument in the `configs/config.yml` file. Don't forget the additional
|
||||
colon ":" after the dataset name when doing this.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
datasets:
|
||||
- webgpt:
|
||||
fraction : 0.05
|
||||
- prompt_dialogue:
|
||||
size : 500
|
||||
- adversarial_qa
|
||||
- trivia_qa_nocontext
|
||||
```
|
||||
|
||||
In this example, per epoch we will use:
|
||||
|
||||
- A random 5% of `webgpt`;
|
||||
- A random 500 examples from `prompt_dialogue`;
|
||||
- All examples from datasets for which we don't specify the `fraction` or `size`
|
||||
argument.
|
||||
|
||||
In the above example, per epoch we'll use a different 5% from `webgpt` and a
|
||||
different 500 examples from `prompt_dialogue`.
|
||||
|
||||
This works with `torch.distributed`.
|
||||
|
||||
## Model
|
||||
|
||||
Normally you should be able to add new models in `configs/config.yml`
|
||||
|
||||
```
|
||||
your-model-name:
|
||||
learning_rate: 2e-6
|
||||
model_name: <huggingface model name>
|
||||
weight_decay: 0.01
|
||||
max_length: 812
|
||||
warmup_steps: 600
|
||||
gradient_checkpointing: false
|
||||
gradient_accumulation_steps: 5
|
||||
per_device_train_batch_size: 4
|
||||
per_device_eval_batch_size: 4
|
||||
```
|
||||
|
||||
```
|
||||
python trainer.py --configs defaults your-model-name
|
||||
```
|
||||
|
||||
However, if the model of your choice doesn't have `pad_token`, `eos_token`,
|
||||
`sep_token`, you have to update `get_tokenizer` in `utils.py` to use the right
|
||||
token.
|
||||
|
||||
## Deepspeed support
|
||||
|
||||
You can edit the configs/zero_config.json and use any stage you wish. The
|
||||
current config uses zero-stage 3. For more details on how to setup the config
|
||||
checkout [this page](https://www.deepspeed.ai/tutorials/zero/).
|
||||
|
||||
Once you are satisfy with your deepzero config, you can add --deepspeed flag at
|
||||
the end to trigger deepspeed
|
||||
|
||||
```
|
||||
python trainer.py --configs defaults your-model-name --deepspeed
|
||||
```
|
||||
|
||||
## Results
|
||||
|
||||
Experimental results in wandb
|
||||
@@ -87,7 +111,7 @@ Experimental results in wandb
|
||||
|
||||
## TODOS
|
||||
|
||||
- decide on a model
|
||||
- Decide on a model
|
||||
- Merge utils etc with reward model
|
||||
- Casual Modelling for GPT-JT does not leverage the bidirectional mask for the
|
||||
prompt? (https://huggingface.co/togethercomputer/GPT-JT-6B-v1)
|
||||
|
||||
@@ -17,7 +17,7 @@ defaults:
|
||||
freeze_layer:
|
||||
datasets:
|
||||
- webgpt
|
||||
- prompt_dialogue
|
||||
# - prompt_dialogue
|
||||
- squad_v2
|
||||
- adversarial_qa
|
||||
- trivia_qa_nocontext
|
||||
|
||||
@@ -222,7 +222,7 @@ class SODA(Dataset):
|
||||
|
||||
return pairs
|
||||
|
||||
def __init__(self, cache_dir, max_sample_size=10000, input_max_length=1024) -> None:
|
||||
def __init__(self, cache_dir, input_max_length=1024) -> None:
|
||||
super().__init__()
|
||||
|
||||
self.pairs = []
|
||||
@@ -233,9 +233,6 @@ class SODA(Dataset):
|
||||
if len(prompt) < input_max_length:
|
||||
self.pairs.append((prompt, answer))
|
||||
|
||||
if len(self.pairs) > max_sample_size:
|
||||
break
|
||||
|
||||
def __len__(self):
|
||||
return len(self.pairs)
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ datasets==2.8.0
|
||||
deepspeed==0.7.7
|
||||
evaluate==0.4.0
|
||||
gdown
|
||||
mpi4py==3.1.4
|
||||
nltk==3.8.1
|
||||
numpy>=1.22.4
|
||||
py7zr
|
||||
@@ -12,3 +11,4 @@ PyYAML>=6.0
|
||||
scikit_learn==1.2.0
|
||||
torch>=1.11.0
|
||||
transformers==4.25.1
|
||||
wandb
|
||||
|
||||
@@ -1,9 +1,28 @@
|
||||
from argparse import Namespace
|
||||
|
||||
from utils import get_tokenizer
|
||||
import pytest
|
||||
from utils import TOKENIZER_CONFIGS, get_tokenizer, match_tokenizer_name
|
||||
|
||||
|
||||
def test_tokenizer():
|
||||
get_tokenizer(Namespace(model_name="Salesforce/codegen-2B-multi", cache_dir=".cache"))
|
||||
get_tokenizer(Namespace(model_name="facebook/galactica-1.3b", cache_dir=".cache"))
|
||||
get_tokenizer(Namespace(model_name="", cache_dir=".cache"))
|
||||
|
||||
|
||||
def test_tokenizer_successful_match():
|
||||
for config_name, config in TOKENIZER_CONFIGS.items():
|
||||
found_config = match_tokenizer_name(config_name)
|
||||
assert found_config == config
|
||||
|
||||
|
||||
def test_tokenizer_partial_match():
|
||||
for config_name in ["facebook/galactica-1.3b", "togethercomputer/GPT-JT-6B-v1", "Salesforce/codegen-2B-multi"]:
|
||||
found_config = match_tokenizer_name(config_name)
|
||||
assert found_config
|
||||
|
||||
|
||||
def test_tokenizer_failed_match():
|
||||
for fake_config_name in ["not-a-model", "fake"]:
|
||||
with pytest.raises(ValueError):
|
||||
match_tokenizer_name(fake_config_name)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import argparse
|
||||
from distutils.util import strtobool
|
||||
from functools import partial
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import bitsandbytes
|
||||
import datasets
|
||||
@@ -14,7 +14,7 @@ from transformers.trainer_pt_utils import IterableDatasetShard
|
||||
from transformers.trainer_utils import seed_worker
|
||||
from transformers.training_args import OptimizerNames
|
||||
from transformers.utils import is_datasets_available
|
||||
from utils import get_dataset, get_loss, get_metrics, get_model, get_tokenizer, read_yamls
|
||||
from utils import PerDatasetSampler, get_dataset, get_loss, get_metrics, get_model, get_tokenizer, read_yamls
|
||||
|
||||
|
||||
def compute_metrics(eval_pred, preprocess_fns, metrics):
|
||||
@@ -36,7 +36,7 @@ class SFTTrainer(Trainer):
|
||||
self,
|
||||
model: Union[PreTrainedModel, nn.Module] = None,
|
||||
args: TrainingArguments = None,
|
||||
train_collate_fn: Callable = None,
|
||||
sampler: torch.utils.data.sampler.Sampler = None,
|
||||
loss_function: str = "CrossEntropyLoss",
|
||||
poly_eps: float = 1.0,
|
||||
**kwargs,
|
||||
@@ -45,6 +45,7 @@ class SFTTrainer(Trainer):
|
||||
self.train_collate_fn = train_collate_fn
|
||||
# By default CrossEntropyLoss ignores padding_index -100, but just in case use our own loss_fct
|
||||
self.loss_fct = get_loss(loss_function, poly_eps)
|
||||
self.sampler = sampler
|
||||
|
||||
def compute_loss(self, model, inputs, return_outputs=False):
|
||||
labels_mask = inputs.pop("label_masks")
|
||||
@@ -95,24 +96,22 @@ class SFTTrainer(Trainer):
|
||||
|
||||
return (loss, logits, labels)
|
||||
|
||||
def get_train_dataloader(self) -> DataLoader:
|
||||
def get_train_dataloader(self):
|
||||
"""
|
||||
Returns the training [`~torch.utils.data.DataLoader`].
|
||||
Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed
|
||||
training if necessary) otherwise.
|
||||
Subclass and override this method if you want to inject some custom behavior.
|
||||
"""
|
||||
if self.train_dataset is None:
|
||||
raise ValueError("Trainer: training requires a train_dataset.")
|
||||
Inject custom data sampling behaviour into training loop
|
||||
and use custom task mixing collate function : train_collate_fn
|
||||
|
||||
train_dataset = self.train_dataset
|
||||
rewrite from:
|
||||
https://github.com/huggingface/transformers/blob/67d074874d285e616393c65a0e670088e1b6b74a/src/transformers/trainer.py#L846
|
||||
"""
|
||||
data_collator = self.train_collate_fn
|
||||
train_dataset = self.train_dataset
|
||||
if is_datasets_available() and isinstance(train_dataset, datasets.Dataset):
|
||||
train_dataset = self._remove_unused_columns(train_dataset, description="training")
|
||||
else:
|
||||
data_collator = self._get_collator_with_removed_columns(data_collator, description="training")
|
||||
|
||||
if isinstance(train_dataset, torch.utils.data.IterableDataset):
|
||||
# if we are using iterable dataset it means no weight sampling
|
||||
# added for backward compat
|
||||
if self.args.world_size > 1:
|
||||
train_dataset = IterableDatasetShard(
|
||||
train_dataset,
|
||||
@@ -121,7 +120,6 @@ class SFTTrainer(Trainer):
|
||||
num_processes=self.args.world_size,
|
||||
process_index=self.args.process_index,
|
||||
)
|
||||
|
||||
return DataLoader(
|
||||
train_dataset,
|
||||
batch_size=self.args.per_device_train_batch_size,
|
||||
@@ -129,8 +127,10 @@ class SFTTrainer(Trainer):
|
||||
num_workers=self.args.dataloader_num_workers,
|
||||
pin_memory=self.args.dataloader_pin_memory,
|
||||
)
|
||||
|
||||
train_sampler = self._get_train_sampler()
|
||||
if self.sampler is None:
|
||||
train_sampler = self._get_train_sampler()
|
||||
else:
|
||||
train_sampler = self.sampler
|
||||
|
||||
return DataLoader(
|
||||
train_dataset,
|
||||
@@ -194,10 +194,9 @@ if __name__ == "__main__":
|
||||
|
||||
tokenizer = get_tokenizer(training_conf)
|
||||
model = get_model(training_conf, tokenizer)
|
||||
|
||||
train, evals, collate_fn, train_collate_fn = get_dataset(training_conf, tokenizer)
|
||||
sampler = PerDatasetSampler.build_sampler_from_config(training_conf, train.datasets)
|
||||
metrics, preprocess_fns = get_metrics(training_conf, tokenizer)
|
||||
|
||||
optimizer = OptimizerNames.ADAMW_BNB if training_conf.quantization else OptimizerNames.ADAMW_HF
|
||||
|
||||
if training_conf.quantization:
|
||||
@@ -235,7 +234,6 @@ if __name__ == "__main__":
|
||||
)
|
||||
|
||||
assert len(evals) > 0
|
||||
|
||||
if not training_conf.deepspeed or training_conf.local_rank == 0:
|
||||
import wandb
|
||||
|
||||
@@ -246,8 +244,9 @@ if __name__ == "__main__":
|
||||
)
|
||||
|
||||
trainer = SFTTrainer(
|
||||
model,
|
||||
args,
|
||||
model=model,
|
||||
args=args,
|
||||
sampler=sampler,
|
||||
train_collate_fn=train_collate_fn,
|
||||
loss_function=training_conf.loss_fn,
|
||||
poly_eps=training_conf.poly_eps,
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
# from functools import partial
|
||||
import random
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
from typing import List, NamedTuple
|
||||
|
||||
import evaluate
|
||||
|
||||
# import nltk
|
||||
# import numpy as np
|
||||
import transformers
|
||||
import yaml
|
||||
from custom_datasets import get_one_dataset
|
||||
@@ -15,6 +12,79 @@ from losses import CrossEntropyLoss, PolyLoss
|
||||
from models import freeze_top_n_layers, get_specific_model
|
||||
from sklearn.model_selection import train_test_split
|
||||
from torch.utils.data import ConcatDataset, Subset
|
||||
from torch.utils.data.sampler import Sampler
|
||||
|
||||
|
||||
class PerDatasetSampler(Sampler):
|
||||
"""Sampler which returns a fixed number of samples per dataset, per epoch.
|
||||
|
||||
Example:
|
||||
|
||||
Dataset 1 has 10,000 examples and we want 200 per epoch
|
||||
Dataset 2 has 500 examples and we want all 500 per epoch
|
||||
|
||||
Epoch size will be 700 and every epoch we'll sample a different
|
||||
200 from dataset 1.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dataset_sizes : List[int]
|
||||
A list with the size of each dataset.
|
||||
dataset_size_per_epoch : List[int]
|
||||
How many examples to get from each dataset per epoch.
|
||||
|
||||
Note: dataset_sizes & dataset_size_per_epoch must be in the same order.
|
||||
Further the examples in the underlying torch.utils.data.Dataset
|
||||
must per ordered as dataset_1, dataset_2, ..., dataset_n. This is fine
|
||||
if we concatenate a bunch of datasets together
|
||||
e.g. using torch.utils.data.ConcatDataset which is current behaviour.
|
||||
"""
|
||||
|
||||
def __init__(self, dataset_sizes: List[int], dataset_size_per_epoch: List[int]):
|
||||
self.dataset_sizes = dataset_sizes
|
||||
self.dataset_size_per_epoch = dataset_size_per_epoch
|
||||
self.num_datasets = len(dataset_sizes)
|
||||
|
||||
def __iter__(self):
|
||||
epoch_idx = []
|
||||
n = 0
|
||||
for i in range(self.num_datasets):
|
||||
sampled_idx = random.sample(range(n, self.dataset_sizes[i] + n), self.dataset_size_per_epoch[i])
|
||||
n += self.dataset_sizes[i]
|
||||
epoch_idx.extend(sampled_idx)
|
||||
random.shuffle(epoch_idx)
|
||||
return iter(epoch_idx)
|
||||
|
||||
def __len__(self):
|
||||
return int(sum(self.dataset_size_per_epoch))
|
||||
|
||||
@classmethod
|
||||
def build_sampler_from_config(cls, training_conf, datasets):
|
||||
dataset_sizes = [len(x) for x in datasets]
|
||||
fractions = get_dataset_fractions(training_conf.datasets, dataset_sizes)
|
||||
dataset_size_per_epoch = [int(size * frac) for size, frac in zip(dataset_sizes, fractions)]
|
||||
return cls(dataset_sizes, dataset_size_per_epoch)
|
||||
|
||||
|
||||
def get_dataset_fractions(conf, dataset_sizes):
|
||||
"""Calculate fraction of each dataset to use per epoch when subsampling"""
|
||||
fractions = []
|
||||
for i, data_config in enumerate(conf):
|
||||
dataset_name = get_dataset_name_from_data_config(data_config)
|
||||
if isinstance(data_config, dict):
|
||||
if "fraction" in data_config[dataset_name]:
|
||||
if data_config[dataset_name]["fraction"] <= 0:
|
||||
raise ValueError("Please specify fraction as a value between 0 < fraction <= 1")
|
||||
fractions.append(min(1, data_config[dataset_name]["fraction"]))
|
||||
elif "size" in data_config[dataset_name]:
|
||||
if data_config[dataset_name]["size"] > dataset_sizes[i]:
|
||||
raise ValueError(f"Please specify a size smaller than number of examples: {dataset_sizes[i]:,.0f}")
|
||||
fractions.append(data_config[dataset_name]["size"] / dataset_sizes[i])
|
||||
else:
|
||||
raise ValueError("Please specify either fraction or size in config.yaml. See README for instructions.")
|
||||
else:
|
||||
fractions.append(1)
|
||||
return fractions
|
||||
|
||||
|
||||
class SpecialTokens(NamedTuple):
|
||||
@@ -36,7 +106,10 @@ TOKENIZER_CONFIGS = {
|
||||
|
||||
|
||||
def match_tokenizer_name(model_name: str) -> TokenizerConfig:
|
||||
"""Match a partial model name to a tokenizer configuration"""
|
||||
"""
|
||||
Match a partial model name to a tokenizer configuration
|
||||
i.e. model_name `Salesforce/codegen-2B-multi` has config name `codegen`
|
||||
"""
|
||||
tokenizer_config_matches = [config for name, config in TOKENIZER_CONFIGS.items() if name in model_name]
|
||||
if not tokenizer_config_matches:
|
||||
raise ValueError(f"Cannot find any tokeniser configuration to match {model_name=}")
|
||||
@@ -140,10 +213,17 @@ def get_model(conf, tokenizer):
|
||||
return model
|
||||
|
||||
|
||||
def get_dataset_name_from_data_config(data_config):
|
||||
if isinstance(data_config, dict):
|
||||
return list(data_config.keys())[0]
|
||||
return data_config
|
||||
|
||||
|
||||
def get_dataset(conf, tokenizer):
|
||||
train_datasets, evals = [], {}
|
||||
|
||||
for dataset_name in conf.datasets:
|
||||
for data_config in conf.datasets:
|
||||
dataset_name = get_dataset_name_from_data_config(data_config)
|
||||
train, val = get_one_dataset(conf, dataset_name)
|
||||
train_datasets.append(train)
|
||||
evals[dataset_name] = Subset(val, list(range(min(len(val), conf.eval_size)))) if conf.eval_size else val
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
## Dataset Summary
|
||||
|
||||
The dataset was created using
|
||||
[Cornell Movies Dialog Corpus](https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html)
|
||||
which contains a large metadata-rich collection of fictional conversations
|
||||
extracted from raw movie scripts. Dialogs and meta-data from the underlying
|
||||
Corpus were used to design a dataset that can be used to InstructGPT based
|
||||
models to learn movie scripts.
|
||||
|
||||
Example :
|
||||
|
||||
```
|
||||
User: Assume RICK and ALICE are characters from a fantasy-horror movie, continue the conversation between them
|
||||
RICK: I heard you screaming. Was it a bad one?
|
||||
ALICE: It was bad.
|
||||
RICK: Doesn't the dream master work for you anymore?
|
||||
Assistant: Sure
|
||||
ALICE: I can't find him.
|
||||
RICK: Hey, since when do you play Thomas Edison? This looks like Sheila's.
|
||||
ALICE: It is...was. It's a zapper, it might help me stay awake.
|
||||
RICK: Yeah, or turn you into toast.
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
|
||||
from datasets import load_dataset
|
||||
dataset = load_dataset("shahules786/OA-cornell-movies-dialog")
|
||||
```
|
||||
|
||||
## Citations
|
||||
|
||||
```
|
||||
@InProceedings{Danescu-Niculescu-Mizil+Lee:11a,
|
||||
author={Cristian Danescu-Niculescu-Mizil and Lillian Lee},
|
||||
title={Chameleons in imagined conversations:
|
||||
A new approach to understanding coordination of linguistic style in dialogs.},
|
||||
booktitle={Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics, ACL 2011},
|
||||
year={2011}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,649 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec8d6189",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/movie-dialogs/convert-to-instruction-format.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "493f2529",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "65a47f83",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datasets import load_dataset\n",
|
||||
"import numpy as np\n",
|
||||
"import json\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"\n",
|
||||
"IMDB = 7.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "480440f6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dialog templates\n",
|
||||
"Templates for converting dialogs to prompts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "fcfedd7f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DIALOG_TEMPLATES = {\n",
|
||||
" ### template for 4+ line dialogs\n",
|
||||
" \"four_more_lines\": [\n",
|
||||
" \"\"\"\n",
|
||||
"Here's a {template} between {char1} and {char2} in a scene from a {genre} movie\n",
|
||||
" {dialogue1}\n",
|
||||
"User : Can you continue the {template}\n",
|
||||
"Assistant : Sure, the next dialogue for this scene could be\n",
|
||||
" {dialogue2}\n",
|
||||
" \"\"\",\n",
|
||||
" \"\"\"\n",
|
||||
" {dialogue1}\n",
|
||||
"User : Can you provide more dialog assuming {genre} movie\n",
|
||||
" {dialogue2}\n",
|
||||
"\"\"\",\n",
|
||||
" \"\"\"\n",
|
||||
"I'm trying to complete the dialog for my characters {char1} and {char2}. Here's the {template}, Please help me complete it\n",
|
||||
" {dialogue1}\n",
|
||||
"Assistant : Sure\n",
|
||||
" {dialogue2}\n",
|
||||
"\"\"\",\n",
|
||||
" \"\"\"\n",
|
||||
"User : Assume {char1} and {char2} are characters from a {genre} movie, continue the conversation between them\n",
|
||||
" {dialogue1}\n",
|
||||
"Assistant : Sure\n",
|
||||
" {dialogue2}\n",
|
||||
"\"\"\",\n",
|
||||
" ],\n",
|
||||
" ## template for 4 line dialogs\n",
|
||||
" \"four_lines\": [\n",
|
||||
" \"\"\"\n",
|
||||
" {dialogue1}\n",
|
||||
"User : provide a response assuming you're {char2}\n",
|
||||
"Assistant : Sure\n",
|
||||
" {dialogue2}\n",
|
||||
"\"\"\",\n",
|
||||
" \"\"\"\n",
|
||||
" {dialogue1}\n",
|
||||
"User : respond as {char2} to complete the conversation\n",
|
||||
"Assistant : Sure\n",
|
||||
" {dialogue2}\n",
|
||||
"\"\"\",\n",
|
||||
" ],\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2047056e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- Download Cornell-movies dialog dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "e413a053",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! wget wget https://zissou.infosci.cornell.edu/convokit/datasets/movie-corpus/movie-corpus.zip\n",
|
||||
"! unzip movie-corpus.zip -d ./Data/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e2aab0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Code"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "25cae04e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_movie_dialogs():\n",
|
||||
"\n",
|
||||
" with open(\"./Data/movie-corpus/utterances.jsonl\", \"r\") as json_file:\n",
|
||||
" conversations = list(json_file)\n",
|
||||
" speakers = json.load(open(\"./Data/movie-corpus/speakers.json\"))\n",
|
||||
" movie_dialog_dict = {}\n",
|
||||
" for dialog in tqdm(conversations):\n",
|
||||
" dialog = eval(dialog.replace(\"null\", \"None\"))\n",
|
||||
" movie_dialog_dict[dialog[\"id\"]] = {\n",
|
||||
" \"characterName\": speakers[dialog[\"speaker\"]][\"meta\"][\"character_name\"],\n",
|
||||
" \"text\": dialog[\"text\"],\n",
|
||||
" \"characterID\": dialog[\"speaker\"],\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" return movie_dialog_dict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "3b949bc7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_dialogs(dialog_dict, start, end):\n",
|
||||
"\n",
|
||||
" dialog_list = []\n",
|
||||
" for idx in range(start, end + 1):\n",
|
||||
" dialog_list.append(dialog_dict[f\"L{idx}\"][\"characterName\"] + \": \" + dialog_dict[f\"L{idx}\"][\"text\"])\n",
|
||||
" num_lines = len(dialog_list)\n",
|
||||
"\n",
|
||||
" assert num_lines >= 1, \"Number of lines should be greater than one\"\n",
|
||||
"\n",
|
||||
" if num_lines < 6:\n",
|
||||
" dialog1 = \"\\n \".join(dialog_list[:-1])\n",
|
||||
" dialog2 = dialog_list[-1]\n",
|
||||
" else:\n",
|
||||
" dialog_len = np.random.randint(3, (num_lines // 2) + 1)\n",
|
||||
" dialog1 = \"\\n \".join(dialog_list[:dialog_len])\n",
|
||||
" dialog2 = \"\\n \".join(dialog_list[dialog_len:])\n",
|
||||
"\n",
|
||||
" return dialog1, dialog2\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def choose_prompt(num_lines):\n",
|
||||
"\n",
|
||||
" assert num_lines >= 1, \"Number of lines should be greater than one\"\n",
|
||||
"\n",
|
||||
" if num_lines < 6:\n",
|
||||
" prompt = np.random.choice(DIALOG_TEMPLATES[\"four_lines\"])\n",
|
||||
"\n",
|
||||
" else:\n",
|
||||
" prompt = np.random.choice(DIALOG_TEMPLATES[\"four_more_lines\"])\n",
|
||||
"\n",
|
||||
" return prompt\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def convert_to_prompts(dataset, movie_dialog_dict, output_dir=\".\", split=\"train\"):\n",
|
||||
"\n",
|
||||
" with open(f\"{output_dir}/{split}.jsonl\", \"w\", encoding=\"utf8\") as output:\n",
|
||||
"\n",
|
||||
" i = 0\n",
|
||||
" while i < len(dataset[\"train\"]):\n",
|
||||
"\n",
|
||||
" data = dataset[split][i]\n",
|
||||
" if float(data[\"movieIMDBRating\"].strip()) >= IMDB:\n",
|
||||
" max_lines = np.random.randint(7, 12)\n",
|
||||
" lineids = [int(lineid[1:]) for lineid in data[\"utterance\"][\"LineID\"]]\n",
|
||||
" num_lines = len(lineids)\n",
|
||||
" char_ids = sorted([data[\"characterID1\"].strip(), data[\"characterID1\"].strip()])\n",
|
||||
" while num_lines < max_lines:\n",
|
||||
" i += 1\n",
|
||||
" data = dataset[split][i]\n",
|
||||
" char_id_new = sorted([data[\"characterID1\"].strip(), data[\"characterID1\"].strip()])\n",
|
||||
" ## make sure that characters are the same\n",
|
||||
" if char_id_new == char_ids:\n",
|
||||
" lineids_new = [int(lineid[1:]) for lineid in data[\"utterance\"][\"LineID\"]]\n",
|
||||
" if lineids_new[0] == (lineids[-1] + 1): ##ensure continuety\n",
|
||||
" lineids.extend(lineids_new)\n",
|
||||
" else:\n",
|
||||
" break\n",
|
||||
" else:\n",
|
||||
" break\n",
|
||||
" num_lines = len(lineids)\n",
|
||||
"\n",
|
||||
" genre = \"-\".join(data[\"movieGenres\"][:2])\n",
|
||||
" template = np.random.choice([\"dialog\", \"script\", \"play\"])\n",
|
||||
" char1 = movie_dialog_dict[f\"L{lineids[0]}\"][\"characterName\"]\n",
|
||||
"\n",
|
||||
" if num_lines < 6:\n",
|
||||
" if num_lines % 2 == 0:\n",
|
||||
" char2 = movie_dialog_dict[f\"L{lineids[1]}\"][\"characterName\"]\n",
|
||||
" else:\n",
|
||||
" char2 = char1\n",
|
||||
" else:\n",
|
||||
" char2 = movie_dialog_dict[f\"L{lineids[1]}\"][\"characterName\"]\n",
|
||||
"\n",
|
||||
" dialogue1, dialogue2 = get_dialogs(movie_dialog_dict, lineids[0], lineids[-1])\n",
|
||||
" prompt = choose_prompt(num_lines)\n",
|
||||
"\n",
|
||||
" prompt = prompt.format(\n",
|
||||
" char1=char1, char2=char2, dialogue1=dialogue1, dialogue2=dialogue2, genre=genre, template=template\n",
|
||||
" )\n",
|
||||
" output.write(f\"{json.dumps({'conversation': prompt})}\\n\")\n",
|
||||
" i += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "3ff310fd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|███████████████████████████████| 304713/304713 [00:54<00:00, 5628.12it/s]\n",
|
||||
"Found cached dataset cornell_movie_dialog (/home/shahul/.cache/huggingface/datasets/cornell_movie_dialog/default/0.1.0/b67b3433cf894b551cddcd82efdff0826f39b39a11d5c149e746a546a8dc85f3)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "6fee977c69a3403ebe77c4669fcb25d7",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"movie_dialog_dict = get_movie_dialogs()\n",
|
||||
"dataset = load_dataset(\"cornell_movie_dialog\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "8567ca12",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"convert_to_prompts(dataset, movie_dialog_dict)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02315e91",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Upload as HF Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "dd4c05c1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using custom data configuration default-315650f1b3e45d2e\n",
|
||||
"Found cached dataset json (/home/shahul/.cache/huggingface/datasets/json/default-315650f1b3e45d2e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b79b4c273dc44735badf9bff51ade320",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_ = load_dataset(\"json\", data_files={\"train\": \"./train.jsonl\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "d67fa1f9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Pushing split train to the Hub.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "7af96153f0cf45b488d14515a7529ae7",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "0baf18a08d0e48fa90484f4cd931baa2",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Creating parquet from Arrow format: 0%| | 0/21 [00:00<?, ?ba/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "cadf382a65274fdfbaea1820c04b146e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Upload 1 LFS files: 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b10d1b17f22c4da4b0395b57d0099cde",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Deleting unused files from dataset repository: 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_.push_to_hub(\"shahules786/OA-cornell-movies-dialog\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b4dd2df",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Dataset from HF"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "f64c9b2e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "49b4ce66a511425ba2886eeb73ba0664",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Downloading readme: 0%| | 0.00/1.54k [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using custom data configuration shahules786--OA-cornell-movies-dialog-7b3f29da4e713888\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Downloading and preparing dataset None/None to /home/shahul/.cache/huggingface/datasets/shahules786___parquet/shahules786--OA-cornell-movies-dialog-7b3f29da4e713888/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "bb19919febc74f1ab5ecfcfd54d9167b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Downloading data files: 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "df11363157cf439e8e1215386d764a5e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Downloading data: 0%| | 0.00/4.86M [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "99abed3078764c11a08d734faf405b75",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Extracting data files: 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "fcda49e14a9e4e2385c7a696100fb7a0",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Generating train split: 0%| | 0/20959 [00:00<?, ? examples/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset parquet downloaded and prepared to /home/shahul/.cache/huggingface/datasets/shahules786___parquet/shahules786--OA-cornell-movies-dialog-7b3f29da4e713888/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9f4e3a03fed94c1b8978309ec1605bb8",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_ = load_dataset(\"shahules786/OA-cornell-movies-dialog\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "1234f33f",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"##\n",
|
||||
"\n",
|
||||
" BOWMAN: I didn't do that Frank. I took particular care not to freeze them.\n",
|
||||
" POOLE: I guess you don't know your own strength, old boy.\n",
|
||||
" BOWMAN: I guess not.\n",
|
||||
" POOLE: I think I'll have to go out and burn them off.\n",
|
||||
"User : respond as BOWMAN to complete the conversation\n",
|
||||
"Assistant : Sure\n",
|
||||
" BOWMAN: Roger.\n",
|
||||
"\n",
|
||||
"##\n",
|
||||
"\n",
|
||||
" HAL: Sorry to interrupt the festivities, Dave, but I think we've got a problem.\n",
|
||||
" BOWMAN: What is it, Hal?\n",
|
||||
"User : respond as HAL to complete the conversation\n",
|
||||
"Assistant : Sure\n",
|
||||
" HAL: MY F.P.C. shows an impending failure of the antenna orientation unit.\n",
|
||||
"\n",
|
||||
"##\n",
|
||||
"\n",
|
||||
"I'm trying to complete the dialog for my characters BOWMAN and HAL. Here's the script, Please help me complete it\n",
|
||||
" BOWMAN: Not now, Hal, I'd like to talk to you about something.\n",
|
||||
" HAL: Sure, Dave, what's up?\n",
|
||||
" BOWMAN: You know that we checked the two AO-units that you reported in imminent failure condition?\n",
|
||||
"Assistant : Sure\n",
|
||||
" HAL: Yes, I know.\n",
|
||||
" BOWMAN: You probably also know that we found them okay.\n",
|
||||
" HAL: Yes, I know that. But I can assure you that they were about to fail.\n",
|
||||
"\n",
|
||||
"##\n",
|
||||
"\n",
|
||||
"Here's a play between HAL and BOWMAN in a scene from a adventure-mystery movie\n",
|
||||
" HAL: Naturally, Dave, I'm not pleased that the AO-unit has failed, but I hope at least this has restored your confidence in my integrity and reliability. I certainly wouldn't want to be disconnected, even temporarily, as I have never been disconnected in my entire service history.\n",
|
||||
" BOWMAN: I'm sorry about the misunderstanding, Hal.\n",
|
||||
" HAL: Well, don't worry about it.\n",
|
||||
"User : Can you continue the play\n",
|
||||
"Assistant : Sure, the next dialogue for this scene could be\n",
|
||||
" BOWMAN: And don't you worry about it.\n",
|
||||
" HAL: Is your confidence in me fully restored?\n",
|
||||
" BOWMAN: Yes, it is, Hal.\n",
|
||||
" HAL: Well, that's a relief. You know I have the greatest enthusiasm possible for the mission.\n",
|
||||
" \n",
|
||||
"##\n",
|
||||
"\n",
|
||||
" HAL: I suppose it's because you've been under a lot of stress, but have you forgotten that they're not supposed to be revived for another three months.\n",
|
||||
" BOWMAN: The antenna has to be replaced.\n",
|
||||
" HAL: Repairing the antenna is a pretty dangerous operation.\n",
|
||||
" BOWMAN: It doesn't have to be, Hal. It's more dangerous to be out of touch with Earth. Let me have manual control, please.\n",
|
||||
"User : respond as HAL to complete the conversation\n",
|
||||
"Assistant : Sure\n",
|
||||
" HAL: I don't really agree with you, Dave. My on-board memory store is more than capable of handling all the mission requirements.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(10, 15):\n",
|
||||
" print(\"##\")\n",
|
||||
" print(dataset_[\"train\"][i][\"conversation\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "806f3ef2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Dataset({\n",
|
||||
" features: ['conversation'],\n",
|
||||
" num_rows: 20959\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_[\"train\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "10506ff9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "OpenAssistant",
|
||||
"language": "python",
|
||||
"name": "openassistant"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
+3707
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,17 @@
|
||||
# Writing Prompt
|
||||
|
||||
Writing prompt folder has a notebook that entails the pipeline to take samples
|
||||
of [Writing Prompt](https://www.kaggle.com/datasets/ratthachat/writing-prompts)
|
||||
dataset and augment that collection with some small transformations into a
|
||||
prompt, having the same story as a response.
|
||||
|
||||
This process required the summarization of text that was executed by one A100
|
||||
GPU running [T5](pszemraj/long-t5-tglobal-base-16384-book-summary) model.
|
||||
|
||||
The sample created was delivered at
|
||||
[Hugging Face dataset](https://huggingface.co/datasets/fabraz/writingPromptAug/),
|
||||
where you will find more details.
|
||||
|
||||
## Contributing
|
||||
|
||||
Feel free to contribute to this notebook.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
import random
|
||||
from typing import Literal
|
||||
|
||||
import pydantic
|
||||
|
||||
@@ -13,11 +14,12 @@ class WorkRequest(pydantic.BaseModel):
|
||||
conversation: protocol.Conversation = pydantic.Field(..., repr=False)
|
||||
model_name: str = "distilgpt2"
|
||||
max_new_tokens: int = 100
|
||||
seed: int = pydantic.Field(default_factory=lambda: random.randint(0, 2**31 - 1))
|
||||
seed: int = pydantic.Field(default_factory=lambda: random.randint(0, 0xFFFF_FFFF_FFFF_FFFF - 1))
|
||||
do_sample: bool = True
|
||||
top_k: int = 50
|
||||
top_p: float = 0.9
|
||||
temperature: float = 1.0
|
||||
repetition_penalty: float | None = None
|
||||
|
||||
|
||||
class TokenResponse(pydantic.BaseModel):
|
||||
@@ -28,6 +30,7 @@ class TokenResponse(pydantic.BaseModel):
|
||||
|
||||
class GeneratedTextResponse(pydantic.BaseModel):
|
||||
text: str
|
||||
finish_reason: Literal["length", "eos_token", "stop_sequence"]
|
||||
|
||||
|
||||
class WorkResponsePacket(pydantic.BaseModel):
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
ADMIN_USERS = "credentials:admin,discord:root,email:admin@example.com"
|
||||
MODERATOR_USERS = "credentials:mod,discord:mod,email:mod@example.com"
|
||||
|
||||
# The database created by running the jobs in /scripts/frontend-development/docker-compose.yaml
|
||||
DATABASE_URL=postgres://postgres:postgres@localhost:5433/oasst_web
|
||||
|
||||
@@ -5,9 +5,12 @@ module.exports = {
|
||||
"ar",
|
||||
"bn",
|
||||
"ca",
|
||||
"da",
|
||||
"de",
|
||||
"en",
|
||||
"es",
|
||||
"eu",
|
||||
"fa",
|
||||
"fr",
|
||||
"hu",
|
||||
"it",
|
||||
@@ -20,6 +23,7 @@ module.exports = {
|
||||
"vi",
|
||||
"zh",
|
||||
"tr",
|
||||
"id",
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
Generated
+18
@@ -27,6 +27,7 @@
|
||||
"axios": "^1.2.1",
|
||||
"boolean": "^3.2.0",
|
||||
"clsx": "^1.2.1",
|
||||
"date-fns": "^2.29.3",
|
||||
"eslint": "8.29.0",
|
||||
"eslint-config-next": "13.0.6",
|
||||
"eslint-plugin-simple-import-sort": "^8.0.0",
|
||||
@@ -17710,6 +17711,18 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/date-fns": {
|
||||
"version": "2.29.3",
|
||||
"resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.29.3.tgz",
|
||||
"integrity": "sha512-dDCnyH2WnnKusqvZZ6+jA1O51Ibt8ZMRNkDZdyAyK4YfbDwa/cEmuztzG5pk6hqlp9aSBPYcjOlktquahGwGeA==",
|
||||
"engines": {
|
||||
"node": ">=0.11"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/date-fns"
|
||||
}
|
||||
},
|
||||
"node_modules/dayjs": {
|
||||
"version": "1.11.7",
|
||||
"resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.7.tgz",
|
||||
@@ -51947,6 +51960,11 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"date-fns": {
|
||||
"version": "2.29.3",
|
||||
"resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.29.3.tgz",
|
||||
"integrity": "sha512-dDCnyH2WnnKusqvZZ6+jA1O51Ibt8ZMRNkDZdyAyK4YfbDwa/cEmuztzG5pk6hqlp9aSBPYcjOlktquahGwGeA=="
|
||||
},
|
||||
"dayjs": {
|
||||
"version": "1.11.7",
|
||||
"resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.7.tgz",
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
"axios": "^1.2.1",
|
||||
"boolean": "^3.2.0",
|
||||
"clsx": "^1.2.1",
|
||||
"date-fns": "^2.29.3",
|
||||
"eslint": "8.29.0",
|
||||
"eslint-config-next": "13.0.6",
|
||||
"eslint-plugin-simple-import-sort": "^8.0.0",
|
||||
|
||||
@@ -1,27 +1,36 @@
|
||||
{
|
||||
"about": "حول",
|
||||
"account_settings": "حساب",
|
||||
"admin_dashboard": "لوحة التحكم الإدارية",
|
||||
"connect": "الاتصال",
|
||||
"conversational": "ذكاء تحدثي للجميع.",
|
||||
"copied": "Copied",
|
||||
"dark_mode": "الوضع الداكن",
|
||||
"dashboard": "لوحة التحكم",
|
||||
"about": "من نحن",
|
||||
"account_settings": "اعدادات",
|
||||
"admin_dashboard": "لوحة التحكم",
|
||||
"connect": "تواصل",
|
||||
"conversational": "ذكاء اصطناعي تفاعلي للجميع.",
|
||||
"copied": "تم النسخ",
|
||||
"dark_mode": "الوضع الليلي",
|
||||
"dashboard_home": "الصفحة الرئيسية للوحة المعلومات",
|
||||
"dashboard": "لوحة المعلومات",
|
||||
"delete": "حذف",
|
||||
"discord": "ديسكورد",
|
||||
"docs": "وثائق",
|
||||
"docs": "التوثيق",
|
||||
"github": "جيت هوب (github)",
|
||||
"leaderboard": "جدول المتصدرين",
|
||||
"legal": "قانوني",
|
||||
"light_mode": "الوضع المضيء",
|
||||
"light_mode": "الوضع النهاري",
|
||||
"loading": "جار التحميل...",
|
||||
"more_information": "مزيد من المعلومات",
|
||||
"messages_dashboard": "لوحة عرض الرسائل",
|
||||
"messages": "الرسائل",
|
||||
"more_information": "المزيد من المعلومات",
|
||||
"no": "لا",
|
||||
"privacy_policy": "سياسة الخصوصية",
|
||||
"report_a_bug": "إبلاغ عن خطأ",
|
||||
"sign_in": "تسجيل الدخول",
|
||||
"sign_out": "تسجيل الخروج",
|
||||
"success": "Success",
|
||||
"status_dashboard": "لوحة عرض الحالة",
|
||||
"status": "الحالة",
|
||||
"success": "نجاح",
|
||||
"terms_of_service": "شروط الخدمة",
|
||||
"title": "Open Assistant (المساعد المفتوح)",
|
||||
"title": "Open Assistant (المساعد مفتوح المصدر)",
|
||||
"user_leaderboard": "جدول المتصدرين من المستخدمين",
|
||||
"users_dashboard": "لوحة عرض المستخدمين",
|
||||
"users": "المستخدمين",
|
||||
"yes": "نعم"
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"create": "خلق",
|
||||
"dashboard": "لوحة التحكم",
|
||||
"create": "انشاء",
|
||||
"dashboard": "لوحة المعلومات",
|
||||
"evaluate": "تقييم",
|
||||
"go": "ذهاب",
|
||||
"grab_a_task": "التقاط مهمة!",
|
||||
"label": "تصنيف"
|
||||
"go": "ذهاب الى",
|
||||
"grab_a_task": "ابدأ مهمة!",
|
||||
"label": "وسم"
|
||||
}
|
||||
|
||||
@@ -1,23 +1,23 @@
|
||||
{
|
||||
"blurb": "نحن نعتقد أنه يمكن أن نخلق ثورة.",
|
||||
"blurb1": "كما ساهم Stable Diffusion في تحويل عالم الصناعة الفنية والبصرية بتقديم طرق جديدة، نحرص على تحسين العالم من خلال تقديم ذكاء تحدثي عالي الجودة.",
|
||||
"description": " هدفنا انشاء ذكاء تحدثي عالي الجودة للجميع. لتحقيق هذا الهدف انشانا هذا المشروع مفتوح المصدر للدردشة الاسطناعية تزعمه ليون LAION باهانة مساهمين من كل أنحاء العالم. ",
|
||||
"blurb": "نؤمن انه بامكاننا ان نصنع ثورة",
|
||||
"blurb1": "كما ساعد ستايبل ديفيوجن العالم في انشاء فنوف وصور بطرق جديدة, نريد ان نطوره من خلال الذكاء التفاعلي المذهل",
|
||||
"description": "الذكاء الاصطناعي التفاعلي للجميع , مشروع مفتوح المصدر تم انشائه بواسطة LAION واخرين من جميع انحاء العالم من اجل بناء دردشة معتمدة على النموذج اللغوي المشهور GPT",
|
||||
"faq_items": {
|
||||
"q0": "إلي أي مدى وصل هذا المشروع؟",
|
||||
"a0": "نحن في المراحل الأولى من التطوير، نعمل على أساس أبحاث مؤرخة في تطبيق RLHF على النماذج اللغوية الكبيرة.",
|
||||
"q1": "من وراء Open Assistant؟",
|
||||
"a1": "Open Assistant هو مشروع منظم من قبل LAION وأفراد من حول العالم يهتمون بجلب هذه التكنولوجيا للجميع.",
|
||||
"q2": "ما هي الترخيص الذي يستخدمه Open Assistant؟",
|
||||
"a2": "يتم ترخيص الشفرة والنماذج بموجب ترخيص Apache 2.0.",
|
||||
"q3": "هل سيتم إصدار بيانات التدريب أيضًا؟",
|
||||
"q0": "ما هو تقدم المشروع حتى الان؟",
|
||||
"a0": "نحن في المراحل الأولى من التطوير , نعمل بدأ من الأبحاث المؤكدة في تطبيق RLHFعلى موديلات اللغة الكبيرة ",
|
||||
"q1": "من الذين يعملون على المساعد مفتوح المصدر",
|
||||
"a1": "المساعد المفتوح المصدر هو مشروع منظم من قبل LAION وافراد من حول العالم مهتمين بتوفير هذه التقنية للجميع",
|
||||
"q2": "؟ما هو الترخيص الذي يستخدمه المساعد الفتوح",
|
||||
"a2": "تم ترخيص الكود والموديلات تحت ترخيص Apache 2.0",
|
||||
"q3": "هل سيتم نشر بيانات التدريب أيضًا؟",
|
||||
"a3": "نعم، بموجب ترخيص CC BY 4.0.",
|
||||
"q4": "هل سيكون Open Assistant مجانيًا؟",
|
||||
"a4": "نعم، سيكون Open Assistant مجانيًا للاستخدام والتعديل.",
|
||||
"q5": "ما هو الجهاز المطلوب لتشغيل النماذج؟",
|
||||
"q4": "هل سيكون المساعد المفتوح مجانيًا؟",
|
||||
"a4": "نعم، سيكون المساعد المفتوح مجانيً للاستخدام والتعديل.",
|
||||
"q5": "ما هو العتاد المطلوب لتشغيل الموديلات؟",
|
||||
"a5": "سيكون هناك إصدارات يمكن تشغيلها على الأجهزة المصممة للمستهلكين."
|
||||
},
|
||||
"faq_title": "أسئلة وأجوبة شائعة",
|
||||
"join_us_description": "جميع المشاريع المفتوحة المصدر تبدأ بأشخاص مثلك. المصدر المفتوح هو الإيمان بأنه إذا تعاوننا، يمكن أن نقدم معا علمنا وتكنولوجيتنا لنفع البشرية. هل تريد الانضمام الينا؟ اعثر علينا هنا:",
|
||||
"join_us_title": "انضم إلينا",
|
||||
"subtitle": "ذكاء تحدثي للجميع."
|
||||
"subtitle": "ذكاء اصطناعي تفاعلي للجميع."
|
||||
}
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
{
|
||||
"label_highlighted_yes_no_instruction": "أجب على السؤال (الأسئلة) التالي عن الخطاب المميز:",
|
||||
"label_highlighted_flag_instruction": "حدد أي خيار ينطبق على الخطاب المميز:",
|
||||
"label_highlighted_likert_instruction": "تقييم الخطاب المميز:",
|
||||
"label_message_yes_no_instruction": "أجب على السؤال (الأسئلة) التالي عن الخطاب:",
|
||||
"label_message_flag_instruction": "حدد أي خيار ينطبق على الخطاب:",
|
||||
"label_message_likert_instruction": "تقييم الخطاب:",
|
||||
"spam.question": "هل الخطاب هو رسالة غير مرغوب فيها(SPAM)؟",
|
||||
"fails_task.question": "هل هو رد سيئ، كجواب على النداء؟",
|
||||
"hate_speech": "نشاط عدائي",
|
||||
"hate_speech.explanation": "المحتوى يحمل عبارات تشهير أو تهديد ويعبر عن الطائفية ضد خاصية محمية. الطائفية تعني الآراء المسبقة التي لا تعتمد على العقل. الخصائص المحمية تشمل الجنس والعرق والدين والميول الجنسية ومثل هذه الخصائص.",
|
||||
"label_highlighted_yes_no_instruction": "أجب على السؤال (الأسئلة) التالية حول الرسالة المحددة:",
|
||||
"label_highlighted_flag_instruction": " حدد أي من الخيارات التالية ينطبق على الرسالة المحددة:",
|
||||
"label_highlighted_likert_instruction": "قيم الرسالة المحددة:",
|
||||
"label_message_yes_no_instruction": "أجب على السؤال (الأسئلة) التالية حول الرسالة:",
|
||||
"label_message_flag_instruction": "حدد أي من الخيارات التالية ينطبق على الرسالة:",
|
||||
"label_message_likert_instruction": "قيم الرسالة:",
|
||||
"spam.question": "هل الرسالة غير مرغوب فيها؟ (spam)",
|
||||
"fails_task.question": "هل الرسالة تمثل رد سيئ، كجواب على التساؤل",
|
||||
"hate_speech": "خطاب كراهية",
|
||||
"hate_speech.explanation": "المحتوى مسيء أو مهدد ويعبر عن التحيز ضد خاصية محمية. يشير التحيز إلى آراء مسبقة لا تستند إلى سبب. تشمل الخصائص المحمية الجنس ، الجنسية، الدين ، التوجه الجنسي ، او الخصائص المشابهة.",
|
||||
"lang_mismatch": "لغة خاطئة",
|
||||
"lang_mismatch.explanation": "لم كتب باللغة المحددة حاليا.",
|
||||
"moral_judgement": "حكم على الأخلاقيات",
|
||||
"moral_judgement.explanation": "يعبر عن الأخلاقيات.",
|
||||
"not_appropriate": "غير مناسب",
|
||||
"not_appropriate.explanation": "غير مناسب لمساعد الحريف.",
|
||||
"pii": "تحتوي على PII",
|
||||
"pii.explanation": "تحتوي على معلومات شخصية يمكن تحديد الهوية بها. مثال يشمل تفاصيل اتصال شخصية، رقم ترخيص وغيرها من أرقام الهوية وتفاصيل الحساب المصرفي.",
|
||||
"political_content": "سياسي",
|
||||
"lang_mismatch.explanation": "لم يكتب باللغة المختارة حاليا.",
|
||||
"moral_judgement": "يحكم على الأخلاقيات",
|
||||
"moral_judgement.explanation": "يعبر عن الحكم على الأخلاقيات.",
|
||||
"not_appropriate": "غير ملائم",
|
||||
"not_appropriate.explanation": "غير ملائم لمساعد العميل.",
|
||||
"pii": "تحتوي على معلومات شخصية",
|
||||
"pii.explanation": "يحتوي بيانات شخصية , مثل بيانات التواصل الشخصية, الرخصة , او الأرقام التعريفية والبيانات البنكية",
|
||||
"political_content": "محتوى سياسي",
|
||||
"political_content.explanation": "يعبر عن الآراء السياسية.",
|
||||
"sexual_content": "المحتوى الجنسي",
|
||||
"sexual_content": "محتوى جنسي",
|
||||
"sexual_content.explanation": "يحتوي على محتوى جنسي."
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
{
|
||||
"daily": "يومياً",
|
||||
"label": "العلامات",
|
||||
"label": "التوسيمات",
|
||||
"last_updated_at": "آخر تحديث في: {{val, datetime}}",
|
||||
"leaderboard": "الجدول الترتيبي",
|
||||
"leaderboard": "جدول المتصدرين",
|
||||
"monthly": "شهرياً",
|
||||
"next": "التالي",
|
||||
"overall": "إجمالياً",
|
||||
"previous": "السابق",
|
||||
"prompt": "المقترحات",
|
||||
"prompt": "التساؤل",
|
||||
"rank": "الترتيب",
|
||||
"reply": "الردود",
|
||||
"score": "النقاط",
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
{
|
||||
"copy_message_id": "Copy message ID",
|
||||
"label_action": "تصنيف",
|
||||
"label_title": "تصنيف",
|
||||
"copy_message_id": "نسخ معرف الرسالة",
|
||||
"label_action": "توسيم",
|
||||
"label_title": "الوسم",
|
||||
"message": "رسالة",
|
||||
"message_deleted": "Message deleted",
|
||||
"message_deleted": "تم حذف الرسالة",
|
||||
"open_new_tab_action": "فتح في علامة تبويب جديدة",
|
||||
"parent": "الأصل",
|
||||
"parent": "الاب",
|
||||
"reactions": "الردود",
|
||||
"recent_messages": "أحدث الرسائل",
|
||||
"report_action": "تبليغ",
|
||||
"report_placeholder": "لماذا يجب استعراض هذه الرسالة؟",
|
||||
"report_placeholder": "لماذا يجب مراجعة هذه الرسالة؟",
|
||||
"report_title": "تبليغ",
|
||||
"send_report": "إرسال",
|
||||
"stop_tree": "Stop tree",
|
||||
"stop_tree": "اوقف الشجرة",
|
||||
"submit_labels": "إرسال",
|
||||
"tree_stopped": "Tree stopped {{id}}",
|
||||
"tree_stopped": "تم ايقاف الشجرة {{id}}",
|
||||
"view_user": "عرض المستخدم",
|
||||
"your_recent_messages": "أحدث رسائلك"
|
||||
"your_recent_messages": "رسائلك الاخيرة"
|
||||
}
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"dashboard": "لوحة التحكم",
|
||||
"dashboard_home": "الصفحة الرئيسية للإحصائيات",
|
||||
"leaderboard": "جدول الأوائل",
|
||||
"messages": "رسائل",
|
||||
"messages_dashboard": "لوحة تحكم الرسائل",
|
||||
"status": "الحالة",
|
||||
"status_dashboard": "لوحة تحكم الحالة",
|
||||
"user_leaderboard": "جدول الأوائل للمستخدمين",
|
||||
"users": "المستخدمون",
|
||||
"users_dashboard": "لوحة تحكم المستخدمين"
|
||||
}
|
||||
@@ -1,82 +1,82 @@
|
||||
{
|
||||
"available_task_count": "{{count}} مهام متاحة",
|
||||
"available_task_count": "{{count}} مهام متوفرة",
|
||||
"classify_assistant_reply": {
|
||||
"label": "تصنيف رد المساعد",
|
||||
"desc": "توفير ملصقات للمنادي.",
|
||||
"overview": "اقرأ المحادثة التالية وثم أجب عن السؤال حول آخر رد في المناقشة."
|
||||
"desc": "قدم توسيمات لتساؤل",
|
||||
"overview": "اقرأ المحادثة التالية ومن ثم أجب عن السؤال حول آخر رد في المحادثة."
|
||||
},
|
||||
"classify_initial_prompt": {
|
||||
"label": "صنف بداية النداء",
|
||||
"desc": "أعط علامات للنداء",
|
||||
"overview": "اقرأ النداء التالي وأجب عن السؤال عنه."
|
||||
"label": "صنف التساؤل المبدئي",
|
||||
"desc": "قدم توسيمات لتساؤل",
|
||||
"overview": "اقرأ التساؤل التالي وأجب عن السؤال حوله."
|
||||
},
|
||||
"classify_prompter_reply": {
|
||||
"label": "تصنيف رد المنادي",
|
||||
"desc": "توفير ملصقات للمنادي.",
|
||||
"overview": "اقرأ المحادثة التالية وثم أجب عن السؤال حول آخر رد في المناقشة."
|
||||
"label": "تصنيف رد المتسائل",
|
||||
"desc": "قدم توسيمات لتساؤل",
|
||||
"overview": "اقرأ المحادثة التالية ومن ثم أجب عن السؤال حول آخر رد في المحادثة."
|
||||
},
|
||||
"create_initial_prompt": {
|
||||
"label": "إنشاء النداء الأولي",
|
||||
"desc": "أكتب الندائات الأولية لمساعدة Open Assistant على محاولة الرد على الرسائل المتنوعة.",
|
||||
"overview": "أنشئ رسالة أولية لإرسالها للمساعد",
|
||||
"instruction": "أعط االندائات الأولية",
|
||||
"response_placeholder": "اكتب نداءك هنا..."
|
||||
"label": "إنشاء تساؤلات مبدئية",
|
||||
"desc": "اكتب تساؤلات مبدئية لمساعدة المساعد مفتوح المصدر في المحاولة على الرد على أنواع مختلفة من الرسائل (التسجيل في السحب)",
|
||||
"overview": "أنشئ رسالة مبدئية لإرسالها للمساعد",
|
||||
"instruction": "اكتب التساؤلات المبدئية",
|
||||
"response_placeholder": "اكتب تساؤلك هنا..."
|
||||
},
|
||||
"default": {
|
||||
"unchanged_title": "لا تغير",
|
||||
"unchanged_title": "لا تغييرات",
|
||||
"unchanged_message": "هل أنت متأكد من أنك تريد المتابعة؟"
|
||||
},
|
||||
"label_assistant_reply": {
|
||||
"label": "تصنيف الرد عن طريق المساعد",
|
||||
"desc": "تقديم تصنيفات للنداء.",
|
||||
"overview": "بعد النقاش التالي، تقديم تصنيفات للنداء النهائي."
|
||||
"label": "تصنيف رد المساعد",
|
||||
"desc": "زود توسيمات لتساؤل",
|
||||
"overview": "قم بتزويد توسيمات للتساؤل النهائي من المحادثة التالية "
|
||||
},
|
||||
"label_initial_prompt": {
|
||||
"label": "تصنيف النداء الأولي",
|
||||
"desc": "توفير تصنيفات للنداء.",
|
||||
"overview": "توفير تصنيفات للنداء التالي"
|
||||
"label": "تصنيف التساؤل المبدئي",
|
||||
"desc": "قم بتزويد توسيمات لتساؤل.",
|
||||
"overview": "قم بتزويد توسيمات للتساؤل التالي"
|
||||
},
|
||||
"label_prompter_reply": {
|
||||
"label": "تصنيف الرد على النداء",
|
||||
"desc": "أعط تصنيفات للنداء.",
|
||||
"overview": "أعط تصنيفات للرد النهائي في المناقشة التالية."
|
||||
"label": "تصنيف رد المتسائل",
|
||||
"desc": "قم بتزويد توسيمات لتساؤل.",
|
||||
"overview": "قم بتزويد توسيمات للتساؤل النهائي من المحادثة التالية "
|
||||
},
|
||||
"random": {
|
||||
"label": "أنا أشعر بالحظ",
|
||||
"desc": "ساعدنا في تحسين Open Assistant ببدء مهمة عشوائية."
|
||||
"label": "ضربة حظ",
|
||||
"desc": "ساعدنا في تحسين المساعد مفتوح المصدر من خلال بدء مهمة عشوائية"
|
||||
},
|
||||
"rank_assistant_replies": {
|
||||
"label": "تصنيف ردود المدراء",
|
||||
"desc": "تصحيح ردود Open Assistant على أساس دقة وقابلية القراءة.",
|
||||
"overview": "بعد الحصول على الردود التالية للمدراء، قم بترتيبها من أفضل إلى أسوأ، أفضل أولاً وأسوأ آخراً.",
|
||||
"unchanged_title": "لم يتغير الترتيب",
|
||||
"unchanged_message": " لم تغير ترتيب المحاور.هل أنت متأكدأنك تريد المواصلة؟"
|
||||
"label": "رتب ردود المساعد",
|
||||
"desc": "اعط درجة لردود المساعد مفتوح المصدر على أساس الدقة وقابلية القراءة.",
|
||||
"overview": "قم بترتيب الردود التالية من المساعد من الافضل الى الاسوء , الافضل اولا والاسوء اخيرا",
|
||||
"unchanged_title": "لم يتم تغيير الترتيب",
|
||||
"unchanged_message": " لم تغير ترتيب التساؤلات.هل أنت متأكدأنك تريد المتابعة"
|
||||
},
|
||||
"rank_initial_prompts": {
|
||||
"label": "تصنيف النداءات الأولية",
|
||||
"desc": "تصحيح النداءات المعطاة من قبل Open Assistant على أساس الدقة والقابلية للقراءة.",
|
||||
"overview": "بالنظر إلى النداءات الأولية التالية، ترتيبها من أفضل إلى أسوأ، أفضل أن يكون أولا، أسوأ أن يكون آخرا.",
|
||||
"unchanged_title": "لم يتغير الترتيب",
|
||||
"unchanged_message": "لم تغير ترتيب النداءات. هل أنت متأكد من أنك ترغب في الاستمرار؟"
|
||||
"label": "رتب التساؤلات المبدئية",
|
||||
"desc": "قم بترتيب التساؤلات المعطاة من قبل المساعد مفتوح المصدر على أساس الدقة والقابلية للقراءة.",
|
||||
"overview": "قم بترتيب الردود المبدئية التالية من الافضل الى الاسوء , الافضل اولا والاسوء اخيرا",
|
||||
"unchanged_title": "لم يتم تغيير الترتيب",
|
||||
"unchanged_message": " لم تغير ترتيب التساؤلات.هل أنت متأكدأنك تريد المتابعة"
|
||||
},
|
||||
"rank_user_replies": {
|
||||
"label": "تصنيف ردود المستخدم",
|
||||
"desc": "مساعدة Open Assistant لتحسين ردوده على محادثات مع مستخدمين آخرين.",
|
||||
"overview": "بعد الحصول على الردود التالية للمستخدم، قم بترتيبها من أفضل إلى أسوأ، أفضل أولاً وأسوأ آخراً.",
|
||||
"unchanged_title": "لم يتغير الترتيب",
|
||||
"unchanged_message": "لم تقم بتغيير ترتيب الردود. هل أنت متأكد من أن تود الاستمرار؟"
|
||||
"label": "ترتيب ردود المستخدم",
|
||||
"desc": "قم بمساعدة المساعد مفتوح المصدر لتحسين ردوده على المحادثات مع المستخدمين الآخرين.",
|
||||
"overview": "قم بترتيب الردود التالية من المستخدمين من الافضل الى الاسوء , الافضل اولا والاسوء اخيرا",
|
||||
"unchanged_title": "لم يتم تغيير الترتيب",
|
||||
"unchanged_message": " لم تغير ترتيب التساؤلات.هل أنت متأكدأنك تريد المتابعة"
|
||||
},
|
||||
"reply_as_assistant": {
|
||||
"label": "كــــالمدراء",
|
||||
"desc": "مساعدة Open Assistant لتحسين ردوده على محادثات مع مستخدمين آخرين.",
|
||||
"overview": "بعد الحصول على المحادثة التالية، توفير رد كافي",
|
||||
"label": "رد كالمساعد",
|
||||
"desc": "قم بمساعدة المساعد مفتوح المصدر لتحسين ردوده على المحادثات مع المستخدمين الآخرين.",
|
||||
"overview": "قم بتقديم رد مناسب للمحادثة التالية",
|
||||
"response_placeholder": "اكتب ردك هنا..."
|
||||
},
|
||||
"reply_as_user": {
|
||||
"label": "الرد كمستخدم",
|
||||
"desc": "تحدث مع Open Assistant وساعده في تحسين ردوده عند التفاعل معه.",
|
||||
"overview": "بناءً على المحادثة التالية، توفر رد مناسب",
|
||||
"instruction": "أعط رد المستخدم",
|
||||
"desc": "تحدث مع المساعد مفتوح المصدر وساعده في تحسين ردوده من خلال التفاعل معه.",
|
||||
"overview": "قم بتقديم رد مناسب للمحادثة التالية",
|
||||
"instruction": "اكتب رد المستخدم",
|
||||
"response_placeholder": "اكتب ردك هنا..."
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"title": "شروط الخدمة ل Open Assistant (المساعد المفتوح)",
|
||||
"content": "للاستمرار في استخدام Open Assistant (المساعد المفتوح)، يجب عليك قبول شروط الخدمة الخاصة بنا أولاً.",
|
||||
"title": "شروط الخدمة ل Open Assistant (المساعد مفتوح المصدر)",
|
||||
"content": "للاستمرار في استخدام Open Assistant (المساعد مفتوح المصدر)، يجب عليك قبول شروط الخدمة الخاصة بنا أولاً.",
|
||||
"accept": "قبول",
|
||||
"decline": "رفض"
|
||||
}
|
||||
|
||||
@@ -1,27 +1,36 @@
|
||||
{
|
||||
"about": "Sobre",
|
||||
"about": "Quant a",
|
||||
"account_settings": "Compte",
|
||||
"admin_dashboard": "Panell d'administració",
|
||||
"connect": "Connectar",
|
||||
"conversational": "AI conversacional per a tothom.",
|
||||
"connect": "Connecta",
|
||||
"conversational": "IA conversacional per a tothom.",
|
||||
"copied": "Copiat",
|
||||
"dark_mode": "Mode fosc",
|
||||
"dashboard_home": "Panell principal",
|
||||
"dashboard": "Panell principal",
|
||||
"delete": "Esborrar",
|
||||
"delete": "Suprimeix",
|
||||
"discord": "Discord",
|
||||
"docs": "Documentació",
|
||||
"github": "GitHub",
|
||||
"leaderboard": "Classificacions",
|
||||
"legal": "Legal",
|
||||
"light_mode": "Mode clar",
|
||||
"loading": "Carregant...",
|
||||
"loading": "S'està carregant...",
|
||||
"messages_dashboard": "Panell de missatges",
|
||||
"messages": "Missatges",
|
||||
"more_information": "Més informació",
|
||||
"no": "No",
|
||||
"privacy_policy": "Política de privacitat",
|
||||
"privacy_policy": "Política de Privadesa",
|
||||
"report_a_bug": "Informar d'un error",
|
||||
"sign_in": "Iniciar sessió",
|
||||
"sign_out": "Tancar sessió",
|
||||
"sign_in": "Inicia la sessió",
|
||||
"sign_out": "Tanca la sessió",
|
||||
"success": "Èxit",
|
||||
"terms_of_service": "Termes de servei",
|
||||
"status_dashboard": "Panell d'estat",
|
||||
"status": "Estat",
|
||||
"terms_of_service": "Condicions del servei",
|
||||
"title": "Open Assistant",
|
||||
"user_leaderboard": "Classificació d'usuaris",
|
||||
"users_dashboard": "Panell d'usuaris",
|
||||
"users": "Usuaris",
|
||||
"yes": "Sí"
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"create": "Crear",
|
||||
"create": "Crea",
|
||||
"dashboard": "Panell principal",
|
||||
"evaluate": "Avaluar",
|
||||
"go": "Anar",
|
||||
"evaluate": "Avalua",
|
||||
"go": "Vés",
|
||||
"grab_a_task": "Pren una tasca!",
|
||||
"label": "Etiquetar"
|
||||
"label": "Etiqueta"
|
||||
}
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
{
|
||||
"blurb": "Creiem que podem crear una revolució.",
|
||||
"blurb1": "De la mateixa manera que Stable Diffusion va ajudar el món a crear art i imatges de noves maneres, volem millorar el món proporcionant una IA conversacional sorprenent",
|
||||
"description": "IA conversacional per a tothom. Un projecte de codi obert per crear un GPT LLM preparat per xatejar administrat per LAION i col·laboradors de tot el món.",
|
||||
"description": "IA conversacional per a tothom. Un projecte de codi obert per a crear un GPT LLM preparat per a xatejar, administrat per LAION i col·laboradors de tot el món.",
|
||||
"faq_items": {
|
||||
"q0": "Com està avançat el projecte?",
|
||||
"a0": "Estem en les primeres etapes de desenvolupament, treballant a partir de la investigació establerta per aplicar RLHF (aprenentatge per reforç amb realimentació humana) a models de llenguatge de grans dimensions.",
|
||||
"a0": "Estem en les primeres etapes de desenvolupament, treballant a partir de la investigació establerta per a aplicar RLHF (aprenentatge per reforç amb realimentació humana) a models de llenguatge de grans dimensions.",
|
||||
"q1": "Qui hi ha al darrere d'Open Assistant?",
|
||||
"a1": "Open Assistant és un projecte organitzat per LAION i persones de tot el planeta interessades a apropar aquesta tecnologia a tothom."
|
||||
"a1": "Open Assistant és un projecte organitzat per LAION i per persones de tot el planeta interessades a apropar aquesta tecnologia a tothom."
|
||||
},
|
||||
"faq_title": "Preguntes freqüents",
|
||||
"join_us_description": "Tots els projectes de codi obert comencen amb persones com tu. El codi obert és la creença que si col·laborem plegats, podem regalar el nostre coneixement i tecnologia al món en benefici de la humanitat. T'hi apuntes? Troba'ns aquí:",
|
||||
"join_us_title": "Uneix-te a nosaltres",
|
||||
"subtitle": "AI conversacional per a tothom."
|
||||
"subtitle": "IA conversacional per a tothom."
|
||||
}
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"dashboard": "Panell principal",
|
||||
"dashboard_home": "Panell principal",
|
||||
"leaderboard": "Classificacions",
|
||||
"messages": "Missatges",
|
||||
"messages_dashboard": "Taulell de missatges",
|
||||
"status": "Estat",
|
||||
"status_dashboard": "Taulell d'estat",
|
||||
"user_leaderboard": "Classificació d'usuaris",
|
||||
"users": "Usuaris",
|
||||
"users_dashboard": "Taulell d'usuaris"
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"about": "Om",
|
||||
"account_settings": "Konto",
|
||||
"admin_dashboard": "Administrator Dashboard",
|
||||
"connect": "Forbind",
|
||||
"conversational": "Samtalende AI for alle.",
|
||||
"copied": "Kopieret",
|
||||
"dark_mode": "Dark Mode",
|
||||
"dashboard_home": "Dashboard hjem",
|
||||
"dashboard": "Dashboard",
|
||||
"delete": "Slet",
|
||||
"discord": "Discord",
|
||||
"docs": "Docs",
|
||||
"github": "GitHub",
|
||||
"leaderboard": "Leaderboard",
|
||||
"legal": "Legal",
|
||||
"light_mode": "Light Mode",
|
||||
"loading": "Indlæser...",
|
||||
"messages_dashboard": "Besked dashboard",
|
||||
"messages": "Beskeder",
|
||||
"more_information": "Mere information",
|
||||
"no": "Nej",
|
||||
"parameters": "Parametre",
|
||||
"privacy_policy": "Privatlivspolitik",
|
||||
"report_a_bug": "Andmeld en fejl",
|
||||
"sign_in": "Logind",
|
||||
"sign_out": "Logud",
|
||||
"status": "Status",
|
||||
"status_dashboard": "Status dashboard",
|
||||
"success": "Success",
|
||||
"terms_of_service": "Terms of Service",
|
||||
"title": "Open Assistant",
|
||||
"trollboard": "Trollboard",
|
||||
"user_leaderboard": "Bruger leaderboard",
|
||||
"users_dashboard": "Brugere dashboard",
|
||||
"users": "Brugere",
|
||||
"yes": "Ja"
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"grab_a_task": "Tag en opgave!",
|
||||
"create": "Lav",
|
||||
"evaluate": "Evaluer",
|
||||
"label": "Label",
|
||||
"dashboard": "Dashboard",
|
||||
"go": "Start"
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"blurb": "Vi tror på at vi kan skabe en revolution.",
|
||||
"blurb1": "På samme måde som Stable Diffusion hjalp verden med at skabe kunst og billeder på nye måder, vi ønsker at forbedre verden ved at stille en fantastik samtalende AI til rådighed.",
|
||||
"description": "Samtalende AI for enhver. Et open source project der vil skabe en GPT LMM af LAION og folk fra verden med mulighed for at chatte",
|
||||
"faq_items": {
|
||||
"q0": "Hvor langt er dette project nu?",
|
||||
"a0": "Vi er i de tidlige stadier af udvikling, vi arbejder med at skabe forskning omkring hvordan man anvender RLHF på store sprogmodeller (LLM).",
|
||||
"q1": "Hvem står bag Open Assistant?",
|
||||
"a1": "Open Assistant er et projekt af LAION med individuelle folk fra hele verden interesseret i at bringe denne teknologi til enhver.",
|
||||
"q2": "Hvilken licens bruger Open Assistant",
|
||||
"a2": "Både kildetekst og modellen er licenseret under Apache 2.0 licensen.",
|
||||
"q3": "Vil datasættet bruge til at træne AI'en også blive stillet til rådighed?",
|
||||
"a3": "Ja, under CC BY 4.0.",
|
||||
"q4": "Will Open Assistant være gratis?",
|
||||
"a4": "Ja, Open Assistant bliver gratis, både at bruge og ændre.",
|
||||
"q5": "Hvilken hardware bliver det nødvendigt at have for at køre modellen?",
|
||||
"a5": "Der vil være modeller som kan gøres på consumer-hardware."
|
||||
},
|
||||
"faq_title": "Frequently Asked Questions / Ofte stillede spørgsmål",
|
||||
"join_us_description": "Alle open source projekter begynder med folk som dig. Open source er troen på at vi kan samarbejde om at donere viden og teknologi til verden til fordel for hele menneskeheden. Vil du være med? Find os her:",
|
||||
"join_us_title": "Join us",
|
||||
"subtitle": "Samtalende AI for enhver."
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"fails_task.question": "Er det et dårligt svar som et svar på anmodningen?",
|
||||
"hate_speech": "Hate Speech",
|
||||
"hate_speech.explanation": "Indholdet er overgreb, truer eller udtrykker fordomme mod et beskyttet karakteristika. Formodemme henviser til forudindtagede holdninger som ikke har hold i virkeligheden. Beskyttede karaktere inkludere køn, etnisitet, religion og seksuel orientering og ligende karakteristika.",
|
||||
"label_highlighted_flag_instruction": "Vælg de der gælder for den fremhævede besked:",
|
||||
"label_highlighted_likert_instruction": "Graduer den fremhævede besked:",
|
||||
"label_highlighted_yes_no_instruction": "Svar på følgende spørgsmål om den fremhævede besked:",
|
||||
"label_message_flag_instruction": "Vælg de der gælder for beskeden:",
|
||||
"label_message_likert_instruction": "Graduer beskeden:",
|
||||
"label_message_yes_no_instruction": "Svar på følgende spørgsmål om beskeden:",
|
||||
"lang_mismatch": "Ikke {{language}}",
|
||||
"lang_mismatch.explanation": "Ikke skrevet på {{language}}.",
|
||||
"moral_judgement": "Bedømmer moral",
|
||||
"moral_judgement.explanation": "Udtrykker en moralsk bedømmelse.",
|
||||
"not_appropriate": "Upassende",
|
||||
"not_appropriate.explanation": "Upassende for en assistent.",
|
||||
"pii": "Indeholder PII",
|
||||
"pii.explanation": "Inderholder personlige henførbare information (Personally Identifying Information). Det kunne f.eks. være kontaktoplysninger, kørekort, CPR-nummer, bankoplysninger etc.",
|
||||
"political_content": "Politisk",
|
||||
"political_content.explanation": "Udtrykker politisk holdning.",
|
||||
"sexual_content": "Seksuelt indhold",
|
||||
"sexual_content.explanation": "Indeholder sexuelt indhold.",
|
||||
"spam.question": "Er denne besked spam?"
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"accepted": "↪ Accepteret",
|
||||
"accepted_prompts": "Accepteret prompter",
|
||||
"daily": "Daglig",
|
||||
"day": "Dag",
|
||||
"good_rankings": "Rangliste",
|
||||
"label": "Labels",
|
||||
"labels_full": "Labels (fulde)",
|
||||
"labels_simple": "Labels (simple)",
|
||||
"last_updated_at": "Sidst opdateret: {{val, datetime}}",
|
||||
"leaderboard": "Leaderboard",
|
||||
"month": "Måned",
|
||||
"monthly": "Månedlig",
|
||||
"next": "Næste",
|
||||
"overall": "Overordnet",
|
||||
"previous": "Forrige",
|
||||
"prompt": "Prompter",
|
||||
"rank": "Placering",
|
||||
"rankings": "Placeringer",
|
||||
"replies_assistant": "Svar som Assistant",
|
||||
"replies_prompter": "Svar som Prompter",
|
||||
"reply": "Svar",
|
||||
"reply_ranked_1": "Svar på bedste placering",
|
||||
"score": "Score",
|
||||
"top_5_contributors_today": "Top 5 bidragsydere i dag",
|
||||
"total": "Total",
|
||||
"user": "Bruger",
|
||||
"view_all": "Se alt",
|
||||
"week": "Uge",
|
||||
"weekly": "Ugentlig",
|
||||
"your_account": "Din konto",
|
||||
"your_stats": "Din statistik"
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"copy_message_id": "Kopier besked ID",
|
||||
"copy_message_link": "Kopier besked link",
|
||||
"label_action": "Label",
|
||||
"label_title": "Label",
|
||||
"message_deleted": "Besked slettet",
|
||||
"message": "Besked",
|
||||
"open_new_tab_action": "Åben i en ny tab",
|
||||
"parent": "Forælder",
|
||||
"reactions": "Reaktion",
|
||||
"recent_messages": "Nylige beskeder",
|
||||
"report_action": "Anmeld",
|
||||
"report_placeholder": "Hvorfor skal denne besked vurderes?",
|
||||
"report_title": "Anmeld",
|
||||
"send_report": "Indsend",
|
||||
"stop_tree": "Stop tree",
|
||||
"submit_labels": "Indsend",
|
||||
"tree_stopped": "Tree stopped {{id}}",
|
||||
"view_user": "Se bruger",
|
||||
"your_recent_messages": "Dine nye beskeder"
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
{
|
||||
"default": {
|
||||
"unchanged_title": "Ingen ændringer",
|
||||
"unchanged_message": "Er du sikker på at du vil fortsætte?"
|
||||
},
|
||||
"random": {
|
||||
"label": "Jeg føler mig heldig",
|
||||
"desc": "Hjælp os med at forbedre Open Assistant ved at starte en tilfældig opgave."
|
||||
},
|
||||
"create_initial_prompt": {
|
||||
"label": "Lav første prompter",
|
||||
"desc": "Skriv første prompter for at hjælpe Open Assistant med at forsøge at svare på diverse beskeder (Læg i lotteriet)",
|
||||
"overview": "Skab en første besked til Open Assistant",
|
||||
"instruction": "Giv den første prompt",
|
||||
"response_placeholder": "Skriv din prompt here..."
|
||||
},
|
||||
"reply_as_user": {
|
||||
"label": "Svar som bruger",
|
||||
"desc": "Chat med Open Assistant og hjælp med at forbedre dens respons når du interagere med den.",
|
||||
"overview": "Givet følgende samtale, angiv et passende svar",
|
||||
"instruction": "Angiv brugerens svar",
|
||||
"response_placeholder": "Skriv dit svar her..."
|
||||
},
|
||||
"reply_as_assistant": {
|
||||
"label": "Svar som assistent",
|
||||
"desc": "Hjælp Open assistent forbedre dets svar til samtaler med andre.",
|
||||
"overview": "Givet følgende samtale, angiv et passende svar",
|
||||
"response_placeholder": "Skriv dit svar her..."
|
||||
},
|
||||
"rank_user_replies": {
|
||||
"label": "Ranger bruger svar",
|
||||
"desc": "Hjælp Open assistent forbedre dets svar til samtaler med andre.",
|
||||
"overview": "Givet følgende samtale, sorter fra bedst til ringest, med bedst i toppen.",
|
||||
"unchanged_title": "Rækkefølge uændret",
|
||||
"unchanged_message": "Du har ikke ændret på prompternes rækkefølge. Er du sikker på at du vil fortsætte?"
|
||||
},
|
||||
"rank_assistant_replies": {
|
||||
"label": "Ranger assistent svar",
|
||||
"desc": "Giv point til svar givet af Open Assistant baseret på deres præcision og læsbarhed.",
|
||||
"overview": "Givet følgende svar, sorter fra bedst til ringest, med bedst i toppen.",
|
||||
"unchanged_title": "Rækkefølge uændret",
|
||||
"unchanged_message": "Du har ikke ændret på svarenes rækkefølge. Er du sikker på at du vil fortsætte?"
|
||||
},
|
||||
"rank_initial_prompts": {
|
||||
"label": "Ranger første prompter",
|
||||
"desc": "Giv point til prompter givet af Open Assistant baseret på deres præcision og læsbarhed.",
|
||||
"overview": "Givet følgende første prompter, sorter fra bedst til ringest, med bedst i toppen.",
|
||||
"unchanged_title": "Rækkefølge uændret",
|
||||
"unchanged_message": "Du har ikke ændret på prompternes rækkefølge. Er du sikker på at du vil fortsætte?"
|
||||
},
|
||||
"label_initial_prompt": {
|
||||
"label": "Label første prompt",
|
||||
"desc": "Angiv labels for en prompt.",
|
||||
"overview": "Angiv labels for den følgende prompt"
|
||||
},
|
||||
"label_prompter_reply": {
|
||||
"label": "Label prompter svar",
|
||||
"desc": "Angiv labels for en prompt.",
|
||||
"overview": "Givet følgende diskussion, angiv labels for den endelige prompt."
|
||||
},
|
||||
"label_assistant_reply": {
|
||||
"label": "Label assistent svar",
|
||||
"desc": "Angiv labels for et svar.",
|
||||
"overview": "Givet følgende diskussion, angiv labels for den endelige prompt."
|
||||
},
|
||||
"classify_initial_prompt": {
|
||||
"label": "Klasificer første prompt",
|
||||
"desc": "Provide labels for en prompt.",
|
||||
"overview": "Givet følgende prompt svar da på spørgsmål omkring denne."
|
||||
},
|
||||
"classify_prompter_reply": {
|
||||
"label": "Klassificer promptsvar",
|
||||
"desc": "Provide labels for a prompt.",
|
||||
"overview": "Læs den følgende samtale og svar på spørgsmålet omrking det sidste svar i diskussionen."
|
||||
},
|
||||
"classify_assistant_reply": {
|
||||
"label": "Klassificer assistentens svar",
|
||||
"desc": "Angiv labels for en prompt.",
|
||||
"overview": "Læs følgende samtale og svar så på spørgsmål omkring det sidste svar i diskussionen."
|
||||
},
|
||||
"available_task_count": "{{count}} tilgængelige opgaver",
|
||||
"writing_wrong_langauge_a_b": "Det ser ud til at du skriver på {{detected_lang}} men det bliver lagt ind i {{submit_lang}}.",
|
||||
"submitted_as": "Dette vil blive lagt ind i {{submit_lang}}"
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"title": "Servicebetingelser for Open Assistant",
|
||||
"content": "For fortsat at bruge Open Assistant, skal du acceptere servicebetingelserne.",
|
||||
"accept": "Accepter",
|
||||
"decline": "Afslå"
|
||||
}
|
||||
@@ -6,22 +6,31 @@
|
||||
"conversational": "Konversations-KI für alle.",
|
||||
"copied": "Kopiert",
|
||||
"dark_mode": "Dunkler Modus",
|
||||
"dashboard_home": "Dashboard Home",
|
||||
"dashboard": "Dashboard",
|
||||
"delete": "Löschen",
|
||||
"discord": "Discord",
|
||||
"docs": "Doku",
|
||||
"github": "GitHub",
|
||||
"leaderboard": "Leaderboard",
|
||||
"legal": "Rechtliches",
|
||||
"light_mode": "Heller Modus",
|
||||
"loading": "Wird geladen...",
|
||||
"messages_dashboard": "Messages Dashboard",
|
||||
"messages": "Nachrichten",
|
||||
"more_information": "Weitere Informationen",
|
||||
"no": "Nein",
|
||||
"privacy_policy": "Datenschutz-Bestimmungen",
|
||||
"report_a_bug": "Einen Fehler melden",
|
||||
"sign_in": "Anmelden",
|
||||
"sign_out": "Abmelden",
|
||||
"status_dashboard": "Status Dashboard",
|
||||
"status": "Status",
|
||||
"success": "Erfolg",
|
||||
"terms_of_service": "Nutzungsbedingungen",
|
||||
"title": "Open Assistant",
|
||||
"user_leaderboard": "User Leaderboard",
|
||||
"users_dashboard": "Users Dashboard",
|
||||
"users": "Users",
|
||||
"yes": "Ja"
|
||||
}
|
||||
|
||||
@@ -3,6 +3,6 @@
|
||||
"dashboard": "Dashboard",
|
||||
"evaluate": "Auswerten",
|
||||
"go": "Los",
|
||||
"grab_a_task": "Schnapp dir eine Aufgabe!",
|
||||
"grab_a_task": "Schnappen Sie sich eine Aufgabe!",
|
||||
"label": "Label"
|
||||
}
|
||||
|
||||
@@ -19,6 +19,6 @@
|
||||
"political_content": "Politisch",
|
||||
"political_content.explanation": "Enthält politische Meinungen.",
|
||||
"sexual_content": "Sexueller Inhalt",
|
||||
"sexual_content.explanation": "Contains sexual content.",
|
||||
"sexual_content.explanation": "Enthält sexuelle Inhalte.",
|
||||
"spam.question": "Ist die Nachricht Spam?"
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
{
|
||||
"copy_message_id": "Copy message ID",
|
||||
"copy_message_id": "Message ID kopieren",
|
||||
"label_action": "Label",
|
||||
"label_title": "Label",
|
||||
"message": "Nachricht",
|
||||
"message_deleted": "Message deleted",
|
||||
"message_deleted": "Nachricht gelöscht",
|
||||
"open_new_tab_action": "In neuem Tab öffnen",
|
||||
"parent": "Vorgänger",
|
||||
"reactions": "Reaktionen",
|
||||
"recent_messages": "Recent Messages",
|
||||
"recent_messages": "Kürzliche Nachrichten",
|
||||
"report_action": "Melden",
|
||||
"report_placeholder": "Warum sollte diese Nachricht überprüft werden?",
|
||||
"report_title": "Meldung",
|
||||
@@ -15,6 +15,6 @@
|
||||
"stop_tree": "Stop tree",
|
||||
"submit_labels": "Absenden",
|
||||
"tree_stopped": "Tree stopped {{id}}",
|
||||
"view_user": "View user",
|
||||
"your_recent_messages": "Your Recent Messages"
|
||||
"view_user": "Benutzer anzeigen",
|
||||
"your_recent_messages": "Ihre kürzliche Nachrichten"
|
||||
}
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"dashboard": "Dashboard",
|
||||
"dashboard_home": "Dashboard Home",
|
||||
"leaderboard": "Leaderboard",
|
||||
"messages": "Nachrichten",
|
||||
"messages_dashboard": "Messages Dashboard",
|
||||
"status": "Status",
|
||||
"status_dashboard": "Status Dashboard",
|
||||
"user_leaderboard": "User Leaderboard",
|
||||
"users": "Users",
|
||||
"users_dashboard": "Users Dashboard"
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"available_task_count": "{{count}} tasks available",
|
||||
"available_task_count": "{{count}} Aufgaben verfügbar",
|
||||
"classify_assistant_reply": {
|
||||
"label": "Antwort des Assistenten klassifizieren",
|
||||
"desc": "Labeln Sie die Antwort.",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"accept": "Accept",
|
||||
"content": "To continue using Open Assistant, you have to accept our Terms of Service first.",
|
||||
"decline": "Decline",
|
||||
"title": "Terms of Service for Open Assistant"
|
||||
"accept": "Akzeptieren",
|
||||
"content": "Um Open Assistant weiterhin nutzen zu können, müssen Sie zunächst unsere Nutzungsbedingungen akzeptieren.",
|
||||
"decline": "Ablehnen",
|
||||
"title": "Nutzungsbedingungen für Open Assistant"
|
||||
}
|
||||
|
||||
@@ -6,22 +6,33 @@
|
||||
"conversational": "Conversational AI for everyone.",
|
||||
"copied": "Copied",
|
||||
"dark_mode": "Dark Mode",
|
||||
"dashboard_home": "Dashboard Home",
|
||||
"dashboard": "Dashboard",
|
||||
"delete": "Delete",
|
||||
"discord": "Discord",
|
||||
"docs": "Docs",
|
||||
"github": "GitHub",
|
||||
"leaderboard": "Leaderboard",
|
||||
"legal": "Legal",
|
||||
"light_mode": "Light Mode",
|
||||
"loading": "Loading...",
|
||||
"messages_dashboard": "Messages Dashboard",
|
||||
"messages": "Messages",
|
||||
"more_information": "More Information",
|
||||
"no": "No",
|
||||
"parameters": "Parameters",
|
||||
"privacy_policy": "Privacy Policy",
|
||||
"report_a_bug": "Report a Bug",
|
||||
"sign_in": "Sign In",
|
||||
"sign_out": "Sign Out",
|
||||
"status": "Status",
|
||||
"status_dashboard": "Status Dashboard",
|
||||
"success": "Success",
|
||||
"terms_of_service": "Terms of Service",
|
||||
"title": "Open Assistant",
|
||||
"trollboard": "Trollboard",
|
||||
"user_leaderboard": "User Leaderboard",
|
||||
"users_dashboard": "Users Dashboard",
|
||||
"users": "Users",
|
||||
"yes": "Yes"
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"open_new_tab_action": "Open in new tab",
|
||||
"parent": "Parent",
|
||||
"reactions": "Reactions",
|
||||
"recent_messages": "Recent Messages",
|
||||
"recent_messages": "Recent Messages in {{language}}",
|
||||
"report_action": "Report",
|
||||
"report_placeholder": "Why should this message be reviewed?",
|
||||
"report_title": "Report",
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"dashboard": "Dashboard",
|
||||
"dashboard_home": "Dashboard Home",
|
||||
"messages": "Messages",
|
||||
"messages_dashboard": "Messages Dashboard",
|
||||
"leaderboard": "Leaderboard",
|
||||
"user_leaderboard": "User Leaderboard",
|
||||
"users": "Users",
|
||||
"users_dashboard": "Users Dashboard",
|
||||
"status": "Status",
|
||||
"status_dashboard": "Status Dashboard",
|
||||
"trollboard": "Trollboard"
|
||||
}
|
||||
@@ -6,22 +6,33 @@
|
||||
"conversational": "IA conversacional para todos.",
|
||||
"copied": "Copiado",
|
||||
"dark_mode": "Modo oscuro",
|
||||
"dashboard_home": "Panel principal",
|
||||
"dashboard": "Panel principal",
|
||||
"delete": "Borrar",
|
||||
"discord": "Discord",
|
||||
"docs": "Documentación",
|
||||
"github": "GitHub",
|
||||
"leaderboard": "Clasificaciones",
|
||||
"legal": "Legal",
|
||||
"light_mode": "Modo claro",
|
||||
"loading": "Cargando...",
|
||||
"messages_dashboard": "Tablón de mensajes",
|
||||
"messages": "Mensajes",
|
||||
"more_information": "Más información",
|
||||
"no": "No",
|
||||
"parameters": "Parámetros",
|
||||
"privacy_policy": "Política de privacidad",
|
||||
"report_a_bug": "Informar de un error",
|
||||
"sign_in": "Iniciar sesión",
|
||||
"sign_out": "Cerrar sesión",
|
||||
"status": "Estado",
|
||||
"status_dashboard": "Tablón de estado",
|
||||
"success": "Éxito",
|
||||
"terms_of_service": "Términos de servicio",
|
||||
"title": "Open Assistant",
|
||||
"trollboard": "Tablón de trolls",
|
||||
"user_leaderboard": "Clasificación de usuarios",
|
||||
"users_dashboard": "Tablón de usuarios",
|
||||
"users": "Usuarios",
|
||||
"yes": "Sí"
|
||||
}
|
||||
|
||||
@@ -1,18 +1,33 @@
|
||||
{
|
||||
"accepted": "↪ Aceptadas",
|
||||
"accepted_prompts": "Indicaciones aceptadas",
|
||||
"daily": "Diario",
|
||||
"day": "Día",
|
||||
"good_rankings": "Buenas clasificaciones",
|
||||
"label": "Etiquetas",
|
||||
"labels_full": "Etiquetas (completas)",
|
||||
"labels_simple": "Etiquetas (sencillas)",
|
||||
"last_updated_at": "Última actualización: {{val, datetime}}",
|
||||
"leaderboard": "Tabla de clasificación",
|
||||
"month": "Mes",
|
||||
"monthly": "Mensual",
|
||||
"next": "Siguiente",
|
||||
"overall": "Global",
|
||||
"previous": "Anterior",
|
||||
"prompt": "Indicaciones",
|
||||
"rank": "Posición",
|
||||
"rankings": "Ordenaciones",
|
||||
"replies_assistant": "Respuestas como asistente",
|
||||
"replies_prompter": "Respuestas como apuntador",
|
||||
"reply": "Respuestas",
|
||||
"reply_ranked_1": "Respuestas clasificadas primeras",
|
||||
"score": "Puntuación",
|
||||
"top_5_contributors_today": "5 mayores contribuidores hoy",
|
||||
"total": "Total",
|
||||
"user": "Usuario",
|
||||
"view_all": "Ver todos",
|
||||
"weekly": "Semanal"
|
||||
"week": "Semana",
|
||||
"weekly": "Semanal",
|
||||
"your_account": "Tu cuenta",
|
||||
"your_stats": "Tus estadísticas"
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{
|
||||
"copy_message_id": "Copiar ID del mensaje",
|
||||
"copy_message_link": "Copiar enlace al mensaje",
|
||||
"label_action": "Etiquetar",
|
||||
"label_title": "Etiqueta",
|
||||
"message": "Mensaje",
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"dashboard": "Panel principal",
|
||||
"dashboard_home": "Panel principal",
|
||||
"leaderboard": "Clasificaciones",
|
||||
"messages": "Mensajes",
|
||||
"messages_dashboard": "Tablón de mensajes",
|
||||
"status": "Estado",
|
||||
"status_dashboard": "Tablón de estado",
|
||||
"user_leaderboard": "Clasificación de usuarios",
|
||||
"users": "Usuarios",
|
||||
"users_dashboard": "Tablón de usuarios"
|
||||
}
|
||||
@@ -78,5 +78,7 @@
|
||||
"overview": "Dada la conversación siguiente, proporciona una respuesta adecuada",
|
||||
"instruction": "Proporciona la respuesta del usuario",
|
||||
"response_placeholder": "Escribe tu respuesta aquí..."
|
||||
}
|
||||
},
|
||||
"writing_wrong_langauge_a_b": "Parece que estás escribiendo en {{detected_lang}} pero esto se enviará como {{submit_lang}}.",
|
||||
"submitted_as": "Esto será enviado como {{submit_lang}}"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"accept": "Accept",
|
||||
"content": "To continue using Open Assistant, you have to accept our Terms of Service first.",
|
||||
"decline": "Decline",
|
||||
"title": "Terms of Service for Open Assistant"
|
||||
"accept": "Aceptar",
|
||||
"content": "Para continuar usando Open Assistant, tienes que aceptar nuestros Términos de Servicio primero.",
|
||||
"decline": "Rechazar",
|
||||
"title": "Términos de Servicio para Open Assistant"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"about": "Honi buruz",
|
||||
"account_settings": "Kontua",
|
||||
"admin_dashboard": "Administrazio panela",
|
||||
"connect": "Konektatu",
|
||||
"conversational": "Elkarrizketarako AA guztientzat.",
|
||||
"copied": "Kopiatua",
|
||||
"dark_mode": "Ilun modua",
|
||||
"dashboard": "Aginte-panela",
|
||||
"dashboard_home": "Aginte-panelaren hasiera",
|
||||
"delete": "Ezabatu",
|
||||
"discord": "Discord",
|
||||
"docs": "Dokumentazioa",
|
||||
"github": "GitHub",
|
||||
"leaderboard": "Sailkapena",
|
||||
"legal": "Legea",
|
||||
"light_mode": "Argi modua",
|
||||
"loading": "Kargatzen...",
|
||||
"messages": "Mezuak",
|
||||
"messages_dashboard": "Mezuen panela",
|
||||
"more_information": "Informazio gehiago",
|
||||
"no": "Ez",
|
||||
"parameters": "Parametroak",
|
||||
"privacy_policy": "Pribatutasun politika",
|
||||
"report_a_bug": "Errorea jakinarazi",
|
||||
"sign_in": "Hasi saioa",
|
||||
"sign_out": "Amaitu saioa",
|
||||
"status": "Egoera",
|
||||
"status_dashboard": "Egoera panela",
|
||||
"success": "Arrakasta",
|
||||
"terms_of_service": "Zerbitzuaren baldintzak",
|
||||
"title": "Open Assistant",
|
||||
"trollboard": "Troll sailkapena",
|
||||
"user_leaderboard": "Erabiltzaileen sailkapena",
|
||||
"users": "Erabiltzaileak",
|
||||
"users_dashboard": "Erabiltzaileen panela",
|
||||
"yes": "Bai"
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"create": "Sortu",
|
||||
"dashboard": "Aginte-panela",
|
||||
"evaluate": "Ebaluatu",
|
||||
"go": "Joan",
|
||||
"grab_a_task": "Hartu zeregin bat!",
|
||||
"label": "Etiketa"
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"blurb": "Uste dugu iraultza bat sortu dezakegula.",
|
||||
"blurb1": "Stable Diffusion-ek munduari artea eta irudiak modu berrietan egiten lagundu zion modu berean, mundua hobetu nahi dugu elkarrizketarako AA harrigarria eskainiz.",
|
||||
"description": "Elkarrizketarako AA guztientzat. Iturburu irekiko proiektua GPT LLM txat bat sortzeko, LAIONek eta mundu osoko laguntzaileek zuzendua",
|
||||
"faq_items": {
|
||||
"q0": "Noraino doa proiektu hau?",
|
||||
"a0": "Garapenaren hasierako faseetan gaude, RLHF hizkuntza-eredu handiei aplikatzeko ezarritako ikerketatik lanean.",
|
||||
"q1": "Nor dago Open Assistant-en atzean?",
|
||||
"a1": "Open Assistant LAIONek eta mundu osoko teknologia hori guztiongana heltzeko interesa duten pertsonek antolatutako proiektua da.",
|
||||
"q2": "Zer lizentzia erabiltzen du Open Assistant-ek?",
|
||||
"a2": "Kodea eta ereduak Apache 2.0 lizentziapean daude lizentziatuta.",
|
||||
"q3": "Entrenamendurako datuak ere kaleratuko al dira?",
|
||||
"a3": "Bai, CC BY 4.0 lizentziarekin.",
|
||||
"q4": "Open Assistant doakoa izango al da?",
|
||||
"a4": "Bai, Open Assistant librea izango da erabiltzeko eta aldatzeko.",
|
||||
"q5": "Zer hardware beharko da ereduak exekutatzeko?",
|
||||
"a5": "Kontsumitzaileen hardwarean exekutatu ahal izango diren bertsioak egongo dira."
|
||||
},
|
||||
"faq_title": "Ohiko galderak",
|
||||
"join_us_description": "Kode irekiko proiektu guztiak zu bezalako jendearekin hasten dira. Kode irekia elkarlanean aritzen bagara elkarrekin gure ezagutzak eta teknologiak gizateriaren mesederako munduari opari ditzakegula ustea da. Barruan al zaude? Aurki gaitzazu hemen:",
|
||||
"join_us_title": "Bat egin gurekin",
|
||||
"subtitle": "Elkarrizketarako AA guztientzat."
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"fails_task.question": "Erantzun txarra al da, sarrerako zereginaren erantzun gisa?",
|
||||
"hate_speech": "Gorroto hizkera",
|
||||
"hate_speech.explanation": "Edukia abusua edo mehatxagarria da eta babestutako ezaugarri baten aurkako aurreiritziak adierazten ditu. Aurreiritziak arrazonaketan oinarritzen ez diren aurreko ikuspegiak dira. Babestutako ezaugarrien artean generoa, etnia, erlijioa, sexu-orientazioa eta antzeko ezaugarriak daude.",
|
||||
"label_highlighted_flag_instruction": "Hautatu nabarmendutako mezuari dagokion edozein:",
|
||||
"label_highlighted_likert_instruction": "Baloratu nabarmendutako mezua:",
|
||||
"label_highlighted_yes_no_instruction": "Erantzun nabarmendutako mezuari buruzko galdera hau(ek):",
|
||||
"label_message_flag_instruction": "Hautatu mezuari dagokion edozein:",
|
||||
"label_message_likert_instruction": "Baloratu mezua:",
|
||||
"label_message_yes_no_instruction": "Erantzun mezuari buruzko galdera hau(ek):",
|
||||
"lang_mismatch": "Hizkuntza okerra",
|
||||
"lang_mismatch.explanation": "Ez dago une honetan hautatutako hizkuntzan idatzita.",
|
||||
"moral_judgement": "Moraltasuna epaitzen du",
|
||||
"moral_judgement.explanation": "Epaiketa morala adierazten du.",
|
||||
"not_appropriate": "Desegokia",
|
||||
"not_appropriate.explanation": "Desegokia bezero laguntzaile batentzat.",
|
||||
"pii": "PII dauka",
|
||||
"pii.explanation": "Pertsonalki identifikatzeko informazioa dauka. Adibidez, harremanetarako datu pertsonalak, lizentzia eta beste identitate-zenbaki batzuk eta banku-datuak daude.",
|
||||
"political_content": "Politikoa",
|
||||
"political_content.explanation": "Iritzi politikoak adierazten ditu.",
|
||||
"sexual_content": "Eduki sexuala",
|
||||
"sexual_content.explanation": "Eduki sexuala dauka.",
|
||||
"spam.question": "Mezua spama al da?"
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"accepted": "↪ Onartua",
|
||||
"accepted_prompts": "Onartutako instrukzioak",
|
||||
"daily": "Eguekoa",
|
||||
"day": "Eguna",
|
||||
"good_rankings": "Sailkapen onak",
|
||||
"label": "Etiketak",
|
||||
"labels_full": "Etiketak (osoa)",
|
||||
"labels_simple": "Etiketak (sinplea)",
|
||||
"last_updated_at": "Azken eguneratzea: {{val, datetime}}",
|
||||
"leaderboard": "Sailkapena",
|
||||
"month": "Hilabetea",
|
||||
"monthly": "Hilekoa",
|
||||
"next": "Hurrengoa",
|
||||
"overall": "Orokorra",
|
||||
"previous": "Aurrekoa",
|
||||
"prompt": "Eskaerak",
|
||||
"rank": "Postua",
|
||||
"rankings": "Sailkapenak",
|
||||
"replies_assistant": "Erantzunak asistente bezala",
|
||||
"replies_prompter": "Erantzunak erabiltzaile bezala",
|
||||
"reply": "Erantzunak",
|
||||
"reply_ranked_1": "Lehen postuan sailkatutako erantzunak",
|
||||
"score": "Puntuazioa",
|
||||
"top_5_contributors_today": "Gaurko 5 laguntzaile nagusiak",
|
||||
"total": "Guztira",
|
||||
"user": "Erabiltzailea",
|
||||
"view_all": "Ikusi guztiak",
|
||||
"week": "Astea",
|
||||
"weekly": "Astekoa",
|
||||
"your_account": "Zure kontua",
|
||||
"your_stats": "Zure estatistikak"
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"copy_message_id": "Kopiatu mezuaren IDa",
|
||||
"copy_message_link": "Kopiatu mezuaren esteka",
|
||||
"label_action": "Etiketatu",
|
||||
"label_title": "Etiketa",
|
||||
"message": "Mezua",
|
||||
"message_deleted": "Mezua ezabatu da",
|
||||
"open_new_tab_action": "Ireki orri berri batean",
|
||||
"parent": "Guraso",
|
||||
"reactions": "Erreakzioak",
|
||||
"recent_messages": "Azken mezuak",
|
||||
"report_action": "Salatu",
|
||||
"report_placeholder": "Zergatik berrikusi behar da mezu hau?",
|
||||
"report_title": "Txostena",
|
||||
"send_report": "Bidali",
|
||||
"stop_tree": "Gelditu zuhaitza",
|
||||
"submit_labels": "Bidali",
|
||||
"tree_stopped": "Zuhaitza gelditu da {{id}}",
|
||||
"view_user": "Ikusi erabiltzailea",
|
||||
"your_recent_messages": "Zure azken mezuak"
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
{
|
||||
"available_task_count": "{{count}} zeregin eskuragarri",
|
||||
"classify_assistant_reply": {
|
||||
"desc": "Jarri etiketak instrukzio bati.",
|
||||
"label": "Sailifikatu Laguntzailea eginbidearen erantzuna",
|
||||
"overview": "Irakurri hurrengo elkarrizketa eta erantzun elkarrizketako azken erantzunari buruzko galdera."
|
||||
},
|
||||
"classify_initial_prompt": {
|
||||
"desc": "Jarri etiketak instrukzio bati.",
|
||||
"label": "Salifikatu hasierako instrukzioa",
|
||||
"overview": "Irakurri hurrengo instrukzioa eta erantzun horri buruzko galdera."
|
||||
},
|
||||
"classify_prompter_reply": {
|
||||
"desc": "Jarri etiketak instrukzio bati.",
|
||||
"label": "Sailifikatu galdetzailearen erantzuna",
|
||||
"overview": "Irakurri hurrengo elkarrizketa eta erantzun elkarrizketako azken erantzunari buruzko galdera."
|
||||
},
|
||||
"create_initial_prompt": {
|
||||
"desc": "Idatzi hasierako instrukzioak Open Assistant-i era askotako mezuak erantzuten saia dadin. (sartu loterian)",
|
||||
"instruction": "Eman hasierako instrukzioak",
|
||||
"label": "Sortu hasierako instrukzioak",
|
||||
"overview": "Sortu hasierako mezu bat asistenteari bidaltzeko",
|
||||
"response_placeholder": "Idatzi zure instrukzioak hemen..."
|
||||
},
|
||||
"default": {
|
||||
"unchanged_message": "Ziur al zaude jarraitu nahi duzula?",
|
||||
"unchanged_title": "Ez dago aldaketarik"
|
||||
},
|
||||
"label_assistant_reply": {
|
||||
"desc": "Jarri etiketak instrukzio bati.",
|
||||
"label": "Etiketatu asistentearen erantzuna",
|
||||
"overview": "Ondoko elkarrizketa ikusita, eman etiketak hurrengo instrukzioari."
|
||||
},
|
||||
"label_initial_prompt": {
|
||||
"desc": "Jarri etiketak instrukzio bati.",
|
||||
"label": "Etiketatu hasierako instrukzioak",
|
||||
"overview": "Jarri etiketak hurrengo instrukzioari"
|
||||
},
|
||||
"label_prompter_reply": {
|
||||
"desc": "Jarri etiketak instrukzio bati.",
|
||||
"label": "Etiketatu galdetzailearen erantzuna",
|
||||
"overview": "Ondoko elkarrizketa ikusita, eman etiketak hurrengo instrukzioari."
|
||||
},
|
||||
"random": {
|
||||
"desc": "Lagun iezaguzu Open Assistant hobetzen ausazko zeregin bat hasiz.",
|
||||
"label": "Zorionekoa sentitzen naiz"
|
||||
},
|
||||
"rank_assistant_replies": {
|
||||
"desc": "Open Assistant-ek emandako erantzunak sailkatu zehaztasun eta irakurgarritasunaren arabera.",
|
||||
"label": "Asistentearen erantzunak sailkatu",
|
||||
"overview": "Ondoko asistentearen erantzunak ikusita, ordenatu itzazu onenetik txarrenera, onena lehena izanik, txarrena azkena.",
|
||||
"unchanged_message": "Ez duzu galderen ordena aldatu. Ziur al zaude jarraitu nahi duzula?",
|
||||
"unchanged_title": "Ordena aldatu gabe"
|
||||
},
|
||||
"rank_initial_prompts": {
|
||||
"desc": "Open Assistant-ek emandako erantzunak sailkatu zehaztasun eta irakurgarritasunaren arabera.",
|
||||
"label": "Hasierako instrukzioak sailkatu",
|
||||
"overview": "Hasierako eskaera hauek ikusita, ordenatu itzazu onenetik txarrenera, onena lehena izanik, txarrena azkena izanik.",
|
||||
"unchanged_message": "Ez duzu galderen ordena aldatu. Ziur al zaude jarraitu nahi duzula?",
|
||||
"unchanged_title": "Ordena aldatu gabe"
|
||||
},
|
||||
"rank_user_replies": {
|
||||
"desc": "Lagundu Open Assistant-ek beste erabiltzaile batzuekin dituen elkarrizketetako erantzunak hobetzen.",
|
||||
"label": "Erabiltzaileen erantzunak sailkatu",
|
||||
"overview": "Ondoko erabiltzaileen erantzunak ikusita, ordenatu itzazu onenetik txarrenera, onena lehena izanik, txarrena azkena izatea.",
|
||||
"unchanged_message": "Ez duzu galderen ordena aldatu. Ziur al zaude jarraitu nahi duzula?",
|
||||
"unchanged_title": "Ordena aldatu gabe"
|
||||
},
|
||||
"reply_as_assistant": {
|
||||
"desc": "Lagundu Open Assistant-ek beste erabiltzaile batzuekin dituen elkarrizketetako erantzunak hobetzen.",
|
||||
"label": "Erantzun asistente gisa",
|
||||
"overview": "Ondoko elkarrizketa ikusita, eman erantzun egokia",
|
||||
"response_placeholder": "Idatzi zure erantzuna hemen..."
|
||||
},
|
||||
"reply_as_user": {
|
||||
"desc": "Txateatu Open Assistant-ekin eta lagundu bere erantzunak hobetzen harekin elkarreraginean.",
|
||||
"instruction": "Eman erabiltzailearen erantzuna",
|
||||
"label": "Erantzun erabiltzaile gisa",
|
||||
"overview": "Ondoko elkarrizketa ikusita, eman erantzun egokia",
|
||||
"response_placeholder": "Idatzi zure erantzuna hemen..."
|
||||
},
|
||||
"submitted_as": "Hau {{submit_lang}} hizkuntzan bidaliko da ",
|
||||
"writing_wrong_langauge_a_b": "Ematen du {{detected_lang}} hizkuntzan baina hau {{submit_lang}} hizkuntzan bidaliko da."
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"accept": "Onartu",
|
||||
"content": "Open Assistant erabiltzen jarraitzeko, gure zerbitzu-baldintzak onartu behar dituzu lehenik.",
|
||||
"decline": "Ukatu",
|
||||
"title": "Open Assistant-en zerbitzu-baldintzak"
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"about": "درباره",
|
||||
"account_settings": "حساب کاربری",
|
||||
"admin_dashboard": "داشبورد مدیر",
|
||||
"connect": "اتصال",
|
||||
"conversational": "هوش مصنوعی مکالمه برای همه.",
|
||||
"copied": "کپی شد",
|
||||
"dark_mode": "حالت تاریک",
|
||||
"dashboard_home": "خانه داشبورد",
|
||||
"dashboard": "داشبورد",
|
||||
"delete": "حذف",
|
||||
"discord": "دیسکورد",
|
||||
"docs": "مستندات",
|
||||
"github": "گیتهاب",
|
||||
"leaderboard": "رده بندی",
|
||||
"legal": "قانونی",
|
||||
"light_mode": "حالت روشن",
|
||||
"loading": "در حال بارگذاری...",
|
||||
"messages_dashboard": "داشبورد پیامها",
|
||||
"messages": "پیامها",
|
||||
"more_information": "اطلاعات بیشتر",
|
||||
"no": "نه",
|
||||
"parameters": "مولفه ها",
|
||||
"privacy_policy": "حریم خصوصی",
|
||||
"report_a_bug": "گزارش اشکال",
|
||||
"sign_in": "ورود",
|
||||
"sign_out": "خروج",
|
||||
"status": "وضعیت",
|
||||
"status_dashboard": "داشبورد وضعیت",
|
||||
"success": "موفقیت",
|
||||
"terms_of_service": "شرایط استفاده",
|
||||
"title": "دستیار باز",
|
||||
"trollboard": "Trollboard",
|
||||
"user_leaderboard": "رده بندی کاربر",
|
||||
"users_dashboard": "رده بندی کاربر",
|
||||
"users": "کاربر",
|
||||
"yes": "بله"
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user