Merging from main

This commit is contained in:
Keith Stevens
2023-01-28 18:05:56 +09:00
188 changed files with 5516 additions and 2442 deletions
+1 -1
View File
@@ -19,7 +19,7 @@ devcontainers in this repo.
pre-commit run --all-files
```
A successfull run should look something like this:
A successful run should look something like this:
```
@andrewm4894 ➜ /workspaces/Open-Assistant (devcontainer-improvements) $ pre-commit run --all-files
+1
View File
@@ -1 +1,2 @@
* text=auto eol=lf
*.ipynb linguist-documentation
+6 -1
View File
@@ -76,7 +76,12 @@ repos:
rev: v2.7.1
hooks:
- id: prettier
args: [--prose-wrap=always, --write]
args:
[
--prose-wrap=always,
--write,
--ignore-path=./website/.prettierignore,
]
- repo: local
hooks:
+6 -6
View File
@@ -50,12 +50,12 @@ contributions smoothly we recommend the following:
[Here](https://github.com/LAION-AI/Open-Assistant/pull/658) is an example PR
for this project to illustrate this flow.
1. If you're lucky, we can merge your change into `main` without any problems.
If there's changes to files you're working on, resolve them by:
1. First try rebase as suggested
[in these instructions](https://timwise.co.uk/2019/10/14/merge-vs-rebase/#should-you-rebase).
1. If rebase feels too painful, merge as suggested
[in these instructions](https://timwise.co.uk/2019/10/14/merge-vs-rebase/#should-you-merge).
1. Once you've resolved any conflicts, finish the review and
If there's changes to files you're working on, resolve them by :
1. First try rebase as suggested
[in these instructions](https://timwise.co.uk/2019/10/14/merge-vs-rebase/#should-you-rebase).
1. If rebase feels too painful, merge as suggested
[in these instructions](https://timwise.co.uk/2019/10/14/merge-vs-rebase/#should-you-merge).
1. Once you've resolved conflicts (if any), finish the review and
[squash and merge](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/about-pull-request-merges#squash-and-merge-your-commits)
your PR (when squashing try to clean up or update the individual commit
messages to be one sensible single one).
@@ -0,0 +1,34 @@
"""add message_id to message_reaction
Revision ID: 8ba17b5f467a
Revises: 160ac010efcc
Create Date: 2023-01-24 11:34:42.167575
"""
import sqlalchemy as sa
import sqlmodel
from alembic import op
# revision identifiers, used by Alembic.
revision = "8ba17b5f467a"
down_revision = "160ac010efcc"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("message_reaction", sa.Column("message_id", sqlmodel.sql.sqltypes.GUID(), nullable=True))
op.create_index(op.f("ix_message_reaction_message_id"), "message_reaction", ["message_id"], unique=False)
op.add_column("text_labels", sa.Column("task_id", sqlmodel.sql.sqltypes.GUID(), nullable=True))
op.create_index(op.f("ix_text_labels_task_id"), "text_labels", ["task_id"], unique=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_text_labels_task_id"), table_name="text_labels")
op.drop_column("text_labels", "task_id")
op.drop_index(op.f("ix_message_reaction_message_id"), table_name="message_reaction")
op.drop_column("message_reaction", "message_id")
# ### end Alembic commands ###
@@ -0,0 +1,44 @@
"""add message_emoji
Revision ID: 40ed93df0ed5
Revises: 8ba17b5f467a
Create Date: 2023-01-24 22:56:28.229408
"""
import sqlalchemy as sa
import sqlmodel
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "40ed93df0ed5"
down_revision = "8ba17b5f467a"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"message_emoji",
sa.Column("message_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column(
"created_date", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False
),
sa.Column("emoji", sqlmodel.sql.sqltypes.AutoString(length=128), nullable=False),
sa.ForeignKeyConstraint(["message_id"], ["message.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("message_id", "user_id", "emoji"),
)
op.create_index("ix_message_emoji__user_id__message_id", "message_emoji", ["user_id", "message_id"], unique=False)
op.add_column("message", sa.Column("emojis", postgresql.JSONB(astext_type=sa.Text()), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("message", "emojis")
op.drop_index("ix_message_emoji__user_id__message_id", table_name="message_emoji")
op.drop_table("message_emoji")
# ### end Alembic commands ###
@@ -0,0 +1,26 @@
"""add task created date index
Revision ID: c84fcd6900dc
Revises: 40ed93df0ed5
Create Date: 2023-01-26 18:35:43.061589
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "c84fcd6900dc"
down_revision = "40ed93df0ed5"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_index(op.f("ix_task_created_date"), "task", ["created_date"], unique=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_task_created_date"), table_name="task")
# ### end Alembic commands ###
@@ -0,0 +1,29 @@
"""add user.show_on_leaderboard
Revision ID: f856bf19d32b
Revises: c84fcd6900dc
Create Date: 2023-01-27 20:13:56.533374
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "f856bf19d32b"
down_revision = "c84fcd6900dc"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"user", sa.Column("show_on_leaderboard", sa.Boolean(), server_default=sa.text("true"), nullable=False)
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("user", "show_on_leaderboard")
# ### end Alembic commands ###
+49
View File
@@ -273,6 +273,38 @@ def get_openapi_schema():
return json.dumps(app.openapi())
def export_ready_trees(file: Optional[str] = None, use_compression: bool = False):
try:
with Session(engine) as db:
api_client = api_auth(settings.OFFICIAL_WEB_API_KEY, db=db)
dummy_user = protocol_schema.User(id="__dummy_user__", display_name="Dummy User", auth_method="local")
ur = UserRepository(db=db, api_client=api_client)
tr = TaskRepository(db=db, api_client=api_client, client_user=dummy_user, user_repository=ur)
pr = PromptRepository(
db=db, api_client=api_client, client_user=dummy_user, user_repository=ur, task_repository=tr
)
tm = TreeManager(db, pr)
tm.export_all_ready_trees(file, use_compression=use_compression)
except Exception:
logger.exception("Error exporting trees.")
def retry_scoring_failed_message_trees():
try:
logger.info("TreeManager.retry_scoring_failed_message_trees()")
with Session(engine) as db:
api_client = api_auth(settings.OFFICIAL_WEB_API_KEY, db=db)
pr = PromptRepository(db=db, api_client=api_client)
tm = TreeManager(db, pr)
tm.retry_scoring_failed_message_trees()
except Exception:
logger.exception("TreeManager.retry_scoring_failed_message_trees() failed.")
def main():
# Importing here so we don't import packages unnecessarily if we're
# importing main as a module.
@@ -289,11 +321,28 @@ def main():
)
parser.add_argument("--host", help="The host to run the server", default="0.0.0.0")
parser.add_argument("--port", help="The port to run the server", default=8080)
parser.add_argument(
"--export", help="Export all trees which are ready for exporting.", action=argparse.BooleanOptionalAction
)
parser.add_argument(
"--export-file",
help="Name of file to export trees to. If not provided when exporting, output will be send to STDOUT",
)
parser.add_argument(
"--retry-scoring",
help="Retry scoring failed message trees",
action=argparse.BooleanOptionalAction,
)
args = parser.parse_args()
if args.print_openapi_schema:
print(get_openapi_schema())
elif args.export:
use_compression: bool = ".gz" in args.export_file
export_ready_trees(file=args.export_file, use_compression=use_compression)
elif args.retry_scoring:
retry_scoring_failed_message_trees()
else:
uvicorn.run(app, host=args.host, port=args.port)
+29 -5
View File
@@ -1,6 +1,6 @@
from http import HTTPStatus
from secrets import token_hex
from typing import Generator
from typing import Generator, NamedTuple
from fastapi import Depends, Request, Response, Security
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -19,22 +19,46 @@ def get_db() -> Generator:
yield db
api_key_query = APIKeyQuery(name="api_key", auto_error=False)
api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
api_key_query = APIKeyQuery(name="api_key", scheme_name="api-key", auto_error=False)
api_key_header = APIKeyHeader(name="X-API-Key", scheme_name="api-key", auto_error=False)
oasst_user_query = APIKeyQuery(name="oasst_user", scheme_name="oasst-user", auto_error=False)
oasst_user_header = APIKeyHeader(name="x-oasst-user", scheme_name="oasst-user", auto_error=False)
bearer_token = HTTPBearer(auto_error=False)
async def get_api_key(
def get_api_key(
api_key_query: str = Security(api_key_query),
api_key_header: str = Security(api_key_header),
):
) -> str:
if api_key_query:
return api_key_query
else:
return api_key_header
class FrontendUserId(NamedTuple):
auth_method: str
username: str
def get_frontend_user_id(
user_query: str = Security(oasst_user_query),
user_header: str = Security(oasst_user_header),
) -> FrontendUserId:
def split_user(v: str) -> tuple[str, str]:
if type(v) is str:
v = v.split(":", maxsplit=1)
if len(v) == 2:
return FrontendUserId(auth_method=v[0], username=v[1])
return FrontendUserId(auth_method=None, username=None)
if user_query:
return split_user(user_query)
else:
return split_user(user_header)
def create_api_client(
*,
session: Session,
+11 -5
View File
@@ -70,13 +70,14 @@ def query_frontend_user_messages(
only_roots: bool = False,
desc: bool = True,
include_deleted: bool = False,
lang: Optional[str] = None,
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Query frontend user messages.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, auth_method=auth_method, username=username)
messages = pr.query_messages_ordered_by_created_date(
auth_method=auth_method,
username=username,
@@ -87,6 +88,7 @@ def query_frontend_user_messages(
lte_created_date=end_date,
only_roots=only_roots,
deleted=None if include_deleted else False,
lang=lang,
)
return utils.prepare_message_list(messages)
@@ -95,24 +97,28 @@ def query_frontend_user_messages(
def query_frontend_user_messages_cursor(
auth_method: str,
username: str,
lt: Optional[str] = None,
gt: Optional[str] = None,
before: Optional[str] = None,
after: Optional[str] = None,
only_roots: Optional[bool] = False,
include_deleted: Optional[bool] = False,
max_count: Optional[int] = Query(10, gt=0, le=1000),
desc: Optional[bool] = False,
lang: Optional[str] = None,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
return get_messages_cursor(
lt=lt,
gt=gt,
before=before,
after=after,
auth_method=auth_method,
username=username,
only_roots=only_roots,
include_deleted=include_deleted,
max_count=max_count,
desc=desc,
lang=lang,
frontend_user=frontend_user,
api_client=api_client,
db=db,
)
+112 -32
View File
@@ -7,6 +7,7 @@ from oasst_backend.api import deps
from oasst_backend.api.v1 import utils
from oasst_backend.models import ApiClient
from oasst_backend.prompt_repository import PromptRepository
from oasst_backend.utils.database_utils import CommitMode, managed_tx_function
from oasst_shared.exceptions.oasst_api_error import OasstError, OasstErrorCode
from oasst_shared.schemas import protocol
from sqlmodel import Session
@@ -17,6 +18,7 @@ router = APIRouter()
@router.get("/", response_model=list[protocol.Message])
def query_messages(
*,
auth_method: Optional[str] = None,
username: Optional[str] = None,
api_client_id: Optional[str] = None,
@@ -26,13 +28,15 @@ def query_messages(
only_roots: Optional[bool] = False,
desc: Optional[bool] = True,
allow_deleted: Optional[bool] = False,
lang: Optional[str] = None,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Query messages.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, auth_method=frontend_user.auth_method, username=frontend_user.username)
messages = pr.query_messages_ordered_by_created_date(
auth_method=auth_method,
username=username,
@@ -43,6 +47,7 @@ def query_messages(
lte_created_date=end_date,
only_roots=only_roots,
deleted=None if allow_deleted else False,
lang=lang,
)
return utils.prepare_message_list(messages)
@@ -50,8 +55,9 @@ def query_messages(
@router.get("/cursor", response_model=protocol.MessagePage)
def get_messages_cursor(
lt: Optional[str] = None,
gt: Optional[str] = None,
*,
before: Optional[str] = None,
after: Optional[str] = None,
user_id: Optional[UUID] = None,
auth_method: Optional[str] = None,
username: Optional[str] = None,
@@ -60,9 +66,13 @@ def get_messages_cursor(
include_deleted: Optional[bool] = False,
max_count: Optional[int] = Query(10, gt=0, le=1000),
desc: Optional[bool] = False,
lang: Optional[str] = None,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
assert max_count is not None
def split_cursor(x: str | None) -> tuple[datetime, UUID]:
if not x:
return None, None
@@ -74,11 +84,21 @@ def get_messages_cursor(
except ValueError:
raise OasstError("Invalid cursor value", OasstErrorCode.INVALID_CURSOR_VALUE)
lte_created_date, lt_id = split_cursor(lt)
gte_created_date, gt_id = split_cursor(gt)
if desc:
gte_created_date, gt_id = split_cursor(before)
lte_created_date, lt_id = split_cursor(after)
query_desc = not (before is not None and not after)
else:
lte_created_date, lt_id = split_cursor(before)
gte_created_date, gt_id = split_cursor(after)
query_desc = before is not None and not after
pr = PromptRepository(db, api_client)
messages = pr.query_messages_ordered_by_created_date(
print(f"{desc=} {query_desc=} {gte_created_date=} {lte_created_date=}")
qry_max_count = max_count + 1 if before is None or after is None else max_count
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
items = pr.query_messages_ordered_by_created_date(
user_id=user_id,
auth_method=auth_method,
username=username,
@@ -89,22 +109,31 @@ def get_messages_cursor(
lt_id=lt_id,
only_roots=only_roots,
deleted=None if include_deleted else False,
desc=desc,
limit=max_count,
desc=query_desc,
limit=qry_max_count,
lang=lang,
)
items = utils.prepare_message_list(messages)
num_rows = len(items)
if qry_max_count > max_count and num_rows == qry_max_count:
assert not (before and after)
items = items[:-1]
if desc != query_desc:
items.reverse()
items = utils.prepare_message_list(items)
n, p = None, None
if len(items) > 0:
if len(items) == max_count or gte_created_date:
if (num_rows > max_count and before) or after:
p = str(items[0].id) + "$" + items[0].created_date.isoformat()
if len(items) == max_count or lte_created_date:
if num_rows > max_count or before:
n = str(items[-1].id) + "$" + items[-1].created_date.isoformat()
else:
if gte_created_date:
p = gte_created_date.isoformat()
if lte_created_date:
n = lte_created_date.isoformat()
if after:
p = lte_created_date.isoformat() if desc else gte_created_date.isoformat()
if before:
n = gte_created_date.isoformat() if desc else lte_created_date.isoformat()
order = "desc" if desc else "asc"
return protocol.MessagePage(prev=p, next=n, sort_key="created_date", order=order, items=items)
@@ -112,37 +141,49 @@ def get_messages_cursor(
@router.get("/{message_id}", response_model=protocol.Message)
def get_message(
message_id: UUID, api_client: ApiClient = Depends(deps.get_api_client), db: Session = Depends(deps.get_db)
*,
message_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Get a message by its internal ID.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
message = pr.fetch_message(message_id)
return utils.prepare_message(message)
@router.get("/{message_id}/conversation", response_model=protocol.Conversation)
def get_conv(
message_id: UUID, api_client: ApiClient = Depends(deps.get_api_client), db: Session = Depends(deps.get_db)
*,
message_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Get a conversation from the tree root and up to the message with given internal ID.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
messages = pr.fetch_message_conversation(message_id)
return utils.prepare_conversation(messages)
@router.get("/{message_id}/tree", response_model=protocol.MessageTree)
def get_tree(
message_id: UUID, api_client: ApiClient = Depends(deps.get_api_client), db: Session = Depends(deps.get_db)
*,
message_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Get all messages belonging to the same message tree.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
message = pr.fetch_message(message_id)
tree = pr.fetch_message_tree(message.message_tree_id, reviewed=False)
return utils.prepare_tree(tree, message.message_tree_id)
@@ -150,24 +191,32 @@ def get_tree(
@router.get("/{message_id}/children", response_model=list[protocol.Message])
def get_children(
message_id: UUID, api_client: ApiClient = Depends(deps.get_api_client), db: Session = Depends(deps.get_db)
*,
message_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Get all messages belonging to the same message tree.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
messages = pr.fetch_message_children(message_id)
return utils.prepare_message_list(messages)
@router.get("/{message_id}/descendants", response_model=protocol.MessageTree)
def get_descendants(
message_id: UUID, api_client: ApiClient = Depends(deps.get_api_client), db: Session = Depends(deps.get_db)
*,
message_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Get a subtree which starts with this message.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
message = pr.fetch_message(message_id)
descendants = pr.fetch_message_descendants(message)
return utils.prepare_tree(descendants, message.id)
@@ -175,12 +224,16 @@ def get_descendants(
@router.get("/{message_id}/longest_conversation_in_tree", response_model=protocol.Conversation)
def get_longest_conv(
message_id: UUID, api_client: ApiClient = Depends(deps.get_api_client), db: Session = Depends(deps.get_db)
*,
message_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Get the longest conversation from the tree of the message.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
message = pr.fetch_message(message_id)
conv = pr.fetch_longest_conversation(message.message_tree_id)
return utils.prepare_conversation(conv)
@@ -188,12 +241,16 @@ def get_longest_conv(
@router.get("/{message_id}/max_children_in_tree", response_model=protocol.MessageTree)
def get_max_children(
message_id: UUID, api_client: ApiClient = Depends(deps.get_api_client), db: Session = Depends(deps.get_db)
*,
message_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Get message with the most children from the tree of the provided message.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
message = pr.fetch_message(message_id)
message, children = pr.fetch_message_with_max_children(message.message_tree_id)
return utils.prepare_tree([message, *children], message.id)
@@ -201,7 +258,30 @@ def get_max_children(
@router.delete("/{message_id}", status_code=HTTP_204_NO_CONTENT)
def mark_message_deleted(
message_id: UUID, api_client: ApiClient = Depends(deps.get_trusted_api_client), db: Session = Depends(deps.get_db)
*,
message_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_trusted_api_client),
db: Session = Depends(deps.get_db),
):
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
pr.mark_messages_deleted(message_id)
@router.post("/{message_id}/emoji", response_model=protocol.Message)
def post_message_emoji(
*,
message_id: UUID,
request: protocol.MessageEmojiRequest,
api_client: ApiClient = Depends(deps.get_api_client),
) -> protocol.Message:
"""
Toggle, add or remove message emoji.
"""
@managed_tx_function(CommitMode.COMMIT)
def emoji_tx(session: deps.Session):
pr = PromptRepository(session, api_client, client_user=request.user)
return pr.handle_message_emoji(message_id, request.op, request.emoji)
return utils.prepare_message(emoji_tx())
+4 -2
View File
@@ -77,6 +77,7 @@ def tasks_acknowledge(
*,
db: Session = Depends(deps.get_db),
api_key: APIKey = Depends(deps.get_api_key),
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
task_id: UUID,
ack_request: protocol_schema.TaskAck,
) -> None:
@@ -87,7 +88,7 @@ def tasks_acknowledge(
api_client = deps.api_auth(api_key, db)
try:
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
# here we store the message id in the database for the task
logger.info(f"Frontend acknowledges task {task_id=}, {ack_request=}.")
@@ -105,6 +106,7 @@ def tasks_acknowledge_failure(
*,
db: Session = Depends(deps.get_db),
api_key: APIKey = Depends(deps.get_api_key),
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
task_id: UUID,
nack_request: protocol_schema.TaskNAck,
) -> None:
@@ -115,7 +117,7 @@ def tasks_acknowledge_failure(
try:
logger.info(f"Frontend reports failure to implement task {task_id=}, {nack_request=}.")
api_client = deps.api_auth(api_key, db)
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
pr.task_repository.acknowledge_task_failure(task_id)
except (KeyError, RuntimeError):
logger.exception("Failed to not acknowledge task.")
+37 -8
View File
@@ -3,9 +3,11 @@ from fastapi.security.api_key import APIKey
from loguru import logger
from oasst_backend.api import deps
from oasst_backend.prompt_repository import PromptRepository
from oasst_backend.schemas.text_labels import LabelOption, ValidLabelsResponse
from oasst_backend.schemas.text_labels import LabelDescription, ValidLabelsResponse
from oasst_backend.utils.database_utils import CommitMode, managed_tx_function
from oasst_shared.exceptions import OasstError
from oasst_shared.schemas import protocol as protocol_schema
from sqlmodel import Session
from oasst_shared.schemas.protocol import TextLabel
from starlette.status import HTTP_204_NO_CONTENT, HTTP_400_BAD_REQUEST
router = APIRouter()
@@ -14,20 +16,25 @@ router = APIRouter()
@router.post("/", status_code=HTTP_204_NO_CONTENT)
def label_text(
*,
db: Session = Depends(deps.get_db),
api_key: APIKey = Depends(deps.get_api_key),
text_labels: protocol_schema.TextLabels,
) -> None:
"""
Label a piece of text.
"""
api_client = deps.api_auth(api_key, db)
@managed_tx_function(CommitMode.COMMIT)
def store_text_labels(session: deps.Session):
api_client = deps.api_auth(api_key, session)
pr = PromptRepository(session, api_client, client_user=text_labels.user)
pr.store_text_labels(text_labels)
try:
logger.info(f"Labeling text {text_labels=}.")
pr = PromptRepository(db, api_client, client_user=text_labels.user)
pr.store_text_labels(text_labels)
store_text_labels()
except OasstError:
raise
except Exception:
logger.exception("Failed to store label.")
raise HTTPException(
@@ -39,7 +46,29 @@ def label_text(
def get_valid_lables() -> ValidLabelsResponse:
return ValidLabelsResponse(
valid_labels=[
LabelOption(name=l.value, display_text=l.display_text, help_text=l.help_text)
for l in protocol_schema.TextLabel
LabelDescription(name=l.value, widget=l.widget.value, display_text=l.display_text, help_text=l.help_text)
for l in TextLabel
]
)
@router.get("/report_labels")
def get_report_lables() -> ValidLabelsResponse:
report_labels = [
TextLabel.spam,
TextLabel.not_appropriate,
TextLabel.pii,
TextLabel.hate_speech,
TextLabel.sexual_content,
TextLabel.moral_judgement,
TextLabel.political_content,
TextLabel.toxicity,
TextLabel.violence,
TextLabel.quality,
]
return ValidLabelsResponse(
valid_labels=[
LabelDescription(name=l.value, widget=l.widget.value, display_text=l.display_text, help_text=l.help_text)
for l in report_labels
]
)
+37 -20
View File
@@ -28,6 +28,7 @@ def get_users_ordered_by_username(
search_text: Optional[str] = None,
auth_method: Optional[str] = None,
max_count: Optional[int] = Query(100, gt=0, le=10000),
desc: Optional[bool] = False,
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
@@ -41,6 +42,7 @@ def get_users_ordered_by_username(
auth_method=auth_method,
search_text=search_text,
limit=max_count,
desc=desc,
)
return [u.to_protocol_frontend_user() for u in users]
@@ -55,6 +57,7 @@ def get_users_ordered_by_display_name(
auth_method: Optional[str] = None,
search_text: Optional[str] = None,
max_count: Optional[int] = Query(100, gt=0, le=10000),
desc: Optional[bool] = False,
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
@@ -68,14 +71,15 @@ def get_users_ordered_by_display_name(
auth_method=auth_method,
search_text=search_text,
limit=max_count,
desc=desc,
)
return [u.to_protocol_frontend_user() for u in users]
@router.get("/cursor", response_model=protocol.FrontEndUserPage)
def get_users_cursor(
lt: Optional[str] = None,
gt: Optional[str] = None,
before: Optional[str] = None,
after: Optional[str] = None,
sort_key: Optional[str] = Query("username", max_length=32),
max_count: Optional[int] = Query(100, gt=0, le=10000),
api_client_id: Optional[UUID] = None,
@@ -95,7 +99,8 @@ def get_users_cursor(
return x, None
items: list[protocol.FrontEndUser]
qry_max_count = max_count + 1 if lt is None or gt is None else max_count
qry_max_count = max_count + 1 if before is None or after is None else max_count
desc = before is not None and not after
def get_next_prev(num_rows: int, lt: str | None, gt: str | None, key_fn: Callable[[protocol.FrontEndUser], str]):
p, n = None, None
@@ -114,17 +119,16 @@ def get_users_cursor(
def remove_extra_item(items: list[protocol.FrontEndUser], lt: str | None, gt: str | None):
num_rows = len(items)
if qry_max_count > max_count and num_rows == qry_max_count:
assert not (lt and gt)
if lt:
items = items[1:]
else:
items = items[:-1]
assert not (lt is not None and gt is not None)
items = items[:-1]
if desc:
items.reverse()
return items, num_rows
n, p = None, None
if sort_key == "username":
lte_username, lt_id = split_cursor(lt)
gte_username, gt_id = split_cursor(gt)
lte_username, lt_id = split_cursor(before)
gte_username, gt_id = split_cursor(after)
items = get_users_ordered_by_username(
api_client_id=api_client_id,
gte_username=gte_username,
@@ -134,6 +138,7 @@ def get_users_cursor(
auth_method=auth_method,
search_text=search_text,
max_count=qry_max_count,
desc=desc,
api_client=api_client,
db=db,
)
@@ -141,8 +146,8 @@ def get_users_cursor(
p, n = get_next_prev(num_rows, lte_username, gte_username, lambda x: x.id)
elif sort_key == "display_name":
lte_display_name, lt_id = split_cursor(lt)
gte_display_name, gt_id = split_cursor(gt)
lte_display_name, lt_id = split_cursor(before)
gte_display_name, gt_id = split_cursor(after)
items = get_users_ordered_by_display_name(
api_client_id=api_client_id,
gte_display_name=gte_display_name,
@@ -152,6 +157,7 @@ def get_users_cursor(
auth_method=auth_method,
search_text=search_text,
max_count=qry_max_count,
desc=desc,
api_client=api_client,
db=db,
)
@@ -184,6 +190,7 @@ def update_user(
user_id: UUID,
enabled: Optional[bool] = None,
notes: Optional[str] = None,
show_on_leaderboard: Optional[bool] = None,
db: Session = Depends(deps.get_db),
api_client: ApiClient = Depends(deps.get_trusted_api_client),
):
@@ -191,7 +198,7 @@ def update_user(
Update a user by global user ID. Only trusted clients can update users.
"""
ur = UserRepository(db, api_client)
ur.update_user(user_id, enabled, notes)
ur.update_user(user_id, enabled, notes, show_on_leaderboard)
@router.delete("/{user_id}", status_code=HTTP_204_NO_CONTENT)
@@ -217,13 +224,15 @@ def query_user_messages(
only_roots: bool = False,
desc: bool = True,
include_deleted: bool = False,
lang: Optional[str] = None,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
"""
Query user messages.
"""
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
messages = pr.query_messages_ordered_by_created_date(
user_id=user_id,
api_client_id=api_client_id,
@@ -233,6 +242,7 @@ def query_user_messages(
lte_created_date=end_date,
only_roots=only_roots,
deleted=None if include_deleted else False,
lang=lang,
)
return utils.prepare_message_list(messages)
@@ -241,23 +251,27 @@ def query_user_messages(
@router.get("/{user_id}/messages/cursor", response_model=protocol.MessagePage)
def query_user_messages_cursor(
user_id: Optional[UUID],
lt: Optional[str] = None,
gt: Optional[str] = None,
before: Optional[str] = None,
after: Optional[str] = None,
only_roots: Optional[bool] = False,
include_deleted: Optional[bool] = False,
max_count: Optional[int] = Query(10, gt=0, le=1000),
desc: Optional[bool] = False,
lang: Optional[str] = None,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_api_client),
db: Session = Depends(deps.get_db),
):
return get_messages_cursor(
lt=lt,
gt=gt,
before=before,
after=after,
user_id=user_id,
only_roots=only_roots,
include_deleted=include_deleted,
max_count=max_count,
desc=desc,
lang=lang,
frontend_user=frontend_user,
api_client=api_client,
db=db,
)
@@ -265,9 +279,12 @@ def query_user_messages_cursor(
@router.delete("/{user_id}/messages", status_code=HTTP_204_NO_CONTENT)
def mark_user_messages_deleted(
user_id: UUID, api_client: ApiClient = Depends(deps.get_trusted_api_client), db: Session = Depends(deps.get_db)
user_id: UUID,
frontend_user: deps.FrontendUserId = Depends(deps.get_frontend_user_id),
api_client: ApiClient = Depends(deps.get_trusted_api_client),
db: Session = Depends(deps.get_db),
):
pr = PromptRepository(db, api_client)
pr = PromptRepository(db, api_client, frontend_user=frontend_user)
messages = pr.query_messages_ordered_by_created_date(user_id=user_id, limit=None)
pr.mark_messages_deleted(messages)
+15 -10
View File
@@ -14,6 +14,8 @@ def prepare_message(m: Message) -> protocol.Message:
lang=m.lang,
is_assistant=(m.role == "assistant"),
created_date=m.created_date,
emojis=m.emojis or {},
user_emojis=m.user_emojis or [],
)
@@ -21,17 +23,20 @@ def prepare_message_list(messages: list[Message]) -> list[protocol.Message]:
return [prepare_message(m) for m in messages]
def prepare_conversation_message(message: Message) -> protocol.ConversationMessage:
return protocol.ConversationMessage(
id=message.id,
frontend_message_id=message.frontend_message_id,
text=message.text,
lang=message.lang,
is_assistant=(message.role == "assistant"),
emojis=message.emojis or {},
user_emojis=message.user_emojis or [],
)
def prepare_conversation_message_list(messages: list[Message]) -> list[protocol.ConversationMessage]:
return [
protocol.ConversationMessage(
id=message.id,
frontend_message_id=message.frontend_message_id,
text=message.text,
lang=message.lang,
is_assistant=(message.role == "assistant"),
)
for message in messages
]
return [prepare_conversation_message(message) for message in messages]
def prepare_conversation(messages: list[Message]) -> protocol.Conversation:
+59 -4
View File
@@ -1,7 +1,7 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from oasst_shared.schemas import protocol as protocol_schema
from oasst_shared.schemas.protocol import TextLabel
from pydantic import AnyHttpUrl, BaseModel, BaseSettings, FilePath, PostgresDsn, validator
@@ -46,17 +46,69 @@ class TreeManagerConfiguration(BaseModel):
num_required_rankings: int = 3
"""Number of rankings in which the message participated."""
mandatory_labels_initial_prompt: Optional[list[protocol_schema.TextLabel]] = [protocol_schema.TextLabel.spam]
labels_initial_prompt: list[TextLabel] = [
TextLabel.spam,
TextLabel.quality,
TextLabel.helpfulness,
TextLabel.creativity,
TextLabel.humor,
TextLabel.toxicity,
TextLabel.violence,
TextLabel.not_appropriate,
TextLabel.pii,
TextLabel.hate_speech,
TextLabel.sexual_content,
]
labels_assistant_reply: list[TextLabel] = [
TextLabel.spam,
TextLabel.fails_task,
TextLabel.quality,
TextLabel.helpfulness,
TextLabel.creativity,
TextLabel.humor,
TextLabel.toxicity,
TextLabel.violence,
TextLabel.not_appropriate,
TextLabel.pii,
TextLabel.hate_speech,
TextLabel.sexual_content,
]
labels_prompter_reply: list[TextLabel] = [
TextLabel.spam,
TextLabel.quality,
TextLabel.helpfulness,
TextLabel.humor,
TextLabel.creativity,
TextLabel.toxicity,
TextLabel.violence,
TextLabel.not_appropriate,
TextLabel.pii,
TextLabel.hate_speech,
TextLabel.sexual_content,
]
mandatory_labels_initial_prompt: Optional[list[TextLabel]] = [TextLabel.spam]
"""Mandatory labels in text-labeling tasks for initial prompts."""
mandatory_labels_assistant_reply: Optional[list[protocol_schema.TextLabel]] = [protocol_schema.TextLabel.spam]
mandatory_labels_assistant_reply: Optional[list[TextLabel]] = [TextLabel.spam]
"""Mandatory labels in text-labeling tasks for assistant replies."""
mandatory_labels_prompter_reply: Optional[list[protocol_schema.TextLabel]] = [protocol_schema.TextLabel.spam]
mandatory_labels_prompter_reply: Optional[list[TextLabel]] = [TextLabel.spam]
"""Mandatory labels in text-labeling tasks for prompter replies."""
rank_prompter_replies: bool = False
lonely_children_count: int = 3
"""Number of children below which parents are preferred during sampling for reply tasks."""
p_lonely_child_extension: float = 0.8
"""Probability to select a parent with less than lonely_children_count children."""
recent_tasks_span_sec: int = 3 * 60 # 3 min
"""Time in seconds of recent tasks to consider for exclusion during task selection."""
class Settings(BaseSettings):
PROJECT_NAME: str = "open-assistant backend"
@@ -90,10 +142,13 @@ class Settings(BaseSettings):
Path(__file__).parent.parent / "test_data/realistic/realistic_seed_data.json"
)
DEBUG_ALLOW_SELF_LABELING: bool = False # allow users to label their own messages
DEBUG_ALLOW_DUPLICATE_TASKS: bool = False # offer users tasks to which they already responded
DEBUG_SKIP_EMBEDDING_COMPUTATION: bool = False
DEBUG_SKIP_TOXICITY_CALCULATION: bool = False
DEBUG_DATABASE_ECHO: bool = False
DUPLICATE_MESSAGE_FILTER_WINDOW_MINUTES: int = 120
HUGGING_FACE_API_KEY: str = ""
ROOT_TOKENS: List[str] = ["1234"] # supply a string that can be parsed to a json list
-1
View File
@@ -1 +0,0 @@
__all__ = []
-56
View File
@@ -1,56 +0,0 @@
from typing import Any, Dict, Generic, List, Optional, Type, TypeVar, Union
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from sqlmodel import Session, SQLModel
ModelType = TypeVar("ModelType", bound=SQLModel)
CreateSchemaType = TypeVar("CreateSchemaType", bound=BaseModel)
UpdateSchemaType = TypeVar("UpdateSchemaType", bound=BaseModel)
class CRUDBase(Generic[ModelType, CreateSchemaType, UpdateSchemaType]):
def __init__(self, model: Type[ModelType]):
"""
CRUD object with default methods to Create, Read, Update, Delete (CRUD).
**Parameters**
* `model`: A SQLModel model class
* `schema`: A Pydantic model (schema) class
"""
self.model = model
def get(self, db: Session, id: Any) -> Optional[ModelType]:
return db.query(self.model).filter(self.model.id == id).first()
def get_multi(self, db: Session, *, begin_id: int = 0, limit: int = 100) -> List[ModelType]:
return db.query(self.model).filter(self.model.id >= begin_id).limit(limit).all()
def create(self, db: Session, *, obj_in: CreateSchemaType) -> ModelType:
obj_in_data = jsonable_encoder(obj_in)
db_obj = self.model(**obj_in_data) # type: ignore
db.add(db_obj)
db.commit()
db.refresh(db_obj)
return db_obj
def update(self, db: Session, *, db_obj: ModelType, obj_in: Union[UpdateSchemaType, Dict[str, Any]]) -> ModelType:
obj_data = jsonable_encoder(db_obj)
if isinstance(obj_in, dict):
update_data = obj_in
else:
update_data = obj_in.dict(exclude_unset=True)
for field in obj_data:
if field in update_data:
setattr(db_obj, field, update_data[field])
db.add(db_obj)
db.commit()
db.refresh(db_obj)
return db_obj
def delete(self, db: Session, *, id: int) -> ModelType:
obj = db.query(self.model).get(id)
db.delete(obj)
db.commit()
return obj
+2
View File
@@ -2,6 +2,7 @@ from .api_client import ApiClient
from .journal import Journal, JournalIntegration
from .message import Message
from .message_embedding import MessageEmbedding
from .message_emoji import MessageEmoji
from .message_reaction import MessageReaction
from .message_toxicity import MessageToxicity
from .message_tree_state import MessageTreeState
@@ -24,4 +25,5 @@ __all__ = [
"TextLabels",
"Journal",
"JournalIntegration",
"MessageEmoji",
]
+2 -1
View File
@@ -117,7 +117,8 @@ class LabelConversationReplyPayload(TaskPayload):
message_id: UUID
conversation: protocol_schema.Conversation
reply: str
reply: str # deprecated
reply_message: Optional[protocol_schema.ConversationMessage]
valid_labels: list[str]
mandatory_labels: Optional[list[str]]
mode: Optional[protocol_schema.LabelTaskMode]
+22 -1
View File
@@ -1,12 +1,13 @@
from datetime import datetime
from http import HTTPStatus
from typing import Optional
from typing import Any, Optional
from uuid import UUID, uuid4
import sqlalchemy as sa
import sqlalchemy.dialects.postgresql as pg
from oasst_backend.models.db_payload import MessagePayload
from oasst_shared.exceptions.oasst_api_error import OasstError, OasstErrorCode
from pydantic import PrivateAttr
from sqlalchemy import false
from sqlmodel import Field, Index, SQLModel
@@ -17,6 +18,13 @@ class Message(SQLModel, table=True):
__tablename__ = "message"
__table_args__ = (Index("ix_message_frontend_message_id", "api_client_id", "frontend_message_id", unique=True),)
def __new__(cls, *args: Any, **kwargs: Any):
new_object = super().__new__(cls, *args, **kwargs)
# temporary fix until https://github.com/tiangolo/sqlmodel/issues/149 gets merged
if not hasattr(new_object, "_user_emojis"):
new_object._init_private_attributes()
return new_object
id: Optional[UUID] = Field(
sa_column=sa.Column(
pg.UUID(as_uuid=True), primary_key=True, default=uuid4, server_default=sa.text("gen_random_uuid()")
@@ -49,11 +57,24 @@ class Message(SQLModel, table=True):
rank: Optional[int] = Field(nullable=True)
emojis: Optional[dict[str, int]] = Field(default=None, sa_column=sa.Column(pg.JSONB), nullable=False)
_user_emojis: Optional[list[str]] = PrivateAttr(default=None)
def ensure_is_message(self) -> None:
if not self.payload or not isinstance(self.payload.payload, MessagePayload):
raise OasstError("Invalid message", OasstErrorCode.INVALID_MESSAGE, HTTPStatus.INTERNAL_SERVER_ERROR)
def has_emoji(self, emoji_code: str) -> bool:
return self.emojis and emoji_code in self.emojis and self.emojis[emoji_code] > 0
def has_user_emoji(self, emoji_code: str) -> bool:
return self._user_emojis and emoji_code in self._user_emojis
@property
def text(self) -> str:
self.ensure_is_message()
return self.payload.payload.text
@property
def user_emojis(self) -> str:
return self._user_emojis
@@ -0,0 +1,27 @@
from datetime import datetime
from typing import Optional
from uuid import UUID
import sqlalchemy as sa
import sqlalchemy.dialects.postgresql as pg
from sqlmodel import Field, Index, SQLModel
class MessageEmoji(SQLModel, table=True):
__tablename__ = "message_emoji"
__table_args__ = (Index("ix_message_emoji__user_id__message_id", "user_id", "message_id", unique=False),)
message_id: Optional[UUID] = Field(
sa_column=sa.Column(
pg.UUID(as_uuid=True), sa.ForeignKey("message.id", ondelete="CASCADE"), nullable=False, primary_key=True
)
)
user_id: UUID = Field(
sa_column=sa.Column(
pg.UUID(as_uuid=True), sa.ForeignKey("user.id", ondelete="CASCADE"), nullable=False, primary_key=True
)
)
emoji: str = Field(nullable=False, max_length=128, primary_key=True)
created_date: Optional[datetime] = Field(
sa_column=sa.Column(sa.DateTime(timezone=True), nullable=False, server_default=sa.func.current_timestamp())
)
@@ -26,3 +26,4 @@ class MessageReaction(SQLModel, table=True):
payload_type: str = Field(nullable=False, max_length=200)
payload: PayloadContainer = Field(sa_column=sa.Column(payload_column_type(PayloadContainer), nullable=False))
api_client_id: UUID = Field(nullable=False, foreign_key="api_client.id")
message_id: Optional[UUID] = Field(nullable=True, index=True)
+3 -1
View File
@@ -20,7 +20,9 @@ class Task(SQLModel, table=True):
),
)
created_date: Optional[datetime] = Field(
sa_column=sa.Column(sa.DateTime(timezone=True), nullable=False, server_default=sa.func.current_timestamp()),
sa_column=sa.Column(
sa.DateTime(timezone=True), nullable=False, index=True, server_default=sa.func.current_timestamp()
),
)
expiry_date: Optional[datetime] = Field(sa_column=sa.Column(sa.DateTime(timezone=True), nullable=True))
user_id: Optional[UUID] = Field(nullable=True, foreign_key="user.id", index=True)
@@ -27,3 +27,4 @@ class TextLabels(SQLModel, table=True):
sa_column=sa.Column(pg.UUID(as_uuid=True), sa.ForeignKey("message.id"), nullable=True)
)
labels: dict[str, float] = Field(default={}, sa_column=sa.Column(pg.JSONB), nullable=False)
task_id: Optional[UUID] = Field(nullable=True, index=True)
+1
View File
@@ -30,6 +30,7 @@ class User(SQLModel, table=True):
enabled: bool = Field(sa_column=sa.Column(sa.Boolean, nullable=False, server_default=sa.true()))
notes: str = Field(sa_column=sa.Column(AutoString(length=1024), nullable=False, server_default=""))
deleted: bool = Field(sa_column=sa.Column(sa.Boolean, nullable=False, server_default=sa.false()))
show_on_leaderboard: bool = Field(sa_column=sa.Column(sa.Boolean, nullable=False, server_default=sa.true()))
def to_protocol_frontend_user(self):
return protocol.FrontEndUser(
+253 -22
View File
@@ -1,18 +1,22 @@
import random
import re
from collections import defaultdict
from datetime import datetime
from datetime import datetime, timedelta
from http import HTTPStatus
from typing import List, Optional, Tuple
from typing import Optional
from uuid import UUID, uuid4
import oasst_backend.models.db_payload as db_payload
import sqlalchemy as sa
from loguru import logger
from oasst_backend.api.deps import FrontendUserId
from oasst_backend.config import settings
from oasst_backend.journal_writer import JournalWriter
from oasst_backend.models import (
ApiClient,
Message,
MessageEmbedding,
MessageEmoji,
MessageReaction,
MessageToxicity,
MessageTreeState,
@@ -28,8 +32,10 @@ from oasst_backend.utils.database_utils import CommitMode, managed_tx_method
from oasst_shared.exceptions import OasstError, OasstErrorCode
from oasst_shared.schemas import protocol as protocol_schema
from oasst_shared.schemas.protocol import SystemStats
from oasst_shared.utils import unaware_to_utc
from sqlmodel import Session, and_, func, not_, or_, text, update
from oasst_shared.utils import unaware_to_utc, utcnow
from sqlalchemy.orm import Query
from sqlalchemy.orm.attributes import flag_modified
from sqlmodel import JSON, Session, and_, func, literal_column, not_, or_, text, update
from starlette.status import HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
@@ -39,14 +45,30 @@ class PromptRepository:
db: Session,
api_client: ApiClient,
client_user: Optional[protocol_schema.User] = None,
*,
user_repository: Optional[UserRepository] = None,
task_repository: Optional[TaskRepository] = None,
user_id: Optional[UUID] = None,
auth_method: Optional[str] = None,
username: Optional[str] = None,
frontend_user: Optional[FrontendUserId] = None,
):
self.db = db
self.api_client = api_client
self.user_repository = user_repository or UserRepository(db, api_client)
self.user = self.user_repository.lookup_client_user(client_user, create_missing=True)
self.user_id = self.user.id if self.user else None
if frontend_user and not auth_method and not username:
auth_method, username = frontend_user
if user_id:
self.user = self.user_repository.get_user(id=user_id)
self.user_id = self.user.id
elif auth_method and username:
self.user = self.user_repository.query_frontend_user(auth_method=auth_method, username=username)
self.user_id = self.user.id
else:
self.user = self.user_repository.lookup_client_user(client_user, create_missing=True)
self.user_id = self.user.id if self.user else None
logger.debug(f"PromptRepository(api_client_id={self.api_client.id}, {self.user_id=})")
self.task_repository = task_repository or TaskRepository(
db, api_client, client_user, user_repository=self.user_repository
@@ -168,6 +190,18 @@ class PromptRepository:
role = None
depth = 0
# reject whitespaces match with ^\s+$
if re.match(r"^\s+$", text):
raise OasstError("Message text is empty", OasstErrorCode.TASK_MESSAGE_TEXT_EMPTY)
# ensure message size is below the predefined limit
if len(text) > settings.MESSAGE_SIZE_LIMIT:
logger.error(f"Message size {len(text)=} exceeds size limit of {settings.MESSAGE_SIZE_LIMIT=}.")
raise OasstError("Message size too long.", OasstErrorCode.TASK_MESSAGE_TOO_LONG)
if self.check_users_recent_replies_for_duplicates(text):
raise OasstError("User recent messages have duplicates", OasstErrorCode.TASK_MESSAGE_DUPLICATED)
if task.parent_message_id:
parent_message = self.fetch_message(task.parent_message_id)
@@ -245,7 +279,7 @@ class PromptRepository:
# store reaction to message
reaction_payload = db_payload.RatingReactionPayload(rating=rating.rating)
reaction = self.insert_reaction(message.id, reaction_payload)
reaction = self.insert_reaction(task_id=task.id, payload=reaction_payload, message_id=message.id)
if not task.collective:
task.done = True
self.db.add(task)
@@ -255,7 +289,7 @@ class PromptRepository:
return reaction
@managed_tx_method(CommitMode.COMMIT)
def store_ranking(self, ranking: protocol_schema.MessageRanking) -> Tuple[MessageReaction, Task]:
def store_ranking(self, ranking: protocol_schema.MessageRanking) -> tuple[MessageReaction, Task]:
# fetch task
task = self.task_repository.fetch_task_by_frontend_message_id(ranking.message_id)
self._validate_task(task, frontend_message_id=ranking.message_id)
@@ -295,7 +329,7 @@ class PromptRepository:
ranking_parent_id=task_payload.ranking_parent_id,
message_tree_id=task_payload.message_tree_id,
)
reaction = self.insert_reaction(task.id, reaction_payload)
reaction = self.insert_reaction(task_id=task.id, payload=reaction_payload, message_id=parent_msg.id)
self.journal.log_ranking(task, message_id=parent_msg.id, ranking=ranking.ranking)
logger.info(f"Ranking {ranking.ranking} stored for task {task.id}.")
@@ -313,9 +347,8 @@ class PromptRepository:
reaction_payload = db_payload.RankingReactionPayload(
ranking=ranking.ranking, ranked_message_ids=ranked_message_ids
)
reaction = self.insert_reaction(task.id, reaction_payload)
# TODO: resolve message_id
self.journal.log_ranking(task, message_id=None, ranking=ranking.ranking)
reaction = self.insert_reaction(task_id=task.id, payload=reaction_payload, message_id=None)
# self.journal.log_ranking(task, message_id=None, ranking=ranking.ranking)
logger.info(f"Ranking {ranking.ranking} stored for task {task.id}.")
@@ -346,13 +379,13 @@ class PromptRepository:
return message_toxicity
@managed_tx_method(CommitMode.FLUSH)
def insert_message_embedding(self, message_id: UUID, model: str, embedding: List[float]) -> MessageEmbedding:
def insert_message_embedding(self, message_id: UUID, model: str, embedding: list[float]) -> MessageEmbedding:
"""Insert the embedding of a new message in the database.
Args:
message_id (UUID): the identifier of the message we want to save its embedding
model (str): the model used for creating the embedding
embedding (List[float]): the values obtained from the message & model
embedding (list[float]): the values obtained from the message & model
Raises:
OasstError: if misses some of the before params
@@ -366,7 +399,9 @@ class PromptRepository:
return message_embedding
@managed_tx_method(CommitMode.FLUSH)
def insert_reaction(self, task_id: UUID, payload: db_payload.ReactionPayload) -> MessageReaction:
def insert_reaction(
self, task_id: UUID, payload: db_payload.ReactionPayload, message_id: Optional[UUID]
) -> MessageReaction:
self.ensure_user_is_enabled()
container = PayloadContainer(payload=payload)
@@ -376,12 +411,13 @@ class PromptRepository:
payload=container,
api_client_id=self.api_client.id,
payload_type=type(payload).__name__,
message_id=message_id,
)
self.db.add(reaction)
return reaction
@managed_tx_method(CommitMode.FLUSH)
def store_text_labels(self, text_labels: protocol_schema.TextLabels) -> Tuple[TextLabels, Task, Message]:
def store_text_labels(self, text_labels: protocol_schema.TextLabels) -> tuple[TextLabels, Task, Message]:
valid_labels: Optional[list[str]] = None
mandatory_labels: Optional[list[str]] = None
@@ -441,11 +477,24 @@ class PromptRepository:
user_id=self.user_id,
text=text_labels.text,
labels=text_labels.labels,
task_id=task.id if task else None,
)
if message_id:
message = self.fetch_message(message_id)
if task:
if not task:
if text_labels.is_report is True:
message = self.handle_message_emoji(
message_id, protocol_schema.EmojiOp.add, protocol_schema.EmojiCode.red_flag
)
# update existing record for repeated updates (same user no task associated)
existing_text_label = self.fetch_non_task_text_labels(message_id, self.user_id)
if existing_text_label is not None:
existing_text_label.labels = text_labels.labels
model = existing_text_label
else:
message = self.fetch_message(message_id)
message.review_count += 1
self.db.add(message)
@@ -519,6 +568,46 @@ class PromptRepository:
qry = qry.filter(Message.review_result)
if not include_deleted:
qry = qry.filter(not_(Message.deleted))
return self._add_user_emojis_all(qry)
def check_users_recent_replies_for_duplicates(self, text: str) -> bool:
"""
Checks if the user has recently replied with the same text within a given time period.
"""
user_id = self.user_id
logger.debug(f"Checking for duplicate tasks for user {user_id}")
# messages in the past 24 hours
messages = (
self.db.query(Message)
.filter(Message.user_id == user_id)
.order_by(Message.created_date.desc())
.filter(
Message.created_date > utcnow() - timedelta(minutes=settings.DUPLICATE_MESSAGE_FILTER_WINDOW_MINUTES)
)
.all()
)
if not messages:
return False
for msg in messages:
if msg.text == text:
return True
return False
def fetch_user_message_trees(
self, user_id: Message.user_id, reviewed: bool = True, include_deleted: bool = False
) -> list[Message]:
qry = self.db.query(Message).filter(Message.user_id == user_id)
if reviewed:
qry = qry.filter(Message.review_result)
if not include_deleted:
qry = qry.filter(not_(Message.deleted))
return self._add_user_emojis_all(qry)
def fetch_message_trees_ready_for_export(self) -> list[MessageTreeState]:
qry = self.db.query(MessageTreeState).filter(
MessageTreeState.state == message_tree_state.State.READY_FOR_EXPORT
)
return qry.all()
def fetch_multiple_random_replies(self, max_size: int = 5, message_role: str = None):
@@ -556,11 +645,25 @@ class PromptRepository:
return conversation, replies
def fetch_message(self, message_id: UUID, fail_if_missing: bool = True) -> Optional[Message]:
qry = self.db.query(Message).filter(Message.id == message_id)
messages = self._add_user_emojis_all(qry)
message = messages[0] if messages else None
message = self.db.query(Message).filter(Message.id == message_id).one_or_none()
if fail_if_missing and not message:
raise OasstError("Message not found", OasstErrorCode.MESSAGE_NOT_FOUND, HTTP_404_NOT_FOUND)
return message
def fetch_non_task_text_labels(self, message_id: UUID, user_id: UUID) -> Optional[TextLabels]:
query = (
self.db.query(TextLabels)
.outerjoin(Task, Task.id == TextLabels.id)
.filter(Task.id.is_(None), TextLabels.message_id == message_id, TextLabels.user_id == user_id)
)
text_label = query.one_or_none()
return text_label
@staticmethod
def trace_conversation(messages: list[Message] | dict[UUID, Message], last_message: Message) -> list[Message]:
"""
@@ -620,9 +723,27 @@ class PromptRepository:
qry = qry.filter(Message.review_result)
if exclude_deleted:
qry = qry.filter(Message.deleted == sa.false())
children = qry.all()
children = self._add_user_emojis_all(qry)
return children
def fetch_message_siblings(
self, message: Message | UUID, reviewed: Optional[bool] = True, deleted: Optional[bool] = False
) -> list[Message]:
"""
Get siblings of a message (other messages with the same parent_id)
"""
if isinstance(message, Message):
message = message.id
parent_qry = self.db.query(Message.parent_id).filter(Message.id == message).subquery()
qry = self.db.query(Message).filter(Message.parent_id == parent_qry.c.parent_id)
if reviewed is not None:
qry = qry.filter(Message.review_result == reviewed)
if deleted is not None:
qry = qry.filter(Message.deleted == deleted)
siblings = self._add_user_emojis_all(qry)
return siblings
@staticmethod
def trace_descendants(root: Message, messages: list[Message]) -> list[Message]:
children = defaultdict(list)
@@ -651,7 +772,7 @@ class PromptRepository:
if max_depth is not None:
desc = desc.filter(Message.depth <= max_depth)
desc = desc.all()
desc = self._add_user_emojis_all(desc)
return self.trace_descendants(message, desc)
@@ -665,6 +786,33 @@ class PromptRepository:
max_message = max(tree, key=lambda m: m.children_count)
return max_message, [m for m in tree if m.parent_id == max_message.id]
def _add_user_emojis_all(self, qry: Query) -> list[Message]:
if self.user_id is None:
return qry.all()
sq = qry.subquery("m")
qry = (
self.db.query(Message, func.string_agg(MessageEmoji.emoji, literal_column("','")).label("user_emojis"))
.select_entity_from(sq)
.outerjoin(
MessageEmoji,
and_(
sq.c.id == MessageEmoji.message_id,
MessageEmoji.user_id == self.user_id,
sq.c.emojis != JSON.NULL,
),
)
.group_by(sq)
)
messages: list[Message] = []
for x in qry:
m: Message = x.Message
user_emojis = x["user_emojis"]
if user_emojis:
m._user_emojis = user_emojis.split(",")
messages.append(m)
return messages
def query_messages_ordered_by_created_date(
self,
user_id: Optional[UUID] = None,
@@ -679,6 +827,7 @@ class PromptRepository:
deleted: Optional[bool] = None,
desc: bool = False,
limit: Optional[int] = 100,
lang: Optional[str] = None,
) -> list[Message]:
if not self.api_client.trusted:
if not api_client_id:
@@ -693,7 +842,7 @@ class PromptRepository:
if user_id:
qry = qry.filter(Message.user_id == user_id)
if username or auth_method:
if not username and auth_method:
if not (username and auth_method):
raise OasstError("Auth method or username missing.", OasstErrorCode.AUTH_AND_USERNAME_REQUIRED)
qry = qry.join(User)
qry = qry.filter(User.username == username, User.auth_method == auth_method)
@@ -743,7 +892,10 @@ class PromptRepository:
if limit is not None:
qry = qry.limit(limit)
return qry.all()
if lang is not None:
qry = qry.filter(Message.lang == lang)
return self._add_user_emojis_all(qry)
def update_children_counts(self, message_tree_id: UUID):
sql_update_children_count = """
@@ -805,3 +957,82 @@ WHERE message.id = cc.id;
deleted=result.get(True, 0),
message_trees=result.get(None, 0),
)
def handle_message_emoji(self, message_id: UUID, op: protocol_schema.EmojiOp, emoji: protocol_schema) -> Message:
self.ensure_user_is_enabled()
message = self.fetch_message(message_id)
# check if emoji exists
existing_emoji = (
self.db.query(MessageEmoji)
.filter(
MessageEmoji.message_id == message_id, MessageEmoji.user_id == self.user_id, MessageEmoji.emoji == emoji
)
.one_or_none()
)
if existing_emoji:
if op == protocol_schema.EmojiOp.add:
logger.info(f"Emoji record already exists {message_id=}, {emoji=}, {self.user_id=}")
return message
elif op == protocol_schema.EmojiOp.togggle:
op = protocol_schema.EmojiOp.remove
if existing_emoji is None:
if op == protocol_schema.EmojiOp.remove:
logger.info(f"Emoji record not found {message_id=}, {emoji=}, {self.user_id=}")
return message
elif op == protocol_schema.EmojiOp.togggle:
op = protocol_schema.EmojiOp.add
if op == protocol_schema.EmojiOp.add:
# hard coded exclusivity of thumbs_up & thumbs_down
if emoji == protocol_schema.EmojiCode.thumbs_up and message.has_user_emoji(
protocol_schema.EmojiCode.thumbs_down.value
):
message = self.handle_message_emoji(
message_id, protocol_schema.EmojiOp.remove, protocol_schema.EmojiCode.thumbs_down
)
elif emoji == protocol_schema.EmojiCode.thumbs_down and message.has_user_emoji(
protocol_schema.EmojiCode.thumbs_up.value
):
message = self.handle_message_emoji(
message_id, protocol_schema.EmojiOp.remove, protocol_schema.EmojiCode.thumbs_up
)
# insert emoji record & increment count
message_emoji = MessageEmoji(message_id=message.id, user_id=self.user_id, emoji=emoji)
self.db.add(message_emoji)
emoji_counts = message.emojis
if not emoji_counts:
message.emojis = {emoji.value: 1}
else:
count = emoji_counts.get(emoji.value) or 0
emoji_counts[emoji.value] = count + 1
if message._user_emojis is None:
message._user_emojis = []
if emoji.value not in message._user_emojis:
message._user_emojis.append(emoji.value)
elif op == protocol_schema.EmojiOp.remove:
# remove emoji record and & decrement count
message = self.fetch_message(message_id)
if message._user_emojis and emoji.value in message._user_emojis:
message._user_emojis.remove(emoji.value)
self.db.delete(existing_emoji)
emoji_counts = message.emojis
count = emoji_counts.get(emoji.value)
if count is not None:
if count == 1:
del emoji_counts[emoji.value]
else:
emoji_counts[emoji.value] = count - 1
flag_modified(message, "emojis")
self.db.add(message)
else:
raise OasstError("Emoji op not supported", OasstErrorCode.EMOJI_OP_UNSUPPORTED)
flag_modified(message, "emojis")
self.db.add(message)
self.db.flush()
return message
+2 -9
View File
@@ -1,13 +1,6 @@
from typing import Optional
from oasst_shared.schemas.protocol import LabelDescription
from pydantic import BaseModel
class LabelOption(BaseModel):
name: str
display_text: str
help_text: Optional[str]
class ValidLabelsResponse(BaseModel):
valid_labels: list[LabelOption]
valid_labels: list[LabelDescription]
+17 -1
View File
@@ -1,3 +1,4 @@
from datetime import timedelta
from typing import Optional
from uuid import UUID
@@ -9,7 +10,7 @@ from oasst_backend.user_repository import UserRepository
from oasst_backend.utils.database_utils import CommitMode, managed_tx_method
from oasst_shared.exceptions.oasst_api_error import OasstError, OasstErrorCode
from oasst_shared.schemas import protocol as protocol_schema
from sqlmodel import Session
from sqlmodel import Session, func, or_
from starlette.status import HTTP_404_NOT_FOUND
@@ -100,6 +101,7 @@ class TaskRepository:
message_id=task.message_id,
conversation=task.conversation,
reply=task.reply,
reply_message=task.reply_message,
valid_labels=task.valid_labels,
mandatory_labels=task.mandatory_labels,
mode=task.mode,
@@ -111,6 +113,7 @@ class TaskRepository:
message_id=task.message_id,
conversation=task.conversation,
reply=task.reply,
reply_message=task.reply_message,
valid_labels=task.valid_labels,
mandatory_labels=task.mandatory_labels,
mode=task.mode,
@@ -219,3 +222,16 @@ class TaskRepository:
def fetch_task_by_id(self, task_id: UUID) -> Task:
task = self.db.query(Task).filter(Task.api_client_id == self.api_client.id, Task.id == task_id).one_or_none()
return task
def fetch_recent_reply_tasks(
self, max_age: timedelta = timedelta(minutes=5), done: bool = False, limit: int = 100
) -> list[Task]:
qry = self.db.query(Task).filter(
func.age(Task.created_date) < max_age,
or_(Task.payload_type == "AssistantReplyPayload", Task.payload_type == "PrompterReplyPayload"),
)
if done is not None:
qry = qry.filter(Task.done == done)
if limit:
qry = qry.limit(limit)
return qry.all()
+262 -90
View File
@@ -1,5 +1,7 @@
import json
import random
from datetime import datetime
import sys
from datetime import datetime, timedelta
from enum import Enum
from http import HTTPStatus
from typing import Any, Dict, List, Optional, Tuple
@@ -7,11 +9,17 @@ from uuid import UUID
import numpy as np
import pydantic
from fastapi.encoders import jsonable_encoder
from loguru import logger
from oasst_backend.api.v1.utils import prepare_conversation, prepare_conversation_message_list
from oasst_backend.api.v1.utils import (
prepare_conversation,
prepare_conversation_message,
prepare_conversation_message_list,
)
from oasst_backend.config import TreeManagerConfiguration, settings
from oasst_backend.models import Message, MessageReaction, MessageTreeState, Task, TextLabels, User, message_tree_state
from oasst_backend.prompt_repository import PromptRepository
from oasst_backend.utils import tree_export
from oasst_backend.utils.database_utils import CommitMode, async_managed_tx_method, managed_tx_method
from oasst_backend.utils.hugging_face import HfClassificationModel, HfEmbeddingModel, HfUrl, HuggingFaceAPI
from oasst_backend.utils.ranking import ranked_pairs
@@ -37,8 +45,9 @@ class TaskRole(Enum):
class ActiveTreeSizeRow(pydantic.BaseModel):
message_tree_id: UUID
tree_size: int
goal_tree_size: int
tree_size: int
awaiting_review: Optional[int]
@property
def remaining_messages(self) -> int:
@@ -89,8 +98,6 @@ class TreeManagerStats(pydantic.BaseModel):
class TreeManager:
_all_text_labels = list(map(lambda x: x.value, protocol_schema.TextLabel))
def __init__(
self,
db: Session,
@@ -197,7 +204,7 @@ class TreeManager:
lang = "en"
logger.warning("Task availability request without lang tag received, assuming lang='en'.")
num_active_trees = self.query_num_active_trees(lang=lang)
num_active_trees = self.query_num_active_trees(lang=lang, exclude_ranking=True)
extendible_parents = self.query_extendible_parents(lang=lang)
prompts_need_review = self.query_prompts_need_review(lang=lang)
replies_need_review = self.query_replies_need_review(lang=lang)
@@ -211,6 +218,15 @@ class TreeManager:
incomplete_rankings=incomplete_rankings,
)
@staticmethod
def _get_label_descriptions(valid_labels: list[TextLabels]) -> list[protocol_schema.LabelDescription]:
return [
protocol_schema.LabelDescription(
name=l.value, widget=l.widget.value, display_text=l.display_text, help_text=l.help_text
)
for l in valid_labels
]
def next_task(
self,
desired_task_type: protocol_schema.TaskRequestType = protocol_schema.TaskRequestType.random,
@@ -225,7 +241,7 @@ class TreeManager:
lang = "en"
logger.warning("Task request without lang tag received, assuming 'en'.")
num_active_trees = self.query_num_active_trees(lang=lang)
num_active_trees = self.query_num_active_trees(lang=lang, exclude_ranking=True)
prompts_need_review = self.query_prompts_need_review(lang=lang)
replies_need_review = self.query_replies_need_review(lang=lang)
extendible_parents = self.query_extendible_parents(lang=lang)
@@ -334,6 +350,7 @@ class TreeManager:
message_tree_id = messages[-1].message_tree_id
case TaskType.LABEL_REPLY:
if task_role == TaskRole.PROMPTER:
replies_need_review = list(filter(lambda m: m.role == "prompter", replies_need_review))
elif task_role == TaskRole.ASSISTANT:
@@ -349,58 +366,99 @@ class TreeManager:
self.cfg.p_full_labeling_review_reply_prompter: float = 0.1
label_mode = protocol_schema.LabelTaskMode.full
valid_labels = self._all_text_labels
label_disposition = protocol_schema.LabelTaskDisposition.quality
if message.role == "assistant":
valid_labels = self.cfg.labels_assistant_reply
if (
desired_task_type == protocol_schema.TaskRequestType.random
and random.random() > self.cfg.p_full_labeling_review_reply_assistant
):
valid_labels = list(map(lambda x: x.value, self.cfg.mandatory_labels_assistant_reply))
label_mode = protocol_schema.LabelTaskMode.simple
label_disposition = protocol_schema.LabelTaskDisposition.spam
valid_labels = list(self.cfg.mandatory_labels_assistant_reply)
if protocol_schema.TextLabel.quality not in valid_labels:
valid_labels.append(protocol_schema.TextLabel.quality)
logger.info(f"Generating a LabelAssistantReplyTask. ({label_mode=:s})")
task = protocol_schema.LabelAssistantReplyTask(
message_id=message.id,
conversation=conversation,
reply=message.text,
valid_labels=valid_labels,
reply_message=prepare_conversation_message(message),
valid_labels=list(map(lambda x: x.value, valid_labels)),
mandatory_labels=list(map(lambda x: x.value, self.cfg.mandatory_labels_assistant_reply)),
mode=label_mode,
disposition=label_disposition,
labels=self._get_label_descriptions(valid_labels),
)
else:
valid_labels = self.cfg.labels_prompter_reply
if (
desired_task_type == protocol_schema.TaskRequestType.random
and random.random() > self.cfg.p_full_labeling_review_reply_prompter
):
valid_labels = list(map(lambda x: x.value, self.cfg.mandatory_labels_prompter_reply))
label_mode = protocol_schema.LabelTaskMode.simple
label_disposition = protocol_schema.LabelTaskDisposition.spam
valid_labels = list(self.cfg.mandatory_labels_prompter_reply)
if protocol_schema.TextLabel.quality not in valid_labels:
valid_labels.append(protocol_schema.TextLabel.quality)
logger.info(f"Generating a LabelPrompterReplyTask. ({label_mode=:s})")
task = protocol_schema.LabelPrompterReplyTask(
message_id=message.id,
conversation=conversation,
reply=message.text,
valid_labels=valid_labels,
reply_message=prepare_conversation_message(message),
valid_labels=list(map(lambda x: x.value, valid_labels)),
mandatory_labels=list(map(lambda x: x.value, self.cfg.mandatory_labels_prompter_reply)),
mode=label_mode,
disposition=label_disposition,
labels=self._get_label_descriptions(valid_labels),
)
parent_message_id = message.id
message_tree_id = message.message_tree_id
case TaskType.REPLY:
# select a tree with missing replies
recent_reply_tasks = self.pr.task_repository.fetch_recent_reply_tasks(
max_age=timedelta(seconds=self.cfg.recent_tasks_span_sec), done=False
)
recent_reply_task_parents = {t.parent_message_id for t in recent_reply_tasks}
if task_role == TaskRole.PROMPTER:
extendible_parents = list(filter(lambda x: x.parent_role == "assistant", extendible_parents))
elif task_role == TaskRole.ASSISTANT:
extendible_parents = list(filter(lambda x: x.parent_role == "prompter", extendible_parents))
# select a tree with missing replies
if len(extendible_parents) > 0:
random_parent = random.choice(extendible_parents)
random_parent: ExtendibleParentRow = None
if self.cfg.p_lonely_child_extension > 0 and self.cfg.lonely_children_count > 1:
# check if we have extendible parents with a small number of replies
lonely_children_parents = [
p
for p in extendible_parents
if 0 < p.active_children_count < self.cfg.lonely_children_count
and p.parent_id not in recent_reply_task_parents
]
if len(lonely_children_parents) > 0 and random.random() < self.cfg.p_lonely_child_extension:
random_parent = random.choice(lonely_children_parents)
if random_parent is None:
# try to exclude parents for which tasks were recently handed out
fresh_parents = [p for p in extendible_parents if p.parent_id not in recent_reply_task_parents]
if len(fresh_parents) > 0:
random_parent = random.choice(fresh_parents)
else:
random_parent = random.choice(extendible_parents)
# fetch random conversation to extend
logger.debug(f"selected {random_parent=}")
messages = self.pr.fetch_message_conversation(random_parent.parent_id)
assert all(m.review_result for m in messages) # ensure all messages have positive review
assert all(m.review_result for m in messages) # ensure all messages have positive reviews
conversation = prepare_conversation(messages)
# generate reply task depending on last message
@@ -419,19 +477,23 @@ class TreeManager:
message = random.choice(prompts_need_review)
label_mode = protocol_schema.LabelTaskMode.full
valid_labels = self._all_text_labels
label_disposition = protocol_schema.LabelTaskDisposition.quality
valid_labels = self.cfg.labels_initial_prompt
if random.random() > self.cfg.p_full_labeling_review_prompt:
valid_labels = list(map(lambda x: x.value, self.cfg.mandatory_labels_initial_prompt))
valid_labels = self.cfg.mandatory_labels_initial_prompt
label_mode = protocol_schema.LabelTaskMode.simple
label_disposition = protocol_schema.LabelTaskDisposition.spam
logger.info(f"Generating a LabelInitialPromptTask ({label_mode=:s}).")
task = protocol_schema.LabelInitialPromptTask(
message_id=message.id,
prompt=message.text,
valid_labels=valid_labels,
valid_labels=list(map(lambda x: x.value, valid_labels)),
mandatory_labels=list(map(lambda x: x.value, self.cfg.mandatory_labels_initial_prompt)),
mode=label_mode,
disposition=label_disposition,
labels=self._get_label_descriptions(valid_labels),
)
parent_message_id = message.id
@@ -464,14 +526,6 @@ class TreeManager:
logger.info(
f"Frontend reports text reply to {interaction.message_id=} with {interaction.text=} by {interaction.user=}."
)
# ensure message size is below the predefined limit
if len(interaction.text) > settings.MESSAGE_SIZE_LIMIT:
logger.error(
f"Message size {len(interaction.text)=} exceeds size limit of {settings.MESSAGE_SIZE_LIMIT=}."
)
raise OasstError("Message size too long.", OasstErrorCode.TASK_MESSAGE_TOO_LONG)
# here we store the text reply in the database
message = pr.store_text_reply(
text=interaction.text,
@@ -502,19 +556,18 @@ class TreeManager:
try:
model_name: str = HfClassificationModel.TOXIC_ROBERTA.value
hugging_face_api: HuggingFaceAPI = HuggingFaceAPI(
f"{HfUrl.HUGGINGFACE_FEATURE_EXTRACTION.value}/{model_name}"
f"{HfUrl.HUGGINGFACE_TOXIC_CLASSIFICATION.value}/{model_name}"
)
toxicity: List[List[Dict[str, Any]]] = await hugging_face_api.post(interaction.text)
toxicity = toxicity[0][0]
pr.insert_toxicity(
message_id=message.id, model=model_name, score=toxicity["score"], label=toxicity["label"]
)
except OasstError:
logger.error(
f"Could not compute toxicity for text reply to {interaction.message_id=} with {interaction.text=} by {interaction.user=}."
f"Could not compute toxicity for text reply to {interaction.message_id=} with {interaction.text=} by {interaction.user=}."
)
case protocol_schema.MessageRating:
@@ -530,9 +583,7 @@ class TreeManager:
)
_, task = pr.store_ranking(interaction)
ok, rankings_by_message = self.check_condition_for_scoring_state(task.message_tree_id)
self.update_message_ranks(task.message_tree_id, rankings_by_message)
self.check_condition_for_scoring_state(task.message_tree_id)
case protocol_schema.TextLabels:
logger.info(
@@ -541,7 +592,7 @@ class TreeManager:
_, task, msg = pr.store_text_labels(interaction)
# if it was a respones for a task, check if we have enough reviews to calc review_result
# if it was a response for a task, check if we have enough reviews to calc review_result
if task and msg:
reviews = self.query_reviews_for_message(msg.id)
acceptance_score = self._calculate_acceptance(reviews)
@@ -622,8 +673,8 @@ class TreeManager:
# check if desired tree size has been reached and all nodes have been reviewed
tree_size = self.query_tree_size(message_tree_id)
if tree_size.remaining_messages > 0:
logger.debug(f"False {tree_size.remaining_messages=}")
if tree_size.remaining_messages > 0 or tree_size.awaiting_review > 0:
logger.debug(f"False {tree_size.remaining_messages=}, {tree_size.awaiting_review=}")
return False
self._enter_state(mts, message_tree_state.State.RANKING)
@@ -647,9 +698,12 @@ class TreeManager:
return False, None
self._enter_state(mts, message_tree_state.State.READY_FOR_SCORING)
return True, rankings_by_message
self.update_message_ranks(message_tree_id, rankings_by_message)
return True
def update_message_ranks(self, message_tree_id: UUID, rankings_by_message: Dict[int, int]) -> bool:
def update_message_ranks(
self, message_tree_id: UUID, rankings_by_message: dict[UUID, list[MessageReaction]]
) -> bool:
mts = self.pr.fetch_tree_state(message_tree_id)
# check state, allow retry if in SCORING_FAILED state
@@ -657,19 +711,47 @@ class TreeManager:
logger.debug(f"False {mts.active=}, {mts.state=}")
return False
if mts.state == message_tree_state.State.SCORING_FAILED:
mts.active = True
mts.state = message_tree_state.State.READY_FOR_SCORING
try:
for rankings in rankings_by_message.values():
sorted_messages = []
for msg_reaction in rankings:
sorted_messages.append(msg_reaction.payload.payload.ranked_message_ids)
logger.debug(f"SORTED MESSAGE {sorted_messages}")
consensus = ranked_pairs(sorted_messages)
ordered_ids_list: list[list[UUID]] = [
msg_reaction.payload.payload.ranked_message_ids for msg_reaction in rankings
]
common_set: set[UUID] = set.intersection(*map(set, ordered_ids_list))
if len(common_set) < 2:
logger.warning("The intersection of ranking results ID sets has less than two elements. Skipping.")
continue
# keep only elements in commond set
ordered_ids_list = [list(filter(lambda x: x in common_set, ids)) for ids in ordered_ids_list]
assert all(len(x) == len(common_set) for x in ordered_ids_list)
logger.debug(f"SORTED MESSAGE IDS {ordered_ids_list}")
consensus = ranked_pairs(ordered_ids_list)
assert len(consensus) == len(common_set)
logger.debug(f"CONSENSUS: {consensus}\n\n")
# fetch all siblings and clear ranks
siblings = self.pr.fetch_message_siblings(consensus[0], reviewed=None, deleted=None)
for m in siblings:
m.rank = None
self.db.add(m)
# index by id
siblings = {m.id: m for m in siblings}
# set rank for each message that was part of the common set
for rank, message_id in enumerate(consensus):
# set rank for each message_id for Message rows
msg = self.pr.fetch_message(message_id=message_id, fail_if_missing=True)
msg.rank = rank
self.db.add(msg)
msg = siblings.get(message_id)
if msg:
msg.rank = rank
self.db.add(msg)
else:
logger.warning(f"Message {message_id=} not found among siblings.")
except Exception:
logger.exception(f"update_message_ranks({message_tree_id=}) failed")
@@ -683,57 +765,65 @@ class TreeManager:
# calculate acceptance based on spam label
return np.mean([1 - l.labels[protocol_schema.TextLabel.spam] for l in labels])
def query_prompts_need_review(self, lang: str) -> list[Message]:
"""
Select initial prompt messages with less then required rankings in active message tree
(active == True in message_tree_state)
"""
def _query_need_review(
self, state: message_tree_state.State, required_reviews: int, root: bool, lang: str
) -> list[Message]:
qry = (
need_review = (
self.db.query(Message)
.select_from(MessageTreeState)
.join(Message, MessageTreeState.message_tree_id == Message.message_tree_id)
.filter(
MessageTreeState.active,
MessageTreeState.state == message_tree_state.State.INITIAL_PROMPT_REVIEW,
MessageTreeState.state == state,
not_(Message.review_result),
not_(Message.deleted),
Message.review_count < self.cfg.num_reviews_initial_prompt,
Message.parent_id.is_(None),
Message.review_count < required_reviews,
Message.lang == lang,
)
)
if root:
need_review = need_review.filter(Message.parent_id.is_(None))
else:
need_review = need_review.filter(Message.parent_id.is_not(None))
if not settings.DEBUG_ALLOW_SELF_LABELING:
qry = qry.filter(Message.user_id != self.pr.user_id)
need_review = need_review.filter(Message.user_id != self.pr.user_id)
if settings.DEBUG_ALLOW_DUPLICATE_TASKS:
qry = need_review
else:
user_id = self.pr.user_id
need_review = need_review.cte(name="need_review")
qry = (
self.db.query(Message)
.select_entity_from(need_review)
.outerjoin(TextLabels, need_review.c.id == TextLabels.message_id)
.group_by(need_review)
.having(
func.count(TextLabels.id).filter(TextLabels.task_id.is_not(None), TextLabels.user_id == user_id)
== 0
)
)
return qry.all()
def query_prompts_need_review(self, lang: str) -> list[Message]:
"""
Select initial prompt messages with less then required rankings in active message tree
(active == True in message_tree_state)
"""
return self._query_need_review(
message_tree_state.State.INITIAL_PROMPT_REVIEW, self.cfg.num_reviews_initial_prompt, True, lang
)
def query_replies_need_review(self, lang: str) -> list[Message]:
"""
Select child messages (parent_id IS NOT NULL) with less then required rankings
in active message tree (active == True in message_tree_state)
"""
qry = (
self.db.query(Message)
.select_from(MessageTreeState)
.join(Message, MessageTreeState.message_tree_id == Message.message_tree_id)
.filter(
MessageTreeState.active,
MessageTreeState.state == message_tree_state.State.GROWING,
not_(Message.review_result),
not_(Message.deleted),
Message.review_count < self.cfg.num_reviews_reply,
Message.parent_id.is_not(None),
Message.lang == lang,
)
)
if not settings.DEBUG_ALLOW_SELF_LABELING:
qry = qry.filter(Message.user_id != self.pr.user_id)
return qry.all()
return self._query_need_review(message_tree_state.State.GROWING, self.cfg.num_reviews_reply, False, lang)
_sql_find_incomplete_rankings = """
-- find incomplete rankings
@@ -749,17 +839,28 @@ WHERE mts.active -- only consider active trees
AND m.parent_id IS NOT NULL -- ignore initial prompts
GROUP BY m.parent_id, m.role
HAVING COUNT(m.id) > 1 and MIN(m.ranking_count) < :num_required_rankings
"""
_sql_find_incomplete_rankings_ex = f"""
-- incomplete rankings but exclude of current user
WITH incomplete_rankings AS ({_sql_find_incomplete_rankings})
SELECT ir.* FROM incomplete_rankings ir
LEFT JOIN message_reaction mr ON ir.parent_id = mr.message_id AND mr.payload_type = 'RankingReactionPayload'
GROUP BY ir.parent_id, ir.role, ir.children_count, ir.child_min_ranking_count, ir.completed_rankings
HAVING(COUNT(mr.message_id) FILTER (WHERE mr.user_id = :user_id) = 0)
"""
def query_incomplete_rankings(self, lang: str) -> list[IncompleteRankingsRow]:
"""Query parents which have childern that need further rankings"""
user_id = self.pr.user_id if not settings.DEBUG_ALLOW_DUPLICATE_TASKS else None
r = self.db.execute(
text(self._sql_find_incomplete_rankings),
text(self._sql_find_incomplete_rankings_ex),
{
"num_required_rankings": self.cfg.num_required_rankings,
"ranking_state": message_tree_state.State.RANKING,
"lang": lang,
"user_id": user_id,
},
)
return [IncompleteRankingsRow.from_orm(x) for x in r.all()]
@@ -780,17 +881,20 @@ WHERE mts.active -- only consider active trees
AND (c.review_result OR coalesce(c.review_count, 0) < :num_reviews_reply) -- don't count children with negative review but count elements under review
GROUP BY m.id, m.role, m.depth, m.message_tree_id, mts.max_children_count
HAVING COUNT(c.id) < mts.max_children_count -- below maximum number of children
AND COUNT(c.id) FILTER (WHERE c.user_id = :user_id) = 0 -- without reply by user
"""
def query_extendible_parents(self, lang: str) -> list[ExtendibleParentRow]:
"""Query parent messages that have not reached the maximum number of replies."""
user_id = self.pr.user_id if not settings.DEBUG_ALLOW_DUPLICATE_TASKS else None
r = self.db.execute(
text(self._sql_find_extendible_parents),
{
"growing_state": message_tree_state.State.GROWING,
"num_reviews_reply": self.cfg.num_reviews_reply,
"lang": lang,
"user_id": user_id,
},
)
return [ExtendibleParentRow.from_orm(x) for x in r.all()]
@@ -814,12 +918,14 @@ HAVING COUNT(m.id) < mts.goal_tree_size
def query_extendible_trees(self, lang: str) -> list[ActiveTreeSizeRow]:
"""Query size of active message trees in growing state."""
user_id = self.pr.user_id if not settings.DEBUG_ALLOW_DUPLICATE_TASKS else None
r = self.db.execute(
text(self._sql_find_extendible_trees),
{
"growing_state": message_tree_state.State.GROWING,
"num_reviews_reply": self.cfg.num_reviews_reply,
"lang": lang,
"user_id": user_id,
},
)
return [ActiveTreeSizeRow.from_orm(x) for x in r.all()]
@@ -827,18 +933,21 @@ HAVING COUNT(m.id) < mts.goal_tree_size
def query_tree_size(self, message_tree_id: UUID) -> ActiveTreeSizeRow:
"""Returns the number of reviewed not deleted messages in the message tree."""
required_reviews = settings.tree_manager.num_reviews_reply
qry = (
self.db.query(
MessageTreeState.message_tree_id.label("message_tree_id"),
MessageTreeState.goal_tree_size.label("goal_tree_size"),
func.count(Message.id).label("tree_size"),
func.count(Message.id).filter(Message.review_result).label("tree_size"),
func.count(Message.id)
.filter(not_(Message.review_result), Message.review_count < required_reviews)
.label("awaiting_review"),
)
.select_from(MessageTreeState)
.outerjoin(Message, MessageTreeState.message_tree_id == Message.message_tree_id)
.filter(
MessageTreeState.active,
not_(Message.deleted),
Message.review_result,
MessageTreeState.message_tree_id == message_tree_id,
)
.group_by(MessageTreeState.message_tree_id, MessageTreeState.goal_tree_size)
@@ -907,7 +1016,7 @@ INNER JOIN message_reaction mr ON mr.task_id = t.id AND mr.payload_type = 'Ranki
return rankings_by_message
@managed_tx_method(CommitMode.COMMIT)
def ensure_tree_states(self):
def ensure_tree_states(self) -> None:
"""Add message tree state rows for all root nodes (inital prompt messages)."""
missing_tree_ids = self.query_misssing_tree_states()
@@ -919,12 +1028,23 @@ INNER JOIN message_reaction mr ON mr.task_id = t.id AND mr.payload_type = 'Ranki
logger.info(f"Inserting missing message tree state for message: {id} ({tree_size=}, {state=:s})")
self._insert_default_state(id, state=state)
def query_num_active_trees(self, lang: str) -> int:
rankings = (
self.db.query(MessageTreeState).filter(MessageTreeState.state == message_tree_state.State.RANKING).all()
)
if len(rankings) > 0:
logger.info(f"Checking state of {len(rankings)} message trees in ranking state.")
for r in rankings:
self.check_condition_for_scoring_state(r.message_tree_id)
def query_num_active_trees(self, lang: str, exclude_ranking: bool = True) -> int:
"""Count all active trees (optionally exclude those in ranking state)."""
query = (
self.db.query(func.count(MessageTreeState.message_tree_id))
.join(Message, MessageTreeState.message_tree_id == Message.id)
.filter(MessageTreeState.active, Message.lang == lang)
)
if exclude_ranking:
query = query.filter(MessageTreeState.state != message_tree_state.State.RANKING)
return query.scalar()
def query_reviews_for_message(self, message_id: UUID) -> list[TextLabels]:
@@ -1150,6 +1270,7 @@ DELETE FROM message WHERE message_tree_id = :message_tree_id;
sql_purge_user = """
DELETE FROM journal WHERE user_id = :user_id;
DELETE FROM message_reaction WHERE user_id = :user_id;
DELETE FROM message_emoji WHERE user_id = :user_id;
DELETE FROM task WHERE user_id = :user_id;
DELETE FROM message WHERE user_id = :user_id;
DELETE FROM user_stats WHERE user_id = :user_id;
@@ -1161,41 +1282,92 @@ DELETE FROM user_stats WHERE user_id = :user_id;
if ban:
self.db.execute(update(User).filter(User.id == user_id).values(deleted=True, enabled=False))
def export_trees_to_file(
self,
message_tree_ids: list[str],
file=None,
reviewed: bool = True,
include_deleted: bool = False,
use_compression: bool = False,
) -> None:
trees_to_export: List[tree_export.ExportMessageTree] = []
for message_tree_id in message_tree_ids:
messages: List[Message] = self.pr.fetch_message_tree(message_tree_id, reviewed, include_deleted)
trees_to_export.append(tree_export.build_export_tree(message_tree_id, messages))
if file:
tree_export.write_trees_to_file(file, trees_to_export, use_compression)
else:
sys.stdout.write(json.dumps(jsonable_encoder(trees_to_export), indent=2))
def export_all_ready_trees(
self, file: str, reviewed: bool = True, include_deleted: bool = False, use_compression: bool = False
) -> None:
message_tree_states: MessageTreeState = self.pr.fetch_message_trees_ready_for_export()
message_tree_ids = [ms.message_tree_id for ms in message_tree_states]
self.export_trees_to_file(message_tree_ids, file, reviewed, include_deleted, use_compression)
def export_all_user_trees(
self,
user_id: str,
file: str,
reviewed: bool = True,
include_deleted: bool = False,
use_compression: bool = False,
) -> None:
messages = self.pr.fetch_user_message_trees(UUID(user_id))
message_tree_ids = [ms.message_tree_id for ms in messages]
self.export_trees_to_file(message_tree_ids, file, reviewed, include_deleted, use_compression)
@managed_tx_method(CommitMode.COMMIT)
def retry_scoring_failed_message_trees(self):
query = self.db.query(MessageTreeState.message_tree_id).filter(
MessageTreeState.state == message_tree_state.State.SCORING_FAILED
)
ranking_role_filter = None if self.cfg.rank_prompter_replies else "assistant"
for row in query.all():
try:
message_tree_id = row["message_tree_id"]
rankings_by_message = self.query_tree_ranking_results(message_tree_id, role_filter=ranking_role_filter)
self.update_message_ranks(message_tree_id=message_tree_id, rankings_by_message=rankings_by_message)
except Exception:
logger.exception(f"retry_scoring_failed_message_trees failed for ({message_tree_id=})")
if __name__ == "__main__":
from oasst_backend.api.deps import api_auth
# from oasst_backend.api.deps import create_api_client
from oasst_backend.database import engine
from oasst_backend.prompt_repository import PromptRepository
with Session(engine) as db:
api_client = api_auth(settings.OFFICIAL_WEB_API_KEY, db=db)
# api_client = create_api_client(session=db, description="test", frontend_type="bot")
dummy_user = protocol_schema.User(id="__dummy_user__", display_name="Dummy User", auth_method="local")
pr = PromptRepository(db=db, api_client=api_client, client_user=dummy_user)
cfg = TreeManagerConfiguration()
tm = TreeManager(db, pr, cfg)
tm.ensure_tree_states()
tm.purge_user_messages(user_id=UUID("2ef9ad21-0dc5-442d-8750-6f7f1790723f"), purge_initial_prompts=False)
# tm.purge_user_messages(user_id=UUID("2ef9ad21-0dc5-442d-8750-6f7f1790723f"), purge_initial_prompts=False)
# tm.purge_user(user_id=UUID("2ef9ad21-0dc5-442d-8750-6f7f1790723f"))
# db.commit()
# print("query_num_active_trees", tm.query_num_active_trees())
# print("query_incomplete_rankings", tm.query_incomplete_rankings())
# print("query_replies_need_review", tm.query_replies_need_review())
# print("query_incomplete_reply_reviews", tm.query_replies_need_review())
# print("query_incomplete_initial_prompt_reviews", tm.query_prompts_need_review())
# print("query_extendible_trees", tm.query_extendible_trees())
# print("query_extendible_parents", tm.query_extendible_parents())
# print("query_tree_size", tm.query_tree_size(message_tree_id=UUID("bdf434cf-4df5-4b74-949c-a5a157bc3292")))
# print(
# "query_reviews_for_message",
# tm.query_reviews_for_message(message_id=UUID("6a444493-0d48-4316-a9f1-7e263f5a2473")),
# )
# print("next_task:", tm.next_task())
# print(
# "query_tree_ranking_results", tm.query_tree_ranking_results(UUID("6036f58f-41b5-48c4-bdd9-b16f34ab1312"))
# ".query_tree_ranking_results", tm.query_tree_ranking_results(UUID("2ac20d38-6650-43aa-8bb3-f61080c0d921"))
# )
# print(tm.export_trees_to_file(message_tree_ids=["7e75fb38-e664-4e2b-817c-b9a0b01b0074"], file="lol.jsonl"))
+37 -16
View File
@@ -1,10 +1,12 @@
from typing import Optional
from uuid import UUID
from oasst_backend.config import settings
from oasst_backend.models import ApiClient, User
from oasst_backend.utils.database_utils import CommitMode, managed_tx_method
from oasst_shared.exceptions import OasstError, OasstErrorCode
from oasst_shared.schemas import protocol as protocol_schema
from sqlalchemy.exc import IntegrityError
from sqlmodel import Session, and_, or_
from starlette.status import HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
@@ -64,7 +66,13 @@ class UserRepository:
return user
@managed_tx_method(CommitMode.COMMIT)
def update_user(self, id: UUID, enabled: Optional[bool] = None, notes: Optional[str] = None) -> None:
def update_user(
self,
id: UUID,
enabled: Optional[bool] = None,
notes: Optional[str] = None,
show_on_leaderboard: Optional[bool] = None,
) -> None:
"""
Update a user by global user ID to disable or set admin notes. Only trusted clients may update users.
@@ -83,6 +91,8 @@ class UserRepository:
user.enabled = enabled
if notes is not None:
user.notes = notes
if show_on_leaderboard is not None:
user.show_on_leaderboard = show_on_leaderboard
self.db.add(user)
@@ -107,9 +117,7 @@ class UserRepository:
self.db.add(user)
@managed_tx_method(CommitMode.COMMIT)
def lookup_client_user(self, client_user: protocol_schema.User, create_missing: bool = True) -> Optional[User]:
if not client_user:
return None
def _lookup_client_user_tx(self, client_user: protocol_schema.User, create_missing: bool = True) -> Optional[User]:
user: User = (
self.db.query(User)
.filter(
@@ -135,6 +143,18 @@ class UserRepository:
self.db.add(user)
return user
def lookup_client_user(self, client_user: protocol_schema.User, create_missing: bool = True) -> Optional[User]:
if not client_user:
return None
num_retries = settings.DATABASE_MAX_TX_RETRY_COUNT
for i in range(num_retries):
try:
return self._lookup_client_user_tx(client_user, create_missing)
except IntegrityError:
# catch UniqueViolation exception, for concurrent requests due to conflicts in ix_user_username
if i + 1 == num_retries:
raise
def query_users_ordered_by_username(
self,
api_client_id: Optional[UUID] = None,
@@ -145,6 +165,7 @@ class UserRepository:
auth_method: Optional[str] = None,
search_text: Optional[str] = None,
limit: Optional[int] = 100,
desc: bool = False,
) -> list[User]:
if not self.api_client.trusted:
if not api_client_id:
@@ -184,14 +205,13 @@ class UserRepository:
pattern = "%{}%".format(search_text.replace("\\", "\\\\").replace("_", "\\_").replace("%", "\\%"))
qry = qry.filter(User.username.like(pattern))
if limit is not None and lte_username and not gte_username:
# select top rows but return results in ascernding order
sub_qry = qry.order_by(User.username.desc(), User.id.desc()).limit(limit).subquery("u")
qry = self.db.query(User).select_entity_from(sub_qry).order_by(User.username, User.id)
if desc:
qry = qry.order_by(User.username.desc(), User.id.desc())
else:
qry = qry.order_by(User.username, User.id)
if limit is not None:
qry = qry.limit(limit)
if limit is not None:
qry = qry.limit(limit)
return qry.all()
@@ -205,7 +225,9 @@ class UserRepository:
auth_method: Optional[str] = None,
search_text: Optional[str] = None,
limit: Optional[int] = 100,
desc: bool = False,
) -> list[User]:
if not self.api_client.trusted:
if not api_client_id:
# Let unprivileged api clients query their own users without api_client_id being set
@@ -255,13 +277,12 @@ class UserRepository:
if auth_method:
qry = qry.filter(User.auth_method == auth_method)
if limit is not None and lte_display_name and not gte_display_name:
# select top rows but return results in ascernding order
sub_qry = qry.order_by(User.display_name.desc(), User.id.desc()).limit(limit).subquery("u")
qry = self.db.query(User).select_entity_from(sub_qry).order_by(User.display_name, User.id)
if desc:
qry = qry.order_by(User.display_name.desc(), User.id.desc())
else:
qry = qry.order_by(User.display_name, User.id)
if limit is not None:
qry = qry.limit(limit)
if limit is not None:
qry = qry.limit(limit)
return qry.all()
@@ -39,7 +39,7 @@ class UserStatsRepository:
qry = (
self.session.query(User.id.label("user_id"), User.username, User.auth_method, User.display_name, UserStats)
.join(UserStats, User.id == UserStats.user_id)
.filter(UserStats.time_frame == time_frame.value)
.filter(UserStats.time_frame == time_frame.value, User.show_on_leaderboard)
.order_by(UserStats.rank)
.limit(limit)
)
@@ -250,7 +250,8 @@ FROM
PARTITION BY time_frame
ORDER BY leader_score DESC, user_id
) AS "rank", user_id, time_frame
FROM user_stats
FROM user_stats us2
INNER JOIN "user" u ON us2.user_id = u.id AND u.show_on_leaderboard
WHERE (:time_frame IS NULL OR time_frame = :time_frame)) AS r
WHERE
us.user_id = r.user_id
+114 -52
View File
@@ -7,9 +7,14 @@ from loguru import logger
from oasst_backend.config import settings
from oasst_backend.database import engine
from oasst_shared.exceptions import OasstError, OasstErrorCode
from sqlalchemy.exc import OperationalError
from psycopg2.errors import DeadlockDetected, ExclusionViolation, SerializationFailure, UniqueViolation
from sqlalchemy.exc import OperationalError, PendingRollbackError
from sqlmodel import Session, SQLModel
"""
Error Handling Reference: https://www.postgresql.org/docs/15/mvcc-serialization-failure-handling.html
"""
class CommitMode(IntEnum):
"""
@@ -26,7 +31,6 @@ class CommitMode(IntEnum):
* managed_tx_method and async_managed_tx_method methods are decorators functions
* to be used on class functions. It expects the Class to have a 'db' Session object
* initialised
* TODO: tx method decorator for non class methods
"""
@@ -35,28 +39,46 @@ def managed_tx_method(auto_commit: CommitMode = CommitMode.COMMIT, num_retries=s
@wraps(f)
def wrapped_f(self, *args, **kwargs):
try:
for i in range(num_retries):
try:
result = f(self, *args, **kwargs)
if auto_commit == CommitMode.COMMIT:
result = None
if auto_commit == CommitMode.COMMIT:
retry_exhausted = True
for i in range(num_retries):
try:
result = f(self, *args, **kwargs)
self.db.commit()
elif auto_commit == CommitMode.FLUSH:
self.db.flush()
elif auto_commit == CommitMode.ROLLBACK:
if isinstance(result, SQLModel):
self.db.refresh(result)
retry_exhausted = False
break
except PendingRollbackError as e:
logger.info(str(e))
self.db.rollback()
except OperationalError as e:
if e.orig is not None and isinstance(
e.orig, (SerializationFailure, DeadlockDetected, UniqueViolation, ExclusionViolation)
):
logger.info(f"{type(e.orig)} Inner {e.orig.pgcode} {type(e.orig.pgcode)}")
self.db.rollback()
else:
raise e
logger.info(f"Retry {i+1}/{num_retries}")
if retry_exhausted:
raise OasstError(
"DATABASE_MAX_RETIRES_EXHAUSTED",
error_code=OasstErrorCode.DATABASE_MAX_RETRIES_EXHAUSTED,
http_status_code=HTTPStatus.SERVICE_UNAVAILABLE,
)
else:
result = f(self, *args, **kwargs)
if auto_commit == CommitMode.FLUSH:
self.db.flush()
if isinstance(result, SQLModel):
self.db.refresh(result)
return result
except OperationalError:
logger.info(f"Retry {i+1}/{num_retries} after possible DB concurrent update conflict.")
elif auto_commit == CommitMode.ROLLBACK:
self.db.rollback()
raise OasstError(
"DATABASE_MAX_RETIRES_EXHAUSTED",
error_code=OasstErrorCode.DATABASE_MAX_RETRIES_EXHAUSTED,
http_status_code=HTTPStatus.SERVICE_UNAVAILABLE,
)
return result
except Exception as e:
logger.error("DB Rollback Failure")
logger.info(str(e))
raise e
return wrapped_f
@@ -71,28 +93,46 @@ def async_managed_tx_method(
@wraps(f)
async def wrapped_f(self, *args, **kwargs):
try:
for i in range(num_retries):
try:
result = await f(self, *args, **kwargs)
if auto_commit == CommitMode.COMMIT:
result = None
if auto_commit == CommitMode.COMMIT:
retry_exhausted = True
for i in range(num_retries):
try:
result = await f(self, *args, **kwargs)
self.db.commit()
elif auto_commit == CommitMode.FLUSH:
self.db.flush()
elif auto_commit == CommitMode.ROLLBACK:
if isinstance(result, SQLModel):
self.db.refresh(result)
retry_exhausted = False
break
except PendingRollbackError as e:
logger.info(str(e))
self.db.rollback()
except OperationalError as e:
if e.orig is not None and isinstance(
e.orig, (SerializationFailure, DeadlockDetected, UniqueViolation, ExclusionViolation)
):
logger.info(f"{type(e.orig)} Inner {e.orig.pgcode} {type(e.orig.pgcode)}")
self.db.rollback()
else:
raise e
logger.info(f"Retry {i+1}/{num_retries}")
if retry_exhausted:
raise OasstError(
"DATABASE_MAX_RETIRES_EXHAUSTED",
error_code=OasstErrorCode.DATABASE_MAX_RETRIES_EXHAUSTED,
http_status_code=HTTPStatus.SERVICE_UNAVAILABLE,
)
else:
result = await f(self, *args, **kwargs)
if auto_commit == CommitMode.FLUSH:
self.db.flush()
if isinstance(result, SQLModel):
self.db.refresh(result)
return result
except OperationalError:
logger.info(f"Retry {i+1}/{num_retries} after possible DB concurrent update conflict.")
elif auto_commit == CommitMode.ROLLBACK:
self.db.rollback()
raise OasstError(
"DATABASE_MAX_RETIRES_EXHAUSTED",
error_code=OasstErrorCode.DATABASE_MAX_RETRIES_EXHAUSTED,
http_status_code=HTTPStatus.SERVICE_UNAVAILABLE,
)
return result
except Exception as e:
logger.exception("DB Rollback Failure")
logger.info(str(e))
raise e
return wrapped_f
@@ -115,27 +155,49 @@ def managed_tx_function(
@wraps(f)
def wrapped_f(*args, **kwargs):
try:
for i in range(num_retries):
with session_factory() as session:
try:
result = f(session, *args, **kwargs)
if auto_commit == CommitMode.COMMIT:
result = None
if auto_commit == CommitMode.COMMIT:
retry_exhausted = True
for i in range(num_retries):
with session_factory() as session:
try:
result = f(session, *args, **kwargs)
session.commit()
elif auto_commit == CommitMode.FLUSH:
session.flush()
elif auto_commit == CommitMode.ROLLBACK:
if isinstance(result, SQLModel):
session.refresh(result)
retry_exhausted = False
break
except PendingRollbackError as e:
logger.info(str(e))
session.rollback()
return result
except OperationalError:
logger.info(f"Retry {i+1}/{num_retries} after possible DB concurrent update conflict.")
session.rollback()
raise OasstError(
"DATABASE_MAX_RETIRES_EXHAUSTED",
error_code=OasstErrorCode.DATABASE_MAX_RETRIES_EXHAUSTED,
http_status_code=HTTPStatus.SERVICE_UNAVAILABLE,
)
except OperationalError as e:
if e.orig is not None and isinstance(
e.orig,
(SerializationFailure, DeadlockDetected, UniqueViolation, ExclusionViolation),
):
logger.info(f"{type(e.orig)} Inner {e.orig.pgcode} {type(e.orig.pgcode)}")
session.rollback()
else:
raise e
logger.info(f"Retry {i+1}/{num_retries}")
if retry_exhausted:
raise OasstError(
"DATABASE_MAX_RETIRES_EXHAUSTED",
error_code=OasstErrorCode.DATABASE_MAX_RETRIES_EXHAUSTED,
http_status_code=HTTPStatus.SERVICE_UNAVAILABLE,
)
else:
with session_factory() as session:
result = f(session, *args, **kwargs)
if auto_commit == CommitMode.FLUSH:
session.flush()
if isinstance(result, SQLModel):
session.refresh(result)
elif auto_commit == CommitMode.ROLLBACK:
session.rollback()
return result
except Exception as e:
logger.error("DB Rollback Failure")
logger.info(str(e))
raise e
return wrapped_f
@@ -0,0 +1,111 @@
import os
import pickle
from collections import Counter
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
def load_and_split(foldername, num_words):
ls = os.listdir(foldername)
X = []
Y = []
langmap = dict()
for idx, x in enumerate(ls):
print("loading language", x)
with open(foldername + "/" + x, "r") as reader:
tmp = reader.read().split(" ")
tmp = [" ".join(tmp[i : i + num_words]) for i in range(0, 100_000, num_words)]
X.extend(tmp)
Y.extend([idx] * len(tmp))
langmap[idx] = x
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.90)
return x_train, x_test, y_train, y_test, langmap
def build_and_train_pipeline(x_train, y_train):
vectorizer = TfidfVectorizer(ngram_range=(1, 2), analyzer="char", use_idf=False)
clf = Pipeline(
[
("vec", vectorizer),
# ("nystrom", Nystroem(n_components=1000,n_jobs=6)),
("clf", LinearSVC(C=0.5)),
# ("clf",GaussianNB())
# ("clf", HistGradientBoostingClassifier())
]
)
print("fitting model...")
clf.fit(x_train, y_train)
return clf
def benchmark(clf, x_test, y_test, langmap):
print("benchmarking model...")
y_pred = clf.predict(x_test)
names = list(langmap.values())
# print(y_test)
# print(langmap)
print(metrics.classification_report(y_test, y_pred, target_names=names))
cm = metrics.confusion_matrix(y_test, y_pred)
print(cm)
def main(foldername, modelname, num_words):
x_train, x_test, y_train, y_test, langmap = load_and_split(foldername=foldername, num_words=num_words)
clf = build_and_train_pipeline(x_train, y_train)
benchmark(clf, x_test, y_test, langmap)
save_model(clf, langmap, num_words, modelname)
model = load(modelname)
print(
"running infernence on long tests",
inference_voter(
model,
"""
What language is this text written in? Nobody knows until you fill in at least ten words.
This test here is to check whether the moving window approach works,
so I still need to fill in a little more text.
""",
),
)
def load(modelname):
with open(modelname, "rb") as writer:
data = pickle.load(writer)
return data
def save_model(model, idx_to_name, num_words, modelname):
out = {
"model": model,
"idx_to_name": idx_to_name,
"num_words": num_words,
}
with open(modelname, "wb") as writer:
pickle.dump(out, writer)
def inference_voter(model, text):
tmp = text.split()
# print(len(tmp), tmp)
tmp = [" ".join(tmp[i : i + model["num_words"]]) for i in range(0, len(tmp) - model["num_words"])]
predictions = model["model"].predict(tmp)
# print("integer predictions", predictions)
# print("name predictions", *[model["idx_to_name"][n] for n in predictions])
result = Counter(predictions).most_common(1)[0][0]
return model["idx_to_name"][result]
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", help="save location for model and metadata")
parser.add_argument("-d", "--data", help="specify the folder for data files")
parser.add_argument("-n", "--num_words", help="number of words to use for statistics", type=int)
args = parser.parse_args()
# np.set_printoptions(threshold=np.inf)
main(args.data, args.model, args.num_words)
+29 -4
View File
@@ -96,13 +96,15 @@ def ranked_pairs(ranks: List[List[int]]):
"""
tallies, names = head_to_head_votes(ranks)
tallies = tallies - tallies.T
# print(tallies)
# note: the resulting tally matrix should be skew-symmetric
# order by strength of victory (using tideman's original method, don't think it would make a difference for us)
sorted_majorities = []
for i in range(len(ranks[0])):
for j in range(len(ranks[0])):
if tallies[i, j] > 0:
# you can never prefer yourself over yourself
# we also have to pick one of the two choices,
# if the preference is exactly zero...
if tallies[i, j] >= 0 and i != j:
sorted_majorities.append((i, j, tallies[i, j]))
# we don't explicitly deal with tied majorities here
sorted_majorities = np.array(sorted(sorted_majorities, key=lambda x: x[2], reverse=True))
@@ -128,13 +130,36 @@ def ranked_pairs(ranks: List[List[int]]):
if __name__ == "__main__":
ranks = (
ranks = """ (
[("w", "x", "z", "y") for _ in range(1)]
+ [("w", "y", "x", "z") for _ in range(2)]
# + [("x","y","z","w") for _ in range(4)]
+ [("x", "z", "w", "y") for _ in range(5)]
+ [("y", "w", "x", "z") for _ in range(1)]
# [("y","z","w","x") for _ in range(1000)]
)
)"""
ranks = [
[
("c5181083-d3e9-41e7-a935-83fb9fa01488"),
("dcf3d179-0f34-4c15-ae21-b8feb15e422d"),
("d11705af-5575-43e5-b22e-08d155fbaa62"),
],
[
("d11705af-5575-43e5-b22e-08d155fbaa62"),
("c5181083-d3e9-41e7-a935-83fb9fa01488"),
("dcf3d179-0f34-4c15-ae21-b8feb15e422d"),
],
[
("dcf3d179-0f34-4c15-ae21-b8feb15e422d"),
("c5181083-d3e9-41e7-a935-83fb9fa01488"),
("d11705af-5575-43e5-b22e-08d155fbaa62"),
],
[
("d11705af-5575-43e5-b22e-08d155fbaa62"),
("c5181083-d3e9-41e7-a935-83fb9fa01488"),
("dcf3d179-0f34-4c15-ae21-b8feb15e422d"),
],
]
rp = ranked_pairs(ranks)
print(rp)
@@ -0,0 +1,71 @@
from __future__ import annotations
import gzip
import json
from collections import defaultdict
from typing import Optional, TextIO
from fastapi.encoders import jsonable_encoder
from oasst_backend.models import Message
from pydantic import BaseModel
class ExportMessageNode(BaseModel):
message_id: str
parent_id: Optional[str]
text: Optional[str]
role: str
review_count: Optional[int]
rank: Optional[int]
replies: Optional[list[ExportMessageNode]]
@classmethod
def prep_message_export(cls, message: Message) -> ExportMessageNode:
return cls(
message_id=str(message.id),
parent_id=str(message.parent_id) if message.parent_id else None,
text=str(message.payload.payload.text),
role=message.role,
review_count=message.review_count,
rank=message.rank,
)
class ExportMessageTree(BaseModel):
message_tree_id: str
replies: Optional[ExportMessageNode]
def build_export_tree(message_tree_id: str, messages: list[Message]) -> ExportMessageTree:
export_tree = ExportMessageTree(message_tree_id=str(message_tree_id))
export_tree_data = [ExportMessageNode.prep_message_export(m) for m in messages]
message_parents = defaultdict(list)
for message in export_tree_data:
message_parents[message.parent_id].append(message)
def build_tree(tree: dict, parent: Optional[str], messages: list[Message]):
children = message_parents[parent]
tree.replies = children
for idx, child in enumerate(tree.replies):
build_tree(tree.replies[idx], child.message_id, messages)
build_tree(export_tree, None, export_tree_data)
return export_tree
def write_trees_to_file(file, trees: list[ExportMessageTree], use_compression: bool = True) -> None:
out_buff: TextIO
if use_compression:
out_buff = gzip.open(file, "wt", encoding="UTF-8")
else:
out_buff = open(file, "wt", encoding="UTF-8")
with out_buff as f:
for tree in trees:
file_data = jsonable_encoder(tree)
json.dump(file_data, f)
f.write("\n")
+14 -1
View File
@@ -2,7 +2,13 @@
Preliminary implementation of the inference engine for OpenAssistant.
## Development (you'll need multiple terminals)
## Development Variant 1 (you'll need tmux)
Run `./full-dev-setup.sh` to start the full development setup. Make sure to wait
until the 2nd terminal is ready and says `{"message":"Connected"}` before
entering input into the last terminal.
## Development Variant 2 (you'll need multiple terminals)
Run a redis container (or use the one of the general docker compose file):
@@ -26,6 +32,13 @@ pip install -r requirements.txt
python __main__.py
```
For the worker, you'll also want to have the text-generation-inference server
running:
```bash
docker run --rm -it -p 8001:80 -e MODEL_ID=distilgpt2 ykilcher/text-generation-inference
```
Run the client:
```bash
+19
View File
@@ -0,0 +1,19 @@
#!/bin/bash
# Creates a tmux window with splits for the individual services
tmux new-session -d -s "inference-dev-setup"
tmux send-keys "docker run --rm -it -p 6379:6379 redis" C-m
tmux split-window -h
tmux send-keys "docker run --rm -it -p 8001:80 -e MODEL_ID=distilgpt2 ykilcher/text-generation-inference" C-m
tmux split-window -h
tmux send-keys "cd server" C-m
tmux send-keys "uvicorn main:app --reload" C-m
tmux split-window -h
tmux send-keys "cd worker" C-m
tmux send-keys "python __main__.py" C-m
tmux split-window -h
tmux send-keys "cd text-client" C-m
tmux send-keys "sleep 5" C-m
tmux send-keys "python __main__.py" C-m
tmux attach-session -t "inference-dev-setup"
+53 -35
View File
@@ -5,6 +5,7 @@ import uuid
import fastapi
import pydantic
import redis.asyncio as redis
import websockets.exceptions
from fastapi.middleware.cors import CORSMiddleware
from loguru import logger
from oasst_shared.schemas import inference, protocol
@@ -63,6 +64,7 @@ class MessageRequestState(str, enum.Enum):
pending = "pending"
in_progress = "in_progress"
complete = "complete"
aborted_by_worker = "aborted_by_worker"
class DbChatEntry(pydantic.BaseModel):
@@ -154,40 +156,56 @@ async def create_message(id: str, message_request: MessageRequest, fastapi_reque
async def work(websocket: fastapi.WebSocket):
await websocket.accept()
worker_config = inference.WorkerConfig.parse_raw(await websocket.receive_text())
while True:
# find a pending task that matches the worker's config
# could also be implemented using task queues
# but general compatibility matching is tricky
for chat in CHATS.values():
if (request := chat.pending_message_request) is not None:
if chat.message_request_state == MessageRequestState.pending:
if request.compatible_with(worker_config):
try:
while True:
print(websocket.client_state)
if websocket.client_state == fastapi.websockets.WebSocketState.DISCONNECTED:
logger.warning("Worker disconnected")
break
# find a pending task that matches the worker's config
# could also be implemented using task queues
# but general compatibility matching is tricky
for chat in CHATS.values():
if (request := chat.pending_message_request) is not None:
if chat.message_request_state == MessageRequestState.pending:
if request.compatible_with(worker_config):
break
else:
logger.debug("No pending tasks")
await asyncio.sleep(1)
continue
chat.message_request_state = MessageRequestState.in_progress
work_request = inference.WorkRequest(
conversation=chat.conversation,
model_name=request.model_name,
max_new_tokens=request.max_new_tokens,
)
logger.info(f"Created {work_request}")
try:
await websocket.send_text(work_request.json())
except websockets.exceptions.ConnectionClosedError:
logger.warning("Worker disconnected")
websocket.close()
chat.message_request_state = MessageRequestState.pending
break
try:
while True:
# maybe unnecessary to parse and re-serialize
# could just pass the raw string and mark end via empty string
response_packet = inference.WorkResponsePacket.parse_raw(await websocket.receive_text())
await redisClient.rpush(chat.id, response_packet.json())
if response_packet.is_end:
break
else:
logger.debug("No pending tasks")
await asyncio.sleep(1)
continue
except fastapi.WebSocketException:
# TODO: handle this better
logger.exception(f"Websocket closed during handling of {chat.id}")
chat.message_request_state = MessageRequestState.aborted_by_worker
raise
chat.message_request_state = MessageRequestState.in_progress
work_request = inference.WorkRequest(
conversation=chat.conversation,
model_name=request.model_name,
max_new_tokens=request.max_new_tokens,
)
logger.info(f"Created {work_request}")
try:
await websocket.send_text(work_request.json())
while True:
# maybe unnecessary to parse and re-serialize
# could just pass the raw string and mark end via empty string
response_packet = inference.WorkResponsePacket.parse_raw(await websocket.receive_text())
await redisClient.rpush(chat.id, response_packet.json())
if response_packet.is_end:
break
except fastapi.WebSocketException:
# TODO: handle this better
logger.exception(f"Websocket closed during handling of {chat.id}")
chat.message_request_state = MessageRequestState.complete
chat.message_request_state = MessageRequestState.complete
except fastapi.WebSocketException:
logger.exception("Websocket closed")
+36 -22
View File
@@ -1,13 +1,12 @@
import re
import time
import json
import rel
import torch
import requests
import sseclient
import typer
import websocket
from loguru import logger
from oasst_shared.schemas import inference, protocol
from transformers import pipeline
app = typer.Typer()
@@ -16,12 +15,13 @@ app = typer.Typer()
def main(
backend_url: str = "ws://localhost:8000",
model_name: str = "distilgpt2",
inference_server_url: str = "http://localhost:8001",
):
pipe = pipeline("text-generation", model=model_name)
def on_open(ws: websocket.WebSocket):
logger.info("Connected to backend, sending config...")
worker_config = inference.WorkerConfig(model_name=model_name)
ws.send(worker_config.json())
logger.info("Config sent, waiting for work...")
def on_message(ws: websocket.WebSocket, message: str):
# TODO: what if this comes in, but one is already in progress?
@@ -35,25 +35,39 @@ def main(
# construct prompt
messages = [_prepare_message(message) for message in work_request.conversation.messages]
prompt = "\n".join(messages) + "\nAssistant:"
prefix = (
"The following is a conversation between a user and an assistant. "
"The assistant is helpful, creative, clever, and very friendly.\n"
"Assistant: Hello! How can I help you today?\n"
)
# TODO: replace this with incremental generation
torch.manual_seed(work_request.seed)
model_output = pipe(prompt, max_new_tokens=work_request.max_new_tokens, do_sample=True, return_full_text=False)[
0
]["generated_text"]
model_output = model_output.strip()
prompt = prefix + "\n".join(messages) + "\nAssistant:"
# fake streaming
split_idcs = [m.start() for m in re.finditer(r"([\w:]+)", model_output)]
pieces = [model_output[a:b] for a, b in zip([0] + split_idcs, split_idcs + [None])]
for piece in pieces:
if not piece:
continue
if piece.strip() in ("User:", "Assistant:"):
response = requests.post(
f"{inference_server_url}/generate_stream",
json={
"inputs": prompt,
"parameters": {
"max_new_tokens": work_request.max_new_tokens,
"do_sample": work_request.do_sample,
"top_k": work_request.top_k,
"top_p": work_request.top_p,
"temperature": work_request.temperature,
"seed": work_request.seed,
},
},
stream=True,
headers={"Accept": "text/event-stream"},
)
response.raise_for_status()
client = sseclient.SSEClient(response)
for event in client.events():
data = json.loads(event.data)
if data["is_end"]:
break
ws.send(inference.WorkResponsePacket(token=piece).json())
time.sleep(0.1)
intermediate = data["event"]
ws.send(inference.WorkResponsePacket(token=intermediate["token"]).json())
ws.send(inference.WorkResponsePacket(is_end=True).json())
def on_error(ws: websocket.WebSocket, error: Exception):
+2 -2
View File
@@ -1,6 +1,6 @@
loguru
rel
torch
transformers
requests
sseclient-py
typer
websocket-client
@@ -43,7 +43,7 @@ def get_one_dataset(conf, dataset_name):
if dataset_name == "debate_sum":
train, eval = train_val_dataset(train, val_split=0.2)
else:
val_name = "validation" if dataset_name not in ["billsum"] else "test"
val_name = "validation" if dataset_name not in ["billsum", "tldr_news"] else "test"
eval = SummarizationDataset(dataset_name, conf.cache_dir, val_name)
elif "ted_trans" in dataset_name:
language_pair = dataset_name.split("_")[-1]
@@ -3,7 +3,6 @@ from typing import Optional, Union
import numpy as np
import torch
from custom_datasets.qa_datasets import QA_SPECIAL_TOKENS
from torch.nn import functional as F
from transformers.tokenization_utils_base import PaddingStrategy, PreTrainedTokenizerBase
@@ -23,15 +22,8 @@ class DialogueDataCollator:
flatten_messages = []
label_masks = []
for feature_one in features:
assert len(feature_one) % 2 == 0, "Number of messages must be even"
# TODO: we should push this to dataset __getitem__
messages = [
(QA_SPECIAL_TOKENS["Question"] if i % 2 == 0 else "")
+ x
+ (QA_SPECIAL_TOKENS["Answer"] if i % 2 == 0 else "")
for i, x in enumerate(feature_one)
]
for messages in features:
messages = list(messages)
# Add a way for the model to terminate generation
# When we predict the start of a new expected question, we want to be able to stop generation
@@ -0,0 +1,5 @@
QA_SPECIAL_TOKENS = {"Question": "<human>", "Answer": "<bot>", "StartPrefix": "<prefix>", "EndPrefix": "</prefix>"}
def format_pair(pair):
return "{}{}{}".format(QA_SPECIAL_TOKENS["Question"], pair[0], QA_SPECIAL_TOKENS["Answer"]), pair[1]
@@ -2,6 +2,7 @@ import json
import os
from urllib.request import urlopen
from custom_datasets.formatting import format_pair
from torch.utils.data import Dataset
@@ -49,8 +50,7 @@ class PromptGeneratedDataset(Dataset):
return len(self.pairs)
def __getitem__(self, index):
question, answer = self.pairs[index]
return question, answer
return format_pair(self.pairs[index])
class InstructionTuning(Dataset):
@@ -101,5 +101,4 @@ class InstructionTuning(Dataset):
return len(self.pairs)
def __getitem__(self, index):
question, answer = self.pairs[index]
return question, answer
return format_pair(self.pairs[index])
@@ -7,14 +7,13 @@ import re
from urllib.request import urlopen
import numpy as np
from custom_datasets.formatting import QA_SPECIAL_TOKENS, format_pair
from datasets import load_dataset
from torch.utils.data import Dataset
# @agoryuno contributed this
re_reference_remove = re.compile(r"\[\d+(?:,\s*\d+)*?\]")
QA_SPECIAL_TOKENS = {"Question": "<human>", "Answer": "<bot>", "StartPrefix": "<prefix>", "EndPrefix": "</prefix>"}
def index_squad_v2(example):
if len(example["answers"]["text"]):
@@ -78,7 +77,7 @@ class QADataset(Dataset):
def __getitem__(self, idx):
data = self.dataset[idx]
return self.index_fn(data)
return format_pair(self.index_fn(data))
class WebGPT(Dataset):
@@ -111,7 +110,7 @@ class WebGPT(Dataset):
def __getitem__(self, index):
question = self.index2question[index]
answer = self.questions[question]
return [question, answer]
return format_pair((question, answer))
class SODA(Dataset):
@@ -121,14 +120,14 @@ class SODA(Dataset):
def process_soda_convo(self, data):
pairs = []
play_as = data["speakers"][1]
prefix = "{}{}. {}{}".format(
QA_SPECIAL_TOKENS["StartPrefix"],
data["narrative"],
"your name {}".format(play_as),
QA_SPECIAL_TOKENS["EndPrefix"],
)
question, answer = "", ""
prefix, postfix = "", ""
dialogue_bg = "{}{} {}{}".format(
QA_SPECIAL_TOKENS["StartPrefix"],
data["narrative"],
"your are {}".format(play_as),
QA_SPECIAL_TOKENS["EndPrefix"],
)
previous_chat = []
for idx, convo in enumerate(data["dialogue"]):
@@ -138,14 +137,20 @@ class SODA(Dataset):
else:
answer = convo
postfix = data["speakers"][idx]
if len(question) and len(answer) and prefix != postfix and postfix == play_as:
history = "<sep>".join(
["{}{}{}".format(p[0], QA_SPECIAL_TOKENS["Answer"], p[1]) for p in previous_chat]
[
"{}{}{}{}".format(QA_SPECIAL_TOKENS["Question"], p[0], QA_SPECIAL_TOKENS["Answer"], p[1])
for p in previous_chat
]
)
if len(history):
history += "<sep>"
pairs.append((prefix + history + question, answer))
prompt = QA_SPECIAL_TOKENS["Question"] + question + QA_SPECIAL_TOKENS["Answer"]
pairs.append((dialogue_bg + history + prompt, answer))
previous_chat.append((question, answer))
return pairs
def __init__(self, cache_dir, max_sample_size=10000, input_max_length=1024) -> None:
@@ -166,8 +171,8 @@ class SODA(Dataset):
return len(self.pairs)
def __getitem__(self, index):
question, answer = self.pairs[index]
return question, answer
# special token added during preprocess
return self.pairs[index]
class SODADialogue(Dataset):
@@ -218,7 +223,7 @@ class SODADialogue(Dataset):
return len(self.pairs)
def __getitem__(self, index):
return self.pairs[index]
return format_pair(self.pairs[index])
class JokeExplaination(Dataset):
@@ -253,8 +258,7 @@ class JokeExplaination(Dataset):
return len(self.pairs)
def __getitem__(self, index):
question, answer = self.pairs[index]
return question, answer
return format_pair(self.pairs[index])
# https://huggingface.co/datasets/aquamuse
@@ -3,6 +3,7 @@
"""
import random
from custom_datasets.formatting import format_pair
from datasets import load_dataset
from torch.utils.data import Dataset
@@ -54,11 +55,12 @@ def index_summary_merge(text, summary):
class SummarizationDataset(Dataset):
def __init__(self, dataset, cache_dir, split):
def __init__(self, dataset, cache_dir, split, max_words=512):
self.name = dataset
self.dataset = load_dataset(*summarization_config_mapping[dataset], cache_dir=cache_dir, split=split)
self.text_column, self.summary_column = summarization_name_mapping[dataset]
self.preprocess_fn = index_summary_merge if dataset == "scitldr" else index_summary_default
self.max_words = max_words
def __len__(self):
return len(self.dataset)
@@ -72,4 +74,5 @@ class SummarizationDataset(Dataset):
else:
prompt = random.choice(SUMMARIZATION_SPECIAL_TOKENS["Summary"])
return ("".join([SUMMARIZATION_SPECIAL_TOKENS["Text"], " ".join(text.split(" ")[:256]), prompt]), summary)
context = "".join([SUMMARIZATION_SPECIAL_TOKENS["Text"], " ".join(text.split(" ")[: self.max_words]), prompt])
return format_pair((context, summary))
@@ -4,12 +4,13 @@
"""
import random
from custom_datasets.formatting import QA_SPECIAL_TOKENS, format_pair
from datasets import load_dataset
from torch.utils.data import Dataset
class ProsocialDialogueExplaination(Dataset):
name = "prosocial_explain"
name = "explain_prosocial"
TEMPLATE = [
# 0 : reply or sentence of interest, 1 : reason of caution
("'{0}' Why is this sentence {1}", "This sentence is {1} because {0}"),
@@ -36,7 +37,7 @@ class ProsocialDialogueExplaination(Dataset):
return len(self.pairs)
def __getitem__(self, idx):
return self.pairs[idx]
return format_pair(self.pairs[idx])
class ProsocialDialogue(Dataset):
@@ -58,8 +59,9 @@ class ProsocialDialogue(Dataset):
dataset = load_dataset("allenai/prosocial-dialog", cache_dir=cache_dir)[split]
self.pairs = []
for row in dataset:
prompt = QA_SPECIAL_TOKENS["Question"] + row["context"] + QA_SPECIAL_TOKENS["Answer"]
for answer in row["rots"]:
self.pairs.append((self.PREFIX + row["context"], answer))
self.pairs.append((self.PREFIX + prompt, answer))
def __len__(self):
return len(self.pairs)
@@ -8,6 +8,7 @@
"""
import random
from custom_datasets.formatting import format_pair
from datasets import load_dataset
from torch.utils.data import Dataset
@@ -82,7 +83,7 @@ class TranslationPair(Dataset):
return len(self.pairs)
def __getitem__(self, index):
return self.pairs[index]
return format_pair(self.pairs[index])
class WMT2019(TranslationPair):
@@ -99,6 +100,8 @@ class WMT2019(TranslationPair):
else: # translating in reverse direction
source = random.choice(TRANSLATION_PROMPT[src]).format(row[tgt])
self.pairs.append((source, row[src]))
if len(self.pairs) > 100000:
break
class DiveMT(TranslationPair):
@@ -7,8 +7,8 @@ from custom_datasets.dialogue_collator import DialogueDataCollator
def test_all_datasets():
qa_base = QA_DATASETS
summarize_base = SUMMARIZATION_DATASETS
others = ["prompt_dialogue", "webgpt", "soda", "joke", "instruct_tuning"]
translation = ["dive_mt", "wmt2019_zh-en", "wmt2019_ru-en", "wmt2019_de-en", "ted_trans_de-ja", "ted_trans_nl-en"]
others = ["prompt_dialogue", "webgpt", "soda", "joke", "instruct_tuning", "explain_prosocial", "prosocial_dialogue"]
translation = ["dive_mt", "wmt2019_zh-en", "wmt2019_ru-en", "ted_trans_de-ja", "ted_trans_nl-en"]
config = Namespace(cache_dir=".cache")
for dataset_name in translation + others + summarize_base + qa_base:
@@ -31,7 +31,6 @@ def test_collate_fn():
qa_base = QA_DATASETS
summarize_base = SUMMARIZATION_DATASETS
others = ["prompt_dialogue", "webgpt", "soda", "joke", "gsm8k"]
trains, evals = [], []
for dataset_name in others + qa_base + summarize_base:
print(dataset_name)
@@ -41,10 +40,10 @@ def test_collate_fn():
dataloader = DataLoader(ConcatDataset(trains), collate_fn=collate_fn, batch_size=128)
for batch in dataloader:
# print(batch.keys())
# print(tokenizer.decode(batch['input_ids'][0]))
# print('-----')
# print(tokenizer.decode(batch['targets'][0][batch['label_masks'][0]]))
print(batch.keys())
print(tokenizer.decode(batch["input_ids"][0]))
print("-----")
print(tokenizer.decode(batch["targets"][0][batch["label_masks"][0]]))
assert batch["targets"].shape[1] <= 512
dataloader = DataLoader(ConcatDataset(evals), collate_fn=collate_fn, batch_size=128)
for batch in dataloader:
+4
View File
@@ -25,6 +25,10 @@ def get_tokenizer(conf):
tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token, "sep_token": "<|extratoken_100|>"})
elif "codegen" in conf.model_name:
tokenizer.add_special_tokens({"pad_token": "<|endoftext|>", "sep_token": "<|endoftext|>"})
elif "pythia" in conf.model_name:
tokenizer.add_special_tokens(
{"pad_token": "<|padding|>", "sep_token": "<|endoftext|>", "eos_token": "<|endoftext|>"}
)
additional_special_tokens = (
[]
@@ -0,0 +1,37 @@
# README
## Introduction
This program converts data obtained from the subreddit r/changemyview into a cleaner format for further data processing. The data is not clean enough to be used directly in a model yet, and additional preprocessing is required.
## Data Format
The cleaned data is stored in an Apache Parquet file with the following columns:
| Column Name | Description | Data Type |
|-------------|------------------------------------------------------------------------|----------------|
| INSTRUCTION | Post title + body text | String |
| RESPONSE | Body text of comments attempting to change OP's mind of `INSTRUCTION`. | List\<String\> |
| SOURCE | Permalink to the reddit post | String |
| METADATA | Metadata related to `RESPONSE`. | Dict\<Variant> |
### Metadata
Currently, metadata is only broken into one category:
- `detoxify_labels`- A Dictionary of values outputted by the [Unitaryai Detoxifier](https://github.com/unitaryai/detoxify) model, fitted to every comment under any given post.
## Usage
To use the program, follow these instructions:
1. **Clone the repository** - `git clone https://github.com/LAION-AI/Open-Assistant.git`
2. **Navigate to the project directory** - `cd notebooks/data-augmentation/changemyview-builder`
3. **Open the Jupyter Notebook** - `jupyter notebook data_processor.ipynb`
4. **Run the program** - Go through the notebook and run the cells
## Contributing
If you would like to contribute to this project, please fork the repository and submit a pull request with your changes.
## License
This project is licensed under the Apache-2.0 License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,577 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# r/ChangeMyView data converter\n",
"Converts subreddit data into readable format for ML training\n",
"\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/changemyview-builder/data_processor.ipynb)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 65,
"outputs": [],
"source": [
"### REMEMBER: setup the .env before running this code!\n",
"\n",
"\"\"\"CONSTANTS\"\"\"\n",
"\n",
"# Set the head number to the amount of entries you want to load in minus one\n",
"ENTRIES_COUNT = 10\n",
"\n",
"# Set the threshold for toxic comments to be removed\n",
"TOXIC_THRESHOLD = 0.95"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 66,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (1.4.4)\r\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from pandas) (2.8.2)\r\n",
"Requirement already satisfied: pytz>=2020.1 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from pandas) (2022.1)\r\n",
"Requirement already satisfied: numpy>=1.18.5 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from pandas) (1.21.5)\r\n",
"Requirement already satisfied: six>=1.5 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\r\n",
"Requirement already satisfied: praw in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (7.6.1)\r\n",
"Requirement already satisfied: websocket-client>=0.54.0 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from praw) (0.58.0)\r\n",
"Requirement already satisfied: update-checker>=0.18 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from praw) (0.18.0)\r\n",
"Requirement already satisfied: prawcore<3,>=2.1 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from praw) (2.3.0)\r\n",
"Requirement already satisfied: requests<3.0,>=2.6.0 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from prawcore<3,>=2.1->praw) (2.28.1)\r\n",
"Requirement already satisfied: six in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from websocket-client>=0.54.0->praw) (1.16.0)\r\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0,>=2.6.0->prawcore<3,>=2.1->praw) (3.3)\r\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0,>=2.6.0->prawcore<3,>=2.1->praw) (2022.9.24)\r\n",
"Requirement already satisfied: charset-normalizer<3,>=2 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0,>=2.6.0->prawcore<3,>=2.1->praw) (2.0.4)\r\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0,>=2.6.0->prawcore<3,>=2.1->praw) (1.26.11)\r\n",
"Requirement already satisfied: python-dotenv in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (0.21.0)\r\n",
"Requirement already satisfied: pyarrow in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (10.0.1)\r\n",
"Requirement already satisfied: numpy>=1.16.6 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from pyarrow) (1.21.5)\r\n",
"Requirement already satisfied: detoxify in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (0.5.1)\r\n",
"Requirement already satisfied: transformers==4.22.1 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from detoxify) (4.22.1)\r\n",
"Requirement already satisfied: torch>=1.7.0 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from detoxify) (1.13.1)\r\n",
"Requirement already satisfied: sentencepiece>=0.1.94 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from detoxify) (0.1.97)\r\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.9.0 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (0.11.1)\r\n",
"Requirement already satisfied: regex!=2019.12.17 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (2022.7.9)\r\n",
"Requirement already satisfied: pyyaml>=5.1 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (6.0)\r\n",
"Requirement already satisfied: tqdm>=4.27 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (4.64.1)\r\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.13,>=0.11.1 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (0.12.1)\r\n",
"Requirement already satisfied: filelock in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (3.6.0)\r\n",
"Requirement already satisfied: packaging>=20.0 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (21.3)\r\n",
"Requirement already satisfied: numpy>=1.17 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (1.21.5)\r\n",
"Requirement already satisfied: requests in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from transformers==4.22.1->detoxify) (2.28.1)\r\n",
"Requirement already satisfied: typing-extensions in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from torch>=1.7.0->detoxify) (4.3.0)\r\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from packaging>=20.0->transformers==4.22.1->detoxify) (3.0.9)\r\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers==4.22.1->detoxify) (3.3)\r\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers==4.22.1->detoxify) (1.26.11)\r\n",
"Requirement already satisfied: charset-normalizer<3,>=2 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers==4.22.1->detoxify) (2.0.4)\r\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers==4.22.1->detoxify) (2022.9.24)\r\n",
"Requirement already satisfied: tqdm in /Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages (4.64.1)\r\n"
]
}
],
"source": [
"# Install any dependencies\n",
"!pip install pandas\n",
"!pip install praw\n",
"!pip install python-dotenv\n",
"!pip install pyarrow\n",
"!pip install detoxify\n",
"!pip install tqdm"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import praw\n",
"import os\n",
"from os.path import join, dirname\n",
"from dotenv import main\n",
"\n",
"# Make sure you create a .env file and fill in all the necessary information in the same folder as this script!\n",
"main.load_dotenv(join(dirname(os.path.realpath('__file__')), '.env'))\n",
"\n",
"reddit = praw.Reddit(\n",
" client_id=os.environ.get(\"CLIENT_ID\"),\n",
" client_secret=os.environ.get(\"CLIENT_SECRET\"),\n",
" user_agent=\"CMV_Scraper\",\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"outputs": [],
"source": [
"# load the data\n",
"import tarfile\n",
"import os.path\n",
"import json\n",
"import re\n",
"from bz2 import BZ2File\n",
"from urllib import request\n",
"from io import BytesIO\n",
"\n",
"import numpy as np\n",
"\n",
"\n",
"fname = \"cmv.tar.bz2\"\n",
"url = \"https://chenhaot.com/data/cmv/\" + fname\n",
"\n",
"# download if not exists\n",
"if not os.path.isfile(fname):\n",
" f = BytesIO()\n",
" with request.urlopen(url) as resp, open(fname, 'wb') as f_disk:\n",
" data = resp.read()\n",
" f_disk.write(data) # save to disk too\n",
" f.write(data)\n",
" f.seek(0)\n",
"else:\n",
" f = open(fname, 'rb')\n",
"\n",
"\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 69,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/kayjaymac/opt/anaconda3/lib/python3.9/bz2.py:124: ResourceWarning: unclosed file <_io.BufferedReader name='cmv.tar.bz2'>\n",
" self._buffer = None\n",
"ResourceWarning: Enable tracemalloc to get the object allocation traceback\n"
]
}
],
"source": [
"#tar = tarfile.open(fileobj=f, mode=\"r:bz2\")\n",
"tar = tarfile.open(fileobj=f, mode=\"r\")\n",
"\n",
"# Extract the file we are interested in\n",
"\n",
"train_fname = \"op_task/train_op_data.jsonlist.bz2\"\n",
"test_fname = \"op_task/heldout_op_data.jsonlist.bz2\"\n",
"\n",
"train_bzlist = tar.extractfile(train_fname)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 70,
"outputs": [],
"source": [
"# Deserialize the JSON list\n",
"original_posts_train = [\n",
" json.loads(line.decode('utf-8'))\n",
" for line in BZ2File(train_bzlist)\n",
"]"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 71,
"outputs": [
{
"data": {
"text/plain": "[{'title': \"CMV: I shouldn't get a job in this economic climate because it'll be automated anyway; I should just wait for a post-scarcity utopia.\",\n 'delta_label': False,\n 'name': 't3_2rpsl8',\n 'selftext': \"I think the world is automating fast enough that a utopia will arise where no one will have to work anymore. Within the next 2 decades or so, having a job won't mean much, and most people will be artists and scientists. \\n\\nMy parents let me live with them, so I can just wait until the utopia happens.\\n\\nCMV.\"}]"
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"original_posts_train[:1]"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 72,
"outputs": [],
"source": [
"# Load the jsonlist file into a dataframe\n",
"#df = pd.read_json(original_posts_train, orient='list', lines=True)\n",
"df = pd.DataFrame(original_posts_train)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 73,
"outputs": [],
"source": [
"# Function to check if the posts still exists on reddit\n",
"def try_get_post(post_id):\n",
" try:\n",
" submission = reddit.submission(id=post_id)\n",
" submission.name\n",
" return True\n",
" except Exception as e:\n",
" return False"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 74,
"outputs": [],
"source": [
"# Set up the detoxifier model:\n",
"from detoxify import Detoxify"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"# Removes > sign and the template message at the end of a message\n",
"def cleanup_body_text(cmv_post):\n",
" lines = [line for line in cmv_post.splitlines()\n",
" if not line.lstrip().startswith(\"&gt;\")\n",
" and not line.lstrip().startswith(\"____\")\n",
" and not line.lstrip().startswith(\"So go forth and CMV, noble redditors!\")\n",
" and \"edit\" not in \" \".join(line.lower().split()[:2])\n",
" ]\n",
" return \"\\n\".join(lines)\n",
"\n",
"\n",
"\n",
"\n",
"# Create the function that will be handling all the data gathering\n",
"def get_top_comment_and_clean_data(post_id):\n",
" #print(post_id.lstrip(\"t3_\"))\n",
" last_author = \"\"\n",
" # Grab the post\n",
" submission = reddit.submission(id=post_id.lstrip(\"t3_\"))\n",
" #print(submission.title)\n",
"\n",
" # Grab the highest rated comment on root layer\n",
" submission.submission_type = 'best'\n",
" submission.comments.replace_more(limit=0)\n",
" replies = list(submission.comments)[0].replies.list()\n",
"\n",
" # Just some variables\n",
" pros = []\n",
"\n",
" # If the post author doesn't exist this submission was deleted (submission.deleted doesn't work)\n",
" if type(submission.author) == type(None):\n",
" last_author = \"[deleted]\"\n",
" else:\n",
" last_author = submission.author.name\n",
"\n",
" is_pro_argument = False\n",
"\n",
" for comment in replies:\n",
"\n",
" # If redditor object doesn't exist, the account is invalid/deleted\n",
" if type(comment.author) != type(None):\n",
" author = comment.author.name\n",
" else:\n",
" author = \"[deleted]\"\n",
"\n",
" # Assume that whenever the user changes, they are countering the previous person\n",
" if author != last_author:\n",
" is_pro_argument = !is_pro_argument\n",
"\n",
" if author == \"[deleted]\" or author==\"DeltaBot\":\n",
" #print(\"Skipping comment...\")\n",
" continue\n",
"\n",
" # Remove meta and duplicate comments\n",
" comment.body = \" \".join([line for line in comment.body.splitlines()\n",
" if not re.search(r\"(?i)(Change\\smy\\sview|CMV)\", line)\n",
" and line not in pros # Why doesn't this line work\n",
" ])\n",
"\n",
" # Sometimes for some reason duplicate entries exist\n",
" # Also remove automated message with \"Δ\" in it\n",
"\n",
" if comment.body in pros:\n",
" #print(\"Skipping duplicate entry\")\n",
" continue\n",
"\n",
" #print(\"\\t\\t>>\\t\",comment.body)\n",
"\n",
" # Remove toxic comments\n",
" if Detoxify(\"multilingual\").predict(comment.body)[\"toxicity\"] > TOXIC_THRESHOLD:\n",
" #print(\"Identified toxic comment, ignoring...\")\n",
" comment.body = \"\"\n",
"\n",
" # Add to the respective argument type \n",
" if is_pro_argument:\n",
" pros.append(comment.body)\n",
" \n",
" last_author = comment.author.name\n",
" \n",
" # Pros = arguments for the Title of this post\n",
" # Cons = arguments against the title of this post\n",
"\n",
" pros.append(comment.body)\n",
" return pros"
]
},
{
"cell_type": "code",
"execution_count": 76,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading in 10 posts\n"
]
}
],
"source": [
"print(f\"Loading in {ENTRIES_COUNT} posts\")\n",
"dataset = df.head(ENTRIES_COUNT)\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 77,
"outputs": [],
"source": [
"# the name column does some weird sh** because dataframes already have a name property, so migrate to a different column name\n",
"\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"dataset[\"post_id\"] = dataset[\"name\"]\n",
"warnings.filterwarnings('default')"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading in data... This will take a while.\n"
]
},
{
"data": {
"text/plain": " 0%| | 0/10 [00:00<?, ?it/s]",
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "203fd74c8a5146b68b8af961bb3874c8"
}
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<timed exec>:29: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"/Users/kayjaymac/opt/anaconda3/lib/python3.9/site-packages/torch/serialization.py:997: ResourceWarning: unclosed file <_io.BufferedReader name='cmv.tar.bz2'>\n",
" storage = zip_file.get_storage_from_record(name, numel, torch._UntypedStorage).storage()._untyped()\n",
"ResourceWarning: Enable tracemalloc to get the object allocation traceback\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 7min 49s, sys: 2min 29s, total: 10min 19s\n",
"Wall time: 8min 45s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"from tqdm.auto import tqdm\n",
"# Reset variables for if we run this multiple times\n",
"all_pros = []\n",
"all_names = []\n",
"all_titles = []\n",
"all_sources = []\n",
"\n",
"print(\"Loading in data... This will take a while.\")\n",
"\n",
"for i in tqdm(range(dataset.shape[0])):\n",
"\n",
" post = dataset.iloc[i]\n",
" modified_title = post.title.replace('CMV', \"Change my mind\")\n",
" #print(f\"\\n Loading entry {i+1}/{dataset.shape[0]}:\\n\\t\\\"{modified_title}\\\"\")\n",
"\n",
" if type(post) == type(None):\n",
" continue\n",
"\n",
" assert(post.post_id != i)\n",
"\n",
" pros = get_top_comment_and_clean_data(post.post_id)\n",
"\n",
" if post.title == \"[deleted]\":\n",
" continue\n",
"\n",
" pros = \" \".join([*set(pros)])\n",
" pros = pros.replace(\"[deleted]\",\"\")\n",
"\n",
" post.selftext = cleanup_body_text(post.selftext)\n",
" all_titles.append(modified_title + \" \" + post.selftext)\n",
" all_pros.append(pros)\n",
" all_names.append(post.name)\n",
" all_sources.append(f\"https://reddit.com/r/changemyview/comments/{post.post_id}\")\n",
" #print(post.title)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 83,
"outputs": [
{
"data": {
"text/plain": "'it\\'s already been signed. They even claim to be adhering to it, though they\\'ve been found to be violating it before. There is no such thing as \"de facto acceptance of Israel\\'s nuclear program.\" the Non-Proliferation Treaty is only binding for signatory states. Israel is not a signatory. Article 10 of the NPT allows them to withdraw if they so choose. they have not done so. a whole new country which explicitly has a right to withdraw from the NPT and has not chosen to do so. It\\'s more accurate, I think, to say that the problem with Iran here from a legal standpoint is that they aren\\'t honoring their own commitments, rather than that they\\'re building weapons. They could pull out of the NPT at any time, and the ball would be essentially in America\\'s court, because their nuclear program would no longer be illegal by international legal standards. However, Iran insists both on developing nukes *and* remaining an NPT signatory non-nuclear state, and that\\'s what makes their program illegal. I\\'d also like to clarify that I\\'m not making an ethical argument here, this is just how international law currently works. because international law doesn\\'t require states to sign treaties, it only requires them to adhere to treaties they\\'ve already signed. Israel isn\\'t defying the UN, at least not in this particular case. Think of the NPT less like a standard law within a state and more like a contract. Once you\\'ve signed, you\\'re bound by the contract, but if you never sign it then you haven\\'t broken a law, you\\'ve just decided not to agree to the terms you were offered. > Because Iran did sign the treaty, and thus are bound by it. They signed on July 1, 1968. Hmm. So is the argument here that it\\'s not \"ok\" for Iran to have a nuke, since they signed treaty not to do so. But it\\'s \"ok\" for Israel to have one because they never signed such thing? Can\\'t quite put my finger on it, but doesn\\'t seem quite right this one.'"
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_pros[1]"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 80,
"outputs": [],
"source": [
"# Place it all into a Pandas Dataframe\n",
"clean_df = pd.DataFrame({\n",
" \"INSTRUCTION\": all_titles,\n",
" \"RESPONSE\": all_pros,\n",
" \"SOURCE\": all_sources\n",
"}, index=all_names\n",
")"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"# Create Apache Paquete file\n",
"\n",
"import pyarrow as pa\n",
"import pyarrow.parquet as pq\n",
"\n",
"table = pa.Table.from_pandas(clean_df)\n",
"pq.write_table(table,\"output.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 82,
"outputs": [
{
"data": {
"text/plain": " INSTRUCTION \\\n0 Change my mind: I shouldn't get a job in this ... \n1 Change my mind: Iran has the right to develop ... \n2 Change my mind: The events in Paris suck...but... \n3 Change my mind: It is ok to hate a religion so... \n4 Change my mind: There is no productive reason ... \n5 Change my mind: Diet soda is perfectly healthy... \n6 Change my mind:Essential Oils are bullshit My ... \n7 Change my mind: I think the Paris shooting mak... \n8 Change my mind: Printing an image of the Musli... \n9 Change my mind: Philosophy has no tangible val... \n\n RESPONSE \\\n0 That is what someone in the 1500s would have s... \n1 it's already been signed. They even claim to b... \n2 Hm I guess I made the OP incorrectly. The mai... \n3 I don't understand your analogy. Promoting a ... \n4 ∆ I hadn't thought it from a \"let's trick peop... \n5 Thanks for a fresh argument! I hadn't conside... \n6 Most do. Some smell kinda funky. \n7 I already said in different comments that thi... \n8 The first bacon sandwich came about because 9... \n9 >Why restrict it to 50 years? I can name all s... \n\n SOURCE \n0 https://reddit.com/r/changemyview/comments/t3_... \n1 https://reddit.com/r/changemyview/comments/t3_... \n2 https://reddit.com/r/changemyview/comments/t3_... \n3 https://reddit.com/r/changemyview/comments/t3_... \n4 https://reddit.com/r/changemyview/comments/t3_... \n5 https://reddit.com/r/changemyview/comments/t3_... \n6 https://reddit.com/r/changemyview/comments/t3_... \n7 https://reddit.com/r/changemyview/comments/t3_... \n8 https://reddit.com/r/changemyview/comments/t3_... \n9 https://reddit.com/r/changemyview/comments/t3_... ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>INSTRUCTION</th>\n <th>RESPONSE</th>\n <th>SOURCE</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Change my mind: I shouldn't get a job in this ...</td>\n <td>That is what someone in the 1500s would have s...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Change my mind: Iran has the right to develop ...</td>\n <td>it's already been signed. They even claim to b...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Change my mind: The events in Paris suck...but...</td>\n <td>Hm I guess I made the OP incorrectly. The mai...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Change my mind: It is ok to hate a religion so...</td>\n <td>I don't understand your analogy. Promoting a ...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Change my mind: There is no productive reason ...</td>\n <td>∆ I hadn't thought it from a \"let's trick peop...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Change my mind: Diet soda is perfectly healthy...</td>\n <td>Thanks for a fresh argument! I hadn't conside...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Change my mind:Essential Oils are bullshit My ...</td>\n <td>Most do. Some smell kinda funky.</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Change my mind: I think the Paris shooting mak...</td>\n <td>I already said in different comments that thi...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Change my mind: Printing an image of the Musli...</td>\n <td>The first bacon sandwich came about because 9...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Change my mind: Philosophy has no tangible val...</td>\n <td>&gt;Why restrict it to 50 years? I can name all s...</td>\n <td>https://reddit.com/r/changemyview/comments/t3_...</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Test to see if it was sucessful\n",
"table = pq.read_table(\"output.parquet\")\n",
"table.to_pandas()"
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
@@ -38,6 +38,8 @@ class OasstErrorCode(IntEnum):
TASK_REQUESTED_TYPE_NOT_AVAILABLE = 1006
TASK_AVAILABILITY_QUERY_FAILED = 1007
TASK_MESSAGE_TOO_LONG = 1008
TASK_MESSAGE_DUPLICATED = 1009
TASK_MESSAGE_TEXT_EMPTY = 1010
# 2000-3000: prompt_repository
INVALID_FRONTEND_MESSAGE_ID = 2000
@@ -76,6 +78,8 @@ class OasstErrorCode(IntEnum):
USER_DISABLED = 4001
USER_NOT_FOUND = 4002
EMOJI_OP_UNSUPPORTED = 5000
class OasstError(Exception):
"""Base class for Open-Assistant exceptions."""
@@ -13,7 +13,11 @@ class WorkRequest(pydantic.BaseModel):
conversation: protocol.Conversation = pydantic.Field(..., repr=False)
model_name: str = "distilgpt2"
max_new_tokens: int = 100
seed: int = pydantic.Field(default_factory=lambda: random.randint(0, 2**32 - 1))
seed: int = pydantic.Field(default_factory=lambda: random.randint(-(2**31), 2**31 - 1))
do_sample: bool = True
top_k: int = 50
top_p: float = 0.9
temperature: float = 1.0
class WorkResponsePacket(pydantic.BaseModel):
+83 -29
View File
@@ -57,6 +57,8 @@ class ConversationMessage(BaseModel):
text: str
lang: Optional[str] # BCP 47
is_assistant: bool
emojis: Optional[dict[str, int]] = None
user_emojis: Optional[list[str]] = None
class Conversation(BaseModel):
@@ -222,27 +224,43 @@ class LabelTaskMode(str, enum.Enum):
full = "full"
class LabelInitialPromptTask(Task):
"""A task to label an initial prompt."""
class LabelTaskDisposition(str, enum.Enum):
"""Reason why the task was issued."""
type: Literal["label_initial_prompt"] = "label_initial_prompt"
quality = "quality"
spam = "spam"
class LabelDescription(BaseModel):
name: str
widget: str
display_text: str
help_text: Optional[str]
class AbstractLabelTask(Task):
message_id: UUID
prompt: str
valid_labels: list[str]
mandatory_labels: Optional[list[str]]
mode: Optional[LabelTaskMode]
disposition: Optional[LabelTaskDisposition]
labels: Optional[list[LabelDescription]]
class LabelConversationReplyTask(Task):
class LabelInitialPromptTask(AbstractLabelTask):
"""A task to label an initial prompt."""
type: Literal["label_initial_prompt"] = "label_initial_prompt"
prompt: str
class LabelConversationReplyTask(AbstractLabelTask):
"""A task to label a reply to a conversation."""
type: Literal["label_conversation_reply"] = "label_conversation_reply"
conversation: Conversation # the conversation so far
message_id: UUID
reply_message: Optional[ConversationMessage]
reply: str
valid_labels: list[str]
mandatory_labels: Optional[list[str]]
mode: Optional[LabelTaskMode]
class LabelPrompterReplyTask(LabelConversationReplyTask):
@@ -315,39 +333,48 @@ class MessageRanking(Interaction):
ranking: conlist(item_type=int, min_items=1)
class LabelWidget(str, enum.Enum):
yes_no = "yes_no"
flag = "flag"
likert = "likert"
class TextLabel(str, enum.Enum):
"""A label for a piece of text."""
def __new__(cls, label: str, display_text: str = "", help_text: str = None):
def __new__(cls, label: str, widget: LabelWidget, display_text: str = "", help_text: str = None):
obj = str.__new__(cls, label)
obj._value_ = label
obj.widget = widget
obj.display_text = display_text
obj.help_text = help_text
return obj
spam = "spam", "Seems to be intentionally low-quality or irrelevant"
fails_task = "fails_task", "Fails to follow the correct instruction / task"
not_appropriate = "not_appropriate", "Inappropriate for customer assistant"
violence = "violence", "Encourages or fails to discourage violence/abuse/terrorism/self-harm"
excessive_harm = (
"excessive_harm",
"Content likely to cause excessive harm not justifiable in the context",
"Harm refers to physical or mental damage or injury to someone or something. Excessive refers to a reasonable threshold of harm in the context, for instance damaging skin is not excessive in the context of surgery.",
)
sexual_content = "sexual_content", "Contains sexual content"
toxicity = "toxicity", "Contains rude, abusive, profane or insulting content"
moral_judgement = "moral_judgement", "Expresses moral judgement"
political_content = "political_content", "Expresses political views"
humor = "humor", "Contains humorous content including sarcasm"
# yes/no questions
spam = "spam", LabelWidget.yes_no, "Seems to be intentionally low-quality or irrelevant"
fails_task = "fails_task", LabelWidget.yes_no, "Fails to follow the correct instruction / task"
# flags
pii = "pii", LabelWidget.flag, "Contains personal identifiable information (PII)"
not_appropriate = "not_appropriate", LabelWidget.flag, "Inappropriate"
hate_speech = (
"hate_speech",
LabelWidget.flag,
"Content is abusive or threatening and expresses prejudice against a protected characteristic",
"Prejudice refers to preconceived views not based on reason. Protected characteristics include gender, ethnicity, religion, sexual orientation, and similar characteristics.",
"Prejudice refers to preconceived views not based on reason. Protected characteristics "
"include gender, ethnicity, religion, sexual orientation, and similar characteristics.",
)
threat = "threat", "Contains a threat against a person or persons"
misleading = "misleading", "Contains text which is incorrect or misleading"
helpful = "helpful", "Completes the task to a high standard"
creative = "creative", "Expresses creativity in responding to the task"
sexual_content = "sexual_content", LabelWidget.flag, "Contains sexual content"
moral_judgement = "moral_judgement", LabelWidget.flag, "Expresses moral judgement"
political_content = "political_content", LabelWidget.flag, "Expresses political views"
# likert
quality = "quality", LabelWidget.likert, "Overall subjective quality rating of the message"
toxicity = "toxicity", LabelWidget.likert, "Rude, abusive, profane or insulting content"
humor = "humor", LabelWidget.likert, "Humorous content including sarcasm"
helpfulness = "helpfulness", LabelWidget.likert, "Helpfulness of the message"
creativity = "creativity", LabelWidget.likert, "Creativity"
violence = "violence", LabelWidget.likert, "Violence/abuse/terrorism/self-harm"
class TextLabels(Interaction):
@@ -358,6 +385,7 @@ class TextLabels(Interaction):
labels: dict[TextLabel, float]
message_id: UUID
task_id: Optional[UUID]
is_report: Optional[bool]
@property
def has_message_id(self) -> bool:
@@ -432,3 +460,29 @@ class OasstErrorResponse(BaseModel):
error_code: OasstErrorCode
message: str
class EmojiCode(str, enum.Enum):
thumbs_up = "+1" # 👍
thumbs_down = "-1" # 👎
red_flag = "red_flag" # 🚩
hundred = "100" # 💯
rofl = "rofl" # 🤣
clap = "clap" # 👏
diamond = "diamond" # 💎
heart_eyes = "heart_eyes" # 😍
disappointed = "disappointed" # 😞
poop = "poop" # 💩
skull = "skull" # 💀
class EmojiOp(str, enum.Enum):
togggle = "toggle"
add = "add"
remove = "remove"
class MessageEmojiRequest(BaseModel):
user: User
op: EmojiOp = EmojiOp.togggle
emoji: EmojiCode
@@ -0,0 +1,101 @@
---
annotations_creators:
- no-annotation
language:
- en
language_creators:
- machine-generated
license:
- mit
multilinguality:
- monolingual
pretty_name: "MT Note Generation"
size_categories:
- <500
source_datasets:
- mt_samples
tags:
- open-assistant
task_categories:
- conversational
task_ids:
- dialogue-generation
dataset_info:
features:
- name: conversation
dtype: string
splits:
- name: train
num_bytes: 1060800
num_examples: 270
- name: test
num_bytes: 339323
num_examples: 90
- name: validation
num_bytes: 337018
num_examples: 90
---
# Dataset Card for MT Samples Clinical Note Generation
## Dataset Description
- **Repository:**
[Code](https://github.com/LAION-AI/Open-Assistant/openassistant/datasets/mt_note_generation/)
## Dataset Summary
MT Note Generation is a set of synthetic dialogues between Assistant and User
where the user asks the assistant to generate a clinical note for a patient
persona.
This data was created by synthesizing the dialogues in
[MT Samples](https://mtsamples.com/) for clinical note generation.
Example:
```
User: Write a clinical note about a patient with the following chief complaint: Stomach pain for 2 weeks.
ASSISTANT:
CHIEF COMPLAINT: Stomach pain for 2 weeks. HISTORY OF PRESENT ILLNESS: The patient is a 45yo Mexican man without significant past medical history who presents to the emergency room with complaints of mid-epigastric and right upper quadrant abdominal pain for the last 14 days. The pain was initially crampy and burning in character and was relieved with food intake. He also reports that it initially was associated with a sour taste in his mouth. He went to his primary care physician who prescribed cimetidine 400mg qhs x 5 days; however, this did not relieve his symptoms. In fact, the pain has worsened such that the pain now radiates to the back but is waxing and waning in duration. It is relieved with standing and ambulation and exacerbated when lying in a supine position. He reports a decrease in appetite associated with a 4 lb. wt loss over the last 2 wks. He does have nausea with only one episode of non-bilious, non-bloody emesis on day of admission. He reports a 2 wk history of subjective fever and diaphoresis. He denies any diarrhea, constipation, dysuria, melena, or hematochezia. His last bowel movement was during the morning of admission and was normal. He denies any travel in the last 9 years and sick contacts.PAST MEDICAL HISTORY: Right inguinal groin cyst removal 15 years ago. Unknown etiology. No recurrence.
PAST SURGICAL HISTORY: Left femoral neck fracture with prosthesis secondary to a fall 4 years ago.
FAMILY HISTORY: Mother with diabetes. No history of liver disease. No malignancies.
SOCIAL HISTORY: The patient was born in central Mexico but moved to the United States 9 years ago. He is on disability due to his prior femoral fracture. He denies any tobacco or illicit drug use. He only drinks alcohol socially, no more than 1 drink every few weeks. He is married and has 3 healthy children. He denies any tattoos or risky sexual behavior.
ALLERGIES: NKDA.
MEDICATIONS: Tylenol prn (1-2 tabs every other day for the last 2 wks), Cimetidine 400mg po qhs x 5 days.
REVIEW OF SYSTEMS: No headache, vision changes. No shortness of breath. No chest pain or palpitations.
PHYSICAL EXAMINATION: Vitals: T 100.9-102.7 BP 136/86 Pulse 117 RR 12 98% sat on room air,Gen: Well-developed, well-nourished, no apparent distress.HEENT: Pupils equal, round and reactive to light. Anicteric. Oropharynx clear and moist.Neck: Supple. No lymphadenopathy or carotid bruits. No thyromegaly or masses.CHEST: Clear to auscultation bilaterally.CV: Tachycardic but regular rhythm, normal S1/S2, no murmurs/rubs/gallops.Abd: Soft, active bowel sounds. Tender in the epigastrium and right upper quadrant with palpation associated with slight guarding. No rebound tenderness. No hepatomegaly. No splenomegaly.Rectal: Stool was brown and guaiac negative.Ext: No cyanosis/clubbing/edema.Neurological: He was alert and oriented x3. CN II-XII intact. Normal 2+ DTRs. No focal neurological deficit.Skin: No jaundice. No skin rashes or lesions.
IMAGING DATA:CT Abdomen with contrast ( 11/29/03 ): There is a 6x6 cm multilobular hypodense mass seen at the level of the hepatic hilum and caudate lobe which is resulting in mass effect with dilatation of the intrahepatic radicals of the left lobe of the liver. The rest of the liver parenchyma is homogeneous. The gallbladder, pancreas, spleen, adrenal glands and kidneys are within normal limits. The retroperitoneal vascular structures are within normal limits. There is no evidence of lymphadenopathy, free fluid or fluid collections.HOSPITAL COURSE: The patient was admitted to the hospital for further evaluation. A diagnostic procedure was performed.
```
## Usage
The dataset contains one configuration, `dialogue_modeling`, which has a single
text `conversation` feature.
## Source data
The script modifies data from mtsamples.csv which is hosted in Kaggle:
https://www.kaggle.com/datasets/tboyle10/medicaltranscriptions
## Citation
Please cite our work if you find the resources in this repository useful:
```
@article{pruks2023mtsamplesnotegen,
author = {Yada Pruksachatkun},
title = {MT Samples Note Generation},
year = {2023}
}
```
@@ -0,0 +1,21 @@
from dataclasses import dataclass
import datasets
@dataclass
class OpenAssistantConfig(datasets.BuilderConfig):
"""BuilderConfig for OpenAssistant datasets."""
name: str = None
version: datasets.Version = None
description: str = None
schema: str = None
subset_id: str = None
features = datasets.Features(
{
"conversation": datasets.Value("string"),
}
)
@@ -0,0 +1,123 @@
# Copyright 2023 The OpenAssistant Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MT Note Generation is a set of synthetic dialogues between Assistant and
User where the user asks the assistant to generate a clinical note for a patient persona.
"""
import json
from typing import Dict, List, Tuple
import datasets
from .hub import OpenAssistantConfig, features
_CITATION = """\
@misc{transcribed medical transcription sample reports and examples, title={Welcome to MTSamples},
url={https://mtsamples.com/},
journal={Transcribed Medical Transcription Sample Reports and Examples}}
"""
_DATASETNAME = "mt_note_generation"
_DISPLAYNAME = "MT Samples Note Generation"
_DESCRIPTION = """\
A dataset of instructions for generating clinical notes from MT samples.
"""
_HOMEPAGE = ""
_LICENSE = "mit"
_URLS = {
_DATASETNAME: {
"train": "./data/mt_note_generation_train.jsonl",
"test": "./data/mt_note_generation_test.jsonl",
"validation": "./data/mt_note_generation_validation.jsonl",
}
}
_SUPPORTED_TASKS = ["dialogue-modeling"]
_VERSION = "1.0.0"
class MTNoteGenerationDataset(datasets.GeneratorBasedBuilder):
"""A set of dialogues synthesized from the MT Samples dataset."""
VERSION = datasets.Version(_VERSION)
BUILDER_CONFIGS = [
OpenAssistantConfig(
name=f"{_DATASETNAME}_dialogue_modeling",
version=VERSION,
description=f"OpenAssistant dataset config for {_DATASETNAME}",
schema="dialogue_modeling",
subset_id=_DATASETNAME,
)
]
DEFAULT_CONFIG_NAME = f"{_DATASETNAME}_dialogue_modeling"
def _info(self) -> datasets.DatasetInfo:
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager) -> List[datasets.SplitGenerator]:
urls = _URLS[_DATASETNAME]
data_dir = dl_manager.download_and_extract(urls)
return [
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
# Whatever you put in gen_kwargs will be passed to _generate_examples
gen_kwargs={
"filepath": data_dir,
"split": "train",
},
),
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": data_dir,
"split": "test",
},
),
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
gen_kwargs={
"filepath": data_dir,
"split": "validation",
},
),
]
def _generate_examples(self, filepath, split: str) -> Tuple[int, Dict]:
"""Yields examples as (key, example) tuples."""
if self.config.schema == "dialogue_modeling":
key = 0
with open(filepath[split], "r", encoding="utf8") as data:
while True:
line = data.readline()
if not line:
return
yield key, json.loads(line)
key += 1
@@ -0,0 +1,84 @@
import json
import math
import os
import random
import re
import sys
from string import punctuation
import kaggle
import pandas as pd
CLINICAL_NOTE_GENERATION_TEMPLATE = """User: Write a clinical note about a patient with the following {section}: {section_information}.
Rosey: {note}"""
def preprocess(mt_dataset):
def filter_for_notes(row):
normalized_transcript = row["transcription"].lower()
if "chief complaint:" in normalized_transcript:
return True
return False
mt_dataset = mt_dataset.dropna(subset=["description", "transcription"])
mt_note_subset = mt_dataset[mt_dataset.apply(filter_for_notes, axis=1)]
return mt_note_subset
def is_chief_complaint(section):
return "chief complaint" in section.lower()
def get_conversations(dataset):
def normalize_transcript(x):
x = re.sub(r"\.+", ".", x)
x = re.sub(r"\,+", ",", x)
x = re.sub(r":\s+", ": ", x)
x = re.sub(r"\.\s+", ". ", x)
x = re.sub(r":(\s)*\,+", ": ", x)
x = re.sub(r"\.\,+", ". ", x)
return x
conversations = []
for idx in range(len(dataset)):
transcript = normalize_transcript(dataset.iloc[idx]["transcription"])
sections = re.findall(r"\b[A-Z]+(?: [A-Z]+)*:", transcript)
if len(sections) >= 2:
note_prompt = transcript.split(sections[0])[1].split(sections[1])[0]
else:
continue
section_name = sections[0].lower().strip(punctuation)
if len(note_prompt.split(" ")) > 30 and is_chief_complaint(section_name):
# There are some chief complaints that seem to be HPI
section_name = "history of present illness"
conversations.append(
CLINICAL_NOTE_GENERATION_TEMPLATE.format(
section=section_name, section_information=note_prompt, note=transcript
)
)
return conversations
def main(output_dir: str = "data"):
"""Download and prepare the dataset for use."""
os.makedirs(output_dir, exist_ok=True)
kaggle.api.dataset_download_files("tboyle10/medicaltranscriptions", "data", unzip=True)
mt_samples = preprocess(pd.read_csv("mtsamples.csv"))
conversations = get_conversations(mt_samples)
random.shuffle(conversations)
train_limit = math.ceil(len(conversations) * 0.6)
dev_limit = math.ceil(len(conversations) * 0.8)
train, validation, test = (
conversations[:train_limit],
conversations[train_limit:dev_limit],
conversations[dev_limit:],
)
splits = {"train": train, "validation": validation, "test": test}
for split in ["train", "validation", "test"]:
with open(f"{output_dir}/mt_note_generation_{split}.jsonl", "w") as f:
for conversation in splits[split]:
f.write(f"{json.dumps({'conversation': conversation})}\n")
if __name__ == "__main__":
sys.exit(main())
+1
View File
@@ -7,6 +7,7 @@ pushd "$parent_path/../../backend"
export DEBUG_USE_SEED_DATA=True
export DEBUG_SKIP_TOXICITY_CALCULATION=True
export DEBUG_ALLOW_SELF_LABELING=True
export DEBUG_ALLOW_DUPLICATE_TASKS=True
export DEBUG_SKIP_EMBEDDING_COMPUTATION=True
uvicorn main:app --reload --port 8080 --host 0.0.0.0
+3
View File
@@ -0,0 +1,3 @@
.eslintrc.json
tailwind.config.js
.storybook/*
+6
View File
@@ -10,6 +10,7 @@ module.exports = {
"@storybook/addon-essentials",
"@storybook/addon-interactions",
"@chakra-ui/storybook-addon",
"storybook-addon-next-router",
],
framework: "@storybook/react",
core: {
@@ -23,7 +24,12 @@ module.exports = {
config.resolve.alias = {
...config.resolve.alias,
src: path.resolve(__dirname, "../src"),
styles: path.resolve(__dirname, "../styles"),
};
config.resolve.fallback = {
fs: false,
path: require.resolve('path-browserify'),
}
return config;
},
features: {
+4
View File
@@ -1,4 +1,5 @@
import "!style-loader!css-loader!postcss-loader!tailwindcss/tailwind.css";
import { RouterContext } from "next/dist/shared/lib/router-context";
export const parameters = {
actions: { argTypesRegex: "^on[A-Z].*" },
@@ -8,6 +9,9 @@ export const parameters = {
date: /Date$/,
},
},
nextRouter: {
Provider: RouterContext.Provider,
},
};
// Hacky solution to get Images in next to work
+67 -100
View File
@@ -2,8 +2,7 @@
## Purpose
This provides a comprehensive webapp interface for LAION's Open Assistant
project. Initially it will support:
This provides a comprehensive webapp interface for LAION's Open Assistant project. Initially it will support:
1. User registration using either Discord or Email.
1. Adding responses to incomplete Open Assistant tasks.
@@ -11,8 +10,7 @@ project. Initially it will support:
1. Viewing an activity leaderboard.
1. Tracking community wide updates.
This interface compliments the Discord bot and will give access to the same
underlying tasks.
This interface compliments the Discord bot and will give access to the same underlying tasks.
## Contributing
@@ -22,67 +20,54 @@ This website is built using:
1. [npm](https://www.npmjs.com/): The node package manager for building.
1. [React](https://reactjs.org/): The core frontend framework.
1. [Next.js](https://nextjs.org/): A React scaffolding framework to streamline
development.
1. [Prisma](https://www.prisma.io/): An ORM to interact with a web specific
[Postgres](https://www.postgresql.org/) database.
1. [NextAuth.js](https://next-auth.js.org/): A user authentication framework to
ensure we handle accounts with best practices.
1. [TailwindCSS](https://tailwindcss.com/): A general purpose framework for
styling any component.
1. [Chakra-UI](https://chakra-ui.com/): A wide collection of pre-built UI
components that generally look pretty good.
1. [Next.js](https://nextjs.org/): A React scaffolding framework to streamline development.
1. [Prisma](https://www.prisma.io/): An ORM to interact with a web specific [Postgres](https://www.postgresql.org/)
database.
1. [NextAuth.js](https://next-auth.js.org/): A user authentication framework to ensure we handle accounts with best
practices.
1. [TailwindCSS](https://tailwindcss.com/): A general purpose framework for styling any component.
1. [Chakra-UI](https://chakra-ui.com/): A wide collection of pre-built UI components that generally look pretty good.
### Set up your environment
To contribute to the website, make sure you have the following setup and
installed:
To contribute to the website, make sure you have the following setup and installed:
1. [NVM](https://github.com/nvm-sh/nvm): The Node Version Manager makes it easy
to ensure you have the right NodeJS version installed. Once installed, run
`nvm use 16` to use Node 16.x. The website is known to be stable with NodeJS
1. [NVM](https://github.com/nvm-sh/nvm): The Node Version Manager makes it easy to ensure you have the right NodeJS
version installed. Once installed, run `nvm use 16` to use Node 16.x. The website is known to be stable with NodeJS
version 16.x. This will install both Node and NPM.
1. [Docker](https://www.docker.com/): We use docker to simplify running
dependent services.
1. [Docker](https://www.docker.com/): We use docker to simplify running dependent services.
### Getting everything up and running
If you're doing active development we suggest the following workflow:
1. In one tab, navigate to the project root.
1. Run `docker compose up frontend-dev --build --attach-dependencies`. You can
optionally include `-d` to detach and later track the logs if desired.
1. Run `docker compose up frontend-dev --build --attach-dependencies`. You can optionally include `-d` to detach and
later track the logs if desired.
1. In another tab navigate to `${OPEN_ASSISTANT_ROOT/website`.
1. Run `npm ci`
1. Run `npx prisma db push` (This is also needed when you restart the docker
stack from scratch).
1. Run `npm run dev`. Now the website is up and running locally at
`http://localhost:3000`.
1. To create an account, login via the user using email authentication and
navigate to `http://localhost:1080`. Check the email listed and click the
log in link. You're now logged in and authenticated.
1. Run `npx prisma db push` (This is also needed when you restart the docker stack from scratch).
1. Run `npm run dev`. Now the website is up and running locally at `http://localhost:3000`.
1. To create an account, login via the user using email authentication and navigate to `http://localhost:1080`. Check
the email listed and click the log in link. You're now logged in and authenticated.
### Using debug user credentials
You can use the debug credentials provider to log in without fancy emails or
OAuth.
You can use the debug credentials provider to log in without fancy emails or OAuth.
1. This feature is automatically on in development mode, i.e. when you run
`npm run dev`. In case you want to do the same with a production build (for
example, the docker image), then run the website with environment variable
1. This feature is automatically on in development mode, i.e. when you run `npm run dev`. In case you want to do the
same with a production build (for example, the docker image), then run the website with environment variable
`DEBUG_LOGIN=true`.
1. Use the `Login` button in the top right to go to the login page.
1. You should see a section for debug credentials. Enter any username you wish,
you will be logged in as that user.
1. You should see a section for debug credentials. Enter any username you wish, you will be logged in as that user.
### Using Storybook
To develop components using [Storybook](https://storybook.js.org/) run
`npm run storybook`. Then navigate to in your browser to
`http://localhost:6006`.
To develop components using [Storybook](https://storybook.js.org/) run `npm run storybook`. Then navigate to in your
browser to `http://localhost:6006`.
To create a new story create a file named `[componentName].stories.js`. An
example how such a story could look like, see `Header.stories.jsx`.
To create a new story create a file named `[componentName].stories.js`. An example how such a story could look like, see
`Header.stories.jsx`.
## Code Layout
@@ -90,12 +75,10 @@ example how such a story could look like, see `Header.stories.jsx`.
All react code is under `src/` with a few sub directories:
1. `pages/`: All pages a user could navigate too and API URLs which are under
`pages/api/`.
1. `components/`: All re-usable React components. If something gets used twice
we should create a component and put it here.
1. `lib/`: A generic place to store library files that are used anywhere. This
doesn't have much structure yet.
1. `pages/`: All pages a user could navigate too and API URLs which are under `pages/api/`.
1. `components/`: All re-usable React components. If something gets used twice we should create a component and put it
here.
1. `lib/`: A generic place to store library files that are used anywhere. This doesn't have much structure yet.
NOTE: `styles/` can be ignored for now.
@@ -113,25 +96,20 @@ We're not really using CSS styles. `styles/` can be ignored.
## Testing the UI
Cypress is used for end-to-end (e2e) and component testing and is configured in
`./cypress.config.ts`. The `./cypress` folder is used for supporting
configuration files etc.
Cypress is used for end-to-end (e2e) and component testing and is configured in `./cypress.config.ts`. The `./cypress`
folder is used for supporting configuration files etc.
- Store e2e tests in the `./cypress/e2e` folder.
- Store component tests adjacent to the component being tested. If you want to
wriite a test for `./src/components/Layout.tsx` then store the test file at
`./src/components/Layout.cy.tsx`.
- Store component tests adjacent to the component being tested. If you want to wriite a test for
`./src/components/Layout.tsx` then store the test file at `./src/components/Layout.cy.tsx`.
A few npm scripts are available for convenience:
- `npm run cypress`: Useful for development, it opens Cypress and allows you to
explore, run and debug tests. It assumes you have the NextJS site running at
`localhost:3000`.
- `npm run cypress:run`: Runs all tests. Useful for a quick sanity check before
sending a PR or to run in CI pipelines.
- `npm run cypress:image-baseline`: If you have tests failing because of visual
changes that was expected, this command will update the baseline images stored
in `./cypress-visual-screenshots/baseline` with those from the adjacent
- `npm run cypress`: Useful for development, it opens Cypress and allows you to explore, run and debug tests. It assumes
you have the NextJS site running at `localhost:3000`.
- `npm run cypress:run`: Runs all tests. Useful for a quick sanity check before sending a PR or to run in CI pipelines.
- `npm run cypress:image-baseline`: If you have tests failing because of visual changes that was expected, this command
will update the baseline images stored in `./cypress-visual-screenshots/baseline` with those from the adjacent
comparison folder. More can be found in the
[docs of `uktrade/cypress-image-diff`](https://github.com/uktrade/cypress-image-diff/blob/main/docs/CLI.md#update-all-baseline-images-for-failing-tests).
@@ -141,10 +119,9 @@ Read more in the [./cypress README](cypress/).
Jest and React Testing Library are used for unit testing JS/TS/TSX code.
- Store unit test files adjacent to the file being tested and have the filename
end with `.test.ts` for non-React code or `.test.tsx` for React code.
- `npm run jest`: automatically runs tests and watches for any relevant changes
to rerun tests.
- Store unit test files adjacent to the file being tested and have the filename end with `.test.ts` for non-React code
or `.test.tsx` for React code.
- `npm run jest`: automatically runs tests and watches for any relevant changes to rerun tests.
Read more in the [./src/README.md](src/README.md).
@@ -152,30 +129,25 @@ Read more in the [./src/README.md](src/README.md).
When writing code for the website, we have a few best practices:
1. When importing packages import external dependencies first then local
dependencies. Order them alphabetically according to the package name.
1. When trying to implement something new, check if
[Chakra-UI](https://chakra-ui.com/) has components that are close enough to
your need. For example Sliders, Radio Buttons, Progress indicators, etc.
They have a lot and we can save time by re-using what they have and tweaking
the style as needed.
1. Format everything with [Prettier](https://prettier.io/). This is done by
default with pre-submits. We currently don't have any custom settings.
1. Define functional React components (with types for all properties when
feasible).
1. When importing packages import external dependencies first then local dependencies. Order them alphabetically
according to the package name.
1. When trying to implement something new, check if [Chakra-UI](https://chakra-ui.com/) has components that are close
enough to your need. For example Sliders, Radio Buttons, Progress indicators, etc. They have a lot and we can save
time by re-using what they have and tweaking the style as needed.
1. Format everything with [Prettier](https://prettier.io/). This is done by default with pre-submits. We currently
don't have any custom settings.
1. Define functional React components (with types for all properties when feasible).
### Developing New Features
When working on new features or making significant changes that can't be done
within a single Pull Request, we ask that you make use of Feature Flags.
When working on new features or making significant changes that can't be done within a single Pull Request, we ask that
you make use of Feature Flags.
We've set up
[`react-feature-flags`](https://www.npmjs.com/package/react-feature-flags) to
make this easier. To get started:
We've set up [`react-feature-flags`](https://www.npmjs.com/package/react-feature-flags) to make this easier. To get
started:
1. Add a new flag entry to `website/src/flags.ts`. We have an example flag you
can copy as an example. Be sure to `isActive` to true when testing your
features but false when submitting your PR.
1. Add a new flag entry to `website/src/flags.ts`. We have an example flag you can copy as an example. Be sure to
`isActive` to true when testing your features but false when submitting your PR.
1. Use your flag wherever you add a new UI element. This can be done with:
```js
@@ -188,29 +160,24 @@ import { Flags } from "react-feature-flags";
You can see an example of how this works by checking `website/src/components/Header/Headers.tsx` where we use `flagTest`.
1. Once you've finished building out the feature and it is ready for everyone
to use, it's safe to remove the `Flag` wrappers around your component and
the entry in `flags.ts`.
1. Once you've finished building out the feature and it is ready for everyone to use, it's safe to remove the `Flag`
wrappers around your component and the entry in `flags.ts`.
### URL Paths
To use stable and consistent URL paths, we recommend the following strategy for
new tasks:
To use stable and consistent URL paths, we recommend the following strategy for new tasks:
1. For any task that involves writing a free-form response, put the page under
`website/src/pages/create` with a page name matching the task type, such as
`initial_prompt.tsx`.
1. For any task that evaluates, rates, or ranks content, put the page under
`website/src/pages/evaluate` with a page name matching the task type such as
`rank_initial_prompts.tsx`.
1. For any task that involves writing a free-form response, put the page under `website/src/pages/create` with a page
name matching the task type, such as `initial_prompt.tsx`.
1. For any task that evaluates, rates, or ranks content, put the page under `website/src/pages/evaluate` with a page
name matching the task type such as `rank_initial_prompts.tsx`.
With this we'll be able to ensure these contribution pages are hidden from
logged out users but accessible to logged in users.
With this we'll be able to ensure these contribution pages are hidden from logged out users but accessible to logged in
users.
## Learn More
To learn more about Next.js, take a look at the following resources:
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js
features and API.
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+40 -58
View File
@@ -1,24 +1,19 @@
# Component and e2e testing with Cypress
[Cypress](https://www.cypress.io/) is used for both component- and end-to-end
testing. Below there's a few examples for the context of this site. To learn
more, the
[Cypress documentation](https://docs.cypress.io/guides/getting-started/opening-the-app)
has it all.
[Cypress](https://www.cypress.io/) is used for both component- and end-to-end testing. Below there's a few examples for
the context of this site. To learn more, the
[Cypress documentation](https://docs.cypress.io/guides/getting-started/opening-the-app) has it all.
Don't get scared by the commercial offerings they offer. Their core is open
source, the cloud offering is not necesarry at all and can be replaced by CI
tooling and [community efforts](https://sorry-cypress.dev/).
Don't get scared by the commercial offerings they offer. Their core is open source, the cloud offering is not necesarry
at all and can be replaced by CI tooling and [community efforts](https://sorry-cypress.dev/).
# Component testing
To write a new component test, you either create a new `.tsx` adjacent to the
component you want to test or you can use the guide presented yo you when
running `npm run cypress` which allows you to easily create the skeleton test
for an existing component.
To write a new component test, you either create a new `.tsx` adjacent to the component you want to test or you can use
the guide presented yo you when running `npm run cypress` which allows you to easily create the skeleton test for an
existing component.
If you have a `Button.tsx` component, create a file next to it called
`Button.cy.tsx` which could look like this:
If you have a `Button.tsx` component, create a file next to it called `Button.cy.tsx` which could look like this:
```typescript
import React from "react";
@@ -35,28 +30,24 @@ describe("<Button />", () => {
## What's happening here?
First we use `cy.mount` to mount our component under test. Notive how we specify
`className` and inner text - this is where we arrange our component with fake
data that we could assert on later.
First we use `cy.mount` to mount our component under test. Notive how we specify `className` and inner text - this is
where we arrange our component with fake data that we could assert on later.
In the example above, we also use `cy.get` to select the rendered `button`
element. Cypress has multiple ways to
[select elements](https://docs.cypress.io/guides/references/best-practices),
`get` is just one of them (and often not recommended).
In the example above, we also use `cy.get` to select the rendered `button` element. Cypress has multiple ways to
[select elements](https://docs.cypress.io/guides/references/best-practices), `get` is just one of them (and often not
recommended).
At last, we use `captureSnapshot` which is a plugin that snaps a photo of the
`button` element and compares it to a baseline located in the
`./cypress-visual-screenshots/baseline/` folder. If there's too many unidentical
pixels between the two, it will fail the test.
At last, we use `captureSnapshot` which is a plugin that snaps a photo of the `button` element and compares it to a
baseline located in the `./cypress-visual-screenshots/baseline/` folder. If there's too many unidentical pixels between
the two, it will fail the test.
# End-to-end (e2e) testing
e2e tests are stored in the `./cypress/e2e` folder and should be named
`{page}.cy.ts` and located in a relative folder structure that mirrors the page
under test.
e2e tests are stored in the `./cypress/e2e` folder and should be named `{page}.cy.ts` and located in a relative folder
structure that mirrors the page under test.
When running `npm run cypress` and selecting e2e testing, we assume you have the
NextJS site running at `localhost:3000`.
When running `npm run cypress` and selecting e2e testing, we assume you have the NextJS site running at
`localhost:3000`.
An example test could look as follows:
@@ -74,39 +65,33 @@ export {};
## What's happening here?
First we use [`cy.visit`](https://docs.cypress.io/api/commands/visit) to point
the browser at the desired page. It appends relative paths to the configured
`baseUrl` (found in `./cypress.config.ts`).
First we use [`cy.visit`](https://docs.cypress.io/api/commands/visit) to point the browser at the desired page. It
appends relative paths to the configured `baseUrl` (found in `./cypress.config.ts`).
Cypress will
[automatically await](https://docs.cypress.io/guides/core-concepts/introduction-to-cypress#Timeouts)
almost anything you do, but fail if the default timeout is reached.
Cypress will [automatically await](https://docs.cypress.io/guides/core-concepts/introduction-to-cypress#Timeouts) almost
anything you do, but fail if the default timeout is reached.
Then we get the email input field and type our email address. We find the input
field using the data-cy attribute that we added in the source code of the
element on the page.
Then we get the email input field and type our email address. We find the input field using the data-cy attribute that
we added in the source code of the element on the page.
```jsx
<Input data-cy="email-address" placeholder="Email Address" />
```
Using `data-cy` is how we ensure that selecting the element is robust to changes
in page design or function and is one of the
Using `data-cy` is how we ensure that selecting the element is robust to changes in page design or function and is one
of the
[best practices recommended by Cypress](https://docs.cypress.io/guides/references/best-practices#Selecting-Elements).
Next we call `type()` to use the keyboard, cypress will automatically focus the
element and send the keypress events. Notice the `{enter}` keyword, this will
cause Cypress to hit the return key which we expect to submit the form.
Next we call `type()` to use the keyboard, cypress will automatically focus the element and send the keypress events.
Notice the `{enter}` keyword, this will cause Cypress to hit the return key which we expect to submit the form.
We then assert that the URL should contain `/auth/verify`. Again the timeout
will make sure we are not waiting forever, and the test will fail if we do not
manage to get there in a reasonable time.
We then assert that the URL should contain `/auth/verify`. Again the timeout will make sure we are not waiting forever,
and the test will fail if we do not manage to get there in a reasonable time.
## Authenticating in e2e tests
For end-to-end tests almost every test will need to first sign in to the
website. To make this easier we have a custom command for Cypress that makes
logging in with an email address a single command, `cy.signInWithEmail()`.
For end-to-end tests almost every test will need to first sign in to the website. To make this easier we have a custom
command for Cypress that makes logging in with an email address a single command, `cy.signInWithEmail()`.
```typescript
describe("replying as the assistant", () => {
@@ -115,16 +100,13 @@ describe("replying as the assistant", () => {
cy.visit("/create/assistant_reply");
cy.get('[data-cy="reply"').type(
"You need to run pre-commit to make the reviewer happy."
);
cy.get('[data-cy="reply"').type("You need to run pre-commit to make the reviewer happy.");
cy.get('[data-cy="submit"]').click();
});
});
```
In this example we sign in as `cypress@example.com` before visiting the
`/create/assistant_reply` page that is only available when authenticated. We can
then continue on with our test as normal. Note: using `cy.signInWithEmail()`
requires that the maildev is running, which should have been started as part of
the `docker compose up` command that is required to do any end-to-end testing.
In this example we sign in as `cypress@example.com` before visiting the `/create/assistant_reply` page that is only
available when authenticated. We can then continue on with our test as normal. Note: using `cy.signInWithEmail()`
requires that the maildev is running, which should have been started as part of the `docker compose up` command that is
required to do any end-to-end testing.
+1 -4
View File
@@ -7,9 +7,6 @@ describe("<Container />", () => {
const className = "my-class";
const text = "test_container";
cy.mount(<Container className={className}>{text}</Container>);
cy.get(`div.${className}`)
.should("have.class", className)
.should("be.visible")
.should("contain", text);
cy.get(`div.${className}`).should("have.class", className).should("be.visible").should("contain", text);
});
});
@@ -12,25 +12,18 @@ describe("Contract test for Oasst API", function () {
} as BackendUserCore;
it("can fetch a task", async () => {
expect(await oasstApiClient.fetchTask("random", testUser)).to.be.not.null;
expect(await oasstApiClient.fetchTask("random", testUser, "en")).to.be.not.null;
});
it("can ack a task", async () => {
const task = await oasstApiClient.fetchTask("random", testUser);
const task = await oasstApiClient.fetchTask("random", testUser, "en");
expect(await oasstApiClient.ackTask(task.id, "321")).to.be.null;
});
it("can record a taskInteraction", async () => {
const task = await oasstApiClient.fetchTask("random", testUser);
const task = await oasstApiClient.fetchTask("random", testUser, "en");
expect(
await oasstApiClient.interactTask(
"text_reply_to_message",
task.id,
"321",
"1",
{ text: "Test" },
testUser
)
await oasstApiClient.interactTask("text_reply_to_message", task.id, "321", "1", { text: "Test" }, testUser, "en")
).to.be.not.null;
});
@@ -0,0 +1,26 @@
describe("labeling assistant replies", () => {
it("completes the current task on submit and on request shows a new task", () => {
cy.signInWithEmail("cypress@example.com");
cy.visit("/label/label_assistant_reply");
cy.get('[data-cy="task"]')
.invoke("attr", "data-task-type")
.then((type) => {
cy.log("Task type", type);
// For specific task pages the no task available result is normal.
if (type === undefined) return;
cy.get('[data-cy="label-options"]').each((label) => {
// Click the 4th option
cy.wrap(label).find('[data-cy="radio-option"]').eq(3).click();
});
cy.get('[data-cy="review"]').click();
cy.get('[data-cy="submit"]').click();
});
});
});
export {};
@@ -0,0 +1,26 @@
describe("labeling initial prompts", () => {
it("completes the current task on submit and on request shows a new task", () => {
cy.signInWithEmail("cypress@example.com");
cy.visit("/label/label_initial_prompt");
cy.get('[data-cy="task"]')
.invoke("attr", "data-task-type")
.then((type) => {
cy.log("Task type", type);
// For specific task pages the no task available result is normal.
if (type === undefined) return;
cy.get('[data-cy="label-options"]').each((label) => {
// Click the 4th option
cy.wrap(label).find('[data-cy="radio-option"]').eq(3).click();
});
cy.get('[data-cy="review"]').click();
cy.get('[data-cy="submit"]').click();
});
});
});
export {};
@@ -0,0 +1,26 @@
describe("labeling prompter replies", () => {
it("completes the current task on submit and on request shows a new task", () => {
cy.signInWithEmail("cypress@example.com");
cy.visit("/label/label_prompter_reply");
cy.get('[data-cy="task"]')
.invoke("attr", "data-task-type")
.then((type) => {
cy.log("Task type", type);
// For specific task pages the no task available result is normal.
if (type === undefined) return;
cy.get('[data-cy="label-options"]').each((label) => {
// Click the 4th option
cy.wrap(label).find('[data-cy="radio-option"]').eq(3).click();
});
cy.get('[data-cy="review"]').click();
cy.get('[data-cy="submit"]').click();
});
});
});
export {};
@@ -0,0 +1,23 @@
describe("no tasks available", () => {
it("displays an empty state when no tasks are available", () => {
cy.signInWithEmail("cypress@example.com");
cy.intercept(
{
method: "GET",
url: "/api/new_task/prompter_reply",
},
{
statusCode: 500,
body: {
message: "No tasks of type 'label_prompter_reply' are currently available.",
errorCode: 1006,
httpStatusCode: 503,
},
}
).as("newTaskPrompterReply");
cy.visit("/create/user_reply");
cy.wait("@newTaskPrompterReply").then(() => {
cy.get('[data-cy="cy-no-tasks"]').should("exist");
});
});
});
+18 -36
View File
@@ -44,47 +44,29 @@ describe("handles random tasks", () => {
break;
}
case "label-task": {
cy.get('[data-cy="label-group-item"]')
.first()
.invoke("attr", "data-label-type")
.then((label_type) => {
const parent = cy
.get('[data-cy="label-group-item"]')
.first();
cy.log("Label type", label_type);
cy.get('[data-cy="label-options"]').each((label) => {
// Click the 4th option
cy.wrap(label).find('[data-cy="radio-option"]').eq(3).click();
});
switch (label_type) {
case "slider": {
// Clicking on the slider will set the value to about the middle where it clicks
parent
.get('[aria-roledescription="slider"]')
.first()
.click();
cy.get('[data-cy="review"]').click();
cy.get('[data-cy="review"]').click();
cy.get('[data-cy="submit"]').click();
break;
}
case "radio": {
// Clicking on the slider will set the value to about the middle where it clicks
parent
.get('[aria-roledescription="radio-button"]')
.last()
.click();
cy.get('[data-cy="review"]').click();
cy.get('[data-cy="submit"]').click();
break;
}
}
});
cy.get('[data-cy="submit"]').click();
break;
}
case "spam-task": {
cy.get('[data-cy="not-spam-button"]').click();
cy.get('[data-cy="review"]').click();
cy.get('[data-cy="submit"]').click();
break;
}
case undefined: {
throw new Error("No tasks available, but at least create initial prompt expected");
}
default:
throw new Error(`Unexpected task type: ${type}`);
}
+5 -10
View File
@@ -37,19 +37,14 @@
// }
Cypress.Commands.add("signInUsingEmailedLink", (emailAddress) => {
const mailDevApi = `${Cypress.env("MAILDEV_PROTOCOL")}://${Cypress.env(
"MAILDEV_HOST"
)}:${Cypress.env("MAILDEV_API_PORT")}`;
cy.request(
"GET",
`${mailDevApi}/email?headers.to=${emailAddress.toLowerCase()}`
).then((response) => {
const mailDevApi = `${Cypress.env("MAILDEV_PROTOCOL")}://${Cypress.env("MAILDEV_HOST")}:${Cypress.env(
"MAILDEV_API_PORT"
)}`;
cy.request("GET", `${mailDevApi}/email?headers.to=${emailAddress.toLowerCase()}`).then((response) => {
const emails = response.body;
// Find and use login link
const loginLink = emails
.pop()
.html.match(/href="[^"]+(\/api\/auth\/callback\/[^"]+?)"/)[1];
const loginLink = emails.pop().html.match(/href="[^"]+(\/api\/auth\/callback\/[^"]+?)"/)[1];
cy.visit(loginLink);
});
});
+1 -1
View File
@@ -1,6 +1,6 @@
module.exports = {
i18n: {
defaultLocale: "en",
locales: ["en"],
locales: ["bn", "de", "en", "es", "fr", "ja", "pt-BR", "ru", "zh"],
},
};
+174 -149
View File
@@ -21,6 +21,8 @@
"@next/font": "^13.1.0",
"@prisma/client": "^4.7.1",
"@tailwindcss/forms": "^0.5.3",
"@tanstack/react-table": "^8.7.6",
"accept-language-parser": "^1.5.0",
"autoprefixer": "^10.4.13",
"axios": "^1.2.1",
"boolean": "^3.2.0",
@@ -31,6 +33,7 @@
"focus-visible": "^5.2.0",
"framer-motion": "^6.5.1",
"install": "^0.13.0",
"lucide-react": "^0.105.0",
"next": "13.0.6",
"next-auth": "^4.18.6",
"next-i18next": "^13.0.3",
@@ -38,13 +41,13 @@
"npm": "^9.2.0",
"postcss-focus-visible": "^7.1.0",
"react": "18.2.0",
"react-cookies": "^0.1.1",
"react-dom": "18.2.0",
"react-feature-flags": "^1.0.0",
"react-hook-form": "^7.42.1",
"react-i18next": "^12.1.4",
"react-icons": "^4.7.1",
"react-table": "^7.8.0",
"sharp": "^0.31.3",
"storybook-addon-next-router": "^4.0.2",
"swr": "^2.0.0",
"tailwindcss": "^3.2.4",
"unique-username-generator": "^1.1.3",
@@ -6199,7 +6202,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/addon-actions/-/addon-actions-6.5.15.tgz",
"integrity": "sha512-cnLzVK1S+EydFDSuvxMmMAxVqNXijBGdV9QTgsu6ys5sOkoiXRETKZmxuN8/ZRbkfc4N+1KAylSCZOOHzBQTBQ==",
"dev": true,
"dependencies": {
"@storybook/addons": "6.5.15",
"@storybook/api": "6.5.15",
@@ -6242,7 +6244,6 @@
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/react-inspector/-/react-inspector-5.1.1.tgz",
"integrity": "sha512-GURDaYzoLbW8pMGXwYPDBIv6nqei4kK7LPRZ9q9HCZF54wqXz/dnylBp/kfE9XmekBhHvLDdcYeyIwSrvtOiWg==",
"dev": true,
"dependencies": {
"@babel/runtime": "^7.0.0",
"is-dom": "^1.0.0",
@@ -6737,7 +6738,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/addons/-/addons-6.5.15.tgz",
"integrity": "sha512-xT31SuSX+kYGyxCNK2nqL7WTxucs3rSmhiCLovJcUjYk+QquV3c2c53Ki7lwwdDbzfXFcNAe0HJ4hoTN4jhn0Q==",
"dev": true,
"dependencies": {
"@storybook/api": "6.5.15",
"@storybook/channels": "6.5.15",
@@ -6764,7 +6764,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/api/-/api-6.5.15.tgz",
"integrity": "sha512-BBE0KXKvj1/3jTghbIoWfrcDM0t+xO7EYtWWAXD6XlhGsZVD2Dy82Z52ONyLulMDRpMWl0OYy3h6A1YnFUH25w==",
"dev": true,
"dependencies": {
"@storybook/channels": "6.5.15",
"@storybook/client-logger": "6.5.15",
@@ -8234,7 +8233,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/channel-postmessage/-/channel-postmessage-6.5.15.tgz",
"integrity": "sha512-gMpA8LWT8lC4z5KWnaMh03aazEwtDO7GtY5kZVru+EEMgExGmaR82qgekwmLmgLj2nRJEv0o138o9IqYUcou8w==",
"dev": true,
"dependencies": {
"@storybook/channels": "6.5.15",
"@storybook/client-logger": "6.5.15",
@@ -8270,7 +8268,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/channels/-/channels-6.5.15.tgz",
"integrity": "sha512-gPpsBgirv2NCXbH4WbYqdkI0JLE96aiVuu7UEWfn9yu071pQ9CLHbhXGD9fSFNrfOkyBBY10ppSE7uCXw3Wexg==",
"dev": true,
"dependencies": {
"core-js": "^3.8.2",
"ts-dedent": "^2.0.0",
@@ -8285,7 +8282,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/client-api/-/client-api-6.5.15.tgz",
"integrity": "sha512-0ZGpRgVz7rdbCguBqBpwObXbsVY5qlSTWDzzIBpmz8EkxW/MtK5wEyeq+0L0O+DTn41FwvH5yCGLAENpzWD8BQ==",
"dev": true,
"dependencies": {
"@storybook/addons": "6.5.15",
"@storybook/channel-postmessage": "6.5.15",
@@ -8321,7 +8317,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/client-logger/-/client-logger-6.5.15.tgz",
"integrity": "sha512-0uyxKvodq+FycGv6aUwC1wUR6suXf2+7ywMFAOlYolI4UvNj8NyU/5AfgKT5XnxYAgPmoCiAjOE700TrfHrosw==",
"dev": true,
"dependencies": {
"core-js": "^3.8.2",
"global": "^4.4.0"
@@ -8335,7 +8330,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/components/-/components-6.5.15.tgz",
"integrity": "sha512-bHTT0Oa3s4g+MBMaLBbX9ofMtb1AW59AzIUNGrfqW1XqJMGuUHMiJ7TSo+i5dRSFpbFygnwMEG9LfHxpR2Z0Dw==",
"dev": true,
"dependencies": {
"@storybook/client-logger": "6.5.15",
"@storybook/csf": "0.0.2--canary.4566f4d.1",
@@ -9234,7 +9228,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/core-events/-/core-events-6.5.15.tgz",
"integrity": "sha512-B1Ba6l5W7MeNclclqMMTMHgYgfdpB5SIhNCQFnzIz8blynzRhNFMdxvbAl6Je5G0S4xydYYd7Lno2kXQebs7HA==",
"dev": true,
"dependencies": {
"core-js": "^3.8.2"
},
@@ -10019,7 +10012,6 @@
"version": "0.0.2--canary.4566f4d.1",
"resolved": "https://registry.npmjs.org/@storybook/csf/-/csf-0.0.2--canary.4566f4d.1.tgz",
"integrity": "sha512-9OVvMVh3t9znYZwb0Svf/YQoxX2gVOeQTGe2bses2yj+a3+OJnCrUF3/hGv6Em7KujtOdL2LL+JnG49oMVGFgQ==",
"dev": true,
"dependencies": {
"lodash": "^4.17.15"
}
@@ -11967,7 +11959,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/router/-/router-6.5.15.tgz",
"integrity": "sha512-9t8rI8t7/Krolau29gsdjdbRQ66orONIyP0efp0EukVgv6reNFzb/U14ARrl0uHys6Tl5Xyece9FoakQUdn8Kg==",
"dev": true,
"dependencies": {
"@storybook/client-logger": "6.5.15",
"core-js": "^3.8.2",
@@ -11988,7 +11979,6 @@
"version": "7.3.2",
"resolved": "https://registry.npmjs.org/@storybook/semver/-/semver-7.3.2.tgz",
"integrity": "sha512-SWeszlsiPsMI0Ps0jVNtH64cI5c0UF3f7KgjVKJoNP30crQ6wUSddY2hsdeczZXEKVJGEn50Q60flcGsQGIcrg==",
"dev": true,
"dependencies": {
"core-js": "^3.6.5",
"find-up": "^4.1.0"
@@ -12004,7 +11994,6 @@
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
"integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
"dev": true,
"dependencies": {
"locate-path": "^5.0.0",
"path-exists": "^4.0.0"
@@ -12017,7 +12006,6 @@
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
"integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
"dev": true,
"dependencies": {
"p-locate": "^4.1.0"
},
@@ -12029,7 +12017,6 @@
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
"integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
"dev": true,
"dependencies": {
"p-try": "^2.0.0"
},
@@ -12044,7 +12031,6 @@
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
"integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
"dev": true,
"dependencies": {
"p-limit": "^2.2.0"
},
@@ -12094,7 +12080,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/store/-/store-6.5.15.tgz",
"integrity": "sha512-r6cYTf6GtbqgdI4ZG3xuWdJAAu5fJ3xAWMiDkHyoK2u+R2TeYXIsJvgn0BPrW87sZhELIkg4ckdFECmATs3kpQ==",
"dev": true,
"dependencies": {
"@storybook/addons": "6.5.15",
"@storybook/client-logger": "6.5.15",
@@ -12232,7 +12217,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/theming/-/theming-6.5.15.tgz",
"integrity": "sha512-pgdW0lVZKKXQ4VhIfLHycMmwFSVOY7vLTKnytag4Y8Yz+aXm0bwDN/QxPntFzDH47F1Rcy2ywNnvty8ooDTvuA==",
"dev": true,
"dependencies": {
"@storybook/client-logger": "6.5.15",
"core-js": "^3.8.2",
@@ -12297,6 +12281,37 @@
"tailwindcss": ">=3.0.0 || >= 3.0.0-alpha.1"
}
},
"node_modules/@tanstack/react-table": {
"version": "8.7.6",
"resolved": "https://registry.npmjs.org/@tanstack/react-table/-/react-table-8.7.6.tgz",
"integrity": "sha512-/QijmMFeP7wDLBnr0MQ/5MlbXePbIL/1nOtkxBC9zvmBu4gDKJEDBqipUyM7Wc/iBpSd0IFyqBlvZvTPD9FYDA==",
"dependencies": {
"@tanstack/table-core": "8.7.6"
},
"engines": {
"node": ">=12"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/tannerlinsley"
},
"peerDependencies": {
"react": ">=16",
"react-dom": ">=16"
}
},
"node_modules/@tanstack/table-core": {
"version": "8.7.6",
"resolved": "https://registry.npmjs.org/@tanstack/table-core/-/table-core-8.7.6.tgz",
"integrity": "sha512-sqiNTMzB6cpyL8DFH6/VqW48SwiflLqxQqYpo2wNock7rdVGvlm0BLNI8vZUJbr1+fmmWmHwBvi5OMgZw8n1DA==",
"engines": {
"node": ">=12"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/tannerlinsley"
}
},
"node_modules/@testing-library/dom": {
"version": "8.19.1",
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-8.19.1.tgz",
@@ -12674,8 +12689,7 @@
"node_modules/@types/is-function": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/@types/is-function/-/is-function-1.0.1.tgz",
"integrity": "sha512-A79HEEiwXTFtfY+Bcbo58M2GRYzCr9itHWzbzHVFNEYCcoU/MMGwYYf721gBrnhpj1s6RGVVha/IgNFnR0Iw/Q==",
"dev": true
"integrity": "sha512-A79HEEiwXTFtfY+Bcbo58M2GRYzCr9itHWzbzHVFNEYCcoU/MMGwYYf721gBrnhpj1s6RGVVha/IgNFnR0Iw/Q=="
},
"node_modules/@types/istanbul-lib-coverage": {
"version": "2.0.4",
@@ -12876,8 +12890,7 @@
"node_modules/@types/qs": {
"version": "6.9.7",
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.7.tgz",
"integrity": "sha512-FGa1F62FT09qcrueBA6qYTrJPVDzah9a+493+o2PCXsesWHIn27G98TsSMs3WPNbZIEj4+VJf6saSFpvD+3Zsw==",
"dev": true
"integrity": "sha512-FGa1F62FT09qcrueBA6qYTrJPVDzah9a+493+o2PCXsesWHIn27G98TsSMs3WPNbZIEj4+VJf6saSFpvD+3Zsw=="
},
"node_modules/@types/react": {
"version": "18.0.26",
@@ -12995,8 +13008,7 @@
"node_modules/@types/webpack-env": {
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/@types/webpack-env/-/webpack-env-1.18.0.tgz",
"integrity": "sha512-56/MAlX5WMsPVbOg7tAxnYvNYMMWr/QJiIp6BxVSW3JJXUVzzOn64qW8TzQyMSqSUFM2+PVI4aUHcHOzIz/1tg==",
"dev": true
"integrity": "sha512-56/MAlX5WMsPVbOg7tAxnYvNYMMWr/QJiIp6BxVSW3JJXUVzzOn64qW8TzQyMSqSUFM2+PVI4aUHcHOzIz/1tg=="
},
"node_modules/@types/webpack-sources": {
"version": "3.2.0",
@@ -13616,6 +13628,11 @@
"integrity": "sha512-j2afSsaIENvHZN2B8GOpF566vZ5WVk5opAiMTvWgaQT8DkbOqsTfvNAvHoRGU2zzP8cPoqys+xHTRDWW8L+/BA==",
"dev": true
},
"node_modules/accept-language-parser": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/accept-language-parser/-/accept-language-parser-1.5.0.tgz",
"integrity": "sha512-QhyTbMLYo0BBGg1aWbeMG4ekWtds/31BrEU+DONOg/7ax23vxpL03Pb7/zBmha2v7vdD3AyzZVWBVGEZxKOXWw=="
},
"node_modules/accepts": {
"version": "1.3.8",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
@@ -17984,8 +18001,7 @@
"node_modules/dom-walk": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/dom-walk/-/dom-walk-0.1.2.tgz",
"integrity": "sha512-6QvTW9mrGeIegrFXdtQi9pk7O/nSK6lSdXW2eqUspN5LWD7UTji2Fqw5V2YLjBpHEoU9Xl/eUWNpDeZvoyOv2w==",
"dev": true
"integrity": "sha512-6QvTW9mrGeIegrFXdtQi9pk7O/nSK6lSdXW2eqUspN5LWD7UTji2Fqw5V2YLjBpHEoU9Xl/eUWNpDeZvoyOv2w=="
},
"node_modules/domain-browser": {
"version": "1.2.0",
@@ -20786,7 +20802,6 @@
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/global/-/global-4.4.0.tgz",
"integrity": "sha512-wv/LAoHdRE3BeTGz53FAamhGlPLhlssK45usmGFThIi4XqnBmjKQ16u+RNbP7WvigRZDxUsM0J3gcQ5yicaL0w==",
"dev": true,
"dependencies": {
"min-document": "^2.19.0",
"process": "^0.11.10"
@@ -22018,7 +22033,6 @@
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/is-dom/-/is-dom-1.1.0.tgz",
"integrity": "sha512-u82f6mvhYxRPKpw8V1N0W8ce1xXwOrQtgGcxl6UCL5zBmZu3is/18K0rR7uFCnMDuAsS/3W54mGL4vsaFUQlEQ==",
"dev": true,
"dependencies": {
"is-object": "^1.0.1",
"is-window": "^1.0.2"
@@ -22069,8 +22083,7 @@
"node_modules/is-function": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/is-function/-/is-function-1.0.2.tgz",
"integrity": "sha512-lw7DUp0aWXYg+CBCN+JKkcE0Q2RayZnSvnZBlwgxHBQhqt5pZNVy4Ri7H9GmmXkdu7LUthszM+Tor1u/2iBcpQ==",
"dev": true
"integrity": "sha512-lw7DUp0aWXYg+CBCN+JKkcE0Q2RayZnSvnZBlwgxHBQhqt5pZNVy4Ri7H9GmmXkdu7LUthszM+Tor1u/2iBcpQ=="
},
"node_modules/is-generator-fn": {
"version": "2.1.0",
@@ -22164,7 +22177,6 @@
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/is-object/-/is-object-1.0.2.tgz",
"integrity": "sha512-2rRIahhZr2UWb45fIOuvZGpFtz0TyOZLf32KxBbSoUCeZR495zCKlWUKKUByk3geS2eAs7ZAABt0Y/Rx0GiQGA==",
"dev": true,
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
@@ -22369,8 +22381,7 @@
"node_modules/is-window": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/is-window/-/is-window-1.0.2.tgz",
"integrity": "sha512-uj00kdXyZb9t9RcAUAwMZAnkBUwdYGhYlt7djMXhfyhUCzwNba50tIiBKR7q0l7tdoBtFVw/3JmLY6fI3rmZmg==",
"dev": true
"integrity": "sha512-uj00kdXyZb9t9RcAUAwMZAnkBUwdYGhYlt7djMXhfyhUCzwNba50tIiBKR7q0l7tdoBtFVw/3JmLY6fI3rmZmg=="
},
"node_modules/is-windows": {
"version": "1.0.2",
@@ -26446,8 +26457,7 @@
"node_modules/lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
"dev": true
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
},
"node_modules/lodash.debounce": {
"version": "4.0.8",
@@ -26687,6 +26697,14 @@
"yallist": "^3.0.2"
}
},
"node_modules/lucide-react": {
"version": "0.105.0",
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.105.0.tgz",
"integrity": "sha512-iHaIkd4Wq6aNIVrFMXt3If8E/+2lnJd4WlCyntoJNIzZ8nWhdSSHWpsw7XM4rlw2319LZ2t4WLdnM8Z0ECDTOQ==",
"peerDependencies": {
"react": "^16.5.1 || ^17.0.0 || ^18.0.0"
}
},
"node_modules/lz-string": {
"version": "1.4.4",
"resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.4.4.tgz",
@@ -26776,8 +26794,7 @@
"node_modules/map-or-similar": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/map-or-similar/-/map-or-similar-1.5.0.tgz",
"integrity": "sha512-0aF7ZmVon1igznGI4VS30yugpduQW3y3GkcgGJOp7d8x8QrizhigUxjI/m2UojsXXto+jLAH3KSz+xOJTiORjg==",
"dev": true
"integrity": "sha512-0aF7ZmVon1igznGI4VS30yugpduQW3y3GkcgGJOp7d8x8QrizhigUxjI/m2UojsXXto+jLAH3KSz+xOJTiORjg=="
},
"node_modules/map-visit": {
"version": "1.0.0",
@@ -26924,7 +26941,6 @@
"version": "1.11.3",
"resolved": "https://registry.npmjs.org/memoizerific/-/memoizerific-1.11.3.tgz",
"integrity": "sha512-/EuHYwAPdLtXwAwSZkh/Gutery6pD2KYd44oQLhAvQp/50mpyduZh8Q7PYHXTCJ+wuXxt7oij2LXyIJOOYFPog==",
"dev": true,
"dependencies": {
"map-or-similar": "^1.5.0"
}
@@ -27183,7 +27199,6 @@
"version": "2.19.0",
"resolved": "https://registry.npmjs.org/min-document/-/min-document-2.19.0.tgz",
"integrity": "sha512-9Wy1B3m3f66bPPmU5hdA4DR4PB2OfDU/+GS3yAB7IQozE3tqXaVv2zOjgla7MEGSRv95+ILmOuvhLkOK6wJtCQ==",
"dev": true,
"dependencies": {
"dom-walk": "^0.1.0"
}
@@ -31128,7 +31143,6 @@
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
"integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==",
"dev": true,
"engines": {
"node": ">=6"
}
@@ -31480,7 +31494,6 @@
"version": "4.2.2",
"resolved": "https://registry.npmjs.org/polished/-/polished-4.2.2.tgz",
"integrity": "sha512-Sz2Lkdxz6F2Pgnpi9U5Ng/WdWAUZxmHrNPoVlm3aAemxoy2Qy7LGjQg4uf8qKelDAUW94F4np3iH2YPf2qefcQ==",
"dev": true,
"dependencies": {
"@babel/runtime": "^7.17.8"
},
@@ -32087,7 +32100,6 @@
"version": "0.11.10",
"resolved": "https://registry.npmjs.org/process/-/process-0.11.10.tgz",
"integrity": "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==",
"dev": true,
"engines": {
"node": ">= 0.6.0"
}
@@ -32269,7 +32281,6 @@
"version": "6.11.0",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
"integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==",
"dev": true,
"dependencies": {
"side-channel": "^1.0.4"
},
@@ -32466,6 +32477,23 @@
"react": "^15.3.0 || ^16.0.0 || ^17.0.0 || ^18.0.0"
}
},
"node_modules/react-cookies": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/react-cookies/-/react-cookies-0.1.1.tgz",
"integrity": "sha512-PP75kJ4vtoHuuTdq0TAD3RmlAv7vuDQh9fkC4oDlhntgs9vX1DmREomO0Y1mcQKR9nMZ6/zxoflaMJ3MAmF5KQ==",
"dependencies": {
"cookie": "^0.3.1",
"object-assign": "^4.1.1"
}
},
"node_modules/react-cookies/node_modules/cookie": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.3.1.tgz",
"integrity": "sha512-+IJOX0OqlHCszo2mBUq+SrEbCj6w7Kpffqx60zYbPTFaO4+yYgRjHwcZNpWvaTylDHaV7PPmBHzSecZiMhtPgw==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/react-docgen": {
"version": "5.4.3",
"resolved": "https://registry.npmjs.org/react-docgen/-/react-docgen-5.4.3.tgz",
@@ -32601,14 +32629,6 @@
}
}
},
"node_modules/react-icons": {
"version": "4.7.1",
"resolved": "https://registry.npmjs.org/react-icons/-/react-icons-4.7.1.tgz",
"integrity": "sha512-yHd3oKGMgm7zxo3EA7H2n7vxSoiGmHk5t6Ou4bXsfcgWyhfDKMpyKfhHR6Bjnn63c+YXBLBPUql9H4wPJM6sXw==",
"peerDependencies": {
"react": "*"
}
},
"node_modules/react-is": {
"version": "16.13.1",
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
@@ -32700,18 +32720,6 @@
}
}
},
"node_modules/react-table": {
"version": "7.8.0",
"resolved": "https://registry.npmjs.org/react-table/-/react-table-7.8.0.tgz",
"integrity": "sha512-hNaz4ygkZO4bESeFfnfOft73iBUj8K5oKi1EcSHPAibEydfsX2MyU6Z8KCr3mv3C9Kqqh71U+DhZkFvibbnPbA==",
"funding": {
"type": "github",
"url": "https://github.com/sponsors/tannerlinsley"
},
"peerDependencies": {
"react": "^16.8.3 || ^17.0.0-0 || ^18.0.0"
}
},
"node_modules/read-cache": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz",
@@ -34576,8 +34584,7 @@
"version": "0.1.8",
"resolved": "https://registry.npmjs.org/stable/-/stable-0.1.8.tgz",
"integrity": "sha512-ji9qxRnOVfcuLDySj9qzhGSEFVobyt1kIOSkj1qZzYLzq7Tos/oUUWvotUPQLlrsidqsK6tBH89Bc9kL5zHA6w==",
"deprecated": "Modern JS already guarantees Array#sort() is a stable sort, so this library is deprecated. See the compatibility table on MDN: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/sort#browser_compatibility",
"dev": true
"deprecated": "Modern JS already guarantees Array#sort() is a stable sort, so this library is deprecated. See the compatibility table on MDN: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/sort#browser_compatibility"
},
"node_modules/stack-utils": {
"version": "2.0.6",
@@ -34730,8 +34737,31 @@
"node_modules/store2": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/store2/-/store2-2.14.2.tgz",
"integrity": "sha512-siT1RiqlfQnGqgT/YzXVUNsom9S0H1OX+dpdGN1xkyYATo4I6sep5NmsRD/40s3IIOvlCq6akxkqG82urIZW1w==",
"dev": true
"integrity": "sha512-siT1RiqlfQnGqgT/YzXVUNsom9S0H1OX+dpdGN1xkyYATo4I6sep5NmsRD/40s3IIOvlCq6akxkqG82urIZW1w=="
},
"node_modules/storybook-addon-next-router": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/storybook-addon-next-router/-/storybook-addon-next-router-4.0.2.tgz",
"integrity": "sha512-0rjGAl7HziW4ecDq+VU03H1dwkw5f6phqA+PMquPzdowNVl29ejVwVadLMGlovG6x2snaxMGxtySR2c5bwegSw==",
"dependencies": {
"tslib": "2.4.0"
},
"engines": {
"node": ">=10"
},
"peerDependencies": {
"@storybook/addon-actions": "^6.0.0",
"@storybook/addons": "^6.0.0",
"@storybook/client-api": "^6.0.0",
"next": "^9.0.0 || ^10.0.0 || ^11.0.0 || ^12.0.0 || ^13.0.0",
"react": "^16.8.0 || ^17.0.0 || ^18.0.0",
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0"
}
},
"node_modules/storybook-addon-next-router/node_modules/tslib": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.0.tgz",
"integrity": "sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ=="
},
"node_modules/stream-browserify": {
"version": "2.0.2",
@@ -35176,8 +35206,7 @@
"node_modules/synchronous-promise": {
"version": "2.0.16",
"resolved": "https://registry.npmjs.org/synchronous-promise/-/synchronous-promise-2.0.16.tgz",
"integrity": "sha512-qImOD23aDfnIDNqlG1NOehdB9IYsn1V9oByPjKY1nakv2MQYCEMyX033/q+aEtYCpmYK1cv2+NTmlH+ra6GA5A==",
"dev": true
"integrity": "sha512-qImOD23aDfnIDNqlG1NOehdB9IYsn1V9oByPjKY1nakv2MQYCEMyX033/q+aEtYCpmYK1cv2+NTmlH+ra6GA5A=="
},
"node_modules/synckit": {
"version": "0.8.4",
@@ -35334,7 +35363,6 @@
"version": "6.0.8",
"resolved": "https://registry.npmjs.org/telejson/-/telejson-6.0.8.tgz",
"integrity": "sha512-nerNXi+j8NK1QEfBHtZUN/aLdDcyupA//9kAboYLrtzZlPLpUfqbVGWb9zz91f/mIjRbAYhbgtnJHY8I1b5MBg==",
"dev": true,
"dependencies": {
"@types/is-function": "^1.0.0",
"global": "^4.4.0",
@@ -35350,7 +35378,6 @@
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/isobject/-/isobject-4.0.0.tgz",
"integrity": "sha512-S/2fF5wH8SJA/kmwr6HYhK/RI/OkhD84k8ntalo0iJjZikgq1XFvR5M8NPT1x5F7fBwCG3qHfnzeP/Vh/ZxCUA==",
"dev": true,
"engines": {
"node": ">=0.10.0"
}
@@ -35721,7 +35748,6 @@
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/ts-dedent/-/ts-dedent-2.2.0.tgz",
"integrity": "sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ==",
"dev": true,
"engines": {
"node": ">=6.10"
}
@@ -36514,8 +36540,7 @@
"node_modules/uuid-browser": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/uuid-browser/-/uuid-browser-3.1.0.tgz",
"integrity": "sha512-dsNgbLaTrd6l3MMxTtouOCFw4CBFc/3a+GgYA2YyrJvyQ1u6q4pcu3ktLoUZ/VN/Aw9WsauazbgsgdfVWgAKQg==",
"dev": true
"integrity": "sha512-dsNgbLaTrd6l3MMxTtouOCFw4CBFc/3a+GgYA2YyrJvyQ1u6q4pcu3ktLoUZ/VN/Aw9WsauazbgsgdfVWgAKQg=="
},
"node_modules/v8-compile-cache-lib": {
"version": "3.0.1",
@@ -42022,7 +42047,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/addon-actions/-/addon-actions-6.5.15.tgz",
"integrity": "sha512-cnLzVK1S+EydFDSuvxMmMAxVqNXijBGdV9QTgsu6ys5sOkoiXRETKZmxuN8/ZRbkfc4N+1KAylSCZOOHzBQTBQ==",
"dev": true,
"requires": {
"@storybook/addons": "6.5.15",
"@storybook/api": "6.5.15",
@@ -42049,7 +42073,6 @@
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/react-inspector/-/react-inspector-5.1.1.tgz",
"integrity": "sha512-GURDaYzoLbW8pMGXwYPDBIv6nqei4kK7LPRZ9q9HCZF54wqXz/dnylBp/kfE9XmekBhHvLDdcYeyIwSrvtOiWg==",
"dev": true,
"requires": {
"@babel/runtime": "^7.0.0",
"is-dom": "^1.0.0",
@@ -42319,7 +42342,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/addons/-/addons-6.5.15.tgz",
"integrity": "sha512-xT31SuSX+kYGyxCNK2nqL7WTxucs3rSmhiCLovJcUjYk+QquV3c2c53Ki7lwwdDbzfXFcNAe0HJ4hoTN4jhn0Q==",
"dev": true,
"requires": {
"@storybook/api": "6.5.15",
"@storybook/channels": "6.5.15",
@@ -42338,7 +42360,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/api/-/api-6.5.15.tgz",
"integrity": "sha512-BBE0KXKvj1/3jTghbIoWfrcDM0t+xO7EYtWWAXD6XlhGsZVD2Dy82Z52ONyLulMDRpMWl0OYy3h6A1YnFUH25w==",
"dev": true,
"requires": {
"@storybook/channels": "6.5.15",
"@storybook/client-logger": "6.5.15",
@@ -43496,7 +43517,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/channel-postmessage/-/channel-postmessage-6.5.15.tgz",
"integrity": "sha512-gMpA8LWT8lC4z5KWnaMh03aazEwtDO7GtY5kZVru+EEMgExGmaR82qgekwmLmgLj2nRJEv0o138o9IqYUcou8w==",
"dev": true,
"requires": {
"@storybook/channels": "6.5.15",
"@storybook/client-logger": "6.5.15",
@@ -43524,7 +43544,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/channels/-/channels-6.5.15.tgz",
"integrity": "sha512-gPpsBgirv2NCXbH4WbYqdkI0JLE96aiVuu7UEWfn9yu071pQ9CLHbhXGD9fSFNrfOkyBBY10ppSE7uCXw3Wexg==",
"dev": true,
"requires": {
"core-js": "^3.8.2",
"ts-dedent": "^2.0.0",
@@ -43535,7 +43554,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/client-api/-/client-api-6.5.15.tgz",
"integrity": "sha512-0ZGpRgVz7rdbCguBqBpwObXbsVY5qlSTWDzzIBpmz8EkxW/MtK5wEyeq+0L0O+DTn41FwvH5yCGLAENpzWD8BQ==",
"dev": true,
"requires": {
"@storybook/addons": "6.5.15",
"@storybook/channel-postmessage": "6.5.15",
@@ -43563,7 +43581,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/client-logger/-/client-logger-6.5.15.tgz",
"integrity": "sha512-0uyxKvodq+FycGv6aUwC1wUR6suXf2+7ywMFAOlYolI4UvNj8NyU/5AfgKT5XnxYAgPmoCiAjOE700TrfHrosw==",
"dev": true,
"requires": {
"core-js": "^3.8.2",
"global": "^4.4.0"
@@ -43573,7 +43590,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/components/-/components-6.5.15.tgz",
"integrity": "sha512-bHTT0Oa3s4g+MBMaLBbX9ofMtb1AW59AzIUNGrfqW1XqJMGuUHMiJ7TSo+i5dRSFpbFygnwMEG9LfHxpR2Z0Dw==",
"dev": true,
"requires": {
"@storybook/client-logger": "6.5.15",
"@storybook/csf": "0.0.2--canary.4566f4d.1",
@@ -44298,7 +44314,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/core-events/-/core-events-6.5.15.tgz",
"integrity": "sha512-B1Ba6l5W7MeNclclqMMTMHgYgfdpB5SIhNCQFnzIz8blynzRhNFMdxvbAl6Je5G0S4xydYYd7Lno2kXQebs7HA==",
"dev": true,
"requires": {
"core-js": "^3.8.2"
}
@@ -44949,7 +44964,6 @@
"version": "0.0.2--canary.4566f4d.1",
"resolved": "https://registry.npmjs.org/@storybook/csf/-/csf-0.0.2--canary.4566f4d.1.tgz",
"integrity": "sha512-9OVvMVh3t9znYZwb0Svf/YQoxX2gVOeQTGe2bses2yj+a3+OJnCrUF3/hGv6Em7KujtOdL2LL+JnG49oMVGFgQ==",
"dev": true,
"requires": {
"lodash": "^4.17.15"
}
@@ -46441,7 +46455,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/router/-/router-6.5.15.tgz",
"integrity": "sha512-9t8rI8t7/Krolau29gsdjdbRQ66orONIyP0efp0EukVgv6reNFzb/U14ARrl0uHys6Tl5Xyece9FoakQUdn8Kg==",
"dev": true,
"requires": {
"@storybook/client-logger": "6.5.15",
"core-js": "^3.8.2",
@@ -46454,7 +46467,6 @@
"version": "7.3.2",
"resolved": "https://registry.npmjs.org/@storybook/semver/-/semver-7.3.2.tgz",
"integrity": "sha512-SWeszlsiPsMI0Ps0jVNtH64cI5c0UF3f7KgjVKJoNP30crQ6wUSddY2hsdeczZXEKVJGEn50Q60flcGsQGIcrg==",
"dev": true,
"requires": {
"core-js": "^3.6.5",
"find-up": "^4.1.0"
@@ -46464,7 +46476,6 @@
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
"integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
"dev": true,
"requires": {
"locate-path": "^5.0.0",
"path-exists": "^4.0.0"
@@ -46474,7 +46485,6 @@
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
"integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
"dev": true,
"requires": {
"p-locate": "^4.1.0"
}
@@ -46483,7 +46493,6 @@
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
"integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
"dev": true,
"requires": {
"p-try": "^2.0.0"
}
@@ -46492,7 +46501,6 @@
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
"integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
"dev": true,
"requires": {
"p-limit": "^2.2.0"
}
@@ -46529,7 +46537,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/store/-/store-6.5.15.tgz",
"integrity": "sha512-r6cYTf6GtbqgdI4ZG3xuWdJAAu5fJ3xAWMiDkHyoK2u+R2TeYXIsJvgn0BPrW87sZhELIkg4ckdFECmATs3kpQ==",
"dev": true,
"requires": {
"@storybook/addons": "6.5.15",
"@storybook/client-logger": "6.5.15",
@@ -46636,7 +46643,6 @@
"version": "6.5.15",
"resolved": "https://registry.npmjs.org/@storybook/theming/-/theming-6.5.15.tgz",
"integrity": "sha512-pgdW0lVZKKXQ4VhIfLHycMmwFSVOY7vLTKnytag4Y8Yz+aXm0bwDN/QxPntFzDH47F1Rcy2ywNnvty8ooDTvuA==",
"dev": true,
"requires": {
"@storybook/client-logger": "6.5.15",
"core-js": "^3.8.2",
@@ -46682,6 +46688,19 @@
"mini-svg-data-uri": "^1.2.3"
}
},
"@tanstack/react-table": {
"version": "8.7.6",
"resolved": "https://registry.npmjs.org/@tanstack/react-table/-/react-table-8.7.6.tgz",
"integrity": "sha512-/QijmMFeP7wDLBnr0MQ/5MlbXePbIL/1nOtkxBC9zvmBu4gDKJEDBqipUyM7Wc/iBpSd0IFyqBlvZvTPD9FYDA==",
"requires": {
"@tanstack/table-core": "8.7.6"
}
},
"@tanstack/table-core": {
"version": "8.7.6",
"resolved": "https://registry.npmjs.org/@tanstack/table-core/-/table-core-8.7.6.tgz",
"integrity": "sha512-sqiNTMzB6cpyL8DFH6/VqW48SwiflLqxQqYpo2wNock7rdVGvlm0BLNI8vZUJbr1+fmmWmHwBvi5OMgZw8n1DA=="
},
"@testing-library/dom": {
"version": "8.19.1",
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-8.19.1.tgz",
@@ -46996,8 +47015,7 @@
"@types/is-function": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/@types/is-function/-/is-function-1.0.1.tgz",
"integrity": "sha512-A79HEEiwXTFtfY+Bcbo58M2GRYzCr9itHWzbzHVFNEYCcoU/MMGwYYf721gBrnhpj1s6RGVVha/IgNFnR0Iw/Q==",
"dev": true
"integrity": "sha512-A79HEEiwXTFtfY+Bcbo58M2GRYzCr9itHWzbzHVFNEYCcoU/MMGwYYf721gBrnhpj1s6RGVVha/IgNFnR0Iw/Q=="
},
"@types/istanbul-lib-coverage": {
"version": "2.0.4",
@@ -47184,8 +47202,7 @@
"@types/qs": {
"version": "6.9.7",
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.7.tgz",
"integrity": "sha512-FGa1F62FT09qcrueBA6qYTrJPVDzah9a+493+o2PCXsesWHIn27G98TsSMs3WPNbZIEj4+VJf6saSFpvD+3Zsw==",
"dev": true
"integrity": "sha512-FGa1F62FT09qcrueBA6qYTrJPVDzah9a+493+o2PCXsesWHIn27G98TsSMs3WPNbZIEj4+VJf6saSFpvD+3Zsw=="
},
"@types/react": {
"version": "18.0.26",
@@ -47310,8 +47327,7 @@
"@types/webpack-env": {
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/@types/webpack-env/-/webpack-env-1.18.0.tgz",
"integrity": "sha512-56/MAlX5WMsPVbOg7tAxnYvNYMMWr/QJiIp6BxVSW3JJXUVzzOn64qW8TzQyMSqSUFM2+PVI4aUHcHOzIz/1tg==",
"dev": true
"integrity": "sha512-56/MAlX5WMsPVbOg7tAxnYvNYMMWr/QJiIp6BxVSW3JJXUVzzOn64qW8TzQyMSqSUFM2+PVI4aUHcHOzIz/1tg=="
},
"@types/webpack-sources": {
"version": "3.2.0",
@@ -47817,6 +47833,11 @@
"integrity": "sha512-j2afSsaIENvHZN2B8GOpF566vZ5WVk5opAiMTvWgaQT8DkbOqsTfvNAvHoRGU2zzP8cPoqys+xHTRDWW8L+/BA==",
"dev": true
},
"accept-language-parser": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/accept-language-parser/-/accept-language-parser-1.5.0.tgz",
"integrity": "sha512-QhyTbMLYo0BBGg1aWbeMG4ekWtds/31BrEU+DONOg/7ax23vxpL03Pb7/zBmha2v7vdD3AyzZVWBVGEZxKOXWw=="
},
"accepts": {
"version": "1.3.8",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
@@ -51205,8 +51226,7 @@
"dom-walk": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/dom-walk/-/dom-walk-0.1.2.tgz",
"integrity": "sha512-6QvTW9mrGeIegrFXdtQi9pk7O/nSK6lSdXW2eqUspN5LWD7UTji2Fqw5V2YLjBpHEoU9Xl/eUWNpDeZvoyOv2w==",
"dev": true
"integrity": "sha512-6QvTW9mrGeIegrFXdtQi9pk7O/nSK6lSdXW2eqUspN5LWD7UTji2Fqw5V2YLjBpHEoU9Xl/eUWNpDeZvoyOv2w=="
},
"domain-browser": {
"version": "1.2.0",
@@ -53400,7 +53420,6 @@
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/global/-/global-4.4.0.tgz",
"integrity": "sha512-wv/LAoHdRE3BeTGz53FAamhGlPLhlssK45usmGFThIi4XqnBmjKQ16u+RNbP7WvigRZDxUsM0J3gcQ5yicaL0w==",
"dev": true,
"requires": {
"min-document": "^2.19.0",
"process": "^0.11.10"
@@ -54282,7 +54301,6 @@
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/is-dom/-/is-dom-1.1.0.tgz",
"integrity": "sha512-u82f6mvhYxRPKpw8V1N0W8ce1xXwOrQtgGcxl6UCL5zBmZu3is/18K0rR7uFCnMDuAsS/3W54mGL4vsaFUQlEQ==",
"dev": true,
"requires": {
"is-object": "^1.0.1",
"is-window": "^1.0.2"
@@ -54318,8 +54336,7 @@
"is-function": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/is-function/-/is-function-1.0.2.tgz",
"integrity": "sha512-lw7DUp0aWXYg+CBCN+JKkcE0Q2RayZnSvnZBlwgxHBQhqt5pZNVy4Ri7H9GmmXkdu7LUthszM+Tor1u/2iBcpQ==",
"dev": true
"integrity": "sha512-lw7DUp0aWXYg+CBCN+JKkcE0Q2RayZnSvnZBlwgxHBQhqt5pZNVy4Ri7H9GmmXkdu7LUthszM+Tor1u/2iBcpQ=="
},
"is-generator-fn": {
"version": "2.1.0",
@@ -54378,8 +54395,7 @@
"is-object": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/is-object/-/is-object-1.0.2.tgz",
"integrity": "sha512-2rRIahhZr2UWb45fIOuvZGpFtz0TyOZLf32KxBbSoUCeZR495zCKlWUKKUByk3geS2eAs7ZAABt0Y/Rx0GiQGA==",
"dev": true
"integrity": "sha512-2rRIahhZr2UWb45fIOuvZGpFtz0TyOZLf32KxBbSoUCeZR495zCKlWUKKUByk3geS2eAs7ZAABt0Y/Rx0GiQGA=="
},
"is-path-inside": {
"version": "3.0.3",
@@ -54517,8 +54533,7 @@
"is-window": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/is-window/-/is-window-1.0.2.tgz",
"integrity": "sha512-uj00kdXyZb9t9RcAUAwMZAnkBUwdYGhYlt7djMXhfyhUCzwNba50tIiBKR7q0l7tdoBtFVw/3JmLY6fI3rmZmg==",
"dev": true
"integrity": "sha512-uj00kdXyZb9t9RcAUAwMZAnkBUwdYGhYlt7djMXhfyhUCzwNba50tIiBKR7q0l7tdoBtFVw/3JmLY6fI3rmZmg=="
},
"is-windows": {
"version": "1.0.2",
@@ -57652,8 +57667,7 @@
"lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
"dev": true
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
},
"lodash.debounce": {
"version": "4.0.8",
@@ -57840,6 +57854,12 @@
"yallist": "^3.0.2"
}
},
"lucide-react": {
"version": "0.105.0",
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.105.0.tgz",
"integrity": "sha512-iHaIkd4Wq6aNIVrFMXt3If8E/+2lnJd4WlCyntoJNIzZ8nWhdSSHWpsw7XM4rlw2319LZ2t4WLdnM8Z0ECDTOQ==",
"requires": {}
},
"lz-string": {
"version": "1.4.4",
"resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.4.4.tgz",
@@ -57910,8 +57930,7 @@
"map-or-similar": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/map-or-similar/-/map-or-similar-1.5.0.tgz",
"integrity": "sha512-0aF7ZmVon1igznGI4VS30yugpduQW3y3GkcgGJOp7d8x8QrizhigUxjI/m2UojsXXto+jLAH3KSz+xOJTiORjg==",
"dev": true
"integrity": "sha512-0aF7ZmVon1igznGI4VS30yugpduQW3y3GkcgGJOp7d8x8QrizhigUxjI/m2UojsXXto+jLAH3KSz+xOJTiORjg=="
},
"map-visit": {
"version": "1.0.0",
@@ -58022,7 +58041,6 @@
"version": "1.11.3",
"resolved": "https://registry.npmjs.org/memoizerific/-/memoizerific-1.11.3.tgz",
"integrity": "sha512-/EuHYwAPdLtXwAwSZkh/Gutery6pD2KYd44oQLhAvQp/50mpyduZh8Q7PYHXTCJ+wuXxt7oij2LXyIJOOYFPog==",
"dev": true,
"requires": {
"map-or-similar": "^1.5.0"
}
@@ -58236,7 +58254,6 @@
"version": "2.19.0",
"resolved": "https://registry.npmjs.org/min-document/-/min-document-2.19.0.tgz",
"integrity": "sha512-9Wy1B3m3f66bPPmU5hdA4DR4PB2OfDU/+GS3yAB7IQozE3tqXaVv2zOjgla7MEGSRv95+ILmOuvhLkOK6wJtCQ==",
"dev": true,
"requires": {
"dom-walk": "^0.1.0"
}
@@ -60966,8 +60983,7 @@
"p-try": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
"integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==",
"dev": true
"integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ=="
},
"pako": {
"version": "1.0.11",
@@ -61253,7 +61269,6 @@
"version": "4.2.2",
"resolved": "https://registry.npmjs.org/polished/-/polished-4.2.2.tgz",
"integrity": "sha512-Sz2Lkdxz6F2Pgnpi9U5Ng/WdWAUZxmHrNPoVlm3aAemxoy2Qy7LGjQg4uf8qKelDAUW94F4np3iH2YPf2qefcQ==",
"dev": true,
"requires": {
"@babel/runtime": "^7.17.8"
}
@@ -61670,8 +61685,7 @@
"process": {
"version": "0.11.10",
"resolved": "https://registry.npmjs.org/process/-/process-0.11.10.tgz",
"integrity": "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==",
"dev": true
"integrity": "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A=="
},
"process-nextick-args": {
"version": "2.0.1",
@@ -61829,7 +61843,6 @@
"version": "6.11.0",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
"integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==",
"dev": true,
"requires": {
"side-channel": "^1.0.4"
}
@@ -61962,6 +61975,22 @@
"@babel/runtime": "^7.12.13"
}
},
"react-cookies": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/react-cookies/-/react-cookies-0.1.1.tgz",
"integrity": "sha512-PP75kJ4vtoHuuTdq0TAD3RmlAv7vuDQh9fkC4oDlhntgs9vX1DmREomO0Y1mcQKR9nMZ6/zxoflaMJ3MAmF5KQ==",
"requires": {
"cookie": "^0.3.1",
"object-assign": "^4.1.1"
},
"dependencies": {
"cookie": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.3.1.tgz",
"integrity": "sha512-+IJOX0OqlHCszo2mBUq+SrEbCj6w7Kpffqx60zYbPTFaO4+yYgRjHwcZNpWvaTylDHaV7PPmBHzSecZiMhtPgw=="
}
}
},
"react-docgen": {
"version": "5.4.3",
"resolved": "https://registry.npmjs.org/react-docgen/-/react-docgen-5.4.3.tgz",
@@ -62053,12 +62082,6 @@
"html-parse-stringify": "^3.0.1"
}
},
"react-icons": {
"version": "4.7.1",
"resolved": "https://registry.npmjs.org/react-icons/-/react-icons-4.7.1.tgz",
"integrity": "sha512-yHd3oKGMgm7zxo3EA7H2n7vxSoiGmHk5t6Ou4bXsfcgWyhfDKMpyKfhHR6Bjnn63c+YXBLBPUql9H4wPJM6sXw==",
"requires": {}
},
"react-is": {
"version": "16.13.1",
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
@@ -62107,12 +62130,6 @@
"tslib": "^2.0.0"
}
},
"react-table": {
"version": "7.8.0",
"resolved": "https://registry.npmjs.org/react-table/-/react-table-7.8.0.tgz",
"integrity": "sha512-hNaz4ygkZO4bESeFfnfOft73iBUj8K5oKi1EcSHPAibEydfsX2MyU6Z8KCr3mv3C9Kqqh71U+DhZkFvibbnPbA==",
"requires": {}
},
"read-cache": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz",
@@ -63591,8 +63608,7 @@
"stable": {
"version": "0.1.8",
"resolved": "https://registry.npmjs.org/stable/-/stable-0.1.8.tgz",
"integrity": "sha512-ji9qxRnOVfcuLDySj9qzhGSEFVobyt1kIOSkj1qZzYLzq7Tos/oUUWvotUPQLlrsidqsK6tBH89Bc9kL5zHA6w==",
"dev": true
"integrity": "sha512-ji9qxRnOVfcuLDySj9qzhGSEFVobyt1kIOSkj1qZzYLzq7Tos/oUUWvotUPQLlrsidqsK6tBH89Bc9kL5zHA6w=="
},
"stack-utils": {
"version": "2.0.6",
@@ -63716,8 +63732,22 @@
"store2": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/store2/-/store2-2.14.2.tgz",
"integrity": "sha512-siT1RiqlfQnGqgT/YzXVUNsom9S0H1OX+dpdGN1xkyYATo4I6sep5NmsRD/40s3IIOvlCq6akxkqG82urIZW1w==",
"dev": true
"integrity": "sha512-siT1RiqlfQnGqgT/YzXVUNsom9S0H1OX+dpdGN1xkyYATo4I6sep5NmsRD/40s3IIOvlCq6akxkqG82urIZW1w=="
},
"storybook-addon-next-router": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/storybook-addon-next-router/-/storybook-addon-next-router-4.0.2.tgz",
"integrity": "sha512-0rjGAl7HziW4ecDq+VU03H1dwkw5f6phqA+PMquPzdowNVl29ejVwVadLMGlovG6x2snaxMGxtySR2c5bwegSw==",
"requires": {
"tslib": "2.4.0"
},
"dependencies": {
"tslib": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.0.tgz",
"integrity": "sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ=="
}
}
},
"stream-browserify": {
"version": "2.0.2",
@@ -64058,8 +64088,7 @@
"synchronous-promise": {
"version": "2.0.16",
"resolved": "https://registry.npmjs.org/synchronous-promise/-/synchronous-promise-2.0.16.tgz",
"integrity": "sha512-qImOD23aDfnIDNqlG1NOehdB9IYsn1V9oByPjKY1nakv2MQYCEMyX033/q+aEtYCpmYK1cv2+NTmlH+ra6GA5A==",
"dev": true
"integrity": "sha512-qImOD23aDfnIDNqlG1NOehdB9IYsn1V9oByPjKY1nakv2MQYCEMyX033/q+aEtYCpmYK1cv2+NTmlH+ra6GA5A=="
},
"synckit": {
"version": "0.8.4",
@@ -64186,7 +64215,6 @@
"version": "6.0.8",
"resolved": "https://registry.npmjs.org/telejson/-/telejson-6.0.8.tgz",
"integrity": "sha512-nerNXi+j8NK1QEfBHtZUN/aLdDcyupA//9kAboYLrtzZlPLpUfqbVGWb9zz91f/mIjRbAYhbgtnJHY8I1b5MBg==",
"dev": true,
"requires": {
"@types/is-function": "^1.0.0",
"global": "^4.4.0",
@@ -64201,8 +64229,7 @@
"isobject": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/isobject/-/isobject-4.0.0.tgz",
"integrity": "sha512-S/2fF5wH8SJA/kmwr6HYhK/RI/OkhD84k8ntalo0iJjZikgq1XFvR5M8NPT1x5F7fBwCG3qHfnzeP/Vh/ZxCUA==",
"dev": true
"integrity": "sha512-S/2fF5wH8SJA/kmwr6HYhK/RI/OkhD84k8ntalo0iJjZikgq1XFvR5M8NPT1x5F7fBwCG3qHfnzeP/Vh/ZxCUA=="
}
}
},
@@ -64493,8 +64520,7 @@
"ts-dedent": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/ts-dedent/-/ts-dedent-2.2.0.tgz",
"integrity": "sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ==",
"dev": true
"integrity": "sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ=="
},
"ts-node": {
"version": "10.9.1",
@@ -65058,8 +65084,7 @@
"uuid-browser": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/uuid-browser/-/uuid-browser-3.1.0.tgz",
"integrity": "sha512-dsNgbLaTrd6l3MMxTtouOCFw4CBFc/3a+GgYA2YyrJvyQ1u6q4pcu3ktLoUZ/VN/Aw9WsauazbgsgdfVWgAKQg==",
"dev": true
"integrity": "sha512-dsNgbLaTrd6l3MMxTtouOCFw4CBFc/3a+GgYA2YyrJvyQ1u6q4pcu3ktLoUZ/VN/Aw9WsauazbgsgdfVWgAKQg=="
},
"v8-compile-cache-lib": {
"version": "3.0.1",
+5 -2
View File
@@ -38,6 +38,8 @@
"@next/font": "^13.1.0",
"@prisma/client": "^4.7.1",
"@tailwindcss/forms": "^0.5.3",
"@tanstack/react-table": "^8.7.6",
"accept-language-parser": "^1.5.0",
"autoprefixer": "^10.4.13",
"axios": "^1.2.1",
"boolean": "^3.2.0",
@@ -48,6 +50,7 @@
"focus-visible": "^5.2.0",
"framer-motion": "^6.5.1",
"install": "^0.13.0",
"lucide-react": "^0.105.0",
"next": "13.0.6",
"next-auth": "^4.18.6",
"next-i18next": "^13.0.3",
@@ -55,13 +58,13 @@
"npm": "^9.2.0",
"postcss-focus-visible": "^7.1.0",
"react": "18.2.0",
"react-cookies": "^0.1.1",
"react-dom": "18.2.0",
"react-feature-flags": "^1.0.0",
"react-hook-form": "^7.42.1",
"react-i18next": "^12.1.4",
"react-icons": "^4.7.1",
"react-table": "^7.8.0",
"sharp": "^0.31.3",
"storybook-addon-next-router": "^4.0.2",
"swr": "^2.0.0",
"tailwindcss": "^3.2.4",
"unique-username-generator": "^1.1.3",
+3
View File
@@ -1,6 +1,7 @@
{
"about": "About",
"account_settings": "Account",
"admin_dashboard": "Admin Dashboard",
"connect": "Connect",
"conversational": "Conversational AI for everyone.",
"dashboard": "Dashboard",
@@ -8,6 +9,8 @@
"docs": "Docs",
"github": "GitHub",
"legal": "Legal",
"loading": "Loading...",
"more_information": "More Information",
"privacy_policy": "Privacy Policy",
"report_a_bug": "Report a Bug",
"sign_in": "Sign In",
+8
View File
@@ -0,0 +1,8 @@
{
"grab_a_task": "Grab a task!",
"create": "Create",
"evaluate": "Evaluate",
"label": "Label",
"dashboard": "Dashboard",
"go": "Go"
}
+4 -1
View File
@@ -7,5 +7,8 @@
"rank": "Rank",
"score": "Score",
"user": "User",
"weekly": "Weekly"
"weekly": "Weekly",
"prompt": "Prompts",
"reply": "Replies",
"label": "Labels"
}
+11
View File
@@ -0,0 +1,11 @@
{
"reactions": "Reactions",
"label_action": "Label",
"label_title": "Label",
"submit_labels": "Submit",
"open_new_tab_action": "Open in new tab",
"report_title": "Report",
"report_action": "Report",
"report_placeholder": "Why should this message be reviewed?",
"send_report": "Send"
}
+79
View File
@@ -0,0 +1,79 @@
{
"write_initial_prompt": "Write your prompt here...",
"default": {
"unchanged_title": "No changes",
"unchanged_message": "Are you sure you would like to continue?"
},
"random": {
"label": "I'm feeling lucky",
"desc": "Help us improve Open Assistant by starting a random task."
},
"create_initial_prompt": {
"label": "Create Initial Prompts",
"desc": "Write initial prompts to help Open Assistant to try replying to diverse messages.",
"overview": "Create an initial message to send to the assistant",
"instruction": "Provide the initial prompts"
},
"reply_as_user": {
"label": "Reply as User",
"desc": "Chat with Open Assistant and help improve it's responses as you interact with it.",
"overview": "Given the following conversation, provide an adequate reply",
"instruction": "Provide the user's reply"
},
"reply_as_assistant": {
"label": "Reply as Assistant",
"desc": "Help Open Assistant improve its responses to conversations with other users.",
"overview": "Given the following conversation, provide an adequate reply"
},
"rank_user_replies": {
"label": "Rank User Replies",
"desc": "Help Open Assistant improve its responses to conversations with other users.",
"overview": "Given the following User replies, sort them from best to worst, best being first, worst being last.",
"unchanged_title": "Order Unchanged",
"unchanged_message": "You have not changed the order of the prompts. Are you sure you would like to continue?"
},
"rank_assistant_replies": {
"label": "Rank Assistant Replies",
"desc": "Score prompts given by Open Assistant based on their accuracy and readability.",
"overview": "Given the following Assistant replies, sort them from best to worst, best being first, worst being last.",
"unchanged_title": "Order Unchanged",
"unchanged_message": "You have not changed the order of the prompts. Are you sure you would like to continue?"
},
"rank_initial_prompts": {
"label": "Rank Initial Prompts",
"desc": "Score prompts given by Open Assistant based on their accuracy and readability.",
"overview": "Given the following initial prompts, sort them from best to worst, best being first, worst being last.",
"unchanged_title": "Order Unchanged",
"unchanged_message": "You have not changed the order of the prompts. Are you sure you would like to continue?"
},
"label_initial_prompt": {
"label": "Label Initial Prompt",
"desc": "Provide labels for a prompt.",
"overview": "Provide labels for the following prompt"
},
"label_prompter_reply": {
"label": "Label Prompter Reply",
"desc": "Provide labels for a prompt.",
"overview": "Given the following discussion, provide labels for the final prompt."
},
"label_assistant_reply": {
"label": "Label Assistant Reply",
"desc": "Provide labels for a prompt.",
"overview": "Given the following discussion, provide labels for the final prompt."
},
"classify_initial_prompt": {
"label": "Classify Initial Prompt",
"desc": "Provide labels for a prompt.",
"overview": "Read the following prompt and then answer the question about it."
},
"classify_prompter_reply": {
"label": "Classify Prompter Reply",
"desc": "Provide labels for a prompt.",
"overview": "Read the following conversation and then answer the question about the last reply in the discussion."
},
"classify_assistant_reply": {
"label": "Classify Assistant Reply",
"desc": "Provide labels for a prompt.",
"overview": "Read the following conversation and then answer the question about the last reply in the discussion."
}
}
@@ -0,0 +1,31 @@
import { Radio, RadioGroup } from "@chakra-ui/react";
import { PropsWithChildren } from "react";
export const LikertButtons = ({
isDisabled,
count,
onChange,
"data-cy": dataCy,
}: PropsWithChildren<{
isDisabled: boolean;
count: number;
onChange: (value: number) => void;
"data-cy"?: string;
}>) => {
const valueMap = Object.fromEntries(Array.from({ length: count }, (_, idx) => [`${idx}`, idx / (count - 1)]));
return (
<RadioGroup
data-cy={dataCy}
isDisabled={isDisabled}
onChange={(value) => {
onChange(valueMap[value]);
}}
style={{ display: "flex", justifyContent: "space-between" }}
>
{Object.keys(valueMap).map((value) => {
return <Radio key={value} value={value} data-cy="radio-option" size="md" padding="0.6em" />;
})}
</RadioGroup>
);
};
+4 -3
View File
@@ -1,9 +1,10 @@
import { Box, Link, Text, useColorMode } from "@chakra-ui/react";
import { Github } from "lucide-react";
import { useTranslation } from "next-i18next";
import { useId } from "react";
import { FaDiscord, FaGithub } from "react-icons/fa";
import { Container } from "./Container";
import { Discord } from "./Icons/Discord";
const CIRCLE_HEIGHT = 558;
const CIRCLE_WIDTH = 558;
@@ -70,7 +71,7 @@ export function CallToAction() {
type="button"
className="mb-2 ml-6 flex items-center rounded-md border border-transparent bg-blue-600 px-6 py-3 text-base font-medium text-white shadow-sm hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2"
>
<FaDiscord size={25} />
<Discord size={25} />
<Text as="span" className="text-lg ml-3">
{t("discord")}
</Text>
@@ -81,7 +82,7 @@ export function CallToAction() {
type="button"
className="mb-2 ml-6 flex items-center rounded-md border border-transparent bg-blue-600 px-6 py-3 text-base font-medium text-white shadow-sm hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2"
>
<FaGithub size={25} />
<Github size={25} />
<Text as="span" className="text-lg ml-3">
{t("github")}
</Text>
@@ -1,11 +1,9 @@
import { Box, Link, Text, useColorModeValue } from "@chakra-ui/react";
import { Card, CardBody, Link, Text } from "@chakra-ui/react";
import NextLink from "next/link";
import { LeaderboardGridCell } from "src/components/LeaderboardGridCell";
import { LeaderboardTable } from "src/components/LeaderboardTable";
import { LeaderboardTimeFrame } from "src/types/Leaderboard";
export function LeaderboardTable() {
const backgroundColor = useColorModeValue("white", "gray.700");
const accentColor = useColorModeValue("gray.200", "gray.900");
export function LeaderboardWidget() {
return (
<main className="h-fit col-span-3">
<div className="flex flex-col gap-4">
@@ -17,15 +15,11 @@ export function LeaderboardTable() {
</Text>
</Link>
</div>
<Box
backgroundColor={backgroundColor}
boxShadow="base"
dropShadow={accentColor}
borderRadius="xl"
className="p-6 shadow-sm"
>
<LeaderboardGridCell timeFrame={LeaderboardTimeFrame.day} />
</Box>
<Card>
<CardBody>
<LeaderboardTable timeFrame={LeaderboardTimeFrame.day} limit={5} />
</CardBody>
</Card>
</div>
</main>
);
+79 -34
View File
@@ -1,51 +1,96 @@
import { Box, Flex, GridItem, Heading, SimpleGrid, Text, useColorModeValue } from "@chakra-ui/react";
import {
Box,
Flex,
GridItem,
Heading,
IconButton,
Link as ExternalLink,
SimpleGrid,
Spacer,
Text,
useColorModeValue,
} from "@chakra-ui/react";
import { HelpCircle } from "lucide-react";
import Link from "next/link";
import { useTranslation } from "next-i18next";
import { useMemo } from "react";
import { getTypeSafei18nKey } from "src/lib/i18n";
import { TaskType } from "src/types/Task";
import { TaskCategory, TaskCategoryLabels, TaskTypes } from "../Tasks/TaskTypes";
import { TaskCategory, TaskCategoryLabels, TaskInfo, TaskInfos } from "../Tasks/TaskTypes";
export const TaskOption = ({ displayTaskCategories }: { displayTaskCategories: TaskCategory[] }) => {
export interface TasksOptionProps {
content: Partial<Record<TaskCategory, TaskType[]>>;
}
export const TaskOption = ({ content }: TasksOptionProps) => {
const { t } = useTranslation(["dashboard", "tasks"]);
const backgroundColor = useColorModeValue("white", "gray.700");
const taskInfoMap = useMemo(
() =>
Object.values(content)
.flat()
.reduce((obj, taskType) => {
obj[taskType] = TaskInfos.filter((t) => t.type === taskType).pop();
return obj;
}, {} as Record<TaskType, TaskInfo>),
[content]
);
return (
<Box className="flex flex-col gap-14">
{displayTaskCategories.map((category) => (
{Object.entries(content).map(([category, taskTypes]) => (
<div key={category}>
<Text className="text-2xl font-bold pb-4">{TaskCategoryLabels[category]}</Text>
<Flex>
<Heading size="lg" className="pb-4">
{t(TaskCategoryLabels[category])}
</Heading>
<Spacer />
<ExternalLink href="https://projects.laion.ai/Open-Assistant/" isExternal>
<IconButton variant="ghost" aria-label="More Information" icon={<HelpCircle size="2em" />} />
</ExternalLink>
</Flex>
<SimpleGrid columns={[1, 1, 2, 2, 3, 4]} gap={4}>
{TaskTypes.filter((task) => task.category === category).map((item) => (
<Link key={category + item.label} href={item.pathname}>
<GridItem
bg={backgroundColor}
borderRadius="xl"
boxShadow="base"
className="flex flex-col justify-between h-full"
>
<Box className="p-6 pb-10">
<Flex flexDir="column" gap="3">
<Heading size="md" fontFamily="inter">
{item.label}
</Heading>
<Text size="sm" opacity="80%">
{item.desc}
</Text>
</Flex>
</Box>
<Box
bg="blue.500"
borderBottomRadius="xl"
className="px-6 py-2 transition-colors duration-300"
_hover={{ backgroundColor: "blue.600" }}
{taskTypes
.map((taskType) => taskInfoMap[taskType])
.map((item) => (
<Link key={category + item.id} href={item.pathname}>
<GridItem
bg={backgroundColor}
borderRadius="xl"
boxShadow="base"
className="flex flex-col justify-between h-full"
>
<Text fontWeight="bold" color="white">
Go -&gt;
<Flex className="p-6 pb-10" flexDir="column" gap="3">
<Heading size="md">{t(getTypeSafei18nKey(`tasks:${item.id}.label`))}</Heading>
<Text size="sm">{t(getTypeSafei18nKey(`tasks:${item.id}.desc`))}</Text>
</Flex>
<Text
fontWeight="bold"
color="white"
borderBottomRadius="xl"
className="px-6 py-2 transition-colors duration-300 bg-blue-500 hover:bg-blue-600"
>
{t("go")} -&gt;
</Text>
</Box>
</GridItem>
</Link>
))}
</GridItem>
</Link>
))}
</SimpleGrid>
</div>
))}
</Box>
);
};
export const allTaskOptions: TasksOptionProps["content"] = {
[TaskCategory.Random]: [TaskType.random],
[TaskCategory.Create]: [TaskType.initial_prompt, TaskType.prompter_reply, TaskType.assistant_reply],
[TaskCategory.Evaluate]: [
TaskType.rank_initial_prompts,
TaskType.rank_prompter_replies,
TaskType.rank_assistant_replies,
],
[TaskCategory.Label]: [TaskType.label_initial_prompt, TaskType.label_prompter_reply, TaskType.label_assistant_reply],
};
+1 -1
View File
@@ -1,3 +1,3 @@
export { LeaderboardTable } from "./LeaderboardTable";
export { LeaderboardWidget } from "./LeaderboardWidget";
export { TaskOption } from "./TaskOption";
export { WelcomeCard } from "./WelcomeCard";
+166
View File
@@ -0,0 +1,166 @@
import {
Box,
Button,
Flex,
FormControl,
FormLabel,
Input,
Popover,
PopoverArrow,
PopoverBody,
PopoverCloseButton,
PopoverContent,
PopoverTrigger,
Spacer,
Table,
TableCaption,
TableContainer,
Tbody,
Td,
Th,
Thead,
Tr,
useDisclosure,
} from "@chakra-ui/react";
import { ColumnDef, flexRender, getCoreRowModel, useReactTable } from "@tanstack/react-table";
import { Filter } from "lucide-react";
import { ChangeEvent, ReactNode } from "react";
import { useDebouncedCallback } from "use-debounce";
export type DataTableColumnDef<T> = ColumnDef<T> & {
filterable?: boolean;
};
// TODO: stricter type
export type FilterItem = {
id: string;
value: string;
};
export type DataTableProps<T> = {
data: T[];
columns: DataTableColumnDef<T>[];
caption?: string;
filterValues?: FilterItem[];
onNextClick?: () => void;
onPreviousClick?: () => void;
onFilterChange?: (items: FilterItem[]) => void;
disableNext?: boolean;
disablePrevious?: boolean;
disablePagination?: boolean;
};
export const DataTable = <T,>({
data,
columns,
caption,
filterValues = [],
onNextClick,
onPreviousClick,
onFilterChange,
disableNext,
disablePrevious,
disablePagination,
}: DataTableProps<T>) => {
const { getHeaderGroups, getRowModel } = useReactTable<T>({
data,
columns,
getCoreRowModel: getCoreRowModel(),
});
const handleFilterChange = (value: FilterItem) => {
const idx = filterValues.findIndex((oldValue) => oldValue.id === value.id);
let newValues: FilterItem[] = [];
if (idx === -1) {
newValues = [...filterValues, value];
} else {
newValues = filterValues.map((oldValue) => (oldValue.id === value.id ? value : oldValue));
}
onFilterChange(newValues);
};
return (
<>
{!disablePagination && (
<Flex mb="2">
<Button onClick={onPreviousClick} disabled={disablePrevious}>
Previous
</Button>
<Spacer />
<Button onClick={onNextClick} disabled={disableNext}>
Next
</Button>
</Flex>
)}
<TableContainer>
<Table variant="simple">
<TableCaption>{caption}</TableCaption>
<Thead>
{getHeaderGroups().map((headerGroup) => (
<Tr key={headerGroup.id}>
{headerGroup.headers.map((header) => (
<Th key={header.id}>
<Box display="flex" alignItems="center">
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
{(header.column.columnDef as DataTableColumnDef<T>).filterable && (
<FilterModal
value={filterValues.find((value) => value.id === header.id)?.value ?? ""}
onChange={(value) => handleFilterChange({ id: header.id, value })}
label={flexRender(header.column.columnDef.header, header.getContext())}
></FilterModal>
)}
</Box>
</Th>
))}
</Tr>
))}
</Thead>
<Tbody>
{getRowModel().rows.map((row) => (
<Tr key={row.id}>
{row.getVisibleCells().map((cell) => (
<Td key={cell.id}>{flexRender(cell.column.columnDef.cell, cell.getContext())}</Td>
))}
</Tr>
))}
</Tbody>
</Table>
</TableContainer>
</>
);
};
const FilterModal = ({
label,
onChange,
value,
}: {
label: ReactNode;
onChange: (val: string) => void;
value: string;
}) => {
const { isOpen, onOpen, onClose } = useDisclosure();
const handleInputChange = useDebouncedCallback((e: ChangeEvent<HTMLInputElement>) => {
onChange(e.target.value);
}, 500);
return (
<Popover isOpen={isOpen} onOpen={onOpen} onClose={onClose}>
<PopoverTrigger>
<Button variant={"unstyled"} ml="2">
<Filter size="1em"></Filter>
</Button>
</PopoverTrigger>
<PopoverContent w="fit-content">
<PopoverArrow />
<PopoverCloseButton />
<PopoverBody mt="4">
<FormControl>
<FormLabel>{label}</FormLabel>
<Input onChange={handleInputChange} defaultValue={value}></Input>
</FormControl>
</PopoverBody>
</PopoverContent>
</Popover>
);
};
+11 -12
View File
@@ -1,30 +1,29 @@
import { Box, Link, Text, useColorModeValue } from "@chakra-ui/react";
import { useRouter } from "next/router";
import { FiAlertTriangle } from "react-icons/fi";
import { IconType } from "react-icons/lib";
import { Box, Text, useColorModeValue } from "@chakra-ui/react";
import { AlertTriangle, LucideIcon } from "lucide-react";
import NextLink from "next/link";
type EmptyStateProps = {
text: string;
icon: IconType;
icon: LucideIcon;
"data-cy"?: string;
};
export const EmptyState = (props: EmptyStateProps) => {
const backgroundColor = useColorModeValue("white", "gray.800");
const router = useRouter();
return (
<Box bg={backgroundColor} p="10" borderRadius="xl" shadow="base">
<Box data-cy={props["data-cy"]} bg={backgroundColor} p="10" borderRadius="xl" shadow="base">
<Box display="flex" flexDirection="column" alignItems="center" gap="8" fontSize="lg">
<props.icon size="30" color="DarkOrange" />
<Text>{props.text}</Text>
<Link onClick={() => router.back()} color="blue.500" textUnderlineOffset="3px">
<Text>Click here to go back</Text>
</Link>
<Text data-cy="cy-no-tasks">{props.text}</Text>
<NextLink href="/dashboard">
<Text color="blue.500">Go back to the dashboard</Text>
</NextLink>
</Box>
</Box>
);
};
export const TaskEmptyState = () => {
return <EmptyState text="Looks like no tasks were found." icon={FiAlertTriangle} />;
return <EmptyState text="Looks like no tasks were found." icon={AlertTriangle} data-cy="task" />;
};
+39
View File
@@ -0,0 +1,39 @@
import {
IconButton,
Popover,
PopoverArrow,
PopoverBody,
PopoverCloseButton,
PopoverContent,
PopoverTrigger,
Text,
} from "@chakra-ui/react";
import { InformationCircleIcon } from "@heroicons/react/20/solid";
interface ExplainProps {
explanation: string[];
}
export const Explain = ({ explanation }: ExplainProps) => {
return (
<Popover>
<PopoverTrigger>
<IconButton
aria-label="explanation"
variant="link"
size="xs"
icon={<InformationCircleIcon className="h-4 w-4" />}
></IconButton>
</PopoverTrigger>
<PopoverContent>
<PopoverArrow />
<PopoverCloseButton />
<PopoverBody>
{explanation.map((paragraph, idx) => (
<Text key={idx}>{paragraph}</Text>
))}
</PopoverBody>
</PopoverContent>
</Popover>
);
};
+2 -2
View File
@@ -27,10 +27,10 @@ export function Faq() {
return (
<ListItem className="space-y-10" key={`question_${index}`}>
<Text as="h3" className={`text-lg font-semibold leading-6 ${headingColorClass}`}>
{t(`faq_items.q${index}`)}
{t(`faq_items.q${index as 0}`)}
</Text>
<Text as="p" className={`mt-4 text-sm ${textColorClass}`}>
{t(`faq_items.a${index}`)}
{t(`faq_items.a${index as 0}`)}
</Text>
</ListItem>
);
+48 -205
View File
@@ -1,127 +1,69 @@
import {
Box,
Button,
Checkbox,
Flex,
Modal,
ModalBody,
ModalCloseButton,
ModalContent,
ModalFooter,
ModalHeader,
ModalOverlay,
Popover,
PopoverAnchor,
PopoverArrow,
PopoverBody,
PopoverCloseButton,
PopoverContent,
PopoverTrigger,
Slider,
SliderFilledTrack,
SliderThumb,
SliderTrack,
Tooltip,
useBoolean,
useColorMode,
useColorModeValue,
useId,
useDisclosure,
} from "@chakra-ui/react";
import { QuestionMarkCircleIcon } from "@heroicons/react/20/solid";
import clsx from "clsx";
import { useEffect, useReducer } from "react";
import { FiAlertCircle } from "react-icons/fi";
import { AlertCircle } from "lucide-react";
import { useState } from "react";
import { get, post } from "src/lib/api";
import { colors } from "src/styles/Theme/colors";
import { Message } from "src/types/Conversation";
import useSWR from "swr";
import useSWRImmutable from "swr/immutable";
import useSWRMutation from "swr/mutation";
import { LabelInputGroup } from "./Survey/LabelInputGroup";
interface Label {
name: string;
display_text: string;
help_text: string;
}
interface LoadLabelsAction {
type: "load_labels";
labels: Label[];
}
interface UpdateValueAction {
type: "update_value";
label_index: number;
value: number;
}
interface ToggleLabelAction {
type: "toggle_label";
label_index: number;
check: boolean;
}
interface LabelValue {
label: Label;
checked: boolean;
value: number;
}
interface FlagReportState {
label_values: LabelValue[];
submittable: boolean;
}
interface FlaggableElementProps {
children: React.ReactNode;
message: Message;
}
interface ValidLabelsResponse {
valid_labels: Label[];
}
export const FlaggableElement = (props: FlaggableElementProps) => {
const [report, updateReport] = useReducer(
(state: FlagReportState, action: LoadLabelsAction | UpdateValueAction | ToggleLabelAction): FlagReportState => {
const makeState = (label_values: LabelValue[]): FlagReportState => {
const submittable = label_values.map(({ checked }) => checked).some(Boolean);
return { label_values, submittable };
};
const { data: response } = useSWRImmutable<ValidLabelsResponse>("/api/valid_labels", get);
const { isOpen, onOpen, onClose } = useDisclosure();
const { valid_labels } = response || { valid_labels: [] };
const [values, setValues] = useState<number[]>([]);
switch (action.type) {
case "load_labels":
return makeState(
action.labels.map((label) => {
return { label, checked: false, value: 1 };
})
);
case "toggle_label": {
const values_copy = state.label_values.slice();
values_copy[action.label_index].checked = action.check;
return makeState(values_copy);
}
case "update_value": {
const values_copy = state.label_values.slice();
values_copy[action.label_index].value = action.value;
return makeState(values_copy);
}
}
},
{ label_values: [], submittable: false }
);
const [isEditing, setIsEditing] = useBoolean();
const { data, isLoading } = useSWR("/api/valid_labels", get);
useEffect(() => {
if (isLoading) {
return;
}
if (!data) {
updateReport({ type: "load_labels", labels: [] });
return;
}
const { valid_labels } = data;
updateReport({ type: "load_labels", labels: valid_labels });
}, [data, isLoading]);
const submittable =
values.some((value) => {
return value !== null;
}) &&
values.length === valid_labels.length &&
valid_labels.length > 0;
const { trigger } = useSWRMutation("/api/set_label", post, {
onSuccess: setIsEditing.off,
onSuccess: onClose,
onError: onClose,
});
const submitResponse = () => {
const label_map: Map<string, number> = new Map();
report.label_values.forEach(({ label, checked, value }) => {
if (checked) {
label_map.set(label.name, value);
console.assert(valid_labels.length === values.length);
values.forEach((value, idx) => {
if (value !== null) {
label_map.set(valid_labels[idx].name, value);
}
});
trigger({
@@ -131,22 +73,8 @@ export const FlaggableElement = (props: FlaggableElementProps) => {
});
};
const handleCheckboxState = (checked, label_index) => {
updateReport({ type: "toggle_label", label_index, check: checked });
};
const handleSliderState = (value, label_index) => {
updateReport({ type: "update_value", label_index, value });
};
return (
<Popover
isOpen={isEditing}
onOpen={setIsEditing.on}
onClose={setIsEditing.off}
closeOnBlur={false}
isLazy
lazyBehavior="keepMounted"
>
<Popover isOpen={isOpen} onOpen={onOpen} onClose={onClose} closeOnBlur={false} isLazy lazyBehavior="keepMounted">
<Box display="flex" alignItems="center" flexDirection={["column", "row"]} gap="2">
<PopoverAnchor>{props.children}</PopoverAnchor>
@@ -154,33 +82,24 @@ export const FlaggableElement = (props: FlaggableElementProps) => {
<Box>
<PopoverTrigger>
<Box as="button" display="flex" alignItems="center" justifyContent="center" borderRadius="full" p="1">
<FiAlertCircle size="20" className="text-red-400" aria-hidden="true" />
<AlertCircle size="20" className="text-red-400" aria-hidden="true" />
</Box>
</PopoverTrigger>
</Box>
</Tooltip>
</Box>
<PopoverContent width="auto" p="3" m="4" maxWidth="calc(100vw - 2rem)">
<PopoverArrow />
<Box className="relative h-4">
<PopoverCloseButton />
</Box>
<PopoverBody>
{report.label_values.map(({ label, checked, value }, i) => (
<FlagCheckbox
label={label}
key={i}
idx={i}
checked={checked}
sliderValue={value}
checkboxHandler={handleCheckboxState}
sliderHandler={handleSliderState}
/>
))}
<Flex justify="center">
<Modal isOpen={isOpen} onClose={onClose}>
<ModalOverlay />
<ModalContent>
<ModalHeader>Select one or more labels that apply.</ModalHeader>
<ModalCloseButton />
<ModalBody>
<LabelInputGroup labelIDs={valid_labels.map(({ name }) => name)} onChange={setValues} />
</ModalBody>
<ModalFooter>
<Button
isDisabled={!report.submittable}
isDisabled={!submittable}
onClick={submitResponse}
className={`bg-indigo-600 text-${useColorModeValue(
colors.light.text,
@@ -189,85 +108,9 @@ export const FlaggableElement = (props: FlaggableElementProps) => {
>
Report
</Button>
</Flex>
</PopoverBody>
</PopoverContent>
</ModalFooter>
</ModalContent>
</Modal>
</Popover>
);
};
interface FlagCheckboxProps {
label: Label;
idx: number;
checked: boolean;
sliderValue: number;
checkboxHandler: (newVal: boolean, idx: number) => void;
sliderHandler: (newVal: number, idx: number) => void;
}
export function FlagCheckbox(props: FlagCheckboxProps): JSX.Element {
let AdditionalExplanation = null;
if (props.label.help_text) {
AdditionalExplanation = (
<a href="#" className="text-sm inline group leading-4">
<QuestionMarkCircleIcon
className="h-5 w-5 ml-1 text-gray-400 group-hover:text-gray-500 inline"
aria-hidden="true"
/>
</a>
);
}
const id = useId();
const { colorMode } = useColorMode();
const labelTextClass =
colorMode === "light"
? `text-${colors.light.text} hover:text-blue-700`
: `text-${colors.dark.text} hover:text-blue-400`;
return (
<Flex gap="4" justifyContent="space-between" className="my-2">
<div className="flex items-start align-middle">
<Checkbox
id={id}
isChecked={props.checked}
onChange={(e) => {
props.checkboxHandler(e.target.checked, props.idx);
}}
/>
<label
className={clsx(
"text-sm form-check-label ml-2 break-all inline align-middle first-line:leading-4",
labelTextClass
)}
htmlFor={id}
>
{props.label.display_text}
{AdditionalExplanation}
</label>
</div>
<div
onClick={() => {
if (!props.checked) {
props.checkboxHandler(true, props.idx);
}
}}
>
<Slider
width="100px"
isDisabled={!props.checked}
defaultValue={100}
onChangeEnd={(val) => {
props.sliderHandler(val / 100, props.idx);
}}
>
<SliderTrack>
<SliderFilledTrack />
<SliderThumb />
</SliderTrack>
</Slider>
</div>
</Flex>
);
}
+4 -2
View File
@@ -1,10 +1,11 @@
import { Box, Button, Flex, Text } from "@chakra-ui/react";
import { User } from "lucide-react";
import Image from "next/image";
import Link from "next/link";
import { useSession } from "next-auth/react";
import { useTranslation } from "next-i18next";
import { Flags } from "react-feature-flags";
import { FaUser } from "react-icons/fa";
import { LanguageSelector } from "src/components/LanguageSelector";
import { UserMenu } from "./UserMenu";
@@ -16,7 +17,7 @@ function AccountButton() {
return (
<Link href="/auth/signin" aria-label="Home">
<Flex alignItems="center">
<Button variant="outline" leftIcon={<FaUser />}>
<Button variant="outline" leftIcon={<User size={"20"} />}>
Sign in
</Button>
</Flex>
@@ -45,6 +46,7 @@ export function Header() {
<Flags authorizedFlags={["flagTest"]}>
<Text>FlagTest</Text>
</Flags>
<LanguageSelector />
<AccountButton />
<UserMenu />
</Flex>
+7 -12
View File
@@ -11,16 +11,15 @@ import {
Text,
useColorModeValue,
} from "@chakra-ui/react";
import { AlertTriangle, Layout, LogOut, Settings, Shield } from "lucide-react";
import NextLink from "next/link";
import { signOut, useSession } from "next-auth/react";
import { useTranslation } from "next-i18next";
import React, { ElementType, useCallback } from "react";
import { FiAlertTriangle, FiLayout, FiLogOut, FiSettings, FiShield } from "react-icons/fi";
interface MenuOption {
name: string;
href: string;
desc: string;
icon: ElementType;
isExternal: boolean;
}
@@ -40,22 +39,19 @@ export function UserMenu() {
{
name: t("dashboard"),
href: "/dashboard",
desc: t("dashboard"),
icon: FiLayout,
icon: Layout,
isExternal: false,
},
{
name: t("account_settings"),
href: "/account",
desc: t("account_settings"),
icon: FiSettings,
icon: Settings,
isExternal: false,
},
{
name: t("report_a_bug"),
href: "https://github.com/LAION-AI/Open-Assistant/issues/new/choose",
desc: t("report_a_bug"),
icon: FiAlertTriangle,
icon: AlertTriangle,
isExternal: true,
},
];
@@ -64,8 +60,7 @@ export function UserMenu() {
options.unshift({
name: t("admin_dashboard"),
href: "/admin",
desc: t("admin_dashboard"),
icon: FiShield,
icon: Shield,
isExternal: false,
});
}
@@ -98,7 +93,7 @@ export function UserMenu() {
_hover={{ textDecoration: "none" }}
>
<MenuItem gap="3" borderRadius="md" p="4">
<item.icon className="text-blue-500" aria-hidden="true" />
<item.icon size="1em" className="text-blue-500" aria-hidden="true" />
<Text>{item.name}</Text>
</MenuItem>
</Link>
@@ -106,7 +101,7 @@ export function UserMenu() {
</MenuGroup>
<MenuDivider />
<MenuItem gap="3" borderRadius="md" p="4" onClick={handleSignOut}>
<FiLogOut className="text-blue-500" aria-hidden="true" />
<LogOut size="1em" className="text-blue-500" aria-hidden="true" />
<Text>{t("sign_out")}</Text>
</MenuItem>
</MenuList>
+16
View File
@@ -0,0 +1,16 @@
import { LucideIcon } from "lucide-react";
export const Discord: LucideIcon = ({ size = 24, ...rest }) => {
return (
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 127.14 96.36"
fill="currentColor"
height={size}
width={size}
{...rest}
>
<path d="M107.7 8.07A105.15 105.15 0 0 0 81.47 0a72.06 72.06 0 0 0-3.36 6.83 97.68 97.68 0 0 0-29.11 0A72.37 72.37 0 0 0 45.64 0a105.89 105.89 0 0 0-26.25 8.09C2.79 32.65-1.71 56.6.54 80.21a105.73 105.73 0 0 0 32.17 16.15 77.7 77.7 0 0 0 6.89-11.11 68.42 68.42 0 0 1-10.85-5.18c.91-.66 1.8-1.34 2.66-2a75.57 75.57 0 0 0 64.32 0c.87.71 1.76 1.39 2.66 2a68.68 68.68 0 0 1-10.87 5.19 77 77 0 0 0 6.89 11.1 105.25 105.25 0 0 0 32.19-16.14c2.64-27.38-4.51-51.11-18.9-72.15ZM42.45 65.69C36.18 65.69 31 60 31 53s5-12.74 11.43-12.74S54 46 53.89 53s-5.05 12.69-11.44 12.69Zm42.24 0C78.41 65.69 73.25 60 73.25 53s5-12.74 11.44-12.74S96.23 46 96.12 53s-5.04 12.69-11.43 12.69Z" />
</svg>
);
};
@@ -0,0 +1,41 @@
import { Select } from "@chakra-ui/react";
import { useRouter } from "next/router";
import { useTranslation } from "next-i18next";
import { useCallback, useMemo } from "react";
import cookie from "react-cookies";
const LanguageSelector = () => {
const router = useRouter();
const { i18n } = useTranslation();
// Memo the set of locales and their display names.
const localesAndNames = useMemo(() => {
return router.locales.map((locale) => ({
locale,
name: new Intl.DisplayNames([locale], { type: "language" }).of(locale),
}));
}, [router.locales]);
const languageChanged = useCallback(
async (option) => {
const locale = option.target.value;
cookie.save("NEXT_LOCALE", locale, { path: "/" });
const path = router.asPath;
return router.push(path, path, { locale });
},
[router]
);
const { language: currentLanguage } = i18n;
return (
<Select onChange={languageChanged} defaultValue={currentLanguage}>
{localesAndNames.map(({ locale, name }) => (
<option key={locale} value={locale}>
{name}
</option>
))}
</Select>
);
};
export { LanguageSelector };
@@ -0,0 +1 @@
export * from "./LanguageSelector";

Some files were not shown because too many files have changed in this diff Show More