From c20dfaad5b48e4e176557378983c84f443b6dd2a Mon Sep 17 00:00:00 2001 From: Sotirios Anagnostidis Date: Tue, 3 Jan 2023 22:45:34 +0100 Subject: [PATCH] pre-commits --- model/supervised_finetuning/README.md | 4 ++-- model/supervised_finetuning/configs/config.yaml | 11 +++++++++++ .../supervised_finetuning/custom_datasets/__init__.py | 6 +----- model/supervised_finetuning/utils.py | 7 +++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/model/supervised_finetuning/README.md b/model/supervised_finetuning/README.md index e223e1cd..014afa95 100644 --- a/model/supervised_finetuning/README.md +++ b/model/supervised_finetuning/README.md @@ -33,6 +33,6 @@ Experimental results in wandb ## TODOS - decide on a model -- add special token to declare prompt and reply. Do nto freeze the weights for - these - Merge utils etc with reward model +- Casual Modelling for GPT-JT does not leverage the bidirectional mask for the + prompt? (https://huggingface.co/togethercomputer/GPT-JT-6B-v1) diff --git a/model/supervised_finetuning/configs/config.yaml b/model/supervised_finetuning/configs/config.yaml index f7164002..29086395 100644 --- a/model/supervised_finetuning/configs/config.yaml +++ b/model/supervised_finetuning/configs/config.yaml @@ -32,6 +32,17 @@ galactica-125: per_device_train_batch_size: 4 per_device_eval_batch_size: 4 +gpt-jt: + learning_rate: 2e-6 + model_name: togethercomputer/GPT-JT-6B-v1 + weight_decay: 0.01 + max_length: 1024 + warmup_steps: 600 + gradient_checkpointing: false + gradient_accumulation_steps: 2 + per_device_train_batch_size: 4 + per_device_eval_batch_size: 4 + debug: eval_steps: 20 eval_size: 100 diff --git a/model/supervised_finetuning/custom_datasets/__init__.py b/model/supervised_finetuning/custom_datasets/__init__.py index 907e1a9b..7e3bdc79 100644 --- a/model/supervised_finetuning/custom_datasets/__init__.py +++ b/model/supervised_finetuning/custom_datasets/__init__.py @@ -2,11 +2,7 @@ from datasets import load_dataset from sklearn.model_selection import train_test_split from torch.utils.data import Dataset, Subset -QA_SPECIAL_TOKENS = { - 'Question': '', - 'Answer': '' -} - +QA_SPECIAL_TOKENS = {"Question": "", "Answer": ""} class SquadV2Dataset(Dataset): diff --git a/model/supervised_finetuning/utils.py b/model/supervised_finetuning/utils.py index 6aa5d365..a31f74d3 100644 --- a/model/supervised_finetuning/utils.py +++ b/model/supervised_finetuning/utils.py @@ -1,15 +1,14 @@ from pathlib import Path import yaml -from custom_datasets import get_one_dataset +from custom_datasets import QA_SPECIAL_TOKENS, get_one_dataset from custom_datasets.dialogue_collator import DialogueDataCollator from losses import CrossEntropyLoss from sklearn.model_selection import train_test_split from torch.utils.data import ConcatDataset, Subset from transformers import AutoModelForCausalLM, AutoTokenizer -from custom_datasets import QA_SPECIAL_TOKENS -SUPPORTED_MODELS = ["galactica"] +SUPPORTED_MODELS = ["galactica", "GPT-JT"] # deprecated .. def get_tokenizer(conf): @@ -20,7 +19,7 @@ def get_tokenizer(conf): additional_special_tokens = ( [] - if not "additional_special_tokens" in tokenizer.special_tokens_map + if "additional_special_tokens" not in tokenizer.special_tokens_map else tokenizer.special_tokens_map["additional_special_tokens"] ) additional_special_tokens = list(set(additional_special_tokens + list(QA_SPECIAL_TOKENS.values())))