From 3bf0e3a1feb1afef86b99a657929a7c028139a50 Mon Sep 17 00:00:00 2001 From: mrcabbage972 Date: Sun, 8 Jan 2023 18:55:04 -0500 Subject: [PATCH 1/5] Expanding survey of relevant research --- docs/docs/research/general.md | 71 ++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/docs/docs/research/general.md b/docs/docs/research/general.md index 56f935ac..dbddfefe 100644 --- a/docs/docs/research/general.md +++ b/docs/docs/research/general.md @@ -1,7 +1,63 @@ -# General +# Research This page lists research papers that are relevant to the project. +## Table of Contents + +- Reinforcement Learning from Human Feedback +- Generating Text From Language Models +- Automatically Generating Instruction Data for Training +- Uncertainty Estimation of Language Model Outputs + +## Reinforcement Learning from Human Feedback + +Reinforcement Learning from Human Feedback (RLHF) is a method for fine-tuning a +generative language models based on a reward model that is learned from human +preference data. This method facilitates the learning of instruction-tuned +models, among other things. + +### Learning to summarize from human feedback [[ArXiv](https://arxiv.org/pdf/2009.01325.pdf)], [[Github](https://github.com/openai/summarize-from-feedback)] + +> In this work, we show that it is possible to significantly improve summary +> quality by training a model to optimize for human preferences. We collect a +> large, high-quality dataset of human comparisons between summaries, train a +> model to predict the human-preferred summary, and use that model as a reward +> function to fine-tune a summarization policy using reinforcement learning. + +### Training language models to follow instructions with human feedback [[ArXiv](https://arxiv.org/pdf/2203.02155.pdf)] + +> Starting with a set of labeler-written prompts and prompts submitted through +> the OpenAI API, we collect a dataset of labeler demonstrations of the desired +> model behavior, which we use to fine-tune GPT-3 using supervised learning. We +> then collect a dataset of rankings of model outputs, which we use to further +> fine-tune this supervised model using reinforcement learning from human +> feedback. + +### Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback [[ArXiv](https://arxiv.org/pdf/2204.05862.pdf)] + +> We apply preference modeling and reinforcement learning from human feedback +> (RLHF) to finetune language models to act as helpful and harmless assistants. +> We find this alignment training improves performance on almost all NLP +> evaluations, and is fully compatible with training for specialized skills such +> as python coding and summarization. + +## Generating Text From Language Models + +A language model generates output text token by token, autoregressively. The +large search space of this task requires some method of narrowing down the set +of tokens to be considered in each step. This method, in turn, has a big impact +on the quality of the resulting text. + +### RANKGEN: Improving Text Generation with Large Ranking Models [[ArXiv](https://arxiv.org/pdf/2205.09726.pdf)], [[Github](https://github.com/martiansideofthemoon/rankgen)] + +> Given an input sequence (or prefix), modern language models often assign high +> probabilities to output sequences that are repetitive, incoherent, or +> irrelevant to the prefix; as such, model-generated text also contains such +> artifacts. To address these issues we present RankGen, a 1.2B parameter +> encoder model for English that scores model generations given a prefix. +> RankGen can be flexibly incorporated as a scoring function in beam search and +> used to decode from any pretrained language model. + ## Automatically Generating Instruction Data for Training This line of work is about significantly reducing the need for manually @@ -32,3 +88,16 @@ models. > rivals the effectiveness of training on open-source manually-curated datasets, > surpassing the performance of models such as T0++ and Tk-Instruct across > various benchmarks. + +## Uncertainty Estimation of Language Model Outputs + +### Teaching models to express their uncertainty in words [[Arxiv](https://arxiv.org/pdf/2205.14334.pdf)] + +> We show that a GPT-3 model can learn to express uncertainty about its own +> answers in natural language -- without use of model logits. When given a +> question, the model generates both an answer and a level of confidence (e.g. +> "90% confidence" or "high confidence"). These levels map to probabilities that +> are well calibrated. The model also remains moderately calibrated under +> distribution shift, and is sensitive to uncertainty in its own answers, rather +> than imitating human examples. + From cc4c008933cf7ef1629a413f40d3dca78f4f06fe Mon Sep 17 00:00:00 2001 From: mrcabbage972 Date: Sun, 8 Jan 2023 21:21:44 -0500 Subject: [PATCH 2/5] Fixing EoL --- docs/docs/research/general.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/docs/research/general.md b/docs/docs/research/general.md index dbddfefe..4186ebac 100644 --- a/docs/docs/research/general.md +++ b/docs/docs/research/general.md @@ -100,4 +100,3 @@ models. > are well calibrated. The model also remains moderately calibrated under > distribution shift, and is sensitive to uncertainty in its own answers, rather > than imitating human examples. - From 08bdadf222749e59a5ed4386d5ead74e45c19bf9 Mon Sep 17 00:00:00 2001 From: mrcabbage972 Date: Mon, 9 Jan 2023 22:07:06 -0500 Subject: [PATCH 3/5] Adding BNB 8-bit Adam --- model/supervised_finetuning/configs/config.yaml | 4 ++-- model/supervised_finetuning/requirements.txt | 1 + model/supervised_finetuning/trainer.py | 5 +++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/model/supervised_finetuning/configs/config.yaml b/model/supervised_finetuning/configs/config.yaml index 97e37121..616aa828 100644 --- a/model/supervised_finetuning/configs/config.yaml +++ b/model/supervised_finetuning/configs/config.yaml @@ -22,7 +22,7 @@ defaults: loss_fn: CrossEntropyLoss eval_size: log_dir: "base" - quantization: + quantization: false galactica-125: learning_rate: 5e-5 @@ -62,4 +62,4 @@ debug: gradient_accumulation_steps: 1 per_device_train_batch_size: 1 per_device_eval_batch_size: 1 - quantization: + quantization: false diff --git a/model/supervised_finetuning/requirements.txt b/model/supervised_finetuning/requirements.txt index 7d78f36c..6338614d 100644 --- a/model/supervised_finetuning/requirements.txt +++ b/model/supervised_finetuning/requirements.txt @@ -7,3 +7,4 @@ PyYAML==6.0 scikit_learn==1.2.0 torch==1.13.1 transformers==4.25.1 +bitsandbytes==0.36.0.post2 \ No newline at end of file diff --git a/model/supervised_finetuning/trainer.py b/model/supervised_finetuning/trainer.py index cb55131d..ae7fb3c3 100644 --- a/model/supervised_finetuning/trainer.py +++ b/model/supervised_finetuning/trainer.py @@ -6,6 +6,8 @@ from typing import Any, Dict, List, Optional, Tuple, Union import torch from torch import nn from transformers import PreTrainedModel, Trainer, TrainingArguments +from transformers.training_args import OptimizerNames + from utils import get_dataset, get_loss, get_model, get_tokenizer, read_yamls os.environ["WANDB_PROJECT"] = "supervised-finetuning" @@ -130,12 +132,15 @@ if __name__ == "__main__": train, evals, collate_fn = get_dataset(training_conf, tokenizer) + optimizer = OptimizerNames.ADAMW_BNB if training_conf.quantization else None + args = TrainingArguments( output_dir=f"{training_conf.model_name}-{training_conf.log_dir}-finetuned", num_train_epochs=training_conf.num_train_epochs, warmup_steps=training_conf.warmup_steps, learning_rate=float(training_conf.learning_rate), deepspeed="configs/zero_config.json" if training_conf.deepspeed else None, + optim=optimizer, fp16=True, local_rank=training_conf.local_rank, gradient_checkpointing=training_conf.gradient_checkpointing, From 67aeed2cd743506303b746697c1788caf5ef7e79 Mon Sep 17 00:00:00 2001 From: mrcabbage972 Date: Mon, 9 Jan 2023 23:03:29 -0500 Subject: [PATCH 4/5] Adding override of 32-bit optimization for embedding layer --- model/supervised_finetuning/trainer.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/model/supervised_finetuning/trainer.py b/model/supervised_finetuning/trainer.py index ae7fb3c3..450854f1 100644 --- a/model/supervised_finetuning/trainer.py +++ b/model/supervised_finetuning/trainer.py @@ -3,11 +3,11 @@ import os from distutils.util import strtobool from typing import Any, Dict, List, Optional, Tuple, Union +import bitsandbytes import torch from torch import nn from transformers import PreTrainedModel, Trainer, TrainingArguments from transformers.training_args import OptimizerNames - from utils import get_dataset, get_loss, get_model, get_tokenizer, read_yamls os.environ["WANDB_PROJECT"] = "supervised-finetuning" @@ -134,6 +134,13 @@ if __name__ == "__main__": optimizer = OptimizerNames.ADAMW_BNB if training_conf.quantization else None + if training_conf.quantization: + for module in model.modules(): + if isinstance(module, torch.nn.Embedding): + bitsandbytes.optim.GlobalOptimManager.get_instance().register_module_override( + module, "weight", {"optim_bits": 32} + ) + args = TrainingArguments( output_dir=f"{training_conf.model_name}-{training_conf.log_dir}-finetuned", num_train_epochs=training_conf.num_train_epochs, From d95c741ea0b109f45471af823782af35a51cf04f Mon Sep 17 00:00:00 2001 From: mrcabbage972 Date: Tue, 10 Jan 2023 20:16:02 -0500 Subject: [PATCH 5/5] Fixing requirements file --- model/supervised_finetuning/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/supervised_finetuning/requirements.txt b/model/supervised_finetuning/requirements.txt index 6338614d..c47a1218 100644 --- a/model/supervised_finetuning/requirements.txt +++ b/model/supervised_finetuning/requirements.txt @@ -1,4 +1,5 @@ accelerate==0.15.0 +bitsandbytes==0.36.0.post2 datasets==2.8.0 deepspeed==0.7.7 mpi4py==3.1.4 @@ -7,4 +8,3 @@ PyYAML==6.0 scikit_learn==1.2.0 torch==1.13.1 transformers==4.25.1 -bitsandbytes==0.36.0.post2 \ No newline at end of file