diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 00000000..36c60243 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1675697798532 + + + + + + + + + \ No newline at end of file diff --git a/model/supervised_finetuning/efficiency_utils.py b/model/supervised_finetuning/efficiency_utils.py index 272e1b2f..6a27accc 100644 --- a/model/supervised_finetuning/efficiency_utils.py +++ b/model/supervised_finetuning/efficiency_utils.py @@ -1,7 +1,7 @@ import functools import torch -from transformers.activations import QuickGELUActivation, NewGELUActivation, FastGELUActivation, GELUActivation +from transformers.activations import FastGELUActivation, GELUActivation, NewGELUActivation, QuickGELUActivation def rsetattr(obj, attr, val): diff --git a/model/supervised_finetuning/trainer.py b/model/supervised_finetuning/trainer.py index 8edad854..83034d95 100644 --- a/model/supervised_finetuning/trainer.py +++ b/model/supervised_finetuning/trainer.py @@ -5,11 +5,11 @@ from typing import Any, Dict, List, Optional, Tuple, Union import bitsandbytes import torch +from efficiency_utils import fuse_gelu from torch import nn from transformers import PreTrainedModel, Trainer, TrainingArguments from transformers.training_args import OptimizerNames from utils import get_dataset, get_loss, get_metrics, get_model, get_tokenizer, read_yamls -from efficiency_utils import fuse_gelu def compute_metrics(eval_pred, preprocess_fns, metrics):