From 24b07523aafdb5a0633d015eec6f7441b6534abd Mon Sep 17 00:00:00 2001 From: "jack.butler" Date: Fri, 10 Feb 2023 09:47:20 +0000 Subject: [PATCH] add test for tokenizer matching behaviour --- model/supervised_finetuning/tests/test_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/model/supervised_finetuning/tests/test_utils.py b/model/supervised_finetuning/tests/test_utils.py index 96637e98..c4982024 100644 --- a/model/supervised_finetuning/tests/test_utils.py +++ b/model/supervised_finetuning/tests/test_utils.py @@ -11,15 +11,15 @@ def test_tokenizer(): def test_tokenizer_successful_match(): - for config_name, config in TOKENIZER_CONFIGS: + for config_name, config in TOKENIZER_CONFIGS.items(): found_config = match_tokenizer_name(config_name) assert found_config == config def test_tokenizer_partial_match(): - for config_name, config in TOKENIZER_CONFIGS: - found_config = match_tokenizer_name(config_name[: len(config_name) - 1]) - assert found_config == config + for config_name in ["facebook/galactica-1.3b", "togethercomputer/GPT-JT-6B-v1", "Salesforce/codegen-2B-multi"]: + found_config = match_tokenizer_name(config_name) + assert found_config def test_tokenizer_failed_match():