mirror of
https://github.com/wassname/vllm.git
synced 2026-06-27 18:27:02 +08:00
@@ -10,7 +10,8 @@ from vllm.sampling_params import SamplingParams
|
||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
@pytest.mark.parametrize("block_size", [16])
|
||||
def test_computed_prefix_blocks(model: str, block_size: int):
|
||||
# This test checks if we are able to run the engine to completion
|
||||
|
||||
@@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams
|
||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
def test_computed_prefix_blocks(model: str):
|
||||
# This test checks if the engine generates completions both with and
|
||||
# without optional detokenization, that detokenization includes text
|
||||
|
||||
@@ -38,7 +38,8 @@ class CustomUniExecutor(UniProcExecutor):
|
||||
CustomUniExecutorAsync = CustomUniExecutor
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
def test_custom_executor_type_checking(model):
|
||||
with pytest.raises(ValueError):
|
||||
engine_args = EngineArgs(model=model,
|
||||
@@ -51,7 +52,8 @@ def test_custom_executor_type_checking(model):
|
||||
AsyncLLMEngine.from_engine_args(engine_args)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
def test_custom_executor(model, tmp_path):
|
||||
cwd = os.path.abspath(".")
|
||||
os.chdir(tmp_path)
|
||||
@@ -75,7 +77,8 @@ def test_custom_executor(model, tmp_path):
|
||||
os.chdir(cwd)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
def test_custom_executor_async(model, tmp_path):
|
||||
cwd = os.path.abspath(".")
|
||||
os.chdir(tmp_path)
|
||||
@@ -103,7 +106,8 @@ def test_custom_executor_async(model, tmp_path):
|
||||
os.chdir(cwd)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
def test_respect_ray(model):
|
||||
# even for TP=1 and PP=1,
|
||||
# if users specify ray, we should use ray.
|
||||
|
||||
@@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams
|
||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
||||
@pytest.mark.parametrize("model",
|
||||
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||
def test_skip_tokenizer_initialization(model: str):
|
||||
# This test checks if the flag skip_tokenizer_init skips the initialization
|
||||
# of tokenizer and detokenizer. The generated output is expected to contain
|
||||
|
||||
Reference in New Issue
Block a user