mirror of
https://github.com/wassname/vllm.git
synced 2026-06-27 17:32:55 +08:00
Deprecate best_of Sampling Parameter in anticipation for vLLM V1 (#13997)
Signed-off-by: vincent-4 <vincentzhongy+githubvincent4@gmail.com> Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -617,7 +617,6 @@ def test_schedule_decode_blocks_to_copy_update():
|
||||
num_gpu_blocks=16)
|
||||
_, seq_group = create_dummy_prompt("1",
|
||||
prompt_length=60,
|
||||
best_of=2,
|
||||
block_size=block_size)
|
||||
curr_loras = None
|
||||
scheduler._allocate_and_set_running(seq_group)
|
||||
@@ -686,7 +685,6 @@ def test_schedule_swapped_cannot_swap_in():
|
||||
for i in range(2):
|
||||
_, seq_group = create_dummy_prompt(str(i),
|
||||
prompt_length=60,
|
||||
best_of=2,
|
||||
block_size=block_size)
|
||||
scheduler._allocate_and_set_running(seq_group)
|
||||
append_new_token_seq_group(60, seq_group, 1)
|
||||
@@ -717,7 +715,6 @@ def test_infeasible_swap():
|
||||
for i in range(2):
|
||||
_, seq_group = create_dummy_prompt(str(i),
|
||||
prompt_length=60,
|
||||
best_of=2,
|
||||
block_size=block_size)
|
||||
scheduler._allocate_and_set_running(seq_group)
|
||||
append_new_token_seq_group(60, seq_group, 1)
|
||||
@@ -747,7 +744,6 @@ def test_schedule_swapped_blocks_to_copy():
|
||||
curr_loras = None
|
||||
_, seq_group = create_dummy_prompt("1",
|
||||
prompt_length=60,
|
||||
best_of=2,
|
||||
block_size=block_size)
|
||||
scheduler._allocate_and_set_running(seq_group)
|
||||
append_new_token_seq_group(60, seq_group, 1)
|
||||
|
||||
+13
-14
@@ -18,7 +18,6 @@ def create_dummy_prompt(
|
||||
prompt_length: int = -1,
|
||||
block_size: Optional[int] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
best_of: int = 1,
|
||||
prompt_tokens: Optional[list[int]] = None,
|
||||
min_tokens: int = 0,
|
||||
max_tokens: int = 16,
|
||||
@@ -32,17 +31,19 @@ def create_dummy_prompt(
|
||||
prompt_tokens = list(range(prompt_length))
|
||||
|
||||
prompt_str = " ".join([str(t) for t in prompt_tokens])
|
||||
prompt = Sequence(int(request_id),
|
||||
inputs=token_inputs(prompt_tokens, prompt=prompt_str),
|
||||
block_size=block_size)
|
||||
seq_group = SequenceGroup(request_id=request_id,
|
||||
seqs=[prompt],
|
||||
arrival_time=time.time(),
|
||||
sampling_params=SamplingParams(
|
||||
best_of=best_of,
|
||||
max_tokens=max_tokens,
|
||||
min_tokens=min_tokens),
|
||||
lora_request=lora_request)
|
||||
prompt = Sequence(
|
||||
int(request_id),
|
||||
inputs=token_inputs(prompt_tokens, prompt=prompt_str),
|
||||
block_size=block_size,
|
||||
)
|
||||
seq_group = SequenceGroup(
|
||||
request_id=request_id,
|
||||
seqs=[prompt],
|
||||
arrival_time=time.time(),
|
||||
sampling_params=SamplingParams(max_tokens=max_tokens,
|
||||
min_tokens=min_tokens),
|
||||
lora_request=lora_request,
|
||||
)
|
||||
|
||||
return prompt, seq_group
|
||||
|
||||
@@ -72,7 +73,6 @@ def create_dummy_prompt_encoder_decoder(
|
||||
encoder_prompt_length: int,
|
||||
block_size: Optional[int] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
best_of: int = 1,
|
||||
) -> tuple[Sequence, Sequence, SequenceGroup]:
|
||||
if not block_size:
|
||||
block_size = decoder_prompt_length
|
||||
@@ -102,7 +102,6 @@ def create_dummy_prompt_encoder_decoder(
|
||||
|
||||
seq_group = SequenceGroup(request_id=request_id,
|
||||
seqs=[decoder_prompt],
|
||||
sampling_params=SamplingParams(best_of=best_of),
|
||||
arrival_time=time.time(),
|
||||
lora_request=lora_request,
|
||||
encoder_seq=encoder_prompt)
|
||||
|
||||
@@ -25,14 +25,6 @@ def test_n_gt_1(model):
|
||||
assert len(outputs[0].outputs) == 3
|
||||
|
||||
|
||||
def test_best_of(model):
|
||||
"""Raise a ValueError since best_of is deprecated."""
|
||||
|
||||
params = SamplingParams(n=2, best_of=3)
|
||||
with pytest.raises(ValueError):
|
||||
_ = model.generate(PROMPT, params)
|
||||
|
||||
|
||||
def test_penalties(model):
|
||||
"""Check that we do not get errors if applied."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user