Implement OlMoE logits logging.
Update: logging items in OlMoE. Update: some changes for main script. Add: logits post processing notebook.
This commit is contained in:
@ -11,14 +11,12 @@ from vllm.distributed.parallel_state import destroy_model_parallel
|
||||
from models.register import register_vllm_logit_logging_models
|
||||
from utils import DataLogger as dlog
|
||||
|
||||
# %%
|
||||
# dlog.get_instance(path=f"olmoe_{datetime.now().strftime("%Y%m%d-%H%M%S")}.parquet")
|
||||
|
||||
# %%
|
||||
model_id = "./llms/OLMoE-1B-7B-0924-Instruct"
|
||||
|
||||
try:
|
||||
log_file = Path(f"olmoe_{datetime.now().strftime("%Y%m%d-%H%M%S")}.parquet")
|
||||
log_file = Path(f"logs/olmoe_{datetime.now().strftime("%Y%m%d-%H%M%S")}.parquet")
|
||||
if log_file.exists():
|
||||
log_file.unlink()
|
||||
|
||||
@ -32,6 +30,7 @@ try:
|
||||
# tensor_parallel_size=2,
|
||||
gpu_memory_utilization=0.95,
|
||||
max_model_len=4096,
|
||||
max_num_seqs=1,
|
||||
# compilation_config=CompilationConfig(
|
||||
# level=CompilationLevel.PIECEWISE,
|
||||
# # By default, it goes up to max_num_seqs
|
||||
@ -42,21 +41,21 @@ try:
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
temperature=0.6,
|
||||
top_p=0.95,
|
||||
top_k=20,
|
||||
# top_p=0.95,
|
||||
# top_k=20,
|
||||
top_p=1.0,
|
||||
top_k=1,
|
||||
max_tokens=1024,
|
||||
)
|
||||
|
||||
# Prepare the input to the model
|
||||
prompt = "Give me a very short introduction to large language models."
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": prompt},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the name of the tallest building in Paris? Output the final answer ONLY:",
|
||||
},
|
||||
]
|
||||
# messages = [
|
||||
# {"role": "system", "content": "你是一位人工智能助手。"},
|
||||
# {"role": "user", "content": "请简要地介绍什么是大语言模型。"},
|
||||
# ]
|
||||
|
||||
# Generate outputs
|
||||
outputs = llm.chat(
|
||||
@ -73,12 +72,16 @@ try:
|
||||
# print("=== COMPLETION ===")
|
||||
print(out.outputs[0].text)
|
||||
print("\n---\n")
|
||||
dlog.log({
|
||||
"_time": datetime.now(),
|
||||
"output_text": out.outputs[0].text
|
||||
})
|
||||
dlog.log(
|
||||
{
|
||||
"_time": datetime.now(),
|
||||
"src": "main",
|
||||
"output_text": out.outputs[0].text,
|
||||
"token_ids": out.outputs[0].token_ids,
|
||||
}
|
||||
)
|
||||
|
||||
print("Finish completion")
|
||||
print("\n---\nFinish completion")
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
Reference in New Issue
Block a user