diff --git a/olmoe_inference.ipynb b/olmoe_inference.ipynb deleted file mode 100644 index 275efe7..0000000 --- a/olmoe_inference.ipynb +++ /dev/null @@ -1,91 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "928261ae", - "metadata": {}, - "outputs": [], - "source": [ - "from vllm import LLM, SamplingParams" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a35fc2b4", - "metadata": {}, - "outputs": [], - "source": [ - "model_id = \"./llms/OLMoE-1B-7B-0924-Instruct\"\n", - "\n", - "llm = LLM(\n", - " model=model_id,\n", - " # cpu_offload_gb=4,\n", - " tensor_parallel_size=2,\n", - " # gpu_memory_utilization=0.90,\n", - " max_model_len=4096,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a708f11", - "metadata": {}, - "outputs": [], - "source": [ - "sampling_params = SamplingParams(\n", - " temperature=0.6,\n", - " top_p=0.95,\n", - " top_k=20,\n", - " max_tokens=1024,\n", - ")\n", - "\n", - "# Prepare the input to the model\n", - "prompt = \"Give me a short introduction to large language models.\"\n", - "messages = [\n", - " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", - " {\"role\": \"user\", \"content\": prompt}\n", - "]\n", - "\n", - "# Generate outputs\n", - "outputs = llm.chat(\n", - " messages, \n", - " sampling_params=sampling_params,\n", - " # chat_template_kwargs={\"enable_thinking\": True}, # Set to False to strictly disable thinking\n", - ")\n", - "\n", - "# Print the outputs.\n", - "for out in outputs:\n", - " # out.prompt is the input prompt; out.outputs is a list of completion choices\n", - " print(\"=== PROMPT ===\")\n", - " print(out.prompt)\n", - " print(\"=== COMPLETION ===\")\n", - " print(out.outputs[0].text)\n", - " print(\"\\n---\\n\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "moe-explore", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}