Init commit.
This commit is contained in:
225
.clineignore
Normal file
225
.clineignore
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[codz]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py.cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
# Pipfile.lock
|
||||||
|
|
||||||
|
# UV
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# uv.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
# poetry.lock
|
||||||
|
# poetry.toml
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||||
|
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||||
|
# pdm.lock
|
||||||
|
# pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
|
||||||
|
# pixi
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||||
|
# pixi.lock
|
||||||
|
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||||
|
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||||
|
.pixi
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
*.rdb
|
||||||
|
*.aof
|
||||||
|
*.pid
|
||||||
|
|
||||||
|
# RabbitMQ
|
||||||
|
mnesia/
|
||||||
|
rabbitmq/
|
||||||
|
rabbitmq-data/
|
||||||
|
|
||||||
|
# ActiveMQ
|
||||||
|
activemq-data/
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.envrc
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
# .idea/
|
||||||
|
|
||||||
|
# Abstra
|
||||||
|
# Abstra is an AI-powered process automation framework.
|
||||||
|
# Ignore directories containing user credentials, local state, and settings.
|
||||||
|
# Learn more at https://abstra.io/docs
|
||||||
|
.abstra/
|
||||||
|
|
||||||
|
# Visual Studio Code
|
||||||
|
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||||
|
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||||
|
# you could uncomment the following to ignore the entire vscode folder
|
||||||
|
# .vscode/
|
||||||
|
|
||||||
|
# Ruff stuff:
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
|
# PyPI configuration file
|
||||||
|
.pypirc
|
||||||
|
|
||||||
|
# Marimo
|
||||||
|
marimo/_static/
|
||||||
|
marimo/_lsp/
|
||||||
|
__marimo__/
|
||||||
|
|
||||||
|
# Streamlit
|
||||||
|
.streamlit/secrets.toml
|
||||||
|
|
||||||
|
# Python virtual environments
|
||||||
|
.venv
|
||||||
|
venv
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
|
||||||
|
# Checkpoints for LLMs
|
||||||
|
llms/
|
225
.gitignore
vendored
Normal file
225
.gitignore
vendored
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[codz]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py.cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
# Pipfile.lock
|
||||||
|
|
||||||
|
# UV
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# uv.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
# poetry.lock
|
||||||
|
# poetry.toml
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||||
|
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||||
|
# pdm.lock
|
||||||
|
# pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
|
||||||
|
# pixi
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||||
|
# pixi.lock
|
||||||
|
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||||
|
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||||
|
.pixi
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
*.rdb
|
||||||
|
*.aof
|
||||||
|
*.pid
|
||||||
|
|
||||||
|
# RabbitMQ
|
||||||
|
mnesia/
|
||||||
|
rabbitmq/
|
||||||
|
rabbitmq-data/
|
||||||
|
|
||||||
|
# ActiveMQ
|
||||||
|
activemq-data/
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.envrc
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
# .idea/
|
||||||
|
|
||||||
|
# Abstra
|
||||||
|
# Abstra is an AI-powered process automation framework.
|
||||||
|
# Ignore directories containing user credentials, local state, and settings.
|
||||||
|
# Learn more at https://abstra.io/docs
|
||||||
|
.abstra/
|
||||||
|
|
||||||
|
# Visual Studio Code
|
||||||
|
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||||
|
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||||
|
# you could uncomment the following to ignore the entire vscode folder
|
||||||
|
# .vscode/
|
||||||
|
|
||||||
|
# Ruff stuff:
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
|
# PyPI configuration file
|
||||||
|
.pypirc
|
||||||
|
|
||||||
|
# Marimo
|
||||||
|
marimo/_static/
|
||||||
|
marimo/_lsp/
|
||||||
|
__marimo__/
|
||||||
|
|
||||||
|
# Streamlit
|
||||||
|
.streamlit/secrets.toml
|
||||||
|
|
||||||
|
# Python virtual environments
|
||||||
|
.venv
|
||||||
|
venv
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
|
||||||
|
# Checkpoints for LLMs
|
||||||
|
llms/
|
1
models/.gitignore
vendored
Normal file
1
models/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
vanilla_vllm/
|
0
models/__init__.py
Normal file
0
models/__init__.py
Normal file
0
models/log_expert/__init__.py
Normal file
0
models/log_expert/__init__.py
Normal file
489
models/log_expert/olmoe.py
Normal file
489
models/log_expert/olmoe.py
Normal file
@ -0,0 +1,489 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Inference-only OLMoE model compatible with HuggingFace weights.
|
||||||
|
|
||||||
|
This file is origin from vllm/model_executor/models/olmoe.py
|
||||||
|
"""
|
||||||
|
from collections.abc import Iterable
|
||||||
|
from functools import partial
|
||||||
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from transformers import OlmoeConfig
|
||||||
|
|
||||||
|
from vllm.attention import Attention
|
||||||
|
from vllm.compilation.decorators import support_torch_compile
|
||||||
|
from vllm.config import CacheConfig, VllmConfig
|
||||||
|
from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank,
|
||||||
|
get_tensor_model_parallel_world_size,
|
||||||
|
tensor_model_parallel_all_gather)
|
||||||
|
from vllm.distributed.utils import split_tensor_along_last_dim
|
||||||
|
from vllm.logger import init_logger
|
||||||
|
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||||
|
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||||
|
from vllm.model_executor.layers.linear import (QKVParallelLinear,
|
||||||
|
ReplicatedLinear,
|
||||||
|
RowParallelLinear)
|
||||||
|
from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
||||||
|
from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||||
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
||||||
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||||
|
ParallelLMHead, VocabParallelEmbedding)
|
||||||
|
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
||||||
|
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||||
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
|
from vllm.model_executor.models.interfaces import SupportsPP
|
||||||
|
from vllm.model_executor.models.utils import (AutoWeightsLoader, is_pp_missing_parameter,
|
||||||
|
make_empty_intermediate_tensors_factory, make_layers,
|
||||||
|
maybe_prefix)
|
||||||
|
|
||||||
|
from utils import DataLogger
|
||||||
|
|
||||||
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class OlmoeMoE(nn.Module):
|
||||||
|
"""A tensor-parallel MoE implementation for Olmoe that shards each expert
|
||||||
|
across all ranks.
|
||||||
|
|
||||||
|
Each expert's weights are sharded across all ranks and a fused MoE
|
||||||
|
kernel is used for the forward pass, and finally we reduce the outputs
|
||||||
|
across ranks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
num_experts: int,
|
||||||
|
top_k: int,
|
||||||
|
hidden_size: int,
|
||||||
|
intermediate_size: int,
|
||||||
|
params_dtype: Optional[torch.dtype] = None,
|
||||||
|
quant_config: Optional[QuantizationConfig] = None,
|
||||||
|
tp_size: Optional[int] = None,
|
||||||
|
prefix: str = ""):
|
||||||
|
super().__init__()
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
|
||||||
|
# Gate always runs at half / full precision for now.
|
||||||
|
self.gate = ReplicatedLinear(hidden_size,
|
||||||
|
num_experts,
|
||||||
|
bias=False,
|
||||||
|
quant_config=None)
|
||||||
|
|
||||||
|
self.experts = FusedMoE(num_experts=num_experts,
|
||||||
|
top_k=top_k,
|
||||||
|
hidden_size=hidden_size,
|
||||||
|
intermediate_size=intermediate_size,
|
||||||
|
reduce_results=True,
|
||||||
|
renormalize=False,
|
||||||
|
quant_config=quant_config,
|
||||||
|
tp_size=tp_size,
|
||||||
|
prefix=f"{prefix}.experts")
|
||||||
|
|
||||||
|
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
||||||
|
# NOTE: hidden_states can have either 1D or 2D shape.
|
||||||
|
orig_shape = hidden_states.shape
|
||||||
|
hidden_dim = hidden_states.shape[-1]
|
||||||
|
hidden_states = hidden_states.view(-1, hidden_dim)
|
||||||
|
# router_logits: (num_tokens, n_experts)
|
||||||
|
router_logits, _ = self.gate(hidden_states)
|
||||||
|
final_hidden_states = self.experts(hidden_states=hidden_states,
|
||||||
|
router_logits=router_logits)
|
||||||
|
return final_hidden_states.view(orig_shape)
|
||||||
|
|
||||||
|
|
||||||
|
class OlmoeAttention(nn.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
hidden_size: int,
|
||||||
|
num_heads: int,
|
||||||
|
num_kv_heads: int,
|
||||||
|
rope_theta: float = 10000,
|
||||||
|
rope_scaling: Optional[dict[str, Any]] = None,
|
||||||
|
max_position_embeddings: int = 4096,
|
||||||
|
cache_config: Optional[CacheConfig] = None,
|
||||||
|
quant_config: Optional[QuantizationConfig] = None,
|
||||||
|
prefix: str = "",
|
||||||
|
) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
tp_size = get_tensor_model_parallel_world_size()
|
||||||
|
self.total_num_heads = num_heads
|
||||||
|
assert self.total_num_heads % tp_size == 0
|
||||||
|
self.num_heads = self.total_num_heads // tp_size
|
||||||
|
self.total_num_kv_heads = num_kv_heads
|
||||||
|
if self.total_num_kv_heads >= tp_size:
|
||||||
|
# Number of KV heads is greater than TP size, so we partition
|
||||||
|
# the KV heads across multiple tensor parallel GPUs.
|
||||||
|
assert self.total_num_kv_heads % tp_size == 0
|
||||||
|
else:
|
||||||
|
# Number of KV heads is less than TP size, so we replicate
|
||||||
|
# the KV heads across multiple tensor parallel GPUs.
|
||||||
|
assert tp_size % self.total_num_kv_heads == 0
|
||||||
|
self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
|
||||||
|
self.head_dim = hidden_size // self.total_num_heads
|
||||||
|
self.q_size = self.num_heads * self.head_dim
|
||||||
|
self.kv_size = self.num_kv_heads * self.head_dim
|
||||||
|
self.scaling = self.head_dim**-0.5
|
||||||
|
self.rope_theta = rope_theta
|
||||||
|
self.max_position_embeddings = max_position_embeddings
|
||||||
|
|
||||||
|
self.qkv_proj = QKVParallelLinear(
|
||||||
|
hidden_size,
|
||||||
|
self.head_dim,
|
||||||
|
self.total_num_heads,
|
||||||
|
self.total_num_kv_heads,
|
||||||
|
bias=False,
|
||||||
|
quant_config=quant_config,
|
||||||
|
)
|
||||||
|
self.tp_size = tp_size
|
||||||
|
self.tp_rank = get_tensor_model_parallel_rank()
|
||||||
|
self.q_norm = RMSNorm(self.total_num_heads * self.head_dim, eps=1e-5)
|
||||||
|
self.k_norm = RMSNorm(self.total_num_kv_heads * self.head_dim,
|
||||||
|
eps=1e-5)
|
||||||
|
self.o_proj = RowParallelLinear(
|
||||||
|
self.total_num_heads * self.head_dim,
|
||||||
|
hidden_size,
|
||||||
|
bias=False,
|
||||||
|
quant_config=quant_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.rotary_emb = get_rope(
|
||||||
|
self.head_dim,
|
||||||
|
rotary_dim=self.head_dim,
|
||||||
|
max_position=max_position_embeddings,
|
||||||
|
base=rope_theta,
|
||||||
|
rope_scaling=rope_scaling,
|
||||||
|
is_neox_style=True,
|
||||||
|
)
|
||||||
|
self.attn = Attention(self.num_heads,
|
||||||
|
self.head_dim,
|
||||||
|
self.scaling,
|
||||||
|
num_kv_heads=self.num_kv_heads,
|
||||||
|
cache_config=cache_config,
|
||||||
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.attn")
|
||||||
|
|
||||||
|
def _apply_qk_norm(self, q: torch.Tensor,
|
||||||
|
k: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
||||||
|
if self.tp_size > 1:
|
||||||
|
q = tensor_model_parallel_all_gather(q.contiguous())
|
||||||
|
k = tensor_model_parallel_all_gather(k.contiguous())
|
||||||
|
q = self.q_norm(q)
|
||||||
|
k = self.k_norm(k)
|
||||||
|
if self.tp_size > 1:
|
||||||
|
splitter = partial(split_tensor_along_last_dim,
|
||||||
|
num_partitions=self.tp_size)
|
||||||
|
q = splitter(q)[self.tp_rank]
|
||||||
|
k = splitter(k)[self.tp_rank]
|
||||||
|
return q, k
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
positions: torch.Tensor,
|
||||||
|
hidden_states: torch.Tensor,
|
||||||
|
) -> torch.Tensor:
|
||||||
|
qkv, _ = self.qkv_proj(hidden_states)
|
||||||
|
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
|
||||||
|
q, k = self._apply_qk_norm(q, k)
|
||||||
|
q, k = self.rotary_emb(positions, q, k)
|
||||||
|
attn_output = self.attn(q, k, v)
|
||||||
|
output, _ = self.o_proj(attn_output)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class OlmoeDecoderLayer(nn.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
config: OlmoeConfig,
|
||||||
|
cache_config: Optional[CacheConfig] = None,
|
||||||
|
quant_config: Optional[QuantizationConfig] = None,
|
||||||
|
prefix: str = "",
|
||||||
|
) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.hidden_size = config.hidden_size
|
||||||
|
rope_theta = getattr(config, "rope_theta", 10000)
|
||||||
|
rope_scaling = getattr(config, "rope_scaling", None)
|
||||||
|
max_position_embeddings = getattr(config, "max_position_embeddings",
|
||||||
|
4096)
|
||||||
|
|
||||||
|
self.self_attn = OlmoeAttention(
|
||||||
|
hidden_size=self.hidden_size,
|
||||||
|
num_heads=config.num_attention_heads,
|
||||||
|
num_kv_heads=config.num_key_value_heads,
|
||||||
|
rope_theta=rope_theta,
|
||||||
|
rope_scaling=rope_scaling,
|
||||||
|
max_position_embeddings=max_position_embeddings,
|
||||||
|
cache_config=cache_config,
|
||||||
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.self_attn",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.mlp = OlmoeMoE(
|
||||||
|
num_experts=config.num_experts,
|
||||||
|
top_k=config.num_experts_per_tok,
|
||||||
|
hidden_size=config.hidden_size,
|
||||||
|
intermediate_size=config.intermediate_size,
|
||||||
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.mlp",
|
||||||
|
)
|
||||||
|
self.input_layernorm = RMSNorm(config.hidden_size, eps=1e-5)
|
||||||
|
self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=1e-5)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
positions: torch.Tensor,
|
||||||
|
hidden_states: torch.Tensor,
|
||||||
|
residual: Optional[torch.Tensor],
|
||||||
|
) -> torch.Tensor:
|
||||||
|
# Self Attention
|
||||||
|
if residual is None:
|
||||||
|
residual = hidden_states
|
||||||
|
hidden_states = self.input_layernorm(hidden_states)
|
||||||
|
else:
|
||||||
|
hidden_states, residual = self.input_layernorm(
|
||||||
|
hidden_states, residual)
|
||||||
|
|
||||||
|
hidden_states = self.self_attn(
|
||||||
|
positions=positions,
|
||||||
|
hidden_states=hidden_states,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fully Connected
|
||||||
|
hidden_states, residual = self.post_attention_layernorm(
|
||||||
|
hidden_states, residual)
|
||||||
|
hidden_states = self.mlp(hidden_states)
|
||||||
|
return hidden_states, residual
|
||||||
|
|
||||||
|
|
||||||
|
@support_torch_compile
|
||||||
|
class OlmoeModel(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
config = vllm_config.model_config.hf_config
|
||||||
|
cache_config = vllm_config.cache_config
|
||||||
|
quant_config = vllm_config.quant_config
|
||||||
|
|
||||||
|
self.vocab_size = config.vocab_size
|
||||||
|
self.config = config
|
||||||
|
self.embed_tokens = VocabParallelEmbedding(
|
||||||
|
config.vocab_size,
|
||||||
|
config.hidden_size,
|
||||||
|
)
|
||||||
|
self.start_layer, self.end_layer, self.layers = make_layers(
|
||||||
|
config.num_hidden_layers,
|
||||||
|
lambda prefix: OlmoeDecoderLayer(
|
||||||
|
config, cache_config, quant_config, prefix=prefix),
|
||||||
|
prefix=f"{prefix}.layers")
|
||||||
|
self.norm = RMSNorm(config.hidden_size, eps=1e-5)
|
||||||
|
|
||||||
|
self.make_empty_intermediate_tensors = (
|
||||||
|
make_empty_intermediate_tensors_factory(
|
||||||
|
["hidden_states", "residual"], config.hidden_size))
|
||||||
|
|
||||||
|
def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
|
||||||
|
return self.embed_tokens(input_ids)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
input_ids: torch.Tensor,
|
||||||
|
positions: torch.Tensor,
|
||||||
|
intermediate_tensors: Optional[IntermediateTensors],
|
||||||
|
inputs_embeds: Optional[torch.Tensor] = None,
|
||||||
|
) -> Union[torch.Tensor, IntermediateTensors]:
|
||||||
|
if get_pp_group().is_first_rank:
|
||||||
|
if inputs_embeds is not None:
|
||||||
|
hidden_states = inputs_embeds
|
||||||
|
else:
|
||||||
|
hidden_states = self.get_input_embeddings(input_ids)
|
||||||
|
residual = None
|
||||||
|
else:
|
||||||
|
assert intermediate_tensors is not None
|
||||||
|
hidden_states = intermediate_tensors["hidden_states"]
|
||||||
|
residual = intermediate_tensors["residual"]
|
||||||
|
|
||||||
|
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||||
|
hidden_states, residual = layer(
|
||||||
|
positions,
|
||||||
|
hidden_states,
|
||||||
|
residual,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not get_pp_group().is_last_rank:
|
||||||
|
return IntermediateTensors({
|
||||||
|
"hidden_states": hidden_states,
|
||||||
|
"residual": residual
|
||||||
|
})
|
||||||
|
|
||||||
|
hidden_states, _ = self.norm(hidden_states, residual)
|
||||||
|
return hidden_states
|
||||||
|
|
||||||
|
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
|
||||||
|
# Params for weights, fp8 weight scales, fp8 activation scales
|
||||||
|
# (param_name, weight_name, expert_id, shard_id)
|
||||||
|
return FusedMoE.make_expert_params_mapping(
|
||||||
|
ckpt_gate_proj_name="gate_proj",
|
||||||
|
ckpt_down_proj_name="down_proj",
|
||||||
|
ckpt_up_proj_name="up_proj",
|
||||||
|
num_experts=self.config.num_experts)
|
||||||
|
|
||||||
|
def load_weights(self, weights: Iterable[tuple[str,
|
||||||
|
torch.Tensor]]) -> set[str]:
|
||||||
|
stacked_params_mapping = [
|
||||||
|
# (param_name, shard_name, shard_id)
|
||||||
|
("qkv_proj", "q_proj", "q"),
|
||||||
|
("qkv_proj", "k_proj", "k"),
|
||||||
|
("qkv_proj", "v_proj", "v"),
|
||||||
|
("gate_up_proj", "gate_proj", 0),
|
||||||
|
("gate_up_proj", "up_proj", 1),
|
||||||
|
]
|
||||||
|
|
||||||
|
params_dict = dict(self.named_parameters())
|
||||||
|
loaded_params: set[str] = set()
|
||||||
|
expert_params_mapping = self.get_expert_mapping()
|
||||||
|
for name, loaded_weight in weights:
|
||||||
|
for (param_name, weight_name, shard_id) in stacked_params_mapping:
|
||||||
|
# Skip non-stacked layers and experts (experts handled below).
|
||||||
|
if weight_name not in name:
|
||||||
|
continue
|
||||||
|
# We have mlp.experts[0].gate_proj in the checkpoint.
|
||||||
|
# Since we handle the experts below in expert_params_mapping,
|
||||||
|
# we need to skip here BEFORE we update the name, otherwise
|
||||||
|
# name will be updated to mlp.experts[0].gate_up_proj, which
|
||||||
|
# will then be updated below in expert_params_mapping
|
||||||
|
# for mlp.experts[0].gate_gate_up_proj, which breaks load.
|
||||||
|
if "mlp.experts" in name:
|
||||||
|
continue
|
||||||
|
name = name.replace(weight_name, param_name)
|
||||||
|
# Skip loading extra bias for GPTQ models.
|
||||||
|
if name.endswith(".bias") and name not in params_dict:
|
||||||
|
continue
|
||||||
|
# Skip layers on other devices.
|
||||||
|
if is_pp_missing_parameter(name, self):
|
||||||
|
continue
|
||||||
|
if name not in params_dict:
|
||||||
|
continue
|
||||||
|
|
||||||
|
param = params_dict[name]
|
||||||
|
weight_loader = param.weight_loader
|
||||||
|
weight_loader(param, loaded_weight, shard_id)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
for mapping in expert_params_mapping:
|
||||||
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
|
if weight_name not in name:
|
||||||
|
continue
|
||||||
|
name = name.replace(weight_name, param_name)
|
||||||
|
# Skip layers on other devices.
|
||||||
|
if is_pp_missing_parameter(name, self):
|
||||||
|
continue
|
||||||
|
param = params_dict[name]
|
||||||
|
weight_loader = param.weight_loader
|
||||||
|
weight_loader(param,
|
||||||
|
loaded_weight,
|
||||||
|
name,
|
||||||
|
shard_id=shard_id,
|
||||||
|
expert_id=expert_id)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Skip loading extra bias for GPTQ models.
|
||||||
|
if name.endswith(".bias") and name not in params_dict:
|
||||||
|
continue
|
||||||
|
# Skip layers on other devices.
|
||||||
|
if is_pp_missing_parameter(name, self):
|
||||||
|
continue
|
||||||
|
# Remapping the name of FP8 kv-scale.
|
||||||
|
if name.endswith("kv_scale"):
|
||||||
|
remapped_kv_scale_name = name.replace(
|
||||||
|
".kv_scale", ".attn.kv_scale")
|
||||||
|
if remapped_kv_scale_name not in params_dict:
|
||||||
|
logger.warning_once(
|
||||||
|
"Found kv scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv-scale is not loaded.", # noqa: E501
|
||||||
|
name,
|
||||||
|
remapped_kv_scale_name,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
name = remapped_kv_scale_name
|
||||||
|
|
||||||
|
param = params_dict[name]
|
||||||
|
weight_loader = getattr(param, "weight_loader",
|
||||||
|
default_weight_loader)
|
||||||
|
weight_loader(param, loaded_weight)
|
||||||
|
loaded_params.add(name)
|
||||||
|
return loaded_params
|
||||||
|
|
||||||
|
|
||||||
|
class OlmoeForCausalLM(nn.Module, SupportsPP):
|
||||||
|
packed_modules_mapping = {
|
||||||
|
"qkv_proj": [
|
||||||
|
"q_proj",
|
||||||
|
"k_proj",
|
||||||
|
"v_proj",
|
||||||
|
],
|
||||||
|
"gate_up_proj": [
|
||||||
|
"gate_proj",
|
||||||
|
"up_proj",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||||
|
super().__init__()
|
||||||
|
config = vllm_config.model_config.hf_config
|
||||||
|
quant_config = vllm_config.quant_config
|
||||||
|
self.config = config
|
||||||
|
self.quant_config = quant_config
|
||||||
|
self.model = OlmoeModel(vllm_config=vllm_config,
|
||||||
|
prefix=maybe_prefix(prefix, "model"))
|
||||||
|
self.lm_head = ParallelLMHead(config.vocab_size,
|
||||||
|
config.hidden_size,
|
||||||
|
quant_config=quant_config)
|
||||||
|
self.logits_processor = LogitsProcessor(config.vocab_size)
|
||||||
|
|
||||||
|
self.make_empty_intermediate_tensors = (
|
||||||
|
self.model.make_empty_intermediate_tensors)
|
||||||
|
|
||||||
|
def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
|
||||||
|
return self.model.get_input_embeddings(input_ids)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
input_ids: torch.Tensor,
|
||||||
|
positions: torch.Tensor,
|
||||||
|
intermediate_tensors: Optional[IntermediateTensors] = None,
|
||||||
|
inputs_embeds: Optional[torch.Tensor] = None,
|
||||||
|
) -> Union[torch.Tensor, IntermediateTensors]:
|
||||||
|
hidden_states = self.model(input_ids, positions, intermediate_tensors,
|
||||||
|
inputs_embeds)
|
||||||
|
return hidden_states
|
||||||
|
|
||||||
|
def compute_logits(self, hidden_states: torch.Tensor,
|
||||||
|
sampling_metadata: SamplingMetadata) -> torch.Tensor:
|
||||||
|
logits = self.logits_processor(self.lm_head, hidden_states,
|
||||||
|
sampling_metadata)
|
||||||
|
return logits
|
||||||
|
|
||||||
|
def load_weights(self, weights: Iterable[tuple[str,
|
||||||
|
torch.Tensor]]) -> set[str]:
|
||||||
|
loader = AutoWeightsLoader(self)
|
||||||
|
return loader.load_weights(weights)
|
||||||
|
|
||||||
|
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
|
||||||
|
return self.model.get_expert_mapping()
|
91
olmoe_inference.ipynb
Normal file
91
olmoe_inference.ipynb
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "928261ae",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from vllm import LLM, SamplingParams"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a35fc2b4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model_id = \"./llms/OLMoE-1B-7B-0924-Instruct\"\n",
|
||||||
|
"\n",
|
||||||
|
"llm = LLM(\n",
|
||||||
|
" model=model_id,\n",
|
||||||
|
" # cpu_offload_gb=4,\n",
|
||||||
|
" tensor_parallel_size=2,\n",
|
||||||
|
" # gpu_memory_utilization=0.90,\n",
|
||||||
|
" max_model_len=4096,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6a708f11",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sampling_params = SamplingParams(\n",
|
||||||
|
" temperature=0.6,\n",
|
||||||
|
" top_p=0.95,\n",
|
||||||
|
" top_k=20,\n",
|
||||||
|
" max_tokens=1024,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Prepare the input to the model\n",
|
||||||
|
"prompt = \"Give me a short introduction to large language models.\"\n",
|
||||||
|
"messages = [\n",
|
||||||
|
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"# Generate outputs\n",
|
||||||
|
"outputs = llm.chat(\n",
|
||||||
|
" messages, \n",
|
||||||
|
" sampling_params=sampling_params,\n",
|
||||||
|
" # chat_template_kwargs={\"enable_thinking\": True}, # Set to False to strictly disable thinking\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Print the outputs.\n",
|
||||||
|
"for out in outputs:\n",
|
||||||
|
" # out.prompt is the input prompt; out.outputs is a list of completion choices\n",
|
||||||
|
" print(\"=== PROMPT ===\")\n",
|
||||||
|
" print(out.prompt)\n",
|
||||||
|
" print(\"=== COMPLETION ===\")\n",
|
||||||
|
" print(out.outputs[0].text)\n",
|
||||||
|
" print(\"\\n---\\n\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "moe-explore",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
70
olmoe_inference_vllm.py
Normal file
70
olmoe_inference_vllm.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
# %%
|
||||||
|
import gc
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from vllm import LLM, SamplingParams
|
||||||
|
from vllm.config import CompilationConfig, CompilationLevel
|
||||||
|
from vllm.distributed.parallel_state import destroy_model_parallel
|
||||||
|
|
||||||
|
# %%
|
||||||
|
model_id = "./llms/OLMoE-1B-7B-0924-Instruct"
|
||||||
|
|
||||||
|
try:
|
||||||
|
llm = LLM(
|
||||||
|
model=model_id,
|
||||||
|
cpu_offload_gb=4,
|
||||||
|
# tensor_parallel_size=2,
|
||||||
|
gpu_memory_utilization=0.90,
|
||||||
|
max_model_len=4096,
|
||||||
|
# compilation_config=CompilationConfig(
|
||||||
|
# level=CompilationLevel.PIECEWISE,
|
||||||
|
# # By default, it goes up to max_num_seqs
|
||||||
|
# cudagraph_capture_sizes=[1, 2, 4, 8, 16],
|
||||||
|
# ),
|
||||||
|
enforce_eager=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
sampling_params = SamplingParams(
|
||||||
|
temperature=0.6,
|
||||||
|
top_p=0.95,
|
||||||
|
top_k=20,
|
||||||
|
max_tokens=1024,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepare the input to the model
|
||||||
|
prompt = "Give me a short introduction to large language models."
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Generate outputs
|
||||||
|
outputs = llm.chat(
|
||||||
|
messages,
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
# chat_template_kwargs={"enable_thinking": True}, # Set to False to strictly disable thinking
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print the outputs.
|
||||||
|
for out in outputs:
|
||||||
|
# out.prompt is the input prompt; out.outputs is a list of completion choices
|
||||||
|
# print("=== PROMPT ===")
|
||||||
|
# print(out.prompt)
|
||||||
|
# print("=== COMPLETION ===")
|
||||||
|
print(out.outputs[0].text)
|
||||||
|
print("\n---\n")
|
||||||
|
|
||||||
|
print("Finish completion")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if llm := globals().get("llm", None):
|
||||||
|
if engine := getattr(llm, "llm_engine", None):
|
||||||
|
# llm.llm_engine
|
||||||
|
del engine
|
||||||
|
del llm
|
||||||
|
destroy_model_parallel()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
gc.collect()
|
9
requirements.txt
Normal file
9
requirements.txt
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
vllm==0.10.1.1
|
||||||
|
notebook
|
||||||
|
ipywidgets
|
||||||
|
python-dotenv
|
||||||
|
pandas
|
||||||
|
datasets
|
||||||
|
accelerate
|
||||||
|
kernels
|
||||||
|
modelscope
|
1
utils/__init__.py
Normal file
1
utils/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .logger import DataLogger, LoggerConfig
|
493
utils/logger.py
Normal file
493
utils/logger.py
Normal file
@ -0,0 +1,493 @@
|
|||||||
|
"""
|
||||||
|
Asynchronous, batched, and schema-evolving Parquet logger.
|
||||||
|
|
||||||
|
This module provides the `DataLogger`, a high-performance logger for structured
|
||||||
|
data, designed for applications like machine learning experiments, simulations,
|
||||||
|
or any scenario requiring efficient serialization of row-based data.
|
||||||
|
|
||||||
|
Key Features:
|
||||||
|
- **Unified Interface**: Log data via a simple `DataLogger.log({"key": "value"})` call.
|
||||||
|
- **Asynchronous & Batched**: A dedicated background thread handles I/O,
|
||||||
|
batching rows to minimize disk writes and reduce application latency.
|
||||||
|
- **Schema Evolution**: Automatically adapts the Parquet schema if new data fields
|
||||||
|
are introduced, rewriting the file to maintain a consistent structure.
|
||||||
|
- **Singleton Pattern**: A global singleton instance is managed automatically,
|
||||||
|
providing a convenient, fire-and-forget logging experience.
|
||||||
|
- **Type Handling**: Natively handles Python primitives, NumPy arrays, and PyTorch
|
||||||
|
tensors, converting them to Parquet-compatible formats.
|
||||||
|
- **Robust & Thread-Safe**: Designed for use in multi-threaded environments.
|
||||||
|
|
||||||
|
Basic Usage:
|
||||||
|
-------------
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from logger.data_logger import DataLogger
|
||||||
|
|
||||||
|
# The first call creates and configures the singleton logger.
|
||||||
|
# A timestamped filename is generated by default.
|
||||||
|
DataLogger.log({"step": 0, "loss": 10.5, "accuracy": 0.5})
|
||||||
|
DataLogger.log({"step": 1, "loss": 9.8, "accuracy": 0.55})
|
||||||
|
|
||||||
|
# For the singleton, data is automatically flushed and saved on program exit.
|
||||||
|
# No explicit `close()` call is required for this simple case.
|
||||||
|
|
||||||
|
Advanced Usage (Instance-Based):
|
||||||
|
---------------------------------
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from logger.data_logger import DataLogger, LoggerConfig
|
||||||
|
|
||||||
|
config = LoggerConfig(batch_size=512, flush_interval=5.0)
|
||||||
|
with DataLogger("my_experiment.parquet", config=config) as logger:
|
||||||
|
for i in range(1000):
|
||||||
|
logger.submit({"value": i})
|
||||||
|
# The `with` statement ensures flush and close on exit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import atexit
|
||||||
|
import os
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
import typing as t
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Third-party libraries are imported with runtime checks to provide clear
|
||||||
|
# error messages if they are not installed.
|
||||||
|
try:
|
||||||
|
import numpy as np
|
||||||
|
except ImportError:
|
||||||
|
np = None # type: ignore
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pandas as pd
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"pandas is required for DataLogger. Install with `pip install pandas`."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pyarrow as pa
|
||||||
|
import pyarrow.parquet as pq
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"pyarrow is required for DataLogger. Install with `pip install pyarrow`."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
except ImportError:
|
||||||
|
torch = None # type: ignore
|
||||||
|
|
||||||
|
# Type alias for a single row of data.
|
||||||
|
Row = t.Dict[str, t.Any]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LoggerConfig:
|
||||||
|
"""Configuration for the DataLogger's writer behavior."""
|
||||||
|
|
||||||
|
batch_size: int = 1024
|
||||||
|
"""Number of rows to accumulate before writing a batch to the Parquet file."""
|
||||||
|
|
||||||
|
flush_interval: float = 1.0
|
||||||
|
"""Maximum time in seconds to wait before flushing the buffer, even if
|
||||||
|
`batch_size` is not reached."""
|
||||||
|
|
||||||
|
parquet_compression: str = "snappy"
|
||||||
|
"""Compression codec to use for the Parquet file.
|
||||||
|
Common options: 'snappy', 'gzip', 'brotli', 'none'."""
|
||||||
|
|
||||||
|
allow_schema_rewrite: bool = True
|
||||||
|
"""If True, the logger will automatically rewrite the entire Parquet file to
|
||||||
|
accommodate new columns. If False, it will raise an error."""
|
||||||
|
|
||||||
|
|
||||||
|
class DataLogger:
|
||||||
|
"""
|
||||||
|
An asynchronous, batched logger that writes data to a Parquet file.
|
||||||
|
|
||||||
|
This class manages a background thread to handle file I/O, allowing the
|
||||||
|
calling application to log data with minimal blocking. It supports schema
|
||||||
|
evolution, making it robust to changes in data structure over time.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_singleton: t.Optional["DataLogger"] = None
|
||||||
|
_singleton_lock = threading.Lock()
|
||||||
|
|
||||||
|
# --- Public API ---
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_instance(
|
||||||
|
cls,
|
||||||
|
path: t.Optional[t.Union[str, Path]] = None,
|
||||||
|
config: t.Optional[LoggerConfig] = None,
|
||||||
|
) -> "DataLogger":
|
||||||
|
"""
|
||||||
|
Get or create the global singleton instance of the DataLogger.
|
||||||
|
|
||||||
|
The first time this method is called, it creates a new `DataLogger`
|
||||||
|
instance and registers a cleanup function via `atexit` to ensure
|
||||||
|
`close()` is called automatically upon program termination.
|
||||||
|
|
||||||
|
Subsequent calls will ignore the arguments and return the existing
|
||||||
|
instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The file path for the log file. If None, a timestamped
|
||||||
|
filename like 'log_YYYYMMDD-HHMMSS.parquet' is created in the
|
||||||
|
current working directory.
|
||||||
|
config: A `LoggerConfig` object to configure the writer's behavior.
|
||||||
|
If None, default settings are used.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The singleton `DataLogger` instance.
|
||||||
|
"""
|
||||||
|
if cls._singleton is None:
|
||||||
|
with cls._singleton_lock:
|
||||||
|
if cls._singleton is None:
|
||||||
|
# Create the singleton instance.
|
||||||
|
instance = cls(path, config)
|
||||||
|
# Register its close method to be called at program exit.
|
||||||
|
# This ensures data is saved even if the user forgets to call close().
|
||||||
|
atexit.register(instance.close)
|
||||||
|
cls._singleton = instance
|
||||||
|
return cls._singleton
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def log(cls, row: Row) -> None:
|
||||||
|
"""
|
||||||
|
Log a data row using the singleton instance.
|
||||||
|
|
||||||
|
This is a convenience method that lazily initializes the singleton on
|
||||||
|
its first call. The operation is non-blocking; the data is placed in
|
||||||
|
an internal queue to be processed by the background writer thread.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
row: A dictionary representing a single row of data, where keys
|
||||||
|
are column names and values are the data points.
|
||||||
|
"""
|
||||||
|
instance = cls.get_instance()
|
||||||
|
instance.submit(row)
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
path: t.Optional[t.Union[str, Path]] = None,
|
||||||
|
config: t.Optional[LoggerConfig] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize a DataLogger instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The file path for the log file. If None, a timestamped
|
||||||
|
filename is automatically generated.
|
||||||
|
config: A `LoggerConfig` object. If None, default settings are used.
|
||||||
|
"""
|
||||||
|
self.path = self._resolve_path(path)
|
||||||
|
self._config = config or LoggerConfig()
|
||||||
|
|
||||||
|
# Internal state for the writer thread
|
||||||
|
self._queue: queue.Queue[t.Optional[Row]] = queue.Queue()
|
||||||
|
self._stop_event = threading.Event()
|
||||||
|
self._flush_event = threading.Event()
|
||||||
|
self._writer_thread: t.Optional[threading.Thread] = None
|
||||||
|
self._writer_lock = threading.RLock() # Protects writer and schema
|
||||||
|
|
||||||
|
# Parquet-specific state, managed exclusively by the writer thread
|
||||||
|
self._parquet_writer: t.Optional[pq.ParquetWriter] = None
|
||||||
|
self._schema: t.Optional[pa.Schema] = None
|
||||||
|
self._buffer: t.List[Row] = []
|
||||||
|
|
||||||
|
self._start_writer_thread()
|
||||||
|
|
||||||
|
def submit(self, row: Row) -> None:
|
||||||
|
"""
|
||||||
|
Submit a data row to be written asynchronously by the logger instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
row: A dictionary representing a single row of data.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
TypeError: If the provided row is not a dictionary.
|
||||||
|
RuntimeError: If the logger has already been closed.
|
||||||
|
"""
|
||||||
|
if self._stop_event.is_set():
|
||||||
|
raise RuntimeError("Logger has been closed and cannot accept new data.")
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
raise TypeError(f"Expected a dict for a row, but got {type(row)}.")
|
||||||
|
|
||||||
|
normalized_row = self._normalize_row(row)
|
||||||
|
self._queue.put(normalized_row)
|
||||||
|
|
||||||
|
def flush(self, timeout: float = 10.0) -> None:
|
||||||
|
"""
|
||||||
|
Block until all currently queued and buffered data is written to disk.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout: Maximum time in seconds to wait for the flush to complete.
|
||||||
|
"""
|
||||||
|
if self._writer_thread is None or not self._writer_thread.is_alive():
|
||||||
|
return
|
||||||
|
|
||||||
|
self._flush_event.clear()
|
||||||
|
self._queue.put(None) # Sentinel to trigger a flush
|
||||||
|
self._flush_event.wait(timeout)
|
||||||
|
|
||||||
|
def close(self, timeout: float = 10.0) -> None:
|
||||||
|
"""
|
||||||
|
Flush all remaining data and shut down the background writer thread.
|
||||||
|
|
||||||
|
This method is idempotent and thread-safe. It is designed to be
|
||||||
|
called explicitly, via a `with` statement, or automatically at program
|
||||||
|
exit.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout: Maximum time in seconds to wait for the writer thread
|
||||||
|
to finish.
|
||||||
|
"""
|
||||||
|
if self._stop_event.is_set():
|
||||||
|
return
|
||||||
|
|
||||||
|
self._stop_event.set()
|
||||||
|
self._queue.put(None) # Wake up the writer thread if it's blocking.
|
||||||
|
|
||||||
|
# Do not join the writer thread from itself, which would cause a deadlock.
|
||||||
|
if self._writer_thread and threading.current_thread() != self._writer_thread:
|
||||||
|
self._writer_thread.join(timeout)
|
||||||
|
|
||||||
|
def __enter__(self) -> "DataLogger":
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""Ensures the logger is closed upon exiting a `with` block."""
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
"""Ensures data is flushed when the logger object is destroyed."""
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
# --- Internal Methods ---
|
||||||
|
|
||||||
|
def _resolve_path(self, path: t.Optional[t.Union[str, Path]]) -> Path:
|
||||||
|
"""Determine the final output path for the log file."""
|
||||||
|
if path is None:
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
filename = f"log_{timestamp}.parquet"
|
||||||
|
return Path.cwd() / filename
|
||||||
|
|
||||||
|
resolved_path = Path(path)
|
||||||
|
if resolved_path.suffix == "":
|
||||||
|
resolved_path = resolved_path.with_suffix(".parquet")
|
||||||
|
return resolved_path
|
||||||
|
|
||||||
|
def _start_writer_thread(self) -> None:
|
||||||
|
"""Initialize and start the background writer thread."""
|
||||||
|
if self._writer_thread is not None:
|
||||||
|
return
|
||||||
|
thread_name = f"DataLoggerWriter-{self.path.name}"
|
||||||
|
self._writer_thread = threading.Thread(
|
||||||
|
target=self._writer_loop, name=thread_name, daemon=True
|
||||||
|
)
|
||||||
|
self._writer_thread.start()
|
||||||
|
|
||||||
|
def _writer_loop(self) -> None:
|
||||||
|
"""
|
||||||
|
The main loop for the background writer thread.
|
||||||
|
|
||||||
|
This loop continuously pulls data from the queue, batches it, and
|
||||||
|
writes it to the Parquet file. It handles flush signals, stop events,
|
||||||
|
and schema evolution.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
while not self._stop_event.is_set():
|
||||||
|
try:
|
||||||
|
# Block until an item is available or the flush interval times out.
|
||||||
|
item = self._queue.get(timeout=self._config.flush_interval)
|
||||||
|
except queue.Empty:
|
||||||
|
# Timeout occurred, treat as a periodic flush signal.
|
||||||
|
item = None
|
||||||
|
|
||||||
|
if item is not None:
|
||||||
|
self._buffer.append(item)
|
||||||
|
|
||||||
|
buffer_size = len(self._buffer)
|
||||||
|
is_flush_signal = item is None
|
||||||
|
is_batch_full = buffer_size >= self._config.batch_size
|
||||||
|
is_shutting_down = self._stop_event.is_set()
|
||||||
|
|
||||||
|
if self._buffer and (
|
||||||
|
is_flush_signal or is_batch_full or is_shutting_down
|
||||||
|
):
|
||||||
|
self._write_batch(self._buffer)
|
||||||
|
self._buffer.clear()
|
||||||
|
|
||||||
|
if is_flush_signal:
|
||||||
|
self._flush_event.set() # Signal that a flush completed
|
||||||
|
|
||||||
|
# Final drain of the queue and buffer after the stop event is set.
|
||||||
|
self._drain_remaining()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"FATAL: DataLogger writer thread crashed: {e}", flush=True)
|
||||||
|
traceback.print_exc()
|
||||||
|
finally:
|
||||||
|
# This block ensures that the Parquet writer is always closed
|
||||||
|
# when the writer thread exits, for any reason.
|
||||||
|
with self._writer_lock:
|
||||||
|
if self._parquet_writer:
|
||||||
|
try:
|
||||||
|
self._parquet_writer.close()
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
f"ERROR: Exception while closing Parquet writer: {e}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
self._parquet_writer = None
|
||||||
|
|
||||||
|
def _drain_remaining(self) -> None:
|
||||||
|
"""Process all remaining items in the queue and buffer during shutdown."""
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
item = self._queue.get_nowait()
|
||||||
|
if item:
|
||||||
|
self._buffer.append(item)
|
||||||
|
except queue.Empty:
|
||||||
|
break
|
||||||
|
if self._buffer:
|
||||||
|
self._write_batch(self._buffer)
|
||||||
|
self._buffer.clear()
|
||||||
|
|
||||||
|
def _write_batch(self, rows: t.List[Row]) -> None:
|
||||||
|
"""
|
||||||
|
Convert a list of rows into a Parquet table and write it to the file.
|
||||||
|
|
||||||
|
This method handles schema creation, validation, and evolution.
|
||||||
|
It is always executed within the writer thread.
|
||||||
|
"""
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self._writer_lock:
|
||||||
|
df = pd.DataFrame(rows)
|
||||||
|
# Ensure a consistent column order for schema stability.
|
||||||
|
df = df.reindex(sorted(df.columns), axis=1)
|
||||||
|
new_table = pa.Table.from_pandas(df, preserve_index=False)
|
||||||
|
|
||||||
|
if self.path.exists():
|
||||||
|
# File exists, need to append or evolve schema
|
||||||
|
existing_table = pq.read_table(self.path)
|
||||||
|
existing_schema = existing_table.schema
|
||||||
|
|
||||||
|
if existing_schema.equals(new_table.schema):
|
||||||
|
# Schema matches, append the data
|
||||||
|
combined_table = pa.concat_tables([existing_table, new_table])
|
||||||
|
else:
|
||||||
|
# Schema evolution needed
|
||||||
|
if not self._config.allow_schema_rewrite:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Schema mismatch detected, and rewriting is disabled. "
|
||||||
|
f"Existing schema: {existing_schema}, New schema: {new_table.schema}"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"INFO: Schema evolution detected. Rewriting {self.path}...",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
# Combine with schema evolution
|
||||||
|
combined_df = pd.concat(
|
||||||
|
[existing_table.to_pandas(), new_table.to_pandas()],
|
||||||
|
ignore_index=True,
|
||||||
|
sort=False,
|
||||||
|
)
|
||||||
|
combined_df = combined_df.reindex(
|
||||||
|
sorted(combined_df.columns), axis=1
|
||||||
|
)
|
||||||
|
combined_table = pa.Table.from_pandas(
|
||||||
|
combined_df, preserve_index=False
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# New file
|
||||||
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
combined_table = new_table
|
||||||
|
|
||||||
|
# Write the combined table atomically
|
||||||
|
temp_path = self.path.with_suffix(f"{self.path.suffix}.tmp")
|
||||||
|
pq.write_table(
|
||||||
|
combined_table,
|
||||||
|
temp_path,
|
||||||
|
compression=self._config.parquet_compression,
|
||||||
|
)
|
||||||
|
os.replace(temp_path, self.path)
|
||||||
|
|
||||||
|
# Update our schema tracking
|
||||||
|
self._schema = combined_table.schema
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR: Failed to write batch to {self.path}: {e}", flush=True)
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
def _rewrite_with_new_schema(self, new_table: pa.Table) -> None:
|
||||||
|
"""
|
||||||
|
Rewrite the entire Parquet file to accommodate an evolved schema.
|
||||||
|
|
||||||
|
This is a potentially expensive operation as it reads the entire
|
||||||
|
existing file into memory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
new_table: The new batch of data with a different schema.
|
||||||
|
"""
|
||||||
|
print(f"INFO: Schema evolution detected. Rewriting {self.path}...", flush=True)
|
||||||
|
|
||||||
|
# Close the current writer before reading the file.
|
||||||
|
if self._parquet_writer:
|
||||||
|
self._parquet_writer.close()
|
||||||
|
|
||||||
|
# Read existing data, combine with new data, and create a unified table.
|
||||||
|
existing_table = pq.read_table(self.path)
|
||||||
|
combined_df = pd.concat(
|
||||||
|
[existing_table.to_pandas(), new_table.to_pandas()],
|
||||||
|
ignore_index=True,
|
||||||
|
sort=False,
|
||||||
|
)
|
||||||
|
# Re-sort columns for the new unified schema.
|
||||||
|
combined_df = combined_df.reindex(sorted(combined_df.columns), axis=1)
|
||||||
|
final_table = pa.Table.from_pandas(combined_df, preserve_index=False)
|
||||||
|
self._schema = final_table.schema
|
||||||
|
|
||||||
|
# Atomically replace the old file with the new one.
|
||||||
|
temp_path = self.path.with_suffix(f"{self.path.suffix}.tmp")
|
||||||
|
pq.write_table(
|
||||||
|
final_table, temp_path, compression=self._config.parquet_compression
|
||||||
|
)
|
||||||
|
os.replace(temp_path, self.path)
|
||||||
|
|
||||||
|
# Re-initialize the writer with the new schema for subsequent writes.
|
||||||
|
self._parquet_writer = pq.ParquetWriter(
|
||||||
|
self.path, self._schema, compression=self._config.parquet_compression
|
||||||
|
)
|
||||||
|
|
||||||
|
def _normalize_row(self, row: Row) -> Row:
|
||||||
|
"""
|
||||||
|
Sanitize all values in a row for Parquet compatibility.
|
||||||
|
"""
|
||||||
|
return {key: self._normalize_value(value) for key, value in row.items()}
|
||||||
|
|
||||||
|
def _normalize_value(self, value: t.Any) -> t.Any:
|
||||||
|
"""
|
||||||
|
Convert a single value to a Parquet-friendly format.
|
||||||
|
- NumPy arrays and Torch tensors are converted to nested lists.
|
||||||
|
- Other types are passed through for pandas to handle.
|
||||||
|
"""
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if np and isinstance(value, np.ndarray):
|
||||||
|
return value.tolist()
|
||||||
|
if torch and isinstance(value, torch.Tensor):
|
||||||
|
return value.detach().cpu().numpy().tolist()
|
||||||
|
# return value.detach().cpu().numpy()
|
||||||
|
return value
|
Reference in New Issue
Block a user