Skip to content

Commit

Permalink
[Frontend] [Core] Tensorizer: support dynamic num_readers, update v…
Browse files Browse the repository at this point in the history
…ersion (vllm-project#4467)
  • Loading branch information
alpayariyak authored Apr 30, 2024
1 parent a494140 commit 715c2d8
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 9 deletions.
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ types-setuptools

# testing
pytest
tensorizer==2.9.0a0
tensorizer==2.9.0
pytest-forked
pytest-asyncio
pytest-rerunfailures
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def _read_requirements(filename: str) -> List[str]:
install_requires=get_requirements(),
ext_modules=ext_modules,
extras_require={
"tensorizer": ["tensorizer==2.9.0a1"],
"tensorizer": ["tensorizer==2.9.0"],
},
cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
package_data=package_data,
Expand Down
17 changes: 10 additions & 7 deletions vllm/model_executor/model_loader/tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class TensorizerConfig:
str, bytes, os.PathLike, int]
vllm_tensorized: bool
verify_hash: Optional[bool] = False
num_readers: Optional[int] = 1
num_readers: Optional[int] = None
encryption_keyfile: Optional[str] = None
s3_access_key_id: Optional[str] = None
s3_secret_access_key: Optional[str] = None
Expand Down Expand Up @@ -104,7 +104,7 @@ class TensorizerArgs:
str, bytes, os.PathLike, int]
vllm_tensorized: bool
verify_hash: Optional[bool] = False
num_readers: Optional[int] = 1
num_readers: Optional[int] = None
encryption_keyfile: Optional[str] = None
s3_access_key_id: Optional[str] = None
s3_secret_access_key: Optional[str] = None
Expand All @@ -125,8 +125,9 @@ class TensorizerArgs:
the hashes stored in the metadata. A `HashMismatchError` will be
raised if any of the hashes do not match.
num_readers: Controls how many threads are allowed to read concurrently
from the source file. Default is 1. This greatly increases
performance.
from the source file. Default is `None`, which will dynamically set
the number of readers based on the number of available
resources and model size. This greatly increases performance.
encryption_keyfile: File path to a binary file containing a
binary key to use for decryption. `None` (the default) means
no decryption. See the example script in
Expand Down Expand Up @@ -199,10 +200,12 @@ def add_cli_args(
"use for decryption. Can be a file path or S3 network URI.")
group.add_argument(
"--num-readers",
default=1,
default=None,
type=int,
help="Controls how many threads are allowed to read concurrently "
"from the source file.")
"from the source file. Default is `None`, which will dynamically "
"set the number of readers based on the available resources "
"and model size. This greatly increases performance.")
group.add_argument(
"--s3-access-key-id",
default=None,
Expand Down Expand Up @@ -337,7 +340,7 @@ def deserialize(self):
per_second = convert_bytes(deserializer.total_tensor_bytes / duration)
after_mem = get_mem_usage()
deserializer.close()
logger.info("Deserialized %s in %0.2fs, %f/s", total_bytes_str,
logger.info("Deserialized %s in %0.2fs, %s/s", total_bytes_str,
end - start, per_second)
logger.info("Memory usage before: %s", before_mem)
logger.info("Memory usage after: %s", after_mem)
Expand Down

0 comments on commit 715c2d8

Please sign in to comment.