diff --git a/cpp/src/benchmark.cpp b/cpp/src/benchmark.cpp index 7f72ecc..991eae8 100644 --- a/cpp/src/benchmark.cpp +++ b/cpp/src/benchmark.cpp @@ -23,7 +23,7 @@ void benchmark(std::string model_name, int n_warmup = 5, int n_iter = 5) auto trt_mod = torch::jit::load(model_name, torch::kCUDA); trt_mod.eval(); - torch::Tensor input_tensor = torch::rand({1, 3, 512, 512}).cuda(); + torch::Tensor input_tensor = torch::rand({3, 512, 512}).cuda(); std::cout << "warmup["<< n_warmup << "]" << std::endl; while (n_warmup--) diff --git a/detrex b/detrex index b392ec4..df9f69f 160000 --- a/detrex +++ b/detrex @@ -1 +1 @@ -Subproject commit b392ec497142bf3d93a040a8b6c40668e9339caf +Subproject commit df9f69ffa5c7902640081a16f2e5af2f24144ee1 diff --git a/poetry.lock b/poetry.lock index fc0353a..1b792c5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -93,6 +93,17 @@ doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"] trio = ["trio (>=0.26.1)"] +[[package]] +name = "appdirs" +version = "1.4.4" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = "*" +files = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] + [[package]] name = "appnope" version = "0.1.4" @@ -3264,6 +3275,23 @@ doc = ["ablog (>=0.11.8)", "colorama", "graphviz", "ipykernel", "ipyleaflet", "i i18n = ["Babel", "jinja2"] test = ["pytest", "pytest-cov", "pytest-regressions", "sphinx[test]"] +[[package]] +name = "pyee" +version = "11.1.1" +description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyee-11.1.1-py3-none-any.whl", hash = "sha256:9e4cdd7c2f9fcf247db94bad39a260aceffefdbe52286ce71be01959de34a5c2"}, + {file = "pyee-11.1.1.tar.gz", hash = "sha256:82e1eb1853f8497c4ff1a0c7fa26b9cd2f1253e2b6ffb93b4700fda907017302"}, +] + +[package.dependencies] +typing-extensions = "*" + +[package.extras] +dev = ["black", "build", "flake8", "flake8-black", "isort", "jupyter-console", "mkdocs", "mkdocs-include-markdown-plugin", "mkdocstrings[python]", "pytest", "pytest-asyncio", "pytest-trio", "sphinx", "toml", "tox", "trio", "trio", "trio-typing", "twine", "twisted", "validate-pyproject[all]"] + [[package]] name = "pyflakes" version = "3.2.0" @@ -3289,6 +3317,26 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pyppeteer" +version = "2.0.0" +description = "Headless chrome/chromium automation library (unofficial port of puppeteer)" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "pyppeteer-2.0.0-py3-none-any.whl", hash = "sha256:96f4c574fb36f1d15e02746303ab742b98941f0da58337187e7c1d2ef982adea"}, + {file = "pyppeteer-2.0.0.tar.gz", hash = "sha256:4af63473ff36a746a53347b2336a49efda669bcd781e400bc1799b81838358d9"}, +] + +[package.dependencies] +appdirs = ">=1.4.3,<2.0.0" +certifi = ">=2023" +importlib-metadata = ">=1.4" +pyee = ">=11.0.0,<12.0.0" +tqdm = ">=4.42.1,<5.0.0" +urllib3 = ">=1.25.8,<2.0.0" +websockets = ">=10.0,<11.0" + [[package]] name = "pyreadline3" version = "3.5.4" @@ -4888,19 +4936,19 @@ reference = "torch-cu124" [[package]] name = "torch-tensorrt" -version = "2.6.0.dev20241028+cu124" +version = "2.6.0.dev20241030+cu124" description = "Torch-TensorRT is a package which allows users to automatically compile PyTorch and TorchScript modules to TensorRT while remaining in PyTorch" optional = false python-versions = ">=3.9" files = [ - {file = "torch_tensorrt-2.6.0.dev20241028+cu124-cp310-cp310-linux_x86_64.whl", hash = "sha256:417367ff79c45207dafd4e00fac18f228f4eccee3b794df7d1716ace2e486ea1"}, - {file = "torch_tensorrt-2.6.0.dev20241028+cu124-cp310-cp310-win_amd64.whl", hash = "sha256:bad699a998ab36c62633508a4217757128c4d0a9d436b47c743b318f7dcbf7bb"}, - {file = "torch_tensorrt-2.6.0.dev20241028+cu124-cp311-cp311-linux_x86_64.whl", hash = "sha256:d021b8fb322a1a18e53f3bb0f8b30bf4f0b3ba1fa6013063087af8b391cf77c8"}, - {file = "torch_tensorrt-2.6.0.dev20241028+cu124-cp311-cp311-win_amd64.whl", hash = "sha256:fd82aafc9337a73d06777839245fb60d304ec0b5af246a43590b3f9e02e51369"}, - {file = "torch_tensorrt-2.6.0.dev20241028+cu124-cp312-cp312-linux_x86_64.whl", hash = "sha256:dc2e319cc362830abe86b5aaa85d98256754c1627c84c28060ffd2715270e1a1"}, - {file = "torch_tensorrt-2.6.0.dev20241028+cu124-cp312-cp312-win_amd64.whl", hash = "sha256:522cf309f322f21ced0e99383be1cc9e0f08e1229ba33e577f744fe5fb272d2e"}, - {file = "torch_tensorrt-2.6.0.dev20241028+cu124-cp39-cp39-linux_x86_64.whl", hash = "sha256:9322e21bc03fc3e889ab35fdf83b652736d7662098ba6ff610c008248508b5cf"}, - {file = "torch_tensorrt-2.6.0.dev20241028+cu124-cp39-cp39-win_amd64.whl", hash = "sha256:2391ff89e3011e032a5d984c6fc2cbb90bd462cffa63b716552f817fb4d68a14"}, + {file = "torch_tensorrt-2.6.0.dev20241030+cu124-cp310-cp310-linux_x86_64.whl", hash = "sha256:2b2afc87a6358d2f61ed5f722cfb674b1fb9ce766efc6e87e3df159340af8016"}, + {file = "torch_tensorrt-2.6.0.dev20241030+cu124-cp310-cp310-win_amd64.whl", hash = "sha256:7995d57316cef1ccf6c0db02fae581f4f102ee9d925b8c993d1b86d301a524e3"}, + {file = "torch_tensorrt-2.6.0.dev20241030+cu124-cp311-cp311-linux_x86_64.whl", hash = "sha256:75ce10f91c6eed7786bf050f27f168157b0f20a358040e5f380d908da6bb99f7"}, + {file = "torch_tensorrt-2.6.0.dev20241030+cu124-cp311-cp311-win_amd64.whl", hash = "sha256:79976077898c7a00999b80ab96131df45f8bcedd511f43252b3faa74cd63c9e6"}, + {file = "torch_tensorrt-2.6.0.dev20241030+cu124-cp312-cp312-linux_x86_64.whl", hash = "sha256:bf15559c2a61901e6711528ade8a3493cfd2aff839d818a2bcecda7b9c053678"}, + {file = "torch_tensorrt-2.6.0.dev20241030+cu124-cp312-cp312-win_amd64.whl", hash = "sha256:dc6640802b13685df1594c487cea56667bb88e3739a054348f896594f6d9c6ec"}, + {file = "torch_tensorrt-2.6.0.dev20241030+cu124-cp39-cp39-linux_x86_64.whl", hash = "sha256:b530b3dfd302b1a97cea30488fc46865c230cdd53365d1291466f4a94e619e45"}, + {file = "torch_tensorrt-2.6.0.dev20241030+cu124-cp39-cp39-win_amd64.whl", hash = "sha256:5fb9fc65083867cf11794c75d1071dd2836edac99aabaa942782032af2a4aa89"}, ] [package.dependencies] @@ -5154,20 +5202,19 @@ dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake [[package]] name = "urllib3" -version = "2.2.3" +version = "1.26.20" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.8" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, - {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, + {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, + {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "viztracer" @@ -5279,6 +5326,84 @@ docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] optional = ["python-socks", "wsaccel"] test = ["websockets"] +[[package]] +name = "websockets" +version = "10.4" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "websockets-10.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d58804e996d7d2307173d56c297cf7bc132c52df27a3efaac5e8d43e36c21c48"}, + {file = "websockets-10.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc0b82d728fe21a0d03e65f81980abbbcb13b5387f733a1a870672c5be26edab"}, + {file = "websockets-10.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ba089c499e1f4155d2a3c2a05d2878a3428cf321c848f2b5a45ce55f0d7d310c"}, + {file = "websockets-10.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33d69ca7612f0ddff3316b0c7b33ca180d464ecac2d115805c044bf0a3b0d032"}, + {file = "websockets-10.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62e627f6b6d4aed919a2052efc408da7a545c606268d5ab5bfab4432734b82b4"}, + {file = "websockets-10.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ea7b82bfcae927eeffc55d2ffa31665dc7fec7b8dc654506b8e5a518eb4d50"}, + {file = "websockets-10.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e0cb5cc6ece6ffa75baccfd5c02cffe776f3f5c8bf486811f9d3ea3453676ce8"}, + {file = "websockets-10.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae5e95cfb53ab1da62185e23b3130e11d64431179debac6dc3c6acf08760e9b1"}, + {file = "websockets-10.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7c584f366f46ba667cfa66020344886cf47088e79c9b9d39c84ce9ea98aaa331"}, + {file = "websockets-10.4-cp310-cp310-win32.whl", hash = "sha256:b029fb2032ae4724d8ae8d4f6b363f2cc39e4c7b12454df8df7f0f563ed3e61a"}, + {file = "websockets-10.4-cp310-cp310-win_amd64.whl", hash = "sha256:8dc96f64ae43dde92530775e9cb169979f414dcf5cff670455d81a6823b42089"}, + {file = "websockets-10.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:47a2964021f2110116cc1125b3e6d87ab5ad16dea161949e7244ec583b905bb4"}, + {file = "websockets-10.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e789376b52c295c4946403bd0efecf27ab98f05319df4583d3c48e43c7342c2f"}, + {file = "websockets-10.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d3f0b61c45c3fa9a349cf484962c559a8a1d80dae6977276df8fd1fa5e3cb8c"}, + {file = "websockets-10.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55b5905705725af31ccef50e55391621532cd64fbf0bc6f4bac935f0fccec46"}, + {file = "websockets-10.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00c870522cdb69cd625b93f002961ffb0c095394f06ba8c48f17eef7c1541f96"}, + {file = "websockets-10.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f38706e0b15d3c20ef6259fd4bc1700cd133b06c3c1bb108ffe3f8947be15fa"}, + {file = "websockets-10.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f2c38d588887a609191d30e902df2a32711f708abfd85d318ca9b367258cfd0c"}, + {file = "websockets-10.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe10ddc59b304cb19a1bdf5bd0a7719cbbc9fbdd57ac80ed436b709fcf889106"}, + {file = "websockets-10.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:90fcf8929836d4a0e964d799a58823547df5a5e9afa83081761630553be731f9"}, + {file = "websockets-10.4-cp311-cp311-win32.whl", hash = "sha256:b9968694c5f467bf67ef97ae7ad4d56d14be2751000c1207d31bf3bb8860bae8"}, + {file = "websockets-10.4-cp311-cp311-win_amd64.whl", hash = "sha256:a7a240d7a74bf8d5cb3bfe6be7f21697a28ec4b1a437607bae08ac7acf5b4882"}, + {file = "websockets-10.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:74de2b894b47f1d21cbd0b37a5e2b2392ad95d17ae983e64727e18eb281fe7cb"}, + {file = "websockets-10.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3a686ecb4aa0d64ae60c9c9f1a7d5d46cab9bfb5d91a2d303d00e2cd4c4c5cc"}, + {file = "websockets-10.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0d15c968ea7a65211e084f523151dbf8ae44634de03c801b8bd070b74e85033"}, + {file = "websockets-10.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00213676a2e46b6ebf6045bc11d0f529d9120baa6f58d122b4021ad92adabd41"}, + {file = "websockets-10.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e23173580d740bf8822fd0379e4bf30aa1d5a92a4f252d34e893070c081050df"}, + {file = "websockets-10.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:dd500e0a5e11969cdd3320935ca2ff1e936f2358f9c2e61f100a1660933320ea"}, + {file = "websockets-10.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4239b6027e3d66a89446908ff3027d2737afc1a375f8fd3eea630a4842ec9a0c"}, + {file = "websockets-10.4-cp37-cp37m-win32.whl", hash = "sha256:8a5cc00546e0a701da4639aa0bbcb0ae2bb678c87f46da01ac2d789e1f2d2038"}, + {file = "websockets-10.4-cp37-cp37m-win_amd64.whl", hash = "sha256:a9f9a735deaf9a0cadc2d8c50d1a5bcdbae8b6e539c6e08237bc4082d7c13f28"}, + {file = "websockets-10.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c1289596042fad2cdceb05e1ebf7aadf9995c928e0da2b7a4e99494953b1b94"}, + {file = "websockets-10.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0cff816f51fb33c26d6e2b16b5c7d48eaa31dae5488ace6aae468b361f422b63"}, + {file = "websockets-10.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:dd9becd5fe29773d140d68d607d66a38f60e31b86df75332703757ee645b6faf"}, + {file = "websockets-10.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45ec8e75b7dbc9539cbfafa570742fe4f676eb8b0d3694b67dabe2f2ceed8aa6"}, + {file = "websockets-10.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f72e5cd0f18f262f5da20efa9e241699e0cf3a766317a17392550c9ad7b37d8"}, + {file = "websockets-10.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:185929b4808b36a79c65b7865783b87b6841e852ef5407a2fb0c03381092fa3b"}, + {file = "websockets-10.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7d27a7e34c313b3a7f91adcd05134315002aaf8540d7b4f90336beafaea6217c"}, + {file = "websockets-10.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:884be66c76a444c59f801ac13f40c76f176f1bfa815ef5b8ed44321e74f1600b"}, + {file = "websockets-10.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:931c039af54fc195fe6ad536fde4b0de04da9d5916e78e55405436348cfb0e56"}, + {file = "websockets-10.4-cp38-cp38-win32.whl", hash = "sha256:db3c336f9eda2532ec0fd8ea49fef7a8df8f6c804cdf4f39e5c5c0d4a4ad9a7a"}, + {file = "websockets-10.4-cp38-cp38-win_amd64.whl", hash = "sha256:48c08473563323f9c9debac781ecf66f94ad5a3680a38fe84dee5388cf5acaf6"}, + {file = "websockets-10.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:40e826de3085721dabc7cf9bfd41682dadc02286d8cf149b3ad05bff89311e4f"}, + {file = "websockets-10.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:56029457f219ade1f2fc12a6504ea61e14ee227a815531f9738e41203a429112"}, + {file = "websockets-10.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f5fc088b7a32f244c519a048c170f14cf2251b849ef0e20cbbb0fdf0fdaf556f"}, + {file = "websockets-10.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fc8709c00704194213d45e455adc106ff9e87658297f72d544220e32029cd3d"}, + {file = "websockets-10.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0154f7691e4fe6c2b2bc275b5701e8b158dae92a1ab229e2b940efe11905dff4"}, + {file = "websockets-10.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c6d2264f485f0b53adf22697ac11e261ce84805c232ed5dbe6b1bcb84b00ff0"}, + {file = "websockets-10.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9bc42e8402dc5e9905fb8b9649f57efcb2056693b7e88faa8fb029256ba9c68c"}, + {file = "websockets-10.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:edc344de4dac1d89300a053ac973299e82d3db56330f3494905643bb68801269"}, + {file = "websockets-10.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:84bc2a7d075f32f6ed98652db3a680a17a4edb21ca7f80fe42e38753a58ee02b"}, + {file = "websockets-10.4-cp39-cp39-win32.whl", hash = "sha256:c94ae4faf2d09f7c81847c63843f84fe47bf6253c9d60b20f25edfd30fb12588"}, + {file = "websockets-10.4-cp39-cp39-win_amd64.whl", hash = "sha256:bbccd847aa0c3a69b5f691a84d2341a4f8a629c6922558f2a70611305f902d74"}, + {file = "websockets-10.4-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:82ff5e1cae4e855147fd57a2863376ed7454134c2bf49ec604dfe71e446e2193"}, + {file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d210abe51b5da0ffdbf7b43eed0cfdff8a55a1ab17abbec4301c9ff077dd0342"}, + {file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:942de28af58f352a6f588bc72490ae0f4ccd6dfc2bd3de5945b882a078e4e179"}, + {file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9b27d6c1c6cd53dc93614967e9ce00ae7f864a2d9f99fe5ed86706e1ecbf485"}, + {file = "websockets-10.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3d3cac3e32b2c8414f4f87c1b2ab686fa6284a980ba283617404377cd448f631"}, + {file = "websockets-10.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:da39dd03d130162deb63da51f6e66ed73032ae62e74aaccc4236e30edccddbb0"}, + {file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:389f8dbb5c489e305fb113ca1b6bdcdaa130923f77485db5b189de343a179393"}, + {file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09a1814bb15eff7069e51fed0826df0bc0702652b5cb8f87697d469d79c23576"}, + {file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff64a1d38d156d429404aaa84b27305e957fd10c30e5880d1765c9480bea490f"}, + {file = "websockets-10.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b343f521b047493dc4022dd338fc6db9d9282658862756b4f6fd0e996c1380e1"}, + {file = "websockets-10.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:932af322458da7e4e35df32f050389e13d3d96b09d274b22a7aa1808f292fee4"}, + {file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a4162139374a49eb18ef5b2f4da1dd95c994588f5033d64e0bbfda4b6b6fcf"}, + {file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c57e4c1349fbe0e446c9fa7b19ed2f8a4417233b6984277cce392819123142d3"}, + {file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b627c266f295de9dea86bd1112ed3d5fafb69a348af30a2422e16590a8ecba13"}, + {file = "websockets-10.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:05a7233089f8bd355e8cbe127c2e8ca0b4ea55467861906b80d2ebc7db4d6b72"}, + {file = "websockets-10.4.tar.gz", hash = "sha256:eef610b23933c54d5d921c92578ae5f89813438fded840c2e9809d378dc765d3"}, +] + [[package]] name = "wheel" version = "0.44.0" @@ -5326,4 +5451,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "3.10" -content-hash = "e569013ff95d0a9b7d56e385914b1ce0072d7098d4330bdaa71f9c92f7dd98bd" +content-hash = "0cbc14aea958585e09f8b0ff9310b2ee816c2fc6bd8a5bf6885054cdee20fe3d" diff --git a/projects/dino_dinov2/modeling/exportable/dino_transformer.py b/projects/dino_dinov2/modeling/exportable/dino_transformer.py index ab21990..345ef2e 100644 --- a/projects/dino_dinov2/modeling/exportable/dino_transformer.py +++ b/projects/dino_dinov2/modeling/exportable/dino_transformer.py @@ -200,6 +200,7 @@ def forward( if reference_points.shape[-1] == 4: reference_points_input = ( reference_points[:, :, None] + # DYNAMO REFACTOR # small refactor to avoid: https://github.com/pytorch/pytorch/issues/129038 # * torch.cat([valid_ratios, valid_ratios], -1)[:, None] * valid_ratios.repeat(*[1] * (valid_ratios.ndim - 1), 2)[:, None] @@ -272,6 +273,7 @@ def __init__( num_feature_levels=4, two_stage_num_proposals=900, learnt_init_query=True, + specialize_with_list: bool = False, ): super(DINOTransformer, self).__init__() self.encoder = encoder @@ -289,6 +291,7 @@ def __init__( self.tgt_embed = nn.Embedding(self.two_stage_num_proposals, self.embed_dim) self.enc_output = nn.Linear(self.embed_dim, self.embed_dim) self.enc_output_norm = nn.LayerNorm(self.embed_dim) + self.specialize_with_list = specialize_with_list self.init_weights() @@ -301,7 +304,7 @@ def init_weights(self): m.init_weights() nn.init.normal_(self.level_embeds) - def gen_encoder_output_proposals(self, memory, memory_padding_mask, spatial_shapes: List[Tuple[int, int]]): + def gen_encoder_output_proposals(self, memory, memory_padding_mask, spatial_shapes): N, S, C = memory.shape proposals = [] _cur = 0 @@ -348,7 +351,7 @@ def gen_encoder_output_proposals(self, memory, memory_padding_mask, spatial_shap return output_memory, output_proposals @staticmethod - def get_reference_points(spatial_shapes: List[Tuple[int, int]], valid_ratios: torch.Tensor, device: torch.device): + def get_reference_points(spatial_shapes, valid_ratios: torch.Tensor, device: torch.device): """Get the reference points used in decoder. Args: @@ -422,17 +425,18 @@ def forward( feat_flatten = torch.cat(feat_flatten, 1) mask_flatten = torch.cat(mask_flatten, 1) lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1) - # spatial_shapes = torch.as_tensor( - # spatial_shapes, dtype=torch.long, device=feat_flatten.device - # ) - # list refactor - # level_start_index = torch.cat((spatial_shapes.new_zeros((1,)), spatial_shapes.prod(1).cumsum(0)[:-1])) - level_start_index = [0] + list(itertools.accumulate(list(map(math.prod, spatial_shapes))))[:-1] + if not self.specialize_with_list: + spatial_shapes = torch.tensor( + spatial_shapes, dtype=torch.long, device=feat_flatten.device + ) + level_start_index = torch.cat((spatial_shapes.new_zeros((1,)), spatial_shapes.prod(1).cumsum(0)[:-1])) + else: + level_start_index = [0] + list(itertools.accumulate(list(map(math.prod, spatial_shapes))))[:-1] valid_ratios = torch.stack( [self.get_valid_ratio(m) for m in multi_level_masks], 1 ) - reference_points = self.get_reference_points( # DONE + reference_points = self.get_reference_points( spatial_shapes, valid_ratios, device=feat.device ) diff --git a/pyproject.toml b/pyproject.toml index a754056..576e2f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ viztracer = "^0.17.0" pandas = "^2.2.3" jupyter-book = "^1.0.3" livereload = "^2.7.0" +pyppeteer = "^2.0.0" [build-system] requires = ["poetry-core"] diff --git a/scripts/benchmark_gpu.py b/scripts/benchmark_gpu.py index c68c687..3a65ad0 100644 --- a/scripts/benchmark_gpu.py +++ b/scripts/benchmark_gpu.py @@ -1,74 +1,71 @@ import torch import time -from typing import Optional from functools import partial import contextlib from src.utils import ( load_input_fixed, - plot_predictions, TracingAdapter, ) -from src.utils import load_model as _load_model +from src.utils import load_model from statistics import stdev, mean import torch_tensorrt import logging -import argparse from pathlib import Path import detrex +import hydra +from omegaconf import DictConfig, OmegaConf +import importlib -detrex.layers.multi_scale_deform_attn._ENABLE_CUDA_MSDA = False +logging.basicConfig(level=logging.INFO) -def setup_parser(): - DEFAULT_IMG = Path("artifacts/idea_raw.jpg") - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=Path, required=True) - parser.add_argument("--image", type=Path, default=DEFAULT_IMG) - parser.add_argument("--n_warmup", type=int, default=10) - parser.add_argument("--n_iter", type=int, default=10) - parser.add_argument("--output", type=Path, default=None) - parser.add_argument( - "--amp_dtype", type=str, default=None, choices=["fp16", "bf16", None] - ) - return parser +@hydra.main( + version_base=None, config_path="config/benchmark_gpu", config_name="default" +) +def main(cfg: DictConfig): + OUTPUT_DIR = Path(hydra.core.hydra_config.HydraConfig.get().runtime.output_dir) + print(OmegaConf.to_yaml(cfg)) + + n_iter = cfg.n_iter # default 10 + n_warmup = cfg.n_warmup # default 10 + amp_dtype = cfg.amp_dtype # default None + compile_run_path = Path(cfg.compile_run_path) + compile_run_cfg = OmegaConf.load(compile_run_path / ".hydra" / "config.yaml") + print(OmegaConf.to_yaml(compile_run_cfg)) + + # Setting variables + for var, val in compile_run_cfg.env.items(): + logging.info(f"Setting {var} to {val}") + module_name, attr_name = var.rsplit(".", 1) + module = importlib.import_module(module_name) + setattr(module, attr_name, val) + + height, width = compile_run_cfg.image.height, compile_run_cfg.image.width + + base_model = load_model( + config_file=compile_run_cfg.model.config, + ckpt_path=compile_run_cfg.model.ckpt_path, + opts=compile_run_cfg.model.opts, + ) -logging.basicConfig(level=logging.INFO) + _, inputs = load_input_fixed(height=height, width=width, device="cuda") + model = TracingAdapter( + base_model, inputs=inputs, allow_non_tensor=False, specialize_non_tensor=True + ) + inputs = model.flattened_inputs + print(inputs[0].shape) -def load_model(model_path: Path): - if model_path.suffix == ".ts": - *_, height, width = model_path.stem.split("_") + if cfg.load_ts: + del base_model, model + model_path = compile_run_path / "model.ts" model = torch.jit.load(model_path) - elif model_path.suffix == ".ep": - *_, height, width = model_path.stem.split("_") - model = torch.export.load(model_path).module() - elif model_path.suffix == ".pth": - height, width = 512, 512 - model = _load_model().cuda() - model = TracingAdapter(model, *load_input_fixed(height=height, width=width)) - else: - raise ValueError(f"Unsupported model format: {model_path.suffix}") - - return model, int(height), int(width) - - -def benchmark( - model_path: Path, - image_path: Path, - n_warmup: int, - n_iter: int, - output_path: Optional[Path], - amp_dtype: Optional[str] = None, -): - # track cuda memory history + torch.cuda.memory._record_memory_history() - model, height, width = load_model(model_path) + model.eval() model.cuda() - logging.info("Loaded model") - img, example_kwargs = load_input_fixed(str(image_path), height, width) - input = (example_kwargs["images"].cuda(),) ctx = contextlib.nullcontext if amp_dtype is not None: @@ -81,7 +78,7 @@ def benchmark( with torch.no_grad(), ctx(): logging.info("warmup") for _ in range(n_warmup): - _ = model(*input) + _ = model(*inputs) torch.cuda.reset_peak_memory_stats() logging.info("measuring time") @@ -89,7 +86,7 @@ def benchmark( for _ in range(n_iter): torch.cuda.synchronize() start_time = time.time() - _ = model(*input) + _ = model(*inputs) torch.cuda.synchronize() end_time = time.time() inference_time = end_time - start_time @@ -101,24 +98,9 @@ def benchmark( # get max memory usage max_memory = torch.cuda.memory.max_memory_allocated() - torch.cuda.memory._dump_snapshot(f"artifacts/{model_path.stem}_mem.pickle") + torch.cuda.memory._dump_snapshot(OUTPUT_DIR / "mem.pickle") logging.info(f"Max memory usage: {max_memory / 1e6:.4f} MB") - if output_path is not None: - outputs = model(*input) - outputs = unflatten_repr(outputs) - plot_predictions(outputs, img, output_file=output_path) - - -def main(): - parser = setup_parser() - args = parser.parse_args() - logging.info("Loading model") - model_path = args.model - benchmark( - model_path, args.image, args.n_warmup, args.n_iter, args.output, args.amp_dtype - ) - if __name__ == "__main__": main() diff --git a/scripts/config/benchmark_gpu/default.yaml b/scripts/config/benchmark_gpu/default.yaml new file mode 100644 index 0000000..94780d1 --- /dev/null +++ b/scripts/config/benchmark_gpu/default.yaml @@ -0,0 +1,5 @@ +n_iter: 100 +n_warmup: 10 +amp_dtype: null +compile_run_path: null +load_ts: true \ No newline at end of file diff --git a/scripts/config/export_tensorrt/dinov2.yaml b/scripts/config/export_tensorrt/dinov2.yaml index 426ba33..dec8fa9 100644 --- a/scripts/config/export_tensorrt/dinov2.yaml +++ b/scripts/config/export_tensorrt/dinov2.yaml @@ -6,6 +6,8 @@ amp_dtype: "fp32" trt: enabled_precisions: - "fp32" + - "fp16" + - "bf16" model: config: "projects/dino_dinov2/configs/models/dino_dinov2.py" ckpt_path: "artifacts/model_final.pth" @@ -13,3 +15,8 @@ model: - "model.backbone.net.img_size=[512, 512]" - "model.backbone.net.dynamic_img_size=False" - "model.backbone.net.dynamic_img_pad=False" + - "model.transformer.specialize_with_list=True" + +env: + "torch._subclasses.fake_tensor.CONSTANT_NUMEL_LIMIT": 2000 + "detrex.layers.multi_scale_deform_attn._ENABLE_CUDA_MSDA": False \ No newline at end of file diff --git a/scripts/config/export_tensorrt/vit.yaml b/scripts/config/export_tensorrt/vit.yaml index c0b6b05..4147d18 100644 --- a/scripts/config/export_tensorrt/vit.yaml +++ b/scripts/config/export_tensorrt/vit.yaml @@ -14,4 +14,11 @@ trt: enable_experimental_decompositions: True min_block_size: 1 use_fast_partitioner: True # doesn't make any difference in supported nodes + torch_executed_ops: + - "torch.ops.aten.sym_size.int" amp_dtype: "fp32" +env: + "torch._subclasses.fake_tensor.CONSTANT_NUMEL_LIMIT": 2000 + "detectron2.modeling.proposal_generator.proposal_utils.SKIP_NMS": True + "detectron2.modeling.roi_heads.fast_rcnn.SKIP_NMS": True + "detectron2.modeling.roi_heads.fast_rcnn.SKIP_FILTER_CONFIDENCE": True \ No newline at end of file diff --git a/scripts/export_tensorrt.py b/scripts/export_tensorrt.py index 983a8ff..ffc1a02 100644 --- a/scripts/export_tensorrt.py +++ b/scripts/export_tensorrt.py @@ -8,12 +8,13 @@ import torch_tensorrt from omegaconf import DictConfig, OmegaConf +import importlib import detrex from src.utils import TracingAdapter, load_input_fixed, load_model, plot_predictions logging.basicConfig(level=logging.INFO) -torch._subclasses.fake_tensor.CONSTANT_NUMEL_LIMIT = 2000 -detrex.layers.multi_scale_deform_attn._ENABLE_CUDA_MSDA = False +# torch._subclasses.fake_tensor.CONSTANT_NUMEL_LIMIT = 2000 +# detrex.layers.multi_scale_deform_attn._ENABLE_CUDA_MSDA = False def to_dtype(precision: str): @@ -86,11 +87,18 @@ def compile( return trt_gm -@hydra.main(version_base=None, config_path="config/export_tensorrt", config_name="vit") +@hydra.main(version_base=None, config_path="config/export_tensorrt", config_name="dinov2") def main(cfg: DictConfig): OUTPUT_DIR = Path(hydra.core.hydra_config.HydraConfig.get().runtime.output_dir) print(OmegaConf.to_yaml(cfg)) + # Setting variables + for var, val in cfg.env.items(): + logging.info(f"Setting {var} to {val}") + module_name, attr_name = var.rsplit(".", 1) + module = importlib.import_module(module_name) + setattr(module, attr_name, val) + # check that amp_dtype is in enabled_precisions if cfg.amp_dtype not in cfg.trt.enabled_precisions: raise ValueError( @@ -116,6 +124,7 @@ def main(cfg: DictConfig): ) inputs = model.flattened_inputs model.eval().cuda() + # This forward call is important, it ensures the model works before compilation model(*inputs) try: trt_gm = compile(model, inputs, amp_dtype=cfg.amp_dtype, trt_cfg=cfg.trt) diff --git a/src/utils/io.py b/src/utils/io.py index d5dbca8..f0fbaf6 100644 --- a/src/utils/io.py +++ b/src/utils/io.py @@ -34,7 +34,7 @@ def load_input_fixed( with torch.no_grad(): if input_format == "RGB": img = img[:, :, ::-1] - img = torch.as_tensor(img.astype("float32").transpose(2, 0, 1)) + img = torch.as_tensor(img.astype("float32").transpose(2, 0, 1)).contiguous() return original_img, ( [ {