From 9aeae20e953b8c0471c5e0df692c8a69bb82c33c Mon Sep 17 00:00:00 2001 From: sdrobert Date: Mon, 30 Oct 2023 19:54:53 -0400 Subject: [PATCH 1/2] Actually use Cmake library --- pyproject.toml | 2 +- python/BuildStandalone.cmake | 13 ++++--- setup.py | 75 +++++++++--------------------------- 3 files changed, 27 insertions(+), 63 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cb4fab07..e97d000b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,2 @@ [build-system] -requires = ["setuptools", "wheel", "cmake"] +requires = ["setuptools", "wheel", "cmake>=3.17"] diff --git a/python/BuildStandalone.cmake b/python/BuildStandalone.cmake index 47924a60..f81c2dd2 100644 --- a/python/BuildStandalone.cmake +++ b/python/BuildStandalone.cmake @@ -1,4 +1,6 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.17) +project(kenlm-standalone) +find_package(Python ${PYTHON_VERSION_STRING} REQUIRED COMPONENTS Development) file(GLOB KENLM_PYTHON_STANDALONE_SRCS @@ -11,11 +13,12 @@ file(GLOB list(FILTER KENLM_PYTHON_STANDALONE_SRCS EXCLUDE REGEX ".*main.cc") list(FILTER KENLM_PYTHON_STANDALONE_SRCS EXCLUDE REGEX ".*test.cc") -add_library( + +Python_add_library( kenlm - SHARED - ${KENLM_PYTHON_STANDALONE_SRCS} - ) + MODULE WITH_SOABI + ${KENLM_PYTHON_STANDALONE_SRCS} python/kenlm.cpp +) target_include_directories(kenlm PRIVATE ${PROJECT_SOURCE_DIR}) target_compile_definitions(kenlm PRIVATE KENLM_MAX_ORDER=${KENLM_MAX_ORDER}) diff --git a/setup.py b/setup.py index 7fb9db1f..55279f5a 100644 --- a/setup.py +++ b/setup.py @@ -8,11 +8,7 @@ import re from pathlib import Path -#Does gcc compile with this header and library? -def compile_test(header, library): - dummy_path = os.path.join(os.path.dirname(__file__), "dummy") - command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\"" - return os.system(command) == 0 +VERSION = "0.2.0" # Use an environment variable max_order = os.getenv("MAX_ORDER", "6") @@ -20,43 +16,11 @@ def compile_test(header, library): # Try to get from --config-settings, if present is_max_order = [s for s in sys.argv if "--max_order" in s] for element in is_max_order: - max_order = re.split('[= ]',element)[1] + max_order = re.split("[= ]", element)[1] sys.argv.remove(element) print(f"Will build with KenLM max_order set to {max_order}") -FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob('util/double-conversion/*.cc') + glob.glob('python/*.cc') -FILES = [fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc'))] - -#We don't need -std=c++11 but python seems to be compiled with it now. https://github.com/kpu/kenlm/issues/86 -ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER='+max_order, '-std=c++11'] -INCLUDE_PATHS = [] - -if platform.system() == 'Linux': - LIBS = ['stdc++', 'rt'] - ARGS.append('-DHAVE_CLOCKGETTIME') -elif platform.system() == 'Darwin': - LIBS = ['c++'] -else: - LIBS = [] - -#Attempted fix to https://github.com/kpu/kenlm/issues/186 and https://github.com/kpu/kenlm/issues/197 -if platform.system() == 'Darwin': - ARGS += ["-stdlib=libc++", "-mmacosx-version-min=10.7"] - INCLUDE_PATHS.append("/usr/local/include") - -if compile_test('zlib.h', 'z'): - ARGS.append('-DHAVE_ZLIB') - LIBS.append('z') - -if compile_test('bzlib.h', 'bz2'): - ARGS.append('-DHAVE_BZLIB') - LIBS.append('bz2') - -if compile_test('lzma.h', 'lzma'): - ARGS.append('-DHAVE_XZLIB') - LIBS.append('lzma') - class build_ext(_build_ext): def run(self): @@ -68,14 +32,16 @@ def run(self): + ", ".join(e.name for e in self.extensions) ) - ext_dir = str(Path(self.get_ext_fullpath('libkenlm')).absolute().parent) + ext_dir = str(Path(self.get_ext_fullpath("kenlm")).absolute().parent) source_dir = str(Path(__file__).absolute().parent) cmake_args = [ - "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + ext_dir, + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={ext_dir}", "-DBUILD_SHARED_LIBS=ON", "-DBUILD_PYTHON_STANDALONE=ON", f"-DKENLM_MAX_ORDER={max_order}", + f"-DPYTHON_EXECUTABLE={sys.executable}", + f"-DCMAKE_PROJECT_VERSION={VERSION}", ] cfg = "Debug" if self.debug else "Release" build_args = ["--config", cfg] @@ -83,16 +49,14 @@ def run(self): if platform.system() == "Windows": cmake_args += [ "-DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS=ON", - "-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir), - "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir), - "-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir), + f"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{cfg.upper()}={ext_dir}", + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={ext_dir}", + f"-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{cfg.upper()}={ext_dir}", ] if sys.maxsize > 2**32: cmake_args += ["-A", "x64"] - build_args += ["--", "/m"] else: cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] - build_args += ["--", "-j4"] env = os.environ.copy() env["CXXFLAGS"] = '{} -fPIC -DVERSION_INFO=\\"{}\\"'.format( @@ -105,25 +69,22 @@ def run(self): ["cmake", source_dir] + cmake_args, cwd=self.build_temp, env=env ) subprocess.check_call( - ["cmake", "--build", "."] + build_args, cwd=self.build_temp + ["cmake", "--build", ".", "-j", "4"] + build_args, cwd=self.build_temp ) - return _build_ext.run(self) - ext_modules = [ - Extension(name='kenlm', - sources=FILES + ['python/kenlm.cpp'], - language='C++', - include_dirs=['.'] + INCLUDE_PATHS, - depends = ['python/BuildStandalone.cmake'], - libraries=LIBS, - extra_compile_args=ARGS), + Extension( + name="kenlm", + language="C++", + sources=[], + depends=["python/BuildStandalone.cmake"], + ), ] setup( - name='kenlm', - version='0.2.0', + name="kenlm", + version=VERSION, ext_modules=ext_modules, cmdclass={"build_ext": build_ext}, include_package_data=True, From c04bdd45c8e586ec6d33cfc276ba2bec3aa18aec Mon Sep 17 00:00:00 2001 From: Sean Robertson Date: Mon, 30 Oct 2023 20:37:33 -0400 Subject: [PATCH 2/2] msvc fixes --- python/BuildStandalone.cmake | 1 + setup.py | 5 ++--- util/read_compressed.cc | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/BuildStandalone.cmake b/python/BuildStandalone.cmake index f81c2dd2..baf5393b 100644 --- a/python/BuildStandalone.cmake +++ b/python/BuildStandalone.cmake @@ -20,6 +20,7 @@ Python_add_library( ${KENLM_PYTHON_STANDALONE_SRCS} python/kenlm.cpp ) +set_property(TARGET kenlm PROPERTY CXX_STANDARD 11) target_include_directories(kenlm PRIVATE ${PROJECT_SOURCE_DIR}) target_compile_definitions(kenlm PRIVATE KENLM_MAX_ORDER=${KENLM_MAX_ORDER}) diff --git a/setup.py b/setup.py index 55279f5a..c93b66b4 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ from setuptools import setup, Extension from setuptools.command.build_ext import build_ext as _build_ext -import glob import platform import subprocess import os @@ -40,7 +39,6 @@ def run(self): "-DBUILD_SHARED_LIBS=ON", "-DBUILD_PYTHON_STANDALONE=ON", f"-DKENLM_MAX_ORDER={max_order}", - f"-DPYTHON_EXECUTABLE={sys.executable}", f"-DCMAKE_PROJECT_VERSION={VERSION}", ] cfg = "Debug" if self.debug else "Release" @@ -55,8 +53,9 @@ def run(self): ] if sys.maxsize > 2**32: cmake_args += ["-A", "x64"] + # build_args += ["--", "/m"] else: - cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] + cmake_args.append(f"-DCMAKE_BUILD_TYPE={cfg}") env = os.environ.copy() env["CXXFLAGS"] = '{} -fPIC -DVERSION_INFO=\\"{}\\"'.format( diff --git a/util/read_compressed.cc b/util/read_compressed.cc index c70f91a5..26ab4f96 100644 --- a/util/read_compressed.cc +++ b/util/read_compressed.cc @@ -168,11 +168,11 @@ class GZip { void SetOutput(void *to, std::size_t amount) { stream_.next_out = static_cast(to); - stream_.avail_out = std::min(std::numeric_limits::max(), amount); + stream_.avail_out = std::min((std::numeric_limits::max)(), amount); } void SetInput(const void *base, std::size_t amount) { - assert(amount < static_cast(std::numeric_limits::max())); + assert(amount < static_cast((std::numeric_limits::max)())); stream_.next_in = const_cast(static_cast(base)); stream_.avail_in = amount; } @@ -225,7 +225,7 @@ class BZip { void SetOutput(void *base, std::size_t amount) { stream_.next_out = static_cast(base); - stream_.avail_out = std::min(std::numeric_limits::max(), amount); + stream_.avail_out = std::min((std::numeric_limits::max)(), amount); } void SetInput(const void *base, std::size_t amount) {