Skip to content

Commit

Permalink
Python Interface with Nanobind (#99)
Browse files Browse the repository at this point in the history
Summary cherry-picking the squashed messages:

* Nanobind extension

* Basic test and example.

* CI workflow.

* Update extension.
  • Loading branch information
iglesias committed Jun 17, 2024
1 parent 11df3cc commit 04b33b5
Show file tree
Hide file tree
Showing 10 changed files with 295 additions and 97 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/nanobind.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Nanobind

on:
# TODO probably change/add main later. Ah, perhaps this wf can be part of linux.yml?
push:
branches: [ "nanobind" ]
# pull_request:
# branches: [ ]

jobs:
build:
name: "Build Python interface"
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Python setup
uses: actions/setup-python@v5
with:
python-version: '3.11.9'

- name: Install dependencies
run: |
sudo apt-get install -y libarpack2-dev libcxxopts-dev libeigen3-dev libfmt-dev python3.11-dev
python -m pip install nanobind numpy
- name: Configure
run: cmake -B ${{github.workspace}}/build -DBUILD_NANOBIND=ON

- name: Build
# TODO there are A LOT of warnings in the logs from nanobind (-pedantic, -Wshadow).
run: cmake --build ${{github.workspace}}/build

- name: Test
working-directory: ${{github.workspace}}
run: PYTHONPATH=. python test/test_nanobind_extension.py
71 changes: 69 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,12 @@ if (MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
endif()

if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
# Part of nanobind's setting up the build system, configuring optimized build
# unless otherwise specified, to avoid slow binding code and large binaries.
# https://nanobind.readthedocs.io/en/latest/building.html#preliminaries
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()

if (CMAKE_BUILD_TYPE MATCHES Debug)
Expand Down Expand Up @@ -164,6 +168,69 @@ if (BUILD_TESTS)
endforeach()
endif()

option(BUILD_NANOBIND "Build nanobind Python extension" OFF)

# https://nanobind.readthedocs.io/en/latest/building.html
# TODO for faster nanobind build, I tried removing the
# add_executable tapkee and that quickly didn't work. It would
# be nice if it could be done without requiring a new option
# BUILD_CLI, or so, maybe if there's a CMake command to
# configure not building tapkee inside the if BUILD_NANOBIND.
if (BUILD_NANOBIND)
message(STATUS "Detecting and configuring nanobind")
find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
# Detect the installed nanobind package and import it into CMake
execute_process(
COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir
OUTPUT_VARIABLE NB_DIR OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_VARIABLE NB_DIR)
list(APPEND CMAKE_PREFIX_PATH "${NB_DIR}")
find_package(nanobind CONFIG REQUIRED)

# Build extension (the library that'll be imported from the Python interpreter)
include_directories("${TAPKEE_SRC_DIR}") # about TODO in nanobind_extension.cpp including src/utils.hpp
nanobind_add_module(pytapkee src/python/nanobind_extension.cpp) # TODO fix paths.
target_link_libraries(pytapkee PRIVATE "${EIGEN3_LIBRARY_TO_LINK}")
target_link_libraries(pytapkee PRIVATE arpack) # TODO ARPACK guard; TODO without ARPACK(?)
target_link_libraries(pytapkee PRIVATE "${FMT_LIBRARY_TO_LINK}")

# Rename so that it can be imported as tapkee iso pytapkee.
# TODO can this go into a separate CMake file?
add_custom_command(TARGET pytapkee POST_BUILD
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_BINARY_DIR}/rename_pytapkee.cmake
COMMENT "Renaming nanobind's extension pytapkee*.so to tapkee*.so")
# Create the custom script to rename the file
file(WRITE ${CMAKE_BINARY_DIR}/rename_pytapkee.cmake
" # Find the file that starts with 'pytapkee' and ends with '.so' in the lib directory
file(GLOB PYLIBRARY_FILE \"${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/pytapkee*.so\")
# Ensure exactly one file is found
list(LENGTH PYLIBRARY_FILE FILE_COUNT)
if(NOT FILE_COUNT EQUAL 1)
message(FATAL_ERROR \"Expected exactly one file starting with 'pytapkee' in lib, but found \${FILE_COUNT}\")
endif()
# Get the first (and only) matched file
list(GET PYLIBRARY_FILE 0 SOURCE_FILE)
# Extract the filename from the full path
get_filename_component(FILENAME \${SOURCE_FILE} NAME)
string(REPLACE \"pytapkee\" \"tapkee\" DEST_FILENAME \${FILENAME})
# Construct the full destination path
set(DEST_FILE \"${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/\${DEST_FILENAME}\")
file(RENAME \${SOURCE_FILE} \${DEST_FILE})
if(EXISTS \${DEST_FILE} AND NOT EXISTS \${SOURCE_FILE})
message(STATUS \"File renamed from \${SOURCE_FILE} to \${DEST_FILE}\")
else()
message(FATAL_ERROR \"Error renaming file from \${SOURCE_FILE} to \${DEST_FILE}\")
endif()")
message(STATUS "Detecting and configuring nanobind - done")
endif()

export(
TARGETS tapkee_library
NAMESPACE tapkee::
Expand Down
106 changes: 13 additions & 93 deletions examples/go.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,25 @@
import tempfile

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def generate_data(type, N=1000, random_state=None):
rng = np.random.RandomState(random_state)
if type=='swissroll':
tt = np.array((3*np.pi/2)*(1+2*rng.rand(N)))
height = np.array((rng.rand(N)-0.5))
X = np.array([tt*np.cos(tt), 10*height, tt*np.sin(tt)])
return X, tt
if type=='scurve':
tt = np.array((3*np.pi*(rng.rand(N)-0.5)))
height = np.array((rng.rand(N)-0.5))
X = np.array([np.sin(tt), 10*height, np.sign(tt)*(np.cos(tt)-1)])
return X, tt
if type=='helix':
tt = np.linspace(1,N,N).T / N
tt = tt*2*np.pi
X = np.r_[[(2+np.cos(8*tt))*np.cos(tt)],
[(2+np.cos(8*tt))*np.sin(tt)],
[np.sin(8*tt)]]
return X, tt
if type=='twinpeaks':
X = rng.uniform(-1, 1, size=(N, 2))
tt = np.sin(np.pi * X[:, 0]) * np.tanh(X[:, 1])
tt += 0.1 * rng.normal(size=tt.shape)
X = np.vstack([X.T, tt])
return X, tt
if type=='klein':
u = rng.uniform(0, 2 * np.pi, N)
v = rng.uniform(0, 2 * np.pi, N)
x = (2 + np.cos(u / 2) * np.sin(v) - np.sin(u / 2) * np.sin(2 * v)) * np.cos(u)
y = (2 + np.cos(u / 2) * np.sin(v) - np.sin(u / 2) * np.sin(2 * v)) * np.sin(u)
z = np.sin(u / 2) * np.sin(v) + np.cos(u / 2) * np.sin(2 * v)

noise = 0.01
x += noise * rng.normal(size=x.shape)
y += noise * rng.normal(size=y.shape)
z += noise * rng.normal(size=z.shape)
return np.vstack((x, y, z)), u

raise Exception('Dataset is not supported')

from utils import generate_data, plot

supported_methods = {
'lle': 'Locally Linear Embedding',
'ltsa': 'Local Tangent Space Alignment',
'isomap': 'Isomap',
'mds': 'Multidimensional Scaling',
'pca': 'Principal Component Analysis',
'kpca': 'Kernel Principal Component Analysis',
't-sne': 't-distributed Stochastic Neighborhood Embedding',
'dm': 'Diffusion Map',
}

def embed(data,method):
input_file = tempfile.NamedTemporaryFile(prefix='tapkee_input')
output_file = tempfile.NamedTemporaryFile(prefix='tapkee_output')
np.savetxt(input_file.name, data.T,delimiter=',')
tapkee_binary = 'bin/tapkee'

runner_string = '%s -i %s -o %s -m %s -k 20 --precompute --debug --verbose --transpose-output --benchmark' % (
tapkee_binary, input_file.name, output_file.name, method
)
Expand All @@ -71,59 +41,9 @@ def embed(data,method):
else:
used_method = ''


embedded_data = np.loadtxt(output_file, delimiter=',')
return embedded_data, used_method

def plot(data, embedded_data, colors='m', method=None):
fig = plt.figure()
fig.set_facecolor('white')

ax_original = fig.add_subplot(121, projection='3d')
scatter_original = ax_original.scatter(data[0], data[1], data[2], c=colors, cmap=plt.cm.Spectral, s=5, picker=True)
plt.axis('tight')
plt.axis('off')
plt.title('Original', fontsize=9)

ax_embedding = fig.add_subplot(122)
scatter_embedding = ax_embedding.scatter(embedded_data[0], embedded_data[1], c=colors, cmap=plt.cm.Spectral, s=5, picker=True)
plt.axis('tight')
plt.axis('off')
plt.title('Embedding' + (' with ' + method) if method else '', fontsize=9, wrap=True)

highlighted_points = [] # To store highlighted points

# Function to highlight points on both plots
def highlight(index):
# Reset previous highlighted points
for point in highlighted_points:
point.remove()
highlighted_points.clear()

# Highlight the current point on both scatter plots
point1 = ax_original.scatter([data[0][index]], [data[1][index]], [data[2][index]], color='white', s=25, edgecolor='black', zorder=3)
point2 = ax_embedding.scatter([embedded_data[0][index]], [embedded_data[1][index]], color='white', s=25, edgecolor='black', zorder=3)
highlighted_points.append(point1)
highlighted_points.append(point2)
fig.canvas.draw_idle()

# Event handler for mouse motion
def on_hover(event):
if event.inaxes == ax_original:
cont, ind = scatter_original.contains(event)
elif event.inaxes == ax_embedding:
cont, ind = scatter_embedding.contains(event)
else:
return

if cont:
index = ind['ind'][0]
highlight(index)

fig.canvas.mpl_connect('motion_notify_event', on_hover)

plt.show()

if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Graphical example of dimension reduction with Tapkee.')
Expand Down
10 changes: 10 additions & 0 deletions examples/nanobind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import lib.tapkee as tapkee
from utils import generate_data, plot

if __name__=='__main__':
parameters = tapkee.ParametersSet()
method = tapkee.parse_reduction_method('lle')
parameters.add(tapkee.Parameter.create('dimension reduction method', method))
data, colors = generate_data('swissroll')
embedded_data = tapkee.withParameters(parameters).embedUsing(data).embedding
plot(data, embedded_data.T, colors)
91 changes: 91 additions & 0 deletions examples/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import matplotlib.pyplot as plt
import numpy as np

def generate_data(type, N=1000, random_state=None):
rng = np.random.RandomState(random_state)
if type=='swissroll':
tt = np.array((3*np.pi/2)*(1+2*rng.rand(N)))
height = np.array((rng.rand(N)-0.5))
X = np.array([tt*np.cos(tt), 10*height, tt*np.sin(tt)])
return X, tt
if type=='scurve':
tt = np.array((3*np.pi*(rng.rand(N)-0.5)))
height = np.array((rng.rand(N)-0.5))
X = np.array([np.sin(tt), 10*height, np.sign(tt)*(np.cos(tt)-1)])
return X, tt
if type=='helix':
tt = np.linspace(1,N,N).T / N
tt = tt*2*np.pi
X = np.r_[[(2+np.cos(8*tt))*np.cos(tt)],
[(2+np.cos(8*tt))*np.sin(tt)],
[np.sin(8*tt)]]
return X, tt
if type=='twinpeaks':
X = rng.uniform(-1, 1, size=(N, 2))
tt = np.sin(np.pi * X[:, 0]) * np.tanh(X[:, 1])
tt += 0.1 * rng.normal(size=tt.shape)
X = np.vstack([X.T, tt])
return X, tt
if type=='klein':
u = rng.uniform(0, 2 * np.pi, N)
v = rng.uniform(0, 2 * np.pi, N)
x = (2 + np.cos(u / 2) * np.sin(v) - np.sin(u / 2) * np.sin(2 * v)) * np.cos(u)
y = (2 + np.cos(u / 2) * np.sin(v) - np.sin(u / 2) * np.sin(2 * v)) * np.sin(u)
z = np.sin(u / 2) * np.sin(v) + np.cos(u / 2) * np.sin(2 * v)

noise = 0.01
x += noise * rng.normal(size=x.shape)
y += noise * rng.normal(size=y.shape)
z += noise * rng.normal(size=z.shape)
return np.vstack((x, y, z)), u

raise Exception('Dataset is not supported')

def plot(data, embedded_data, colors='m', method=None):
fig = plt.figure()
fig.set_facecolor('white')

ax_original = fig.add_subplot(121, projection='3d')
scatter_original = ax_original.scatter(data[0], data[1], data[2], c=colors, cmap=plt.cm.Spectral, s=5, picker=True)
plt.axis('tight')
plt.axis('off')
plt.title('Original', fontsize=9)

ax_embedding = fig.add_subplot(122)
scatter_embedding = ax_embedding.scatter(embedded_data[0], embedded_data[1], c=colors, cmap=plt.cm.Spectral, s=5, picker=True)
plt.axis('tight')
plt.axis('off')
plt.title('Embedding' + (' with ' + method) if method else '', fontsize=9, wrap=True)

highlighted_points = [] # To store highlighted points

# Function to highlight points on both plots
def highlight(index):
# Reset previous highlighted points
for point in highlighted_points:
point.remove()
highlighted_points.clear()

# Highlight the current point on both scatter plots
point1 = ax_original.scatter([data[0][index]], [data[1][index]], [data[2][index]], color='white', s=25, edgecolor='black', zorder=3)
point2 = ax_embedding.scatter([embedded_data[0][index]], [embedded_data[1][index]], color='white', s=25, edgecolor='black', zorder=3)
highlighted_points.append(point1)
highlighted_points.append(point2)
fig.canvas.draw_idle()

# Event handler for mouse motion
def on_hover(event):
if event.inaxes == ax_original:
cont, ind = scatter_original.contains(event)
elif event.inaxes == ax_embedding:
cont, ind = scatter_embedding.contains(event)
else:
return

if cont:
index = ind['ind'][0]
highlight(index)

fig.canvas.mpl_connect('motion_notify_event', on_hover)

plt.show()
1 change: 1 addition & 0 deletions include/stichwort/parameter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include <stichwort/value_keeper.hpp>

#include <functional>
#include <iostream>
#include <list>
#include <map>
Expand Down
4 changes: 2 additions & 2 deletions include/tapkee/chain_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,8 @@ class ParametersInitializedState
ParametersInitializedState(const ParametersSet& that) : parameters(that)
{
}
ParametersInitializedState(const ParametersInitializedState&);
ParametersInitializedState& operator=(const ParametersInitializedState&);
ParametersInitializedState(const ParametersInitializedState&) = default;
ParametersInitializedState& operator=(const ParametersInitializedState&) = default;

/** Sets kernel callback.
*
Expand Down
5 changes: 5 additions & 0 deletions src/cli/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ typename Mapping::mapped_type parse_multiple(Mapping mapping, const std::string&
throw std::logic_error(str);
}

auto parse_reduction_method(const std::string& str)
{
return parse_multiple(DIMENSION_REDUCTION_METHODS, str);
}

template <class PairwiseCallback>
tapkee::DenseMatrix matrix_from_callback(const tapkee::IndexType N, PairwiseCallback callback)
{
Expand Down
Loading

0 comments on commit 04b33b5

Please sign in to comment.