Skip to content

Commit

Permalink
Parametrize strings of all command line keywords (#104)
Browse files Browse the repository at this point in the history
  • Loading branch information
lisitsyn authored May 24, 2024
1 parent d60b9dc commit 1475660
Showing 1 changed file with 72 additions and 29 deletions.
101 changes: 72 additions & 29 deletions src/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ template<typename T> auto with_default(T defs)
return cxxopts::value<T>()->default_value(std::to_string(defs));
}

static const char* INPUT_FILE_KEYWORD_SHORT = "i";
static const char* INPUT_FILE_KEYWORD = "input-file";
static const char* INPUT_FILE_DESCRIPTION = "Input filename to be used. Can be any file that can be opened for reading by the program. Expects delimiter-separated matrix of real values. See transposing options for more details on rows and columns.";

Expand All @@ -54,18 +55,23 @@ static const char* TRANSPOSE_INPUT_DESCRIPTION = "Whether input file should be c
static const char* TRANSPOSE_OUTPUT_KEYWORD = "transpose-output";
static const char* TRANSPOSE_OUTPUT_DESCRIPTION = "Whether output file should be transposed. By default a line would be a row of embedding matrix (a single embedding vector)";

static const char* OUTPUT_FILE_KEYWORD_SHORT = "o";
static const char* OUTPUT_FILE_KEYWORD = "output-file";
static const char* OUTPUT_FILE_DESCRIPTION = "Output filename to be used. Can be any file that can be opened for writing by the program";

static const char* OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD_SHORT = "opmat";
static const char* OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD = "output-projection-matrix-file";
static const char* OUTPUT_PROJECTION_MATRIX_FILE_DESCRIPTION = "Filename to store the projection matrix calculated by the selected algorithm. Usually supported by linear algorithms such as PCA.";

static const char* OUTPUT_PROJECTION_MEAN_FILE_KEYWORD_SHORT = "opmean";
static const char* OUTPUT_PROJECTION_MEAN_FILE_KEYWORD = "output-projection-mean-file";
static const char* OUTPUT_PROJECTION_MEAN_FILE_DESCRIPTION = "Filename to store the mean vector calculated by the selected algorithm. Usually supported by linear algorithms such as PCA";

static const char* DELIMITER_KEYWORD_SHORT = "d";
static const char* DELIMITER_KEYWORD = "delimiter";
static const char* DELIMITER_DESCRIPTION = "Delimiter to be used in reading and writing matrices";

static const char* HELP_KEYWORD_SHORT = "h";
static const char* HELP_KEYWORD = "help";
static const char* HELP_DESCRIPTION = "Print usage of the program";

Expand All @@ -78,37 +84,74 @@ static const char* VERBOSE_DESCRIPTION = "Be more verbose in logging";
static const char* DEBUG_KEYWORD = "debug";
static const char* DEBUG_DESCRIPTION = "Output debugging information such as intermediary steps, parameters, and other internals";

static const char* METHOD_KEYWORD_SHORT = "m";
static const char* METHOD_KEYWORD = "method";
static const std::string METHOD_DESCRIPTION = "Dimension reduction method. One of the following: " +
comma_separated_keys(DIMENSION_REDUCTION_METHODS.begin(), DIMENSION_REDUCTION_METHODS.end());

static const char* NEIGHBORS_METHOD_KEYWORD_SHORT = "nm";
static const char* NEIGHBORS_METHOD_KEYWORD = "neighbors-method";
static const std::string NEIGHBORS_METHOD_DESCRIPTION = "Neighbors search method. One of the following: " +
comma_separated_keys(NEIGHBORS_METHODS.begin(), NEIGHBORS_METHODS.end());

static const char* EIGEN_METHOD_KEYWORD_SHORT = "em";
static const char* EIGEN_METHOD_KEYWORD = "eigen-method";
static const std::string EIGEN_METHOD_DESCRIPTION = "Eigendecomposition method. One of the following: " +
comma_separated_keys(EIGEN_METHODS.begin(), EIGEN_METHODS.end());

static const char* COMPUTATION_STRATEGY_KEYWORD_SHORT = "cs";
static const char* COMPUTATION_STRATEGY_KEYWORD = "computation-strategy";
static const std::string COMPUTATION_STRATEGY_DESCRIPTION = "Computation strategy. One of the following: " +
comma_separated_keys(COMPUTATION_STRATEGIES.begin(), COMPUTATION_STRATEGIES.end());

static const char* TARGET_DIMENSION_KEYWORD_SHORT = "td";
static const char* TARGET_DIMENSION_KEYWORD = "target-dimension";
static const char* TARGET_DIMENSION_DESCRIPTION = "Target dimension";

static const char* NUM_NEIGHBORS_KEYWORD_SHORT = "k";
static const char* NUM_NEIGHBORS_KEYWORD = "num-neighbors";
static const char* NUM_NEIGHBORS_DESCRIPTION = "Number of neighbors";

static const char* GAUSSIAN_WIDTH_KEYWORD_SHORT = "gw";
static const char* GAUSSIAN_WIDTH_KEYWORD = "gaussian-width";
static const char* GAUSSIAN_WIDTH_DESCRIPTION = "Width of gaussian kernel";

static const char* TIMESTEPS_KEYWORD = "timesteps";
static const char* TIMESTEPS_DESCRIPTION = "Number of timesteps for diffusion map";

static const char* SPE_LOCAL_KEYWORD = "spe-local";
static const char* EIGENSHIFT_KEYWORD = "eigenshift";;
static const char* SPE_LOCAL_DESCRIPTION = "Local strategy in SPE (default is global)";

static const char* EIGENSHIFT_KEYWORD = "eigenshift";
static const char* EIGENSHIFT_DESCRIPTION = "Regularization diagonal shift for weight matrix";

static const char* LANDMARK_RATIO_KEYWORD = "landmark-ratio";
static const char* LANDMARK_RATIO_DESCRIPTION = "Ratio of landmarks. Should be in (0,1) range (0.2 means 20%)";

static const char* SPE_TOLERANCE_KEYWORD = "spe-tolerance";
static const char* SPE_TOLERANCE_DESCRIPTION = "Tolerance for SPE";

static const char* SPE_NUM_UPDATES_KEYWORD = "spe-num-updates";
static const char* SPE_NUM_UPDATES_DESCRIPTION = "Number of SPE updates";

static const char* MAX_ITERS_KEYWORD = "max-iters";
static const char* MAX_ITERS_DESCRIPTION = "Maximum number of iterations";

static const char* FA_EPSILON_KEYWORD = "fa-epsilon";
static const char* FA_EPSILON_DESCRIPTION = "FA convergence threshold";

static const char* SNE_PERPLEXITY_KEYWORD = "sne-perplexity";
static const char* SNE_PERPLEXITY_DESCRIPTION = "Perplexity for the t-SNE algorithm";

static const char* SNE_THETA_KEYWORD = "sne-theta";
static const char* SNE_THETA_DESCRIPTION = "Theta for the t-SNE algorithm";

static const char* MS_SQUISHING_RATE_KEYWORD = "squishing-rate";
static const char* MS_SQUISHING_RATE_DESCRIPTION = "Squishing rate of the Manifold Sculpting algorithm";

static const char* PRECOMPUTE_KEYWORD = "precompute";
static const char* PRECOMPUTE_DESCRIPTION = "Whether distance and kernel matrices should be precomputed";


int run(int argc, const char **argv)
{
Expand All @@ -123,7 +166,7 @@ int run(int argc, const char **argv)
.set_tab_expansion()
.add_options()
(
either("i", INPUT_FILE_KEYWORD),
either(INPUT_FILE_KEYWORD_SHORT, INPUT_FILE_KEYWORD),
INPUT_FILE_DESCRIPTION,
with_default("/dev/stdin"s)
)
Expand All @@ -136,27 +179,27 @@ int run(int argc, const char **argv)
TRANSPOSE_OUTPUT_DESCRIPTION
)
(
either("o", OUTPUT_FILE_KEYWORD),
either(OUTPUT_FILE_KEYWORD_SHORT, OUTPUT_FILE_KEYWORD),
OUTPUT_FILE_DESCRIPTION,
with_default("/dev/stdout"s)
)
(
either("opmat", OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD),
either(OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD_SHORT, OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD),
OUTPUT_PROJECTION_MATRIX_FILE_DESCRIPTION,
with_default("/dev/null"s)
)
(
either("opmean", OUTPUT_PROJECTION_MEAN_FILE_KEYWORD),
either(OUTPUT_PROJECTION_MEAN_FILE_KEYWORD_SHORT, OUTPUT_PROJECTION_MEAN_FILE_KEYWORD),
OUTPUT_PROJECTION_MEAN_FILE_DESCRIPTION,
with_default("/dev/null"s)
)
(
either("d", DELIMITER_KEYWORD),
either(DELIMITER_KEYWORD_SHORT, DELIMITER_KEYWORD),
DELIMITER_DESCRIPTION,
with_default(","s)
)
(
either("h", HELP_KEYWORD),
either(HELP_KEYWORD_SHORT, HELP_KEYWORD),
HELP_DESCRIPTION
)
(
Expand All @@ -172,12 +215,12 @@ int run(int argc, const char **argv)
DEBUG_DESCRIPTION
)
(
either("m", METHOD_KEYWORD),
either(METHOD_KEYWORD_SHORT, METHOD_KEYWORD),
METHOD_DESCRIPTION,
with_default("locally_linear_embedding"s)
)
(
either("nm", NEIGHBORS_METHOD_KEYWORD),
either(NEIGHBORS_METHOD_KEYWORD_SHORT, NEIGHBORS_METHOD_KEYWORD),
NEIGHBORS_METHOD_DESCRIPTION,
#ifdef TAPKEE_USE_LGPL_COVERTREE
with_default("covertree"s)
Expand All @@ -186,7 +229,7 @@ int run(int argc, const char **argv)
#endif
)
(
either("em", EIGEN_METHOD_KEYWORD),
either(EIGEN_METHOD_KEYWORD_SHORT, EIGEN_METHOD_KEYWORD),
EIGEN_METHOD_DESCRIPTION,
#ifdef TAPKEE_WITH_ARPACK
with_default("arpack"s)
Expand All @@ -195,82 +238,82 @@ int run(int argc, const char **argv)
#endif
)
(
either("cs", COMPUTATION_STRATEGY_KEYWORD),
either(COMPUTATION_STRATEGY_KEYWORD_SHORT, COMPUTATION_STRATEGY_KEYWORD),
COMPUTATION_STRATEGY_DESCRIPTION,
with_default("cpu"s)
)
(
either("td", TARGET_DIMENSION_KEYWORD),
"Target dimension",
either(TARGET_DIMENSION_KEYWORD_SHORT, TARGET_DIMENSION_KEYWORD),
TARGET_DIMENSION_DESCRIPTION,
with_default(2)
)
(
either("k", NUM_NEIGHBORS_KEYWORD),
"Number of neighbors",
either(NUM_NEIGHBORS_KEYWORD_SHORT, NUM_NEIGHBORS_KEYWORD),
NUM_NEIGHBORS_DESCRIPTION,
with_default(10)
)
(
either("gw", GAUSSIAN_WIDTH_KEYWORD),
"Width of gaussian kernel",
either(GAUSSIAN_WIDTH_KEYWORD_SHORT, GAUSSIAN_WIDTH_KEYWORD),
GAUSSIAN_WIDTH_DESCRIPTION,
with_default(1.0)
)
(
TIMESTEPS_KEYWORD,
"Number of timesteps for diffusion map",
TIMESTEPS_DESCRIPTION,
with_default(1)
)
(
EIGENSHIFT_KEYWORD,
"Regularization diagonal shift for weight matrix",
EIGENSHIFT_DESCRIPTION,
with_default(1e-9)
)
(
LANDMARK_RATIO_KEYWORD,
"Ratio of landmarks. Should be in (0,1) range (0.2 means 20%)",
LANDMARK_RATIO_DESCRIPTION,
with_default(0.2)
)
(
SPE_LOCAL_KEYWORD,
"Local strategy in SPE (default is global)"
SPE_LOCAL_DESCRIPTION
)
(
SPE_TOLERANCE_KEYWORD,
"Tolerance for SPE",
SPE_TOLERANCE_DESCRIPTION,
with_default(1e-5)
)
(
SPE_NUM_UPDATES_KEYWORD,
"Number of SPE updates",
SPE_NUM_UPDATES_DESCRIPTION,
with_default(100)
)
(
MAX_ITERS_KEYWORD,
"Maximum number of iterations",
MAX_ITERS_DESCRIPTION,
with_default(1000)
)
(
FA_EPSILON_KEYWORD,
"FA convergence threshold",
FA_EPSILON_DESCRIPTION,
with_default(1e-5)
)
(
SNE_PERPLEXITY_KEYWORD,
"Perplexity for the t-SNE algorithm",
SNE_PERPLEXITY_DESCRIPTION,
with_default(30.0)
)
(
SNE_THETA_KEYWORD,
"Theta for the t-SNE algorithm",
SNE_THETA_DESCRIPTION,
with_default(0.5)
)
(
MS_SQUISHING_RATE_KEYWORD,
"Squishing rate of the Manifold Sculpting algorithm",
MS_SQUISHING_RATE_DESCRIPTION,
with_default(0.99)
)
(
PRECOMPUTE_KEYWORD,
"Whether distance and kernel matrices should be precomputed (default false)"
PRECOMPUTE_DESCRIPTION
)
;

Expand Down

0 comments on commit 1475660

Please sign in to comment.