Skip to content

Commit

Permalink
caseo: capprox: approximate inference via enumeration by optimality
Browse files Browse the repository at this point in the history
  • Loading branch information
RenatoGeh committed Aug 5, 2023
1 parent 898bb78 commit 1360705
Show file tree
Hide file tree
Showing 6 changed files with 395 additions and 2 deletions.
3 changes: 2 additions & 1 deletion pasp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from .program import Program
from sample import sample
from .wlearn import learn
import approx

import numpy as np

__version__ = "0.0.4"
__version__ = "0.0.4-1"
97 changes: 97 additions & 0 deletions pasp/capprox.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include "capprox.h"

#include "cinf.h"

bool approx_rec_obs_maxent(const clingo_model_t *cM, program_t *P, models_t *M, size_t model_idx,
observations_t *O, clingo_control_t *C) {
bool ok = false;
for (size_t i = 0; i < O->n; ++i) {
for (size_t j = 0; j < O->m; ++j) {
bool contains_atom;
if (O->dense) {
if (!O->V[i][j]) break;
if (!clingo_model_contains(cM, O->V[i][j], &contains_atom)) goto cleanup;
} else {
if (O->S[i][j] == OBSERVATION_MIS) continue;
if (!clingo_model_contains(cM, O->A[j], &contains_atom)) goto cleanup;
}
if (contains_atom != O->S[i][j]) goto next_obs;
}
/* Consistent, therefore record as such. */
models_TRUE(M, i, model_idx);
next_obs: ;
}
ok = true;
cleanup:
return ok;
}

bool approx_rec_query_maxent(const clingo_model_t *cM, program_t *P, models_t *M, size_t model_idx,
observations_t *O, clingo_control_t *C) {
bool ok = false, is_partial = P->sem != STABLE_SEMANTICS;

for (size_t i = 0; i < P->Q_n; ++i) {
query_t *q = P->Q+i;
bool all_e = true, all_q = true, c;
for (size_t j = 0; j < q->E_n; ++j) {
if (!model_contains(cM, q, j, &c, MODEL_CONTAINS_EVI, is_partial)) goto cleanup;
if (!c) { all_e = false; break; }
}
if (!all_e) continue;
for (size_t j = 0; j < q->Q_n; ++j) {
if (!model_contains(cM, q, j, &c, MODEL_CONTAINS_QUERY, is_partial)) goto cleanup;
if (!c) { all_q = false; break; }
}
models_TRUE(M, i+M->n, model_idx); /* count_e is set to the second half of M->Q. */
if (all_q) models_TRUE(M, i, model_idx); /* count_q_e is set to the first half of M->Q. */
}

ok = true;
cleanup:
return ok;
}

bool approx_query_credal(const clingo_model_t *cM, program_t *P, models_t *M, observations_t *O,
clingo_control_t *C) {
/* Approximate credal inference not yet implemented. */
return false;
}

bool approx_query_maxent(program_t *P, models_t *M, double **R) {
bool ok = false;
double *a, *b, *r = b = a = NULL;
size_t n = M->n;

/* The resulting (flattened) array has dimension n*k x 1, where n is the number of queries and k
* is the number of examples in the neural dataset. */
r = (double*) malloc(M->n*sizeof(double));
if (!r) goto cleanup;
/* Arrays a and b are the cumulated probabilities for each query. */
a = (double*) calloc(M->n, sizeof(double));
if (!a) goto cleanup;
b = (double*) calloc(M->n, sizeof(double));
if (!b) goto cleanup;

/*printf("n = %lu, m = %lu\n", n, M->m);*/
for (size_t i = 0; i < n; ++i) {
size_t l = i+n;
for (size_t j = 0; j < M->m; ++j) {
a[i] += models_prob(M, i, j);
/*printf("a[%lu] += %d * %f / %lu = %f = %f\n", i, counter_GET(&M->C, i, j), M->L[j]->pr, M->L[j]->n,*/
/*counter_GET(&M->C, i, j)*M->L[j]->pr/M->L[j]->n, a[i]);*/
b[i] += models_prob(M, l, j);
/*printf("b[%lu] += %d * %f / %lu = %f %f\n", i, counter_GET(&M->C, l, j), M->L[j]->pr, M->L[j]->n,*/
/*counter_GET(&M->C, l, j)*M->L[j]->pr/M->L[j]->n, b[i]);*/
}
r[i] = a[i]/b[i];
/*printf(" a[%lu] * b[%lu] = %f\n", i, i, r[i]);*/
}

*R = r;

ok = true;
cleanup:
free(a); free(b);
if (!ok) free(r);
return ok;
}
19 changes: 19 additions & 0 deletions pasp/capprox.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef _PASP_CAPPROX
#define _PASP_CAPPROX

#include <clingo.h>

#include "cprogram.h"
#include "cmodels.h"
#include "cdata.h"
#include "cstorage.h"

bool approx_rec_obs_maxent(const clingo_model_t *cM, program_t *P, models_t *M, size_t model_idx,
observations_t *O, clingo_control_t *C);
bool approx_rec_query_maxent(const clingo_model_t *cM, program_t *P, models_t *M, size_t model_idx,
observations_t *O, clingo_control_t *C);

bool approx_obs_maxent(program_t *P, models_t *M, observations_t *O, prob_obs_storage_t *Q);
bool approx_query_maxent(program_t *P, models_t *M, double **R);

#endif
240 changes: 240 additions & 0 deletions pasp/caseo.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
#include "caseo.h"

#include <stdio.h>
#include <math.h>

#include "cutils.h"

bool watch_minimize(clingo_weight_t p, const clingo_weighted_literal_t *W, size_t n, void *data) {
void **pack = (void**) data;
clingo_weighted_literal_t *wlits = (clingo_weighted_literal_t*) pack[0];
size_t *i = (size_t*) pack[1];

/* If we ever accept optimization inside dPASP, we may set p = 0 for ASEO and p > 0 for
* regular optimization. */

for (size_t j = 0; j < n; ++j) wlits[*i+j] = W[j];
*i += n;

return true;
}

bool control_set_nmodels(clingo_control_t *C, size_t n) {
clingo_configuration_t *cfg = NULL;
clingo_id_t cfg_root, cfg_sub;

/* String to integer. */
#define MAX_N_STR 30
char n_str[MAX_N_STR + 2];
size_t i, d;
n_str[MAX_N_STR+1] = '\0';
for (i = 0, d = n; d > 9; d /= 10) n_str[MAX_N_STR-(i++)] = '0' + (d % 10);
n_str[MAX_N_STR-i] = '0' + d;
char *nmodels = n_str + MAX_N_STR - i;
#undef MAX_N_STR

if (!clingo_control_configuration(C, &cfg)) return false;
if (!clingo_configuration_root(cfg, &cfg_root)) return false;
if (!clingo_configuration_map_at(cfg, cfg_root, "solve.models", &cfg_sub)) return false;
if (!clingo_configuration_value_set(cfg, cfg_sub, nmodels)) return false;

return true;
}

/** Probabilistic components to weak rules. */
bool pc2wr(program_t *P, clingo_control_t *C, clingo_backend_t *back, int scale,
clingo_weighted_literal_t *W) {
bool ok = false;
clingo_weighted_literal_t wl = {0};
clingo_atom_t choice;

if (!clingo_backend_begin(back)) goto cleanup;

for (size_t i = 0; i < P->PF_n; ++i) {
if (!clingo_backend_add_atom(back, &P->PF[i].cl_f, &choice)) goto cleanup;
wl.literal = choice;
wl.weight = -round(scale*log(P->PF[i].p/(1-P->PF[i].p)));
if (!clingo_backend_rule(back, true, &choice, 1, NULL, 0)) goto cleanup;
if (!clingo_backend_minimize(back, 0, &wl, 1)) goto cleanup;
W[i] = wl;
}

/* Clingo unfortunately does not support adding cardinality constraint bounds that are not lower
* bounds through backend. So instead, we have to do string translation. Maybe handling the AST
* might be more efficient, but the AST API is still a mistery to me. */
#define MAX_RULE_LENGTH 8192
char rule[MAX_RULE_LENGTH];
size_t offset;
for (size_t i = 0; i < P->AD_n; ++i) {
rule[0] = '{'; rule[1] = '\0';
offset = 1;
offset += sprintf(rule+offset, "%s", P->AD[i].F[0]);
if (!clingo_backend_add_atom(back, &P->AD[i].cl_F[0], &choice)) goto cleanup;
wl.literal = choice;
wl.weight = -round(scale*log(P->AD[i].P[0]/(1-P->AD[i].P[0])));
if (!clingo_backend_minimize(back, 0, &wl, 1)) goto cleanup;
W[P->PF_n] = wl;
for (size_t j = 1; j < P->AD[i].n; ++j) {
offset += sprintf(rule+offset, "; %s", P->AD[i].F[j]);
if (!clingo_backend_add_atom(back, &P->AD[i].cl_F[j], &choice)) goto cleanup;
wl.literal = choice;
wl.weight = -round(scale*log(P->AD[i].P[j]/(1-P->AD[i].P[j])));
if (!clingo_backend_minimize(back, 0, &wl, 1)) goto cleanup;
W[j+P->PF_n] = wl;
}
strcpy(rule+offset, "} = 1.");
if (!clingo_control_add(C, "base", NULL, 0, rule)) goto cleanup;
}
#undef MAX_RULE_LENGTH
if (!clingo_backend_end(back)) goto cleanup;

ok = true;
cleanup:
return ok;
}

bool aseo_solve(program_t *P, clingo_control_t *C, size_t k,
clingo_solve_result_bitset_t *solve_ret, size_t *N, models_t *models, observations_t *O,
bool (*f)(const clingo_model_t*, program_t*, models_t*, size_t, observations_t*,
clingo_control_t*)) {
bool ok = false, opt;
clingo_solve_handle_t *handle;
const clingo_model_t *M;
size_t m;

if (!clingo_control_solve(C, clingo_solve_mode_yield, NULL, 0, NULL, NULL, &handle)) goto cleanup;
for (m = 0; true;) {
if (!clingo_solve_handle_resume(handle)) goto cleanup;
if (!clingo_solve_handle_model(handle, &M)) goto cleanup;
if (M) {
/* Whether this model is optimal. */
if (!clingo_model_optimality_proven(M, &opt)) goto cleanup;
if (!opt) continue;
ctree_t *leaf = ctree_add(&models->root, M, P);
if (!leaf) goto cleanup;
models->L[*N+m] = leaf;
if (!f(M, P, models, *N+m, O, C)) goto cleanup;
++m;
/* DEBUG BEGIN */
/*printf("=================\n%lu-th model (run %lu):\n=================\n", *N+m, m);*/
/*print_solution(M);*/
/*int64_t cost;*/
/*if (!clingo_model_cost(M, &cost, 1)) goto cleanup;*/
/*printf("Cost: %ld\tOptimal: %d\n", cost, opt);*/
/* DEBUG END */
} else break;
}
if (!clingo_solve_handle_get(handle, solve_ret)) goto cleanup;

*N += m;

ok = true;
cleanup:
return ok;
}

bool set_upper_bound(clingo_backend_t *back, clingo_weighted_literal_t *W, size_t n,
clingo_weighted_literal_t *U, int cost) {
/* We assume there is only one optimization level, so there is only one cost: some objective
* function that is proportional to the log-likelihood. */
int l = -cost;
for (size_t i = 0; i < n; ++i)
if (W[i].weight > 0) {
l += W[i].weight;
U[i].literal = -W[i].literal;
U[i].weight = W[i].weight;
} else {
U[i].literal = W[i].literal;
U[i].weight = -W[i].weight;
}
if (!clingo_backend_begin(back)) return false;
bool ok = clingo_backend_weight_rule(back, false, NULL, 0, l, U, n);
if (!clingo_backend_end(back)) return false;
return ok;
}

models_t* aseo(program_t *P, size_t k, psemantics_t psem, observations_t *O, int scale,
bool (*f)(const clingo_model_t*, program_t*, models_t*, size_t, observations_t*,
clingo_control_t*)) {
bool ok = false;
clingo_control_t *C = NULL;
clingo_backend_t *back = NULL;
/*clingo_ground_program_observer_t obs = {NULL}; obs.minimize = watch_minimize;*/
models_t *M = NULL;

size_t n = num_prob_params(P);
clingo_weighted_literal_t *W = (clingo_weighted_literal_t*) malloc(n*sizeof(clingo_weighted_literal_t));
clingo_weighted_literal_t *U = (clingo_weighted_literal_t*) malloc(n*sizeof(clingo_weighted_literal_t));
if (!(W && U)) goto nomem;
M = O ? models_create(k, O->n, true) : models_create(k, P->Q_n, false);

clingo_configuration_t *cfg = NULL;
clingo_id_t cfg_root, cfg_sub;

/* Create new clingo controller. */
if (!clingo_control_new(NULL, 0, undef_atom_ignore, NULL, 20, &C)) return NULL;
/* Set parallel_mode to "NUM_PROCS,compete". */
if (!clingo_control_configuration(C, &cfg)) return NULL;
if (!clingo_configuration_root(cfg, &cfg_root)) return NULL;
if (!clingo_configuration_map_at(cfg, cfg_root, "solve.parallel_mode", &cfg_sub)) return NULL;
if (!clingo_configuration_value_set(cfg, cfg_sub, NUM_PROCS_CONFIG_STR)) return NULL;
/* Set optimization mode to enumeration of optimal models (optN). */
if (!clingo_configuration_map_at(cfg, cfg_root, "solve.opt_mode", &cfg_sub)) return NULL;
if (!clingo_configuration_value_set(cfg, cfg_sub, "optN")) return NULL;
/* Add logical part. */
if (!clingo_control_add(C, "base", NULL, 0, P->P)) return NULL;
/* Add grounded part. */
if (P->gr_P[0]) if (!clingo_control_add(C, "base", NULL, 0, P->gr_P)) return NULL;

/*size_t i_W = 0;*/
/*void *pack[] = {(void*) W, (void*) &i_W};*/
if (!clingo_control_backend(C, &back)) goto cleanup;
/* Convert probabilistic components into weak rules. */
if (!pc2wr(P, C, back, scale, W)) goto cleanup;
/*if (!clingo_control_register_observer(C, &obs, false, (void*) pack)) goto cleanup;*/
if (!control_set_nmodels(C, k)) goto cleanup;
if (!clingo_control_ground(C, GROUND_DEFAULT_PARTS, 1, NULL, NULL)) goto cleanup;

const clingo_statistics_t *stats;
uint64_t root_key, costs_key, cost_key;
if (!clingo_control_statistics(C, &stats)) goto cleanup;
if (!clingo_statistics_root(stats, &root_key)) goto cleanup;
if (!clingo_statistics_map_at(stats, root_key, "summary.costs", &costs_key)) goto cleanup;

clingo_solve_result_bitset_t res;

size_t m = 0; /* Number of optimal models seen so far. */
double cost;
if (!aseo_solve(P, C, k, &res, &m, M, O, f)) goto cleanup;
while ((res & clingo_solve_result_satisfiable) && !(res & clingo_solve_result_interrupted)) {
if (m >= k) break;
else if (!control_set_nmodels(C, k-m)) goto cleanup;

/* If clingo does not update keys on solving, then we can push this line up to the init block. */
if (!clingo_statistics_array_at(stats, costs_key, 0, &cost_key)) goto cleanup;
if (!clingo_statistics_value_get(stats, cost_key, &cost)) goto cleanup;

if (!set_upper_bound(back, W, n, U, (int) cost)) goto cleanup;
if (!aseo_solve(P, C, k, &res, &m, M, O, f)) goto cleanup;
}
M->m = m;

/* Debug: write models to dot. */
char buffer[1048576];
if (!ctree_dot(&M->root, P, buffer)) goto cleanup;
FILE *file = fopen("/tmp/test.dot", "w");
fputs(buffer, file);
fclose(file);

ok = true;
goto cleanup;
nomem:
PyErr_SetString(PyExc_MemoryError, "could not allocate enough memory for ASEO!");
cleanup:
if (clingo_error_code() != clingo_error_success) raise_clingo_error(NULL);
else if (!ok) PyErr_SetString(PyExc_RuntimeError, "an error has occurred during ASEO!");
clingo_control_free(C);
free(W); free(U);
if (!ok) { models_free(M); M = NULL; }
return M;
}
18 changes: 18 additions & 0 deletions pasp/caseo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef _PASP_CASEO
#define _PASP_CASEO

#include <stdbool.h>
#include "cprogram.h"
#include "cinf.h"
#include "cdata.h"
#include "cmodels.h"

/**
* Answer Set Enumeration by Optimality (ASEO).
*/

models_t* aseo(program_t *P, size_t k, psemantics_t psem, observations_t *O, int scale,
bool (*f)(const clingo_model_t*, program_t*, models_t*, size_t, observations_t*,
clingo_control_t*));

#endif
Loading

0 comments on commit 1360705

Please sign in to comment.