diff --git a/core/src/common/input_tag.rs b/core/src/common/input_tag.rs index 9fddf1d..dafb6ed 100644 --- a/core/src/common/input_tag.rs +++ b/core/src/common/input_tag.rs @@ -10,6 +10,8 @@ pub enum DynamicInputTag { Natural(usize), Float(f64), ExclusiveFloat(f64, usize), + FloatWithID(usize, f64), + ExclusiveFloatWithID(usize, f64, usize), Tensor(DynamicExternalTensor), } @@ -47,6 +49,8 @@ impl std::fmt::Display for DynamicInputTag { Self::Natural(n) => n.fmt(f), Self::Float(n) => n.fmt(f), Self::ExclusiveFloat(n, i) => f.write_str(&format!("{} [ME({})]", n, i)), + Self::FloatWithID(id, n) => f.write_str(&format!("{} [ID({})]", n, id)), + Self::ExclusiveFloatWithID(id, n, i) => f.write_str(&format!("{} [ID({}), ME({})]", n, id, i)), Self::Tensor(t) => t.fmt(f), } } diff --git a/core/src/compiler/back/pretty.rs b/core/src/compiler/back/pretty.rs index 80f0890..bef1d45 100644 --- a/core/src/compiler/back/pretty.rs +++ b/core/src/compiler/back/pretty.rs @@ -29,7 +29,7 @@ impl Display for Program { impl Display for Relation { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { self.attributes.fmt(f)?; - f.write_fmt(format_args!("{}(", self.predicate))?; + f.write_fmt(format_args!(" {}(", self.predicate))?; for (i, arg) in self.arg_types.iter().enumerate() { arg.fmt(f)?; if i < self.arg_types.len() - 1 { diff --git a/core/src/compiler/back/query_plan.rs b/core/src/compiler/back/query_plan.rs index c3a7c69..ce96f57 100644 --- a/core/src/compiler/back/query_plan.rs +++ b/core/src/compiler/back/query_plan.rs @@ -614,9 +614,12 @@ impl<'a> QueryPlanContext<'a> { }; // Note: We always apply constraint first and then assigns - let node = self.try_apply_non_new_assigns(&mut applied_assigns, node); - let node = self.try_apply_constraint(&mut applied_constraints, node); - let node = self.try_apply_foreign_predicate_atom(&mut applied_foreign_predicates, node); + let node = self.try_apply( + node, + &mut applied_assigns, + &mut applied_constraints, + &mut applied_foreign_predicates, + ); (node, 1) } } else { @@ -642,9 +645,12 @@ impl<'a> QueryPlanContext<'a> { } // Note: We always apply constraint first and then assigns - let node = self.try_apply_non_new_assigns(&mut applied_assigns, node); - let node = self.try_apply_constraint(&mut applied_constraints, node); - let node = self.try_apply_foreign_predicate_atom(&mut applied_foreign_predicates, node); + let node = self.try_apply( + node, + &mut applied_assigns, + &mut applied_constraints, + &mut applied_foreign_predicates, + ); (node, 0) }; diff --git a/core/src/compiler/front/ast/types.rs b/core/src/compiler/front/ast/types.rs index 9d1e7f6..57abe90 100644 --- a/core/src/compiler/front/ast/types.rs +++ b/core/src/compiler/front/ast/types.rs @@ -1,4 +1,4 @@ -use serde::ser::{Serialize, SerializeStruct, Serializer}; +use serde::ser::SerializeStruct; use crate::common::value_type::*; diff --git a/core/src/compiler/front/mod.rs b/core/src/compiler/front/mod.rs index c075571..2c06cfc 100644 --- a/core/src/compiler/front/mod.rs +++ b/core/src/compiler/front/mod.rs @@ -6,7 +6,6 @@ pub mod attribute; mod compile; mod error; mod f2b; -mod grammar; pub mod parser; mod pretty; mod source; @@ -14,6 +13,11 @@ mod transform; pub mod transformations; mod utils; +// Include grammar (generated file) +// It is okay to have dead code in generated file +#[allow(dead_code)] +mod grammar; + pub use analysis::*; pub use annotation::*; use ast::*; diff --git a/core/src/compiler/ram/ram2rs.rs b/core/src/compiler/ram/ram2rs.rs index cb6755d..048e2b0 100644 --- a/core/src/compiler/ram/ram2rs.rs +++ b/core/src/compiler/ram/ram2rs.rs @@ -555,6 +555,8 @@ fn input_tag_to_rs_input_tag(tag: &DynamicInputTag) -> TokenStream { DynamicInputTag::Natural(n) => quote! { DynamicInputTag::Natural(#n) }, DynamicInputTag::Float(f) => quote! { DynamicInputTag::Float(#f) }, DynamicInputTag::ExclusiveFloat(f, u) => quote! { DynamicInputTag::ExclusiveFloat(#f, #u) }, + DynamicInputTag::FloatWithID(id, f) => quote! { DynamicInputTag::Float(#id, #f) }, + DynamicInputTag::ExclusiveFloatWithID(id, f, u) => quote! { DynamicInputTag::ExclusiveFloat(#id, #f, #u) }, DynamicInputTag::Tensor(_) => unimplemented!(), } } diff --git a/core/src/lib.rs b/core/src/lib.rs index 369b37e..3a35eac 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -1,6 +1,7 @@ #![feature(min_specialization)] #![feature(extract_if)] #![feature(hash_extract_if)] +#![feature(iter_repeat_n)] #![feature(proc_macro_span)] pub mod common; diff --git a/core/src/runtime/env/random.rs b/core/src/runtime/env/random.rs index efb1c73..d8a3967 100644 --- a/core/src/runtime/env/random.rs +++ b/core/src/runtime/env/random.rs @@ -1,8 +1,6 @@ use std::sync::*; use rand::prelude::*; -use rand::rngs::SmallRng; -use rand::SeedableRng; #[derive(Clone, Debug)] pub struct Random { diff --git a/core/src/runtime/provenance/common/cnf_dnf_formula.rs b/core/src/runtime/provenance/common/cnf_dnf_formula.rs index d036a9c..30b3dc2 100644 --- a/core/src/runtime/provenance/common/cnf_dnf_formula.rs +++ b/core/src/runtime/provenance/common/cnf_dnf_formula.rs @@ -1,5 +1,4 @@ use super::super::*; -use super::{AsBooleanFormula, Clause, Literal}; #[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum FormulaKind { diff --git a/core/src/runtime/provenance/common/diff_prob_storage.rs b/core/src/runtime/provenance/common/diff_prob_storage.rs index 05b7fc5..add3f89 100644 --- a/core/src/runtime/provenance/common/diff_prob_storage.rs +++ b/core/src/runtime/provenance/common/diff_prob_storage.rs @@ -14,6 +14,13 @@ impl DiffProbStorage { } } + pub fn new_with_placeholder() -> Self { + Self { + storage: P::new_rc_cell(Vec::new()), + num_requires_grad: P::new_cell(1), + } + } + /// Clone the internal storage pub fn clone_internal(&self) -> Self { Self { @@ -46,6 +53,25 @@ impl DiffProbStorage { fact_id } + pub fn add_prob_with_id(&self, prob: f64, external_tag: Option, id: usize) -> usize { + // Increment the `num_requires_grad` if the external tag is provided + if external_tag.is_some() { + P::get_cell_mut(&self.num_requires_grad, |n| *n += 1); + } + + // Add + P::get_rc_cell_mut(&self.storage, |s| { + if id >= s.len() { + s.extend(std::iter::repeat_n((0.0, None), id - s.len() + 1)); + } + + s[id] = (prob.clone(), external_tag.clone()) + }); + + // Return the id + id + } + pub fn get_diff_prob(&self, id: &usize) -> (f64, Option) { P::get_rc_cell(&self.storage, |d| d[id.clone()].clone()) } diff --git a/core/src/runtime/provenance/common/dnf_context.rs b/core/src/runtime/provenance/common/dnf_context.rs index 04b699e..d152b2f 100644 --- a/core/src/runtime/provenance/common/dnf_context.rs +++ b/core/src/runtime/provenance/common/dnf_context.rs @@ -3,7 +3,6 @@ use std::collections::*; use itertools::Itertools; use rand::distributions::WeightedIndex; use rand::prelude::*; -use rand::rngs::StdRng; use super::{Clause, DNFFormula, Literal}; diff --git a/core/src/runtime/provenance/common/dnf_formula.rs b/core/src/runtime/provenance/common/dnf_formula.rs index 0161843..77e23b8 100644 --- a/core/src/runtime/provenance/common/dnf_formula.rs +++ b/core/src/runtime/provenance/common/dnf_formula.rs @@ -1,7 +1,6 @@ use itertools::iproduct; use super::super::*; -use super::{AsBooleanFormula, Clause, Literal}; #[derive(Clone, PartialEq, PartialOrd, Eq)] pub struct DNFFormula { diff --git a/core/src/runtime/provenance/common/input_tags/float.rs b/core/src/runtime/provenance/common/input_tags/float.rs index 2625f80..41d95da 100644 --- a/core/src/runtime/provenance/common/input_tags/float.rs +++ b/core/src/runtime/provenance/common/input_tags/float.rs @@ -66,3 +66,9 @@ impl ConvertFromInputTag> for f64 { Some(t.prob) } } + +impl ConvertFromInputTag> for f64 { + fn from_input_tag(t: InputExclusiveDiffProbWithID) -> Option { + Some(t.prob) + } +} diff --git a/core/src/runtime/provenance/common/input_tags/input_diff_prob.rs b/core/src/runtime/provenance/common/input_tags/input_diff_prob.rs index 3424acd..4ca6fb9 100644 --- a/core/src/runtime/provenance/common/input_tags/input_diff_prob.rs +++ b/core/src/runtime/provenance/common/input_tags/input_diff_prob.rs @@ -36,6 +36,8 @@ impl StaticInputTag for InputDiffProb { DynamicInputTag::Exclusive(_) => None, DynamicInputTag::Float(f) => Some(Self(f.clone(), None)), DynamicInputTag::ExclusiveFloat(f, _) => Some(Self(f.clone(), None)), + DynamicInputTag::FloatWithID(_, f) => Some(Self(f.clone(), None)), + DynamicInputTag::ExclusiveFloatWithID(_, f, _) => Some(Self(f.clone(), None)), DynamicInputTag::Tensor(t) => Some(Self(t.get_f64(), T::from_tensor(t.clone()))), } } @@ -96,3 +98,9 @@ impl ConvertFromInputTag> for InputDiff Some(Self(t.prob, None)) } } + +impl ConvertFromInputTag> for InputDiffProb { + fn from_input_tag(t: InputExclusiveDiffProbWithID) -> Option { + Some(Self(t.prob, t.external_tag)) + } +} diff --git a/core/src/runtime/provenance/common/input_tags/input_exclusion.rs b/core/src/runtime/provenance/common/input_tags/input_exclusion.rs index 16637e2..421717c 100644 --- a/core/src/runtime/provenance/common/input_tags/input_exclusion.rs +++ b/core/src/runtime/provenance/common/input_tags/input_exclusion.rs @@ -72,3 +72,12 @@ impl ConvertFromInputTag> for Exclusion } } } + +impl ConvertFromInputTag> for Exclusion { + fn from_input_tag(t: InputExclusiveDiffProbWithID) -> Option { + match &t.exclusion { + Some(e) => Some(Self::Exclusive(e.clone())), + None => Some(Self::Independent), + } + } +} diff --git a/core/src/runtime/provenance/common/input_tags/input_exclusive_diff_prob.rs b/core/src/runtime/provenance/common/input_tags/input_exclusive_diff_prob.rs index 2052e40..777d714 100644 --- a/core/src/runtime/provenance/common/input_tags/input_exclusive_diff_prob.rs +++ b/core/src/runtime/provenance/common/input_tags/input_exclusive_diff_prob.rs @@ -78,6 +78,16 @@ impl StaticInputTag for InputExclusiveDiffProb { external_tag: None, exclusion: Some(i.clone()), }), + DynamicInputTag::FloatWithID(_, prob) => Some(Self { + prob: prob.clone(), + external_tag: None, + exclusion: None, + }), + DynamicInputTag::ExclusiveFloatWithID(_, prob, i) => Some(Self { + prob: prob.clone(), + external_tag: None, + exclusion: Some(i.clone()), + }), DynamicInputTag::Tensor(t) => Some(Self { prob: t.get_f64(), external_tag: T::from_tensor(t.clone()), @@ -145,3 +155,13 @@ impl ConvertFromInputTag> for InputExcl Some(t.clone()) } } + +impl ConvertFromInputTag> for InputExclusiveDiffProb { + fn from_input_tag(t: InputExclusiveDiffProbWithID) -> Option { + Some(Self { + prob: t.prob, + external_tag: t.external_tag, + exclusion: t.exclusion, + }) + } +} diff --git a/core/src/runtime/provenance/common/input_tags/input_exclusive_diff_prob_with_id.rs b/core/src/runtime/provenance/common/input_tags/input_exclusive_diff_prob_with_id.rs new file mode 100644 index 0000000..d75bf93 --- /dev/null +++ b/core/src/runtime/provenance/common/input_tags/input_exclusive_diff_prob_with_id.rs @@ -0,0 +1,130 @@ +use crate::common::foreign_tensor::*; +use crate::common::input_tag::*; + +use super::*; + +#[derive(Clone)] +pub struct InputExclusiveDiffProbWithID { + /// The probability of the tag + pub prob: f64, + + /// The ID of the tag + pub id: usize, + + /// The external tag for differentiability + pub external_tag: Option, + + /// An optional identifier of the mutual exclusion + pub exclusion: Option, +} + +impl InputExclusiveDiffProbWithID { + pub fn new(id: usize, prob: f64, tag: T, exclusion: Option) -> Self { + Self { + id, + prob, + external_tag: Some(tag), + exclusion, + } + } + + pub fn new_without_gradient(id: usize, prob: f64, exclusion: Option) -> Self { + Self { + id, + prob, + external_tag: None, + exclusion, + } + } +} + +impl std::fmt::Debug for InputExclusiveDiffProbWithID { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.prob.fmt(f) + } +} + +impl From<(usize, f64, T, Option)> for InputExclusiveDiffProbWithID { + fn from((id, prob, tag, exclusion): (usize, f64, T, Option)) -> Self { + Self { + id, + prob, + external_tag: Some(tag), + exclusion, + } + } +} + +impl StaticInputTag for InputExclusiveDiffProbWithID { + fn from_dynamic_input_tag(t: &DynamicInputTag) -> Option { + match t { + DynamicInputTag::None => None, + DynamicInputTag::Exclusive(_) => None, + DynamicInputTag::Bool(_) => None, + DynamicInputTag::Natural(_) => None, + DynamicInputTag::Float(_) => None, + DynamicInputTag::ExclusiveFloat(_, _) => None, + DynamicInputTag::FloatWithID(id, prob) => Some(Self { + id: id.clone(), + prob: prob.clone(), + external_tag: None, + exclusion: None, + }), + DynamicInputTag::ExclusiveFloatWithID(id, prob, i) => Some(Self { + id: id.clone(), + prob: prob.clone(), + external_tag: None, + exclusion: Some(i.clone()), + }), + DynamicInputTag::Tensor(_) => None, + } + } +} + +impl ConvertFromInputTag<()> for InputExclusiveDiffProbWithID { + fn from_input_tag(_: ()) -> Option { + None + } +} + +impl ConvertFromInputTag for InputExclusiveDiffProbWithID { + fn from_input_tag(_: bool) -> Option { + None + } +} + +impl ConvertFromInputTag for InputExclusiveDiffProbWithID { + fn from_input_tag(_: usize) -> Option { + None + } +} + +impl ConvertFromInputTag for InputExclusiveDiffProbWithID { + fn from_input_tag(_: Exclusion) -> Option { + None + } +} + +impl ConvertFromInputTag for InputExclusiveDiffProbWithID { + fn from_input_tag(_: f64) -> Option { + None + } +} + +impl ConvertFromInputTag for InputExclusiveDiffProbWithID { + fn from_input_tag(_: InputExclusiveProb) -> Option { + None + } +} + +impl ConvertFromInputTag> for InputExclusiveDiffProbWithID { + fn from_input_tag(_: InputDiffProb) -> Option { + None + } +} + +impl ConvertFromInputTag> for InputExclusiveDiffProbWithID { + fn from_input_tag(_: InputExclusiveDiffProbWithID) -> Option { + None + } +} diff --git a/core/src/runtime/provenance/common/input_tags/input_exclusive_prob.rs b/core/src/runtime/provenance/common/input_tags/input_exclusive_prob.rs index 2a4fba8..ecaff9d 100644 --- a/core/src/runtime/provenance/common/input_tags/input_exclusive_prob.rs +++ b/core/src/runtime/provenance/common/input_tags/input_exclusive_prob.rs @@ -118,3 +118,12 @@ impl ConvertFromInputTag> for InputExcl Some(Self::new(t.prob.clone(), t.exclusion.clone())) } } + +impl ConvertFromInputTag> for InputExclusiveProb { + fn from_input_tag(t: InputExclusiveDiffProbWithID) -> Option { + Some(Self { + prob: t.prob, + exclusion: t.exclusion, + }) + } +} diff --git a/core/src/runtime/provenance/common/input_tags/mod.rs b/core/src/runtime/provenance/common/input_tags/mod.rs index abc07a6..3018031 100644 --- a/core/src/runtime/provenance/common/input_tags/mod.rs +++ b/core/src/runtime/provenance/common/input_tags/mod.rs @@ -4,6 +4,7 @@ mod float; mod input_diff_prob; mod input_exclusion; mod input_exclusive_diff_prob; +mod input_exclusive_diff_prob_with_id; mod input_exclusive_prob; mod natural; mod static_input_tag; @@ -13,5 +14,6 @@ pub use convert_input_tag::*; pub use input_diff_prob::*; pub use input_exclusion::*; pub use input_exclusive_diff_prob::*; +pub use input_exclusive_diff_prob_with_id::*; pub use input_exclusive_prob::*; pub use static_input_tag::*; diff --git a/core/src/runtime/provenance/common/input_tags/natural.rs b/core/src/runtime/provenance/common/input_tags/natural.rs index 18e32e2..a4bf913 100644 --- a/core/src/runtime/provenance/common/input_tags/natural.rs +++ b/core/src/runtime/provenance/common/input_tags/natural.rs @@ -11,7 +11,9 @@ impl StaticInputTag for usize { DynamicInputTag::Natural(n) => Some(*n), DynamicInputTag::Bool(b) => Some(if *b { 1 } else { 0 }), DynamicInputTag::Float(f) => Some(if *f > 0.0 { 1 } else { 0 }), - DynamicInputTag::ExclusiveFloat(_, _) => Some(1), + DynamicInputTag::ExclusiveFloat(f, _) => Some(if *f > 0.0 { 1 } else { 0 }), + DynamicInputTag::FloatWithID(_, f) => Some(if *f > 0.0 { 1 } else { 0 }), + DynamicInputTag::ExclusiveFloatWithID(_, f, _) => Some(if *f > 0.0 { 1 } else { 0 }), DynamicInputTag::Tensor(_) => Some(1), } } diff --git a/core/src/runtime/provenance/common/input_tags/unit.rs b/core/src/runtime/provenance/common/input_tags/unit.rs index 95b692c..16ea0cf 100644 --- a/core/src/runtime/provenance/common/input_tags/unit.rs +++ b/core/src/runtime/provenance/common/input_tags/unit.rs @@ -56,3 +56,9 @@ impl ConvertFromInputTag> for () { Some(()) } } + +impl ConvertFromInputTag> for () { + fn from_input_tag(_: InputExclusiveDiffProbWithID) -> Option { + Some(()) + } +} diff --git a/core/src/runtime/provenance/common/mod.rs b/core/src/runtime/provenance/common/mod.rs index 3f5dc92..a3ecbe1 100644 --- a/core/src/runtime/provenance/common/mod.rs +++ b/core/src/runtime/provenance/common/mod.rs @@ -13,6 +13,7 @@ mod fact_id; mod input_tags; mod literal; mod output_diff_prob; +mod output_diff_prob_with_proofs; mod real; mod top_k_aggregation; @@ -31,5 +32,6 @@ pub use fact_id::*; pub use input_tags::*; pub use literal::*; pub use output_diff_prob::*; +pub use output_diff_prob_with_proofs::*; pub use real::*; pub use top_k_aggregation::*; diff --git a/core/src/runtime/provenance/common/output_diff_prob_with_proofs.rs b/core/src/runtime/provenance/common/output_diff_prob_with_proofs.rs new file mode 100644 index 0000000..a1dd18f --- /dev/null +++ b/core/src/runtime/provenance/common/output_diff_prob_with_proofs.rs @@ -0,0 +1,24 @@ +#[derive(Clone)] +pub struct OutputDiffProbWithProofs { + pub probability: f64, + pub gradient: Vec<(usize, f64)>, + pub proofs: Vec>, +} + +impl std::fmt::Debug for OutputDiffProbWithProofs { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("") + .field(&self.probability) + .field(&self.gradient.iter().map(|(id, weight)| (id, weight)).collect::>()) + .finish() + } +} + +impl std::fmt::Display for OutputDiffProbWithProofs { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("") + .field(&self.probability) + .field(&self.gradient.iter().map(|(id, weight)| (id, weight)).collect::>()) + .finish() + } +} diff --git a/core/src/runtime/provenance/differentiable/diff_sample_k_proofs.rs b/core/src/runtime/provenance/differentiable/diff_sample_k_proofs.rs index ccf0129..d005eb6 100644 --- a/core/src/runtime/provenance/differentiable/diff_sample_k_proofs.rs +++ b/core/src/runtime/provenance/differentiable/diff_sample_k_proofs.rs @@ -1,5 +1,4 @@ use rand::prelude::*; -use rand::rngs::StdRng; use crate::common::foreign_tensor::*; use crate::utils::PointerFamily; diff --git a/core/src/runtime/provenance/differentiable/diff_top_k_proofs_debug.rs b/core/src/runtime/provenance/differentiable/diff_top_k_proofs_debug.rs new file mode 100644 index 0000000..7f7c99d --- /dev/null +++ b/core/src/runtime/provenance/differentiable/diff_top_k_proofs_debug.rs @@ -0,0 +1,164 @@ +use crate::common::foreign_tensor::*; +use crate::utils::*; + +use super::*; + +pub struct DiffTopKProofsDebugProvenance { + pub k: usize, + pub storage: DiffProbStorage, + pub disjunctions: P::Cell, + pub wmc_with_disjunctions: bool, +} + +impl Clone for DiffTopKProofsDebugProvenance { + fn clone(&self) -> Self { + Self { + k: self.k, + storage: self.storage.clone_internal(), + disjunctions: P::clone_cell(&self.disjunctions), + wmc_with_disjunctions: self.wmc_with_disjunctions, + } + } +} + +impl DiffTopKProofsDebugProvenance { + pub fn new(k: usize, wmc_with_disjunctions: bool) -> Self { + Self { + k, + storage: DiffProbStorage::new(), + disjunctions: P::new_cell(Disjunctions::new()), + wmc_with_disjunctions, + } + } + + pub fn input_tags(&self) -> Vec { + self.storage.input_tags() + } + + pub fn set_k(&mut self, k: usize) { + self.k = k; + } +} + +impl DNFContextTrait for DiffTopKProofsDebugProvenance { + fn fact_probability(&self, id: &usize) -> f64 { + self.storage.fact_probability(id) + } + + fn has_disjunction_conflict(&self, pos_facts: &std::collections::BTreeSet) -> bool { + P::get_cell(&self.disjunctions, |d| d.has_conflict(pos_facts)) + } +} + +impl Provenance for DiffTopKProofsDebugProvenance { + type Tag = DNFFormula; + + type InputTag = InputExclusiveDiffProbWithID; + + type OutputTag = OutputDiffProbWithProofs; + + fn name() -> &'static str { + "diff-top-k-proofs" + } + + fn tagging_fn(&self, input_tag: Self::InputTag) -> Self::Tag { + let InputExclusiveDiffProbWithID { + prob, + id, + external_tag, + exclusion, + } = input_tag; + + // First store the probability and generate the id + let fact_id = self.storage.add_prob_with_id(prob, external_tag, id); + + // Store the mutual exclusivity + if let Some(disjunction_id) = exclusion { + P::get_cell_mut(&self.disjunctions, |d| d.add_disjunction(disjunction_id, fact_id)); + } + + // Finally return the formula + DNFFormula::singleton(fact_id) + } + + fn recover_fn(&self, t: &Self::Tag) -> Self::OutputTag { + // Get the number of variables that requires grad + let num_var_requires_grad = self.storage.num_input_tags(); + let s = DualNumberSemiring::new(num_var_requires_grad + 1); + let v = |i: &usize| { + let (real, external_tag) = self.storage.get_diff_prob(i); + + // Check if this variable `i` requires grad or not + if external_tag.is_some() { + s.singleton(real.clone(), i.clone()) + } else { + s.constant(real.clone()) + } + }; + let wmc_result = if self.wmc_with_disjunctions { + P::get_cell(&self.disjunctions, |disj| t.wmc_with_disjunctions(&s, &v, disj)) + } else { + t.wmc(&s, &v) + }; + let prob = wmc_result.real; + let deriv = wmc_result + .deriv + .iter() + .map(|(id, weight)| (id, *weight)) + .collect::>(); + + let proofs = t + .iter() + .map(|clause| { + clause + .iter() + .map(|literal| { + match literal { + Literal::Pos(id) => (true, *id), + Literal::Neg(id) => (false, *id), + } + }) + .collect() + }) + .collect(); + + OutputDiffProbWithProofs { + probability: prob, + gradient: deriv, + proofs, + } + } + + fn discard(&self, t: &Self::Tag) -> bool { + t.is_empty() + } + + fn zero(&self) -> Self::Tag { + self.base_zero() + } + + fn one(&self) -> Self::Tag { + self.base_one() + } + + fn add(&self, t1: &Self::Tag, t2: &Self::Tag) -> Self::Tag { + self.top_k_add(t1, t2, self.k) + } + + fn saturated(&self, t_old: &Self::Tag, t_new: &Self::Tag) -> bool { + t_old == t_new + } + + fn mult(&self, t1: &Self::Tag, t2: &Self::Tag) -> Self::Tag { + self.top_k_mult(t1, t2, self.k) + } + + fn negate(&self, t: &Self::Tag) -> Option { + Some(self.top_k_negate(t, self.k)) + } + + fn weight(&self, t: &Self::Tag) -> f64 { + let v = |i: &usize| self.storage.get_prob(i); + t.wmc(&RealSemiring::new(), &v) + } +} diff --git a/core/src/runtime/provenance/differentiable/mod.rs b/core/src/runtime/provenance/differentiable/mod.rs index 8de7483..89c67d7 100644 --- a/core/src/runtime/provenance/differentiable/mod.rs +++ b/core/src/runtime/provenance/differentiable/mod.rs @@ -6,5 +6,6 @@ pub mod diff_nand_mult_prob; pub mod diff_sample_k_proofs; pub mod diff_top_bottom_k_clauses; pub mod diff_top_k_proofs; +pub mod diff_top_k_proofs_debug; use super::*; diff --git a/core/src/runtime/provenance/probabilistic/sample_k_proofs.rs b/core/src/runtime/provenance/probabilistic/sample_k_proofs.rs index a68f02f..05bc1eb 100644 --- a/core/src/runtime/provenance/probabilistic/sample_k_proofs.rs +++ b/core/src/runtime/provenance/probabilistic/sample_k_proofs.rs @@ -1,7 +1,6 @@ use std::collections::*; use rand::prelude::*; -use rand::rngs::StdRng; use crate::utils::*; diff --git a/core/src/runtime/statics/dataflow/batching/empty_batches.rs b/core/src/runtime/statics/dataflow/batching/empty_batches.rs index 78205b6..dd3aa11 100644 --- a/core/src/runtime/statics/dataflow/batching/empty_batches.rs +++ b/core/src/runtime/statics/dataflow/batching/empty_batches.rs @@ -1,4 +1,3 @@ -use std::iter::FromIterator; use std::marker::PhantomData; use super::*; diff --git a/core/src/runtime/statics/dataflow/batching/single_batch.rs b/core/src/runtime/statics/dataflow/batching/single_batch.rs index 6e526d6..ec18c8e 100644 --- a/core/src/runtime/statics/dataflow/batching/single_batch.rs +++ b/core/src/runtime/statics/dataflow/batching/single_batch.rs @@ -1,5 +1,3 @@ -use std::iter::FromIterator; - use super::batch::*; use super::*; use crate::runtime::provenance::*; diff --git a/core/tests/integrate/fp.rs b/core/tests/integrate/fp.rs index 5f68232..8a0b057 100644 --- a/core/tests/integrate/fp.rs +++ b/core/tests/integrate/fp.rs @@ -65,6 +65,18 @@ fn range_join_4() { ); } +#[test] +fn range_dt_1() { + expect_interpret_empty_result( + r#" + type f(bound i: usize, bound j: usize) + rel f(i, j) = range(i + 1, j, k) and c(i, k) and f(k + 1, j) + type c(bound i: usize, bound j: usize) + "#, + "f", + ) +} + #[test] fn string_chars_1() { expect_interpret_result( diff --git a/core/tests/runtime/provenance/disjunction.rs b/core/tests/runtime/provenance/disjunction.rs index 11bd05a..5bc8b77 100644 --- a/core/tests/runtime/provenance/disjunction.rs +++ b/core/tests/runtime/provenance/disjunction.rs @@ -1,5 +1,4 @@ use std::collections::BTreeSet; -use std::iter::FromIterator; use scallop_core::runtime::provenance::*; diff --git a/doc/src/probabilistic/debug.md b/doc/src/probabilistic/debug.md new file mode 100644 index 0000000..a957002 --- /dev/null +++ b/doc/src/probabilistic/debug.md @@ -0,0 +1 @@ +# Debugging Proofs diff --git a/doc/src/scallopy/debug_proofs.md b/doc/src/scallopy/debug_proofs.md new file mode 100644 index 0000000..400ff20 --- /dev/null +++ b/doc/src/scallopy/debug_proofs.md @@ -0,0 +1,126 @@ +# Debugging Proofs + +We offer capability in Scallop to debug the internal proofs generated by provenance. +This can be done through using special provenances specifically designed for debugging. + +## Debugging Top-K Proofs + +We can use the provenance `difftopkproofsdebug`. +Take the `sum_2` application as an example, we have + +``` py +ctx = scallopy.ScallopContext(provenance="difftopkproofsdebug", k=3) +ctx.add_relation("digit_a", int) +ctx.add_relation("digit_b", int) +ctx.add_rule("sum_2(a + b) = digit_a(a) and digit_b(b)") + +# !!! SPECIAL TREATMENT WHEN INSERTING FACTS !!! +ctx.add_facts("digit_a", [((torch.tensor(0.1), 1), (1,)), ((torch.tensor(0.9), 2), (2,))]) +ctx.add_facts("digit_b", [((torch.tensor(0.9), 3), (1,)), ((torch.tensor(0.1), 4), (2,))]) + +ctx.run() +result = ctx.relation("sum_2") +``` + +A majority of the code will look identical to the original example, +but special treatment is required when adding new facts to the context. +Originally, tags are just `torch.tensor(SOME_PROBABILITY)`. +But now, we need to supply an extra positive integer which we call **Fact ID**. +Each of the fact added should be the following: + +``` py +ctx.add_facts("SOME_RELATION", [ + ( ( torch.tensor(0.1), 1 ) , ( 0, ) ) # Fact 1 + # -- PROBABILITY -- -- FACT ID -- + # --------------- TAG -------------- -- TUPLE -- + ( ( torch.tensor(0.9), 2 ) , ( 1, ) ) # Fact 2 + ( ( torch.tensor(0.1), 3 ) , ( 0, ) ) # Fact 2 + # ... +]) +``` + +Basically, fact IDs are labels that the programmers can add to instrument the computation process. +All IDs should start from 1, and should be distinct and contiguous for all added facts. +The programmers will need to be responsible for managing the IDs so that there is no collision. +Let's say 10 facts are added with probability, then there should be 10 fact IDs ranging from +1 to 10, inclusive. +Of course, for non-probabilistic facts, the whole tag can be specified as `None`. + +### Debug in Forward Mode + +When used in `forward` mode, one should do the following. +The forward module should be setup just like before, with the only change being the provenance configuration: + +``` py +sum_2 = scallopy.Module( + provenance="difftopkproofsdebug", + k=3, + program=""" + type digit_a(a: i32), digit_b(b: i32) + rel sum_2(a + b) = digit_a(a) and digit_b(b) + """, + output_relation="sum_2", + output_mapping=[2, 3, 4]) +``` + +Let's assume that only 1 and 2 are added for each digit in the `sum_2` task. +That is, we have digit A and digit B. +Digit A can be 1 or 2, and the digit B can be 1 or 2 as well. +Looking at the following code, we have the fact IDs being + +- Digit `A` is 1: Fact ID 1 +- Digit `A` is 2: Fact ID 2 +- Digit `B` is 1: Fact ID 3 +- Digit `B` is 2: Fact ID 4 + +Notice that the fact IDs all start from 1 and are contiguous (i.e., no gap in the used fact IDs). +Also notice that, when in forward mode, the inputs need to be batched. +In this example, let's only focus on one single data-point, say `Datapoint 1`. + +``` py +digit_a = [ + [((torch.tensor(0.1), 1), (1,)), ((torch.tensor(0.9), 2), (2,))], # Datapoint 1 + # ... +] +digit_b = [ + [((torch.tensor(0.9), 3), (1,)), ((torch.tensor(0.1), 4), (2,))], # Datapoint 1 + # ... +] +``` + +After preparing the input facts, we can invoke the debug module as the following + +``` py +(result_tensor, proofs) = sum_2(digit_a=digit_a, digit_b=digit_b) +``` + +Here, the output will be a tuple `(result_tensor, proofs)`, unlike the non-debug version where only one single PyTorch tensor is returned. +Specifically, the two components will be + +``` python +result_tensor = torch.tensor([ + [0.09, 0.8119, 0.09], # Datapoint 1 + # ... +]) + +proofs = [ + [ # Datapoint 1 + [ # Proofs of (2,) + [ (True, 1), (True, 3) ], # The first proof is 1 + 1 (using positive fact 1 and 3) + ], + [ # Proofs of (3,) + [ (True, 1), (True, 4) ], # The first proof is 1 + 2 (using positive fact 1 and 4) + [ (True, 2), (True, 3) ], # The second proof is 2 + 1 (using positive fact 2 and 3) + ], + [ # Proofs of (4,) + [ (True, 2), (True, 4) ], # The first proof is 2 + 2 (using positive fact 2 and 4) + ] + ], + # ... +] +``` + +Notice that `result_tensor` is just the original output probability tensor. +The `proofs` will be `List[List[List[List[Tuple[bool, int]]]]]`. +In particular, it is `Batch -> Datapoint Results -> Proofs -> Proof -> Literal`. +Where each `Literal` is `Tuple[bool, int]` where the boolean indicates the positivity of the literal and the integer indicates the used fact ID. diff --git a/doc/src/summary.md b/doc/src/summary.md index 0871135..53d8936 100644 --- a/doc/src/summary.md +++ b/doc/src/summary.md @@ -32,6 +32,7 @@ - [Provenance Library](probabilistic/library.md) - [Aggregation and Probability](probabilistic/aggregation.md) - [Sampling with Probability](probabilistic/sampling.md) + - [Debugging Proofs](probabilistic/debug.md) - [`scallopy`](scallopy/index.md) - [Getting Started](scallopy/getting_started.md) - [Scallop Context](scallopy/context.md) diff --git a/etc/scallopy/scallopy/collection.py b/etc/scallopy/scallopy/collection.py index 1de87ab..4e0a65e 100644 --- a/etc/scallopy/scallopy/collection.py +++ b/etc/scallopy/scallopy/collection.py @@ -33,6 +33,11 @@ def __iter__(self): for ((p, deriv), t) in self._internal: diff_prob = diff_proofs_prob(p, deriv, input_tags) yield (diff_prob, t) + elif self.provenance == "difftopkproofsdebug": + input_tags = self._internal.input_tags() + for ((p, deriv, proofs), t) in self._internal: + diff_prob = diff_proofs_prob(p, deriv, input_tags) + yield ((diff_prob, proofs), t) else: for t in self._internal: yield t diff --git a/etc/scallopy/scallopy/context.py b/etc/scallopy/scallopy/context.py index 54be6d4..5a242d2 100644 --- a/etc/scallopy/scallopy/context.py +++ b/etc/scallopy/scallopy/context.py @@ -353,7 +353,7 @@ def forward_function( raise Exception("`forward_function` cannot be called when there is no PyTorch") # Needs to be a differentiable context - if ("diff" in self.provenance or self.provenance == "custom") and not "debug" in self.provenance: pass + if "diff" in self.provenance or self.provenance == "custom": pass else: raise Exception("`forward_function` can only be called on context with differentiable provenance") # Forward function @@ -738,6 +738,7 @@ def supports_disjunctions(self) -> bool: "diffsamplekproofs", "difftopkproofs", "difftopbottomkclauses", + "difftopkproofsdebug", ]) return self.provenance in PROVENANCE_SUPPORTING_DISJUNCTIONS @@ -771,7 +772,10 @@ def _process_disjunctive_elements(self, elems, disjunctions): if fact_id not in visited_fact_ids: if self.requires_tag(): (tag, tup) = fact - processed_elems[fact_id] = ((tag, None), tup) + if type(tag) == tuple: + processed_elems[fact_id] = ((*tag, None), tup) + else: + processed_elems[fact_id] = ((tag, None), tup) else: processed_elems[fact_id] = (None, fact) diff --git a/etc/scallopy/scallopy/forward.py b/etc/scallopy/scallopy/forward.py index 4c00c27..7344f28 100644 --- a/etc/scallopy/scallopy/forward.py +++ b/etc/scallopy/scallopy/forward.py @@ -444,6 +444,9 @@ def _process_input_facts(self, rela, rela_facts, disjunctions) -> List[Tuple]: # If the facts are provided as Tensor elif ty == torch_importer.torch.Tensor: + if self._has_debug_info(): + raise Exception(f"scallopy.forward with debug provenance `{self.ctx.provenance}` does not accept tensor inputs. Consider passing lists instead") + if rela not in self.ctx._input_mappings: raise Exception(f"scallopy.forward receives vectorized Tensor input. However there is no `input_mapping` provided for relation `{rela}`") @@ -572,6 +575,8 @@ def _process_one_output_wrapper(self, rel_index, rel_name, batch_size, input_tag return self._process_one_output(batch_size, input_tags, [r[rel_index] for r in output_results], False, None) def _process_one_output(self, batch_size, input_tags, output_results, single_element: bool, output_mapping: Optional[List[Tuple]]): + output_parts = [] + # If there is no given output mapping, try if output_mapping is not None: # Integrate the outputs @@ -581,7 +586,11 @@ def _process_one_output(self, batch_size, input_tags, output_results, single_ele v = v.view(-1) if single_element else v # Return the output - return v + output_parts.append(v) + + # If has debug information + if self._has_debug_info(): + output_parts.append(self._batched_debug_info(output_results)) else: # Collect all possible outputs, make them into a list possible_outputs = set() @@ -597,8 +606,13 @@ def _process_one_output(self, batch_size, input_tags, output_results, single_ele # Check if there is no output result if len(possible_outputs) == 0: - # Return empty tensor - return ([], torch_importer.torch.zeros(batch_size, 0, requires_grad=self.training)) + # Return empty mapping and empty tensor + output_parts.append([]) + output_parts.append(torch_importer.torch.zeros(batch_size, 0, requires_grad=self.training)) + + # If has debug information + if self._has_debug_info(): + output_parts.append([[] for _ in range(batch_size)]) else: # Integrate the outputs based on all the output results v = self._batched_prob(post_output_results) @@ -607,7 +621,17 @@ def _process_one_output(self, batch_size, input_tags, output_results, single_ele v = v.view(-1) if single_element else v # Return - return (post_output_mapping, v) + output_parts.append(post_output_mapping) + output_parts.append(v) + + # If has debug information + if self._has_debug_info(): + output_parts.append(self._batched_debug_info(post_output_results)) + + if len(output_parts) == 1: + return output_parts[0] + else: + return tuple(output_parts) def _batched_prob( self, @@ -640,13 +664,14 @@ def _batched_prob( self.ctx.provenance == "diffnandminprob" or \ self.ctx.provenance == "difftopkproofs" or \ self.ctx.provenance == "diffsamplekproofs" or \ - self.ctx.provenance == "difftopbottomkclauses": + self.ctx.provenance == "difftopbottomkclauses" or \ + self.ctx.provenance == "difftopkproofsdebug": tensor_results = [] for task_results in tasks: task_tensor_results = [] for task_tup_result in task_results: if task_tup_result is not None: - (p, _) = task_tup_result + p = task_tup_result[0] # The 0-th element of the differentiable result is always a single probability task_tensor_results.append(torch_importer.torch.tensor(p, requires_grad=True)) else: task_tensor_results.append(torch_importer.torch.tensor(0.0, requires_grad=True)) @@ -669,6 +694,11 @@ def _has_output_hook(self): elif self.ctx.provenance == "diffsamplekproofs": return True elif self.ctx.provenance == "difftopkproofs": return True elif self.ctx.provenance == "difftopbottomkclauses": return True + elif self.ctx.provenance == "difftopkproofsdebug": return True + else: return False + + def _has_debug_info(self): + if self.ctx.provenance == "difftopkproofsdebug": return True else: return False def _batched_output_hook( @@ -684,7 +714,8 @@ def _batched_output_hook( self.ctx.provenance == "diffnandminprob" or \ self.ctx.provenance == "difftopkproofs" or \ self.ctx.provenance == "diffsamplekproofs" or \ - self.ctx.provenance == "difftopbottomkclauses": + self.ctx.provenance == "difftopbottomkclauses" or \ + self.ctx.provenance == "difftopkproofsdebug": return self._diff_proofs_batched_output_hook(input_tags, tasks) else: raise Exception("[Internal Error] Should not happen") @@ -704,9 +735,12 @@ def _diff_proofs_batched_output_hook( def do_nothing_hook(): pass return do_nothing_hook - # mat_i: Input matrix def pad_input(l): - return l + [self._torch_tensor_apply(torch_importer.torch.tensor(0.0))] * (num_inputs - len(l)) if len(l) < num_inputs else l + preproc_l = [self._torch_tensor_apply(torch_importer.torch.tensor(0.0)) if e is None else e for e in l] + pad_zeros = [self._torch_tensor_apply(torch_importer.torch.tensor(0.0))] * (num_inputs - len(l)) if len(l) < num_inputs else [] + return preproc_l + pad_zeros + + # mat_i: Input matrix mat_i = torch_importer.torch.stack([torch_importer.torch.stack(pad_input(l)) for l in input_tags]) # mat_w: Weight matrix @@ -714,7 +748,7 @@ def pad_input(l): for (batch_id, task_result) in enumerate(output_batch): # batch_size for (output_id, output_tag) in enumerate(task_result): # output_size if output_tag is not None: - (_, deriv) = output_tag + deriv = output_tag[1] # The 1-st element of the differentiable result is always the derivative for (input_id, weight) in deriv: mat_w[batch_id, output_id, input_id] = weight @@ -726,3 +760,20 @@ def hook(grad): mat_i.backward(mat_f, retain_graph=retain_graph) return hook + + def _batched_debug_info( + self, + output_results: List[List[Any]], + ) -> List[List[Any]]: + if self.ctx.provenance == "difftopkproofsdebug": + results = [] + for task_results in output_results: + task_tensor_results = [] + for task_tup_result in task_results: + if task_tup_result is not None: + proofs = task_tup_result[2] # The 2-th element of the difftopkproofsdebug result is the proofs + task_tensor_results.append(proofs) + else: + task_tensor_results.append([]) + results.append(task_tensor_results) + return results diff --git a/etc/scallopy/scallopy/predicate.py b/etc/scallopy/scallopy/predicate.py index cd9b39b..01c18cf 100644 --- a/etc/scallopy/scallopy/predicate.py +++ b/etc/scallopy/scallopy/predicate.py @@ -192,8 +192,8 @@ def string_chars(s: str) -> scallopy.Facts[Tuple[int, char]]: # Find argument types for (arg_name, item) in signature.parameters.items(): optional = item.default != inspect.Parameter.empty - if item.annotation is None: - raise Exception(f"Argument {arg_name} type annotation not provided") + if item.annotation is None or 'inspect._empty' in str(item.annotation): + raise Exception(f"Argument `{arg_name}`'s type annotation not provided in the foreign predicate `{func_name}`") if item.kind == inspect.Parameter.VAR_POSITIONAL: raise Exception(f"Cannot have variable arguments in foreign predicate") elif not optional: diff --git a/etc/scallopy/src/collection.rs b/etc/scallopy/src/collection.rs index d5115c0..ea1dfc1 100644 --- a/etc/scallopy/src/collection.rs +++ b/etc/scallopy/src/collection.rs @@ -66,6 +66,10 @@ pub enum CollectionEnum { collection: P::Rc>>, tags: DiffProbStorage, }, + DiffTopKProofsDebug { + collection: P::Rc>>, + tags: DiffProbStorage, + }, Custom { collection: P::Rc>, }, @@ -88,6 +92,7 @@ macro_rules! match_collection { CollectionEnum::DiffSampleKProofs { collection: $v, .. } => $e, CollectionEnum::DiffTopKProofs { collection: $v, .. } => $e, CollectionEnum::DiffTopBottomKClauses { collection: $v, .. } => $e, + CollectionEnum::DiffTopKProofsDebug { collection: $v, .. } => $e, CollectionEnum::Custom { collection: $v } => $e, } }; @@ -110,6 +115,7 @@ impl CollectionEnum { Self::DiffSampleKProofs { tags, .. } => Some(tags.num_input_tags()), Self::DiffTopKProofs { tags, .. } => Some(tags.num_input_tags()), Self::DiffTopBottomKClauses { tags, .. } => Some(tags.num_input_tags()), + Self::DiffTopKProofsDebug { tags, .. } => Some(tags.num_input_tags() + 1), Self::Custom { .. } => None, } } @@ -130,6 +136,7 @@ impl CollectionEnum { Self::DiffSampleKProofs { tags, .. } => Some(tags.input_tags().into_vec()), Self::DiffTopKProofs { tags, .. } => Some(tags.input_tags().into_vec()), Self::DiffTopBottomKClauses { tags, .. } => Some(tags.input_tags().into_vec()), + Self::DiffTopKProofsDebug { tags, .. } => Some(tags.input_tags().into_none_prepended_vec()), Self::Custom { .. } => None, } } diff --git a/etc/scallopy/src/context.rs b/etc/scallopy/src/context.rs index d6c015b..1a08613 100644 --- a/etc/scallopy/src/context.rs +++ b/etc/scallopy/src/context.rs @@ -31,12 +31,17 @@ type AF = ArcFamily; #[derive(Clone)] pub enum ContextEnum { + // Basic Unit(IntegrateContext), Proofs(IntegrateContext, AF>), + + // Probabilistic MinMaxProb(IntegrateContext), AddMultProb(IntegrateContext), TopKProofs(IntegrateContext, AF>), TopBottomKClauses(IntegrateContext, AF>), + + // Differentiable DiffMinMaxProb(IntegrateContext, AF>), DiffAddMultProb(IntegrateContext, AF>), DiffNandMultProb(IntegrateContext, AF>), @@ -45,6 +50,11 @@ pub enum ContextEnum { DiffSampleKProofs(IntegrateContext, AF>), DiffTopKProofs(IntegrateContext, AF>), DiffTopBottomKClauses(IntegrateContext, AF>), + + // Debug + DiffTopKProofsDebug(IntegrateContext, AF>), + + // Custom Custom(IntegrateContext), } @@ -65,6 +75,7 @@ macro_rules! match_context { ContextEnum::DiffSampleKProofs($v) => $e, ContextEnum::DiffTopKProofs($v) => $e, ContextEnum::DiffTopBottomKClauses($v) => $e, + ContextEnum::DiffTopKProofsDebug($v) => $e, ContextEnum::Custom($v) => $e, } }; @@ -87,6 +98,7 @@ macro_rules! match_context_except_custom { ContextEnum::DiffSampleKProofs($v) => Ok($e), ContextEnum::DiffTopKProofs($v) => Ok($e), ContextEnum::DiffTopBottomKClauses($v) => Ok($e), + ContextEnum::DiffTopKProofsDebug($v) => Ok($e), ContextEnum::Custom(_) => Err(BindingError::CustomProvenanceUnsupported), } }; @@ -182,6 +194,11 @@ impl Context { diff_top_bottom_k_clauses::DiffTopBottomKClausesProvenance::new(k, wmc_with_disjunctions), )), }), + "difftopkproofsdebug" => Ok(Self { + ctx: ContextEnum::DiffTopKProofsDebug(IntegrateContext::new_incremental( + diff_top_k_proofs_debug::DiffTopKProofsDebugProvenance::new(k, wmc_with_disjunctions), + )), + }), "custom" => { if let Some(py_ctx) = custom_provenance { Ok(Self { @@ -445,6 +462,7 @@ impl Context { ContextEnum::DiffSampleKProofs(c) => Some(c.provenance_context().input_tags().into_vec()), ContextEnum::DiffTopKProofs(c) => Some(c.provenance_context().input_tags().into_vec()), ContextEnum::DiffTopBottomKClauses(c) => Some(c.provenance_context().input_tags().into_vec()), + ContextEnum::DiffTopKProofsDebug(c) => Some(c.provenance_context().input_tags().into_none_prepended_vec()), ContextEnum::Custom(_) => None, } } @@ -476,6 +494,9 @@ impl Context { ContextEnum::DiffTopBottomKClauses(c) => { c.provenance_context_mut().set_k(k); } + ContextEnum::DiffTopKProofsDebug(c) => { + c.provenance_context_mut().set_k(k); + } ContextEnum::Custom(_) => (), } } diff --git a/etc/scallopy/src/external_tag.rs b/etc/scallopy/src/external_tag.rs index e275077..7d5e13e 100644 --- a/etc/scallopy/src/external_tag.rs +++ b/etc/scallopy/src/external_tag.rs @@ -35,12 +35,19 @@ impl Into> for ExtTag { pub trait ExtTagVec { fn into_vec(self) -> Vec>; + + fn into_none_prepended_vec(self) -> Vec>; } impl ExtTagVec for Vec { fn into_vec(self) -> Vec> { self.into_iter().map(|v| v.tag).collect() } + + fn into_none_prepended_vec(self) -> Vec> { + let none: Option> = None; + std::iter::once(Python::with_gil(|py| none.to_object(py))).chain(self.into_iter().map(|v| v.tag)).collect() + } } pub trait ExtTagOption { diff --git a/etc/scallopy/src/provenance.rs b/etc/scallopy/src/provenance.rs index d9b0cf3..def1dbe 100644 --- a/etc/scallopy/src/provenance.rs +++ b/etc/scallopy/src/provenance.rs @@ -340,6 +340,35 @@ impl PythonProvenance for diff_top_bottom_k_clauses::DiffTopBottomKClausesProven } } +impl PythonProvenance for diff_top_k_proofs_debug::DiffTopKProofsDebugProvenance { + fn process_py_tag(tag: &PyAny) -> PyResult> { + let tag_disj_id: (&PyAny, usize, Option) = tag.extract()?; + if let Some(prob) = tag_disj_id.0.extract()? { + let tag: ExtTag = tag_disj_id.0.into(); + let id: usize = tag_disj_id.1.into(); + Ok(Some(Self::InputTag { + prob, + id, + external_tag: Some(tag), + exclusion: tag_disj_id.2, + })) + } else { + Ok(None) + } + } + + fn to_collection_enum(col: Arc>, ctx: &Self) -> CollectionEnum { + CollectionEnum::DiffTopKProofsDebug { + collection: col, + tags: ctx.storage.clone_rc(), + } + } + + fn to_output_py_tag(tag: &Self::OutputTag) -> Py { + Python::with_gil(|py| (tag.probability, tag.gradient.clone(), tag.proofs.clone()).to_object(py)) + } +} + impl PythonProvenance for custom_tag::CustomProvenance { fn process_py_tag(tag: &PyAny) -> PyResult> { let tag: Py = tag.into(); diff --git a/etc/scallopy/tests/debug.py b/etc/scallopy/tests/debug.py new file mode 100644 index 0000000..0a5e208 --- /dev/null +++ b/etc/scallopy/tests/debug.py @@ -0,0 +1,104 @@ +import unittest +import torch + +import scallopy + +class TestDebugProvenance(unittest.TestCase): + def test_debug_1(self): + ctx = scallopy.ScallopContext(provenance="difftopkproofsdebug") + ctx.add_relation("digit_a", int) + ctx.add_relation("digit_b", int) + ctx.add_rule("sum_2(a + b) = digit_a(a) and digit_b(b)") + ctx.add_facts("digit_a", [((torch.tensor(0.1), 1), (1,)), ((torch.tensor(0.9), 2), (2,))]) + ctx.add_facts("digit_b", [((torch.tensor(0.9), 3), (1,)), ((torch.tensor(0.1), 4), (2,))]) + ctx.run() + result = ctx.relation("sum_2") + for ((prob, proofs), (summation,)) in result: + if summation == 2: + self.assertAlmostEqual(float(prob), 0.09) + self.assertEqual(proofs, [[(True, 1), (True, 3)]]) + elif summation == 3: + self.assertAlmostEqual(float(prob), 0.82, 1) + self.assertEqual(proofs, [[(True, 2), (True, 3)], [(True, 1), (True, 4)]]) + elif summation == 4: + self.assertAlmostEqual(float(prob), 0.09) + self.assertEqual(proofs, [[(True, 2), (True, 4)]]) + + def test_debug_forward_1(self): + fn = scallopy.Module( + provenance="difftopkproofsdebug", + program=""" + type digit_a(a: i32), digit_b(b: i32) + rel sum_2(a + b) = digit_a(a) and digit_b(b) + """, + output_relation="sum_2", + ) + + digit_a = [ + [((torch.tensor(0.1), 1), (1,)), ((torch.tensor(0.9), 2), (2,))], # Datapoint 1 + ] + digit_b = [ + [((torch.tensor(0.9), 3), (1,)), ((torch.tensor(0.1), 4), (2,))], # Datapoint 1 + ] + + (mapping, result_tensor, proofs) = fn(digit_a=digit_a, digit_b=digit_b) + + for (i, (result,)) in enumerate(mapping): + if result == 2: + self.assertAlmostEqual(float(result_tensor[0][i]), 0.09) + self.assertEqual(proofs[0][i], [[(True, 1), (True, 3)]]) + if result == 3: + self.assertAlmostEqual(float(result_tensor[0][i]), 0.8119, 3) + self.assertEqual(proofs[0][i], [[(True, 2), (True, 3)], [(True, 1), (True, 4)]]) + if result == 4: + self.assertAlmostEqual(float(result_tensor[0][i]), 0.09) + self.assertEqual(proofs[0][i], [[(True, 2), (True, 4)]]) + + def test_debug_forward_2(self): + fn = scallopy.Module( + provenance="difftopkproofsdebug", + program=""" + type digit_a(a: i32), digit_b(b: i32) + rel sum_2(a + b) = digit_a(a) and digit_b(b) + """, + output_relation="sum_2", + output_mapping=[2, 3, 4], + ) + + digit_a = [ + [((torch.tensor(0.1), 1), (1,)), ((torch.tensor(0.9), 2), (2,))], # Datapoint 1 + ] + digit_b = [ + [((torch.tensor(0.9), 3), (1,)), ((torch.tensor(0.1), 4), (2,))], # Datapoint 1 + ] + + (result_tensor, proofs) = fn(digit_a=digit_a, digit_b=digit_b) + + self.assertAlmostEqual(float(result_tensor[0][0]), 0.09) + self.assertEqual(proofs[0][0], [[(True, 1), (True, 3)]]) + self.assertAlmostEqual(float(result_tensor[0][1]), 0.8119, 3) + self.assertEqual(proofs[0][1], [[(True, 2), (True, 3)], [(True, 1), (True, 4)]]) + self.assertAlmostEqual(float(result_tensor[0][2]), 0.09) + self.assertEqual(proofs[0][2], [[(True, 2), (True, 4)]]) + + @unittest.expectedFailure + def test_debug_forward_error_1(self): + fn = scallopy.Module( + provenance="difftopkproofsdebug", + program=""" + type digit_a(a: i32), digit_b(b: i32) + rel sum_2(a + b) = digit_a(a) and digit_b(b) + """, + input_mappings={ + "digit_a": range(10), + "digit_b": range(10), + }, + output_relation="sum_2", + ) + + digit_a = torch.randn((10,)) + digit_b = torch.randn((10,)) + result = fn(digit_a=digit_a, digit_b=digit_b) + +if __name__ == "__main__": + unittest.main() diff --git a/etc/scallopy/tests/test.py b/etc/scallopy/tests/test.py index fb5466f..1f05baf 100644 --- a/etc/scallopy/tests/test.py +++ b/etc/scallopy/tests/test.py @@ -4,6 +4,7 @@ from configurations import * from convert import * from dbio import * +from debug import * from entity import * from failure import * from forward import * diff --git a/etc/sclc/tests/common/check_exec.rs b/etc/sclc/tests/common/check_exec.rs index c66287e..4da1d4e 100644 --- a/etc/sclc/tests/common/check_exec.rs +++ b/etc/sclc/tests/common/check_exec.rs @@ -3,7 +3,6 @@ use std::io::prelude::*; use std::process::Command; use scallop_core::compiler::CompileOptions; -use tempfile; use scallop_core::compiler; use sclc_core::exec; diff --git a/etc/sclc/tests/common/check_pylib.rs b/etc/sclc/tests/common/check_pylib.rs index dceb88f..d8f7063 100644 --- a/etc/sclc/tests/common/check_pylib.rs +++ b/etc/sclc/tests/common/check_pylib.rs @@ -3,7 +3,6 @@ use std::io::prelude::*; use std::process::Command; use scallop_core::compiler::CompileOptions; -use tempfile; use scallop_core::compiler; use sclc_core::pylib; diff --git a/lib/parse_relative_duration/src/parse.rs b/lib/parse_relative_duration/src/parse.rs index b2d03d1..df89221 100644 --- a/lib/parse_relative_duration/src/parse.rs +++ b/lib/parse_relative_duration/src/parse.rs @@ -101,7 +101,9 @@ impl ProtoDuration { })?; let months = ::to_i32(&months).ok_or_else(|| Error::OutOfBounds(months))?; - Ok(RelativeDuration::months(months).with_duration(Duration::seconds(seconds) + Duration::nanoseconds(nanoseconds))) + Ok(RelativeDuration::months(months).with_duration( + Duration::try_seconds(seconds).ok_or(Error::NoValueFound("".to_string()))? + Duration::nanoseconds(nanoseconds), + )) } } @@ -189,9 +191,10 @@ pub fn parse(input: &str) -> Result { // Since the regex matched, the first group exists, so we can unwrap. let seconds = BigInt::parse_bytes(int.get(1).unwrap().as_str().as_bytes(), 10) .ok_or_else(|| Error::ParseInt(int.get(1).unwrap().as_str().to_owned()))?; - Ok(RelativeDuration::from(Duration::seconds( - seconds.to_i64().ok_or_else(|| Error::OutOfBounds(seconds))?, - ))) + Ok(RelativeDuration::from( + Duration::try_seconds(seconds.to_i64().ok_or_else(|| Error::OutOfBounds(seconds))?) + .ok_or(Error::NoValueFound("".to_string()))?, + )) } else if DURATION_RE.is_match(input) { // This means we have at least one "unit" (or plain word) and one value. let mut duration = ProtoDuration::default(); diff --git a/lib/parse_relative_duration/tests/basics.rs b/lib/parse_relative_duration/tests/basics.rs index 52c017e..1ee18ee 100644 --- a/lib/parse_relative_duration/tests/basics.rs +++ b/lib/parse_relative_duration/tests/basics.rs @@ -17,7 +17,7 @@ macro_rules! test_parse { parse($string), Ok( RelativeDuration::months($months) - .with_duration(Duration::seconds($seconds) + Duration::nanoseconds($nanoseconds)) + .with_duration(Duration::try_seconds($seconds).unwrap() + Duration::nanoseconds($nanoseconds)) ) ) } diff --git a/readme.md b/readme.md index 1918959..d87084d 100644 --- a/readme.md +++ b/readme.md @@ -9,7 +9,7 @@ Scallop is a language based on DataLog that supports differentiable logical and relational reasoning. Scallop program can be easily integrated in Python and even with a PyTorch learning module. You can also use it as another DataLog solver. -Internally, Scallop is built on a generalized [Provenance Semiring](https://dl.acm.org/doi/10.1145/1265530.1265535) framework. +Internally, Scallop is built on a generalized [Provenance Semiring](https://repository.upenn.edu/cgi/viewcontent.cgi?article=1022&context=db_research) framework. It allows arbitrary semirings to be configured, supporting Scallop to perform discrete logical reasoning, probabilistic reasoning, and differentiable reasoning. ## Example