From ca7e4d56332d59782642b71687ad8000744c6a75 Mon Sep 17 00:00:00 2001
From: Ziyang Li <liby99@icloud.com>
Date: Fri, 23 Feb 2024 09:21:08 -0500
Subject: [PATCH] Adding new documentations

---
 doc/src/language/aggregation.md      | 54 +++++++++++++++++++++++-
 doc/src/probabilistic/aggregation.md | 28 +++++++++++++
 doc/src/probabilistic/reasoning.md   |  1 -
 doc/src/probabilistic/sampling.md    | 16 +++++++
 doc/src/scallopy/context.md          | 40 ++++++++++++++++--
 doc/src/scallopy/getting_started.md  | 62 ++++++++++++++++++++++++++++
 doc/src/summary.md                   |  2 +-
 7 files changed, 195 insertions(+), 8 deletions(-)
 create mode 100644 doc/src/probabilistic/aggregation.md
 delete mode 100644 doc/src/probabilistic/reasoning.md

diff --git a/doc/src/language/aggregation.md b/doc/src/language/aggregation.md
index f564082..68ac036 100644
--- a/doc/src/language/aggregation.md
+++ b/doc/src/language/aggregation.md
@@ -66,6 +66,12 @@ rel num_edges(n) = n := count(a, b: edge(a, b))
 
 Here, we have two binding variables `a` and `b`, meaning that we are counting the number of *distinct* pairs of `a` and `b`.
 
+Note that we can use the syntax sugar for aggregation to omit the repeated `n`:
+
+``` scl
+rel num_edges = count(a, b: edge(a, b))
+```
+
 ### Implicit Group-By
 
 With `group-by`, we may count the number of facts under a pre-defined group.
@@ -130,10 +136,22 @@ rel sales = {("alice", 1000.0), ("bob", 1200.0), ("christine", 1000.0)}
 We can compute the sum of all the sales:
 
 ``` scl
-rel total_sales(s) = s := sum(sp: sales_1(p, sp)) // 3700.0
+rel total_sales(s) = s := sum[p](sp: sales(p, sp)) // 3200.0
+// or
+rel total_sales = sum[p](sp: sales(p, sp)) // 3200.0
 ```
 
 Notice that the result type of `s` is the same as the type of the binding variable `sp`, which is `f32` as indicated by the decimals in the definition of `sales`.
+Here, the argument variable `p` is necessary since it is the key to index each sale number.
+The above rule body is equivalent to the following math formula:
+
+\\[ s = \sum_p \text{sale}_p \\]
+
+If we do not use the argument variable, we get the following:
+
+``` scl
+rel total_sales_wrong(s) = s := sum(sp: sales(p, sp)) // 2200.0, since the two 1000.0 will be deduplicated without its key
+```
 
 The product aggregator `prod` can be used in a similar manner as `sum`.
 
@@ -145,6 +163,8 @@ In the following example, we find the maximum grade of an exam:
 ``` scl
 rel exam_grades = {("a", 95.2), ("b", 87.3), ("c", 99.9)}
 rel min_score(m) = m := max(s: exam_grades(_, s)) // 99.9
+// or, succinctly
+rel min_score = max(s: exam_grades(_, s)) // 99.9
 ```
 
 The number (and types) of binding variables can be arbitrary, but the result variables must match the binding variables.
@@ -155,6 +175,8 @@ Suppose we want to get the person (along with their grade) who scored the best,
 
 ``` scl
 rel best_student(n, s) = (n, s) := max[n](s: exam_grades(n, s))
+// or, succinctly
+rel best_student = max[n](s: exam_grades(n, s))
 ```
 
 Here, we are still finding the maximum score `s`, but along with `max` we have specified the "arg" (`[n]`) which associates with the maximum score.
@@ -171,6 +193,8 @@ Alternatively, we can also use `argmax`:
 
 ``` scl
 rel best_student(n) = n := argmax[n](s: exam_grades(n, s))
+// or, succinctly
+rel best_student = argmax[n](s: exam_grades(n, s))
 ```
 
 ## Exists and Forall
@@ -218,7 +242,8 @@ Note that there can be arbitrary amount of binding variables.
 ### Universal Quantifier
 
 We can also have universal quantifier `forall`.
-For this, there is a special requirement for universal quantification, that the body formula has to be an `implies` formula.
+For this, there is a special requirement for universal quantification: the body formula has to be an `implies` (`=>`) formula.
+This restriction is enforced so that all the binding variables have bounds being specified on the left-hand-side of the `implies` formula.
 In the following example, we check if all the objects are spherical:
 
 ``` scl
@@ -248,3 +273,28 @@ rel target() = forall(o: obj_color(o, RED) implies obj_shape(o, CUBE)) // {()}
 
 Here, we directly use `obj_color` to serve as the left-hand-side of the `implies`.
 There will be one empty tuple being derived, suggesting that the statement is true.
+
+## String Join
+
+If you have multiple facts containing strings and you want to join them together, you can use the `string_join` aggregator:
+
+``` scl
+rel R = {"hello", "world"}
+rel P1(n) = n := string_join(s: R(s)) // P1("helloworld")
+rel P2(n) = n := string_join<" ">(s: R(s)) // P2("hello world")
+```
+
+In the above example,
+we can either directly join, producing the string "helloworld",
+or join with separator `" "`, producing the string "hello world".
+Note that the order of the strings in the joined string is determined by the strings.
+Here, `"hello"` starts with `"h"`, which is smaller than the `"w"` in `"world"`, therefore occurring before `"world"`.
+If you want to specify an explicit order, use the argument variable:
+
+``` scl
+rel R = {(2, "hello"), (1, "world")}
+rel P(n) = n := string_join<" ">[i](s: R(i, s)) // P("world hello")
+```
+
+Since we have specified the variable `i` to be the argument of `string_join`, it serves to order the tuples.
+Here, we have `(1, "world")` and `(2, "hello")`, so the joined string will be `"world hello"` instead of `"hello world"`.
diff --git a/doc/src/probabilistic/aggregation.md b/doc/src/probabilistic/aggregation.md
new file mode 100644
index 0000000..ff433dd
--- /dev/null
+++ b/doc/src/probabilistic/aggregation.md
@@ -0,0 +1,28 @@
+# Aggregation with Probability
+
+With the introduction of probabilities, many existing aggregators are augmented with new semantics, which we typically call *multi-world semantics*.
+What's more, there are new aggregators, such as `softmax`, `rank`, and `weighted_avg`, that make use of the probabilities.
+We introduce these aggregators one-by-one in this section.
+
+## Multi-world Semantics with Aggregators
+
+Let us take the `count` aggregator as an example.
+Consider we have 2 objects, each could be big or small with their respective probabilities:
+
+``` scl
+type OBJ = OBJ_A | OBJ_B
+rel size = {0.8::(OBJ_A, "big"); 0.2::(OBJ_A, "small")} // obj A is very likely big
+rel size = {0.1::(OBJ_B, "big"); 0.9::(OBJ_B, "small")} // obj B is very likely small
+```
+
+Now let's say we want to count how many big objects are there, by using the following
+
+Note that even when using probabilites, one can opt to not use the multi-world semantics by adding `!` sign to the end of the aggregator.
+
+## New Aggregators using Probabilities
+
+### Softmax and Normalize
+
+### Rank
+
+### Weighted Average and Weighted Sum
diff --git a/doc/src/probabilistic/reasoning.md b/doc/src/probabilistic/reasoning.md
deleted file mode 100644
index c17b54a..0000000
--- a/doc/src/probabilistic/reasoning.md
+++ /dev/null
@@ -1 +0,0 @@
-# Aggregation and Probability
diff --git a/doc/src/probabilistic/sampling.md b/doc/src/probabilistic/sampling.md
index f40a645..b32e910 100644
--- a/doc/src/probabilistic/sampling.md
+++ b/doc/src/probabilistic/sampling.md
@@ -1 +1,17 @@
 # Sampling with Probability
+
+In Scallop, samplers share the same syntax as aggregators.
+They usually work with probabilistic provenances, but can also work without them.
+Here are some example samplers:
+
+- `top`: get the $k$ facts with top probabilities
+- `categorical`: treat the relation as a categorical distribution and sample from it
+- `uniform`: treat the relation as a uniform distribution and sample from it
+
+Let's take `top` as an example.
+We can obtain the top ranked symbol by using the following rule:
+
+``` scl
+rel symbols = {0.9::"+", 0.05::"-", 0.02::"3"}
+rel top_symbol(s) = s := top<1>(s: symbols(s)) // 0.9::top_symbol("+")
+```
diff --git a/doc/src/scallopy/context.md b/doc/src/scallopy/context.md
index 3525cc8..acf660d 100644
--- a/doc/src/scallopy/context.md
+++ b/doc/src/scallopy/context.md
@@ -121,10 +121,6 @@ For instance,
 ctx.add_relation("digit", int)
 ```
 
-### Configuring Relations
-
-#### `non_probabilistic`
-
 ## Adding Facts
 
 The most basic version of adding facts into an existing relation inside of an existing context.
@@ -134,6 +130,42 @@ We are assuming that the context has a provenance of `"unit"`.
 ctx.add_facts("edge", [(1, 2), (2, 3)])
 ```
 
+If the relation is declared to be having arity-1 and that the type is a singleton type instead of a 1-tuple, then the facts inside of the list do not need to be a tuple.
+
+``` py
+ctx.add_relation("digit", int)
+ctx.add_facts("digit", [1, 2, 3])
+```
+
+### Probabilistic Facts (Tagged Facts)
+
+When the Scallop context is configured to use a provenance other than.
+If one wants to add facts along with probabilities, they can wrap their non-probabilistic facts into tuples whose first element is a simple probability.
+For example, if originally we have a fact `1`, wrapping it with a corresponding probability gives us `(0.1, 1)`, where `0.1` is the probability.
+
+``` py
+ctx.add_facts("digit", [1, 2, 3])                      # without probability
+ctx.add_facts("digit", [(0.1, 1), (0.2, 2), (0.7, 3)]) # with probability
+```
+
+Of course, if the original facts are tuples, the ones with probability will be required to wrap further:
+
+``` py
+ctx.add_facts("color", [("A", "blue"), ("A", "green"), ...])               # without probability
+ctx.add_facts("color", [(0.1, ("A", "blue")), (0.2, ("A", "green")), ...]) # with probability
+```
+
+We can extend this syntax into tagged facts in general.
+Suppose we are using the boolean semiring (`boolean`), we are going to tag each fact using values such as `True` or `False`.
+
+``` py
+ctx = scallopy.Context(provenance="boolean")
+ctx.add_relation("edge", (int, int))
+ctx.add_facts("edge", [(True, (1, 2)), (False, (2, 3))])
+```
+
+### Non-tagged Facts in Tagged Context
+
 ## Adding Rules
 
 ### Tagged Rules
diff --git a/doc/src/scallopy/getting_started.md b/doc/src/scallopy/getting_started.md
index e69de29..26542a6 100644
--- a/doc/src/scallopy/getting_started.md
+++ b/doc/src/scallopy/getting_started.md
@@ -0,0 +1,62 @@
+# Getting Started with Scallopy
+
+## Motivating Example
+
+Let's start with a very simple example illustrating the usage of `scallopy`.
+
+``` python
+import scallopy
+
+ctx = scallopy.Context()
+
+ctx.add_relation("edge", (int, int))
+ctx.add_facts("edge", [(1, 2), (2, 3)])
+
+ctx.add_rule("path(a, c) = edge(a, c) or path(a, b) and edge(b, c)")
+
+ctx.run()
+
+print(list(ctx.relation("path"))) # [(1, 2), (1, 3), (2, 3)]
+```
+
+In this very simple edge-path example, we are interacting with Scallop through a Python class called `Context`.
+Basically, a `Context` manages a Scallop program, along with the relations, facts, and execution results corresponding to that Scallop program.
+We create a `Context` by `ctx = scallopy.Context`.
+Relations, facts, and rules are added through the functions `add_relation(...)`, `add_facts(...)`, and `add_rule(...)`.
+With everything set, we can execute the program inside the context by calling `run()`
+Lastly, we pull the result from `ctx` by using `relation(...)`.
+Please refer to a more detailed explanation of this example in [Scallop Context](context.md).
+
+## Machine Learning with Scallopy and PyTorch
+
+When doing machine learning, we usually want to have batched inputs and outputs.
+Instead of building the Scallop context incrementally and explicitly run the program, we can create a `Module` at once and be able to run the program for a batch of inputs.
+This offers a few advantages, such as optimization during compilation, batched execution for integration with machine learning pipelines, simplified interaction between data structures, and so on.
+For example, we can create a module and run it like the following:
+
+``` python
+import scallopy
+import torch
+
+# Creating a module for execution
+my_sum2 = scallopy.Module(
+  program="""
+    type digit_1(a: i32), digit_2(b: i32)
+    rel sum_2(a + b) = digit_1(a) and digit_2(b)
+  """,
+  input_mappings={"digit_1": range(10), "digit_2": range(10)},
+  output_mappings={"sum_2": range(19)},
+  provenance="difftopkproofs")
+
+# Invoking the module with torch tensors. `result` is a tensor of 16 x 19
+result = my_sum2(
+  digit_1=torch.softmax(torch.randn(16, 10), dim=0),
+  digit_2=torch.softmax(torch.randn(16, 10), dim=0))
+```
+
+As can be seen in this example, we have defined a `Module` which can be treated also as a PyTorch module.
+Similar to other PyTorch modules, it can take in torch tensors and return torch tensors.
+The logical symbols (such as the `i32` numbers used in `digit_1` and `digit_2`) are configured in `input_mappings` and `output_mappings`, and can be automatically converted from tensors.
+We also see that it is capable of handling a batch of inputs (here, the batch size is 16).
+Internally, Scallop also knows to execute in parallel, making it performing much faster than normal.
+Please refer to [Scallop Module](module.md) for more information.
diff --git a/doc/src/summary.md b/doc/src/summary.md
index e8354ff..0871135 100644
--- a/doc/src/summary.md
+++ b/doc/src/summary.md
@@ -30,7 +30,7 @@
   - [Fact with Probability](probabilistic/facts.md)
   - [Logic and Probability](probabilistic/logic.md)
   - [Provenance Library](probabilistic/library.md)
-  - [Aggregation and Probability](probabilistic/reasoning.md)
+  - [Aggregation and Probability](probabilistic/aggregation.md)
   - [Sampling with Probability](probabilistic/sampling.md)
 - [`scallopy`](scallopy/index.md)
   - [Getting Started](scallopy/getting_started.md)