diff --git a/site/images/mercury_graph.png b/site/images/mercury_graph.png
new file mode 100644
index 0000000..a3dc2d9
Binary files /dev/null and b/site/images/mercury_graph.png differ
diff --git a/site/reference/embeddings/index.html b/site/reference/embeddings/index.html
index 2bfc30c..3a9a088 100644
--- a/site/reference/embeddings/index.html
+++ b/site/reference/embeddings/index.html
@@ -1451,6 +1451,7 @@ <h2 id="mercury.graph.embeddings.GraphEmbedding" class="doc doc-heading">
 
 
         <p>Create an embedding mapping the nodes of a graph.</p>
+<p>Includes contributions by David Muelas Recuenco.</p>
 
 
 <p><span class="doc-section-title">Parameters:</span></p>
@@ -1576,9 +1577,7 @@ <h2 id="mercury.graph.embeddings.GraphEmbedding" class="doc doc-heading">
 
                   <details class="quote">
                     <summary>Source code in <code>mercury/graph/embeddings/graphembeddings.py</code></summary>
-                    <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 88</span>
-<span class="normal"> 89</span>
-<span class="normal"> 90</span>
+                    <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 90</span>
 <span class="normal"> 91</span>
 <span class="normal"> 92</span>
 <span class="normal"> 93</span>
@@ -1600,7 +1599,9 @@ <h2 id="mercury.graph.embeddings.GraphEmbedding" class="doc doc-heading">
 <span class="normal">109</span>
 <span class="normal">110</span>
 <span class="normal">111</span>
-<span class="normal">112</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+<span class="normal">112</span>
+<span class="normal">113</span>
+<span class="normal">114</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span>
     <span class="n">dimension</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
     <span class="n">n_jumps</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
@@ -1709,9 +1710,7 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.__getitem__" class="doc doc-head
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/embeddings/graphembeddings.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">114</span>
-<span class="normal">115</span>
-<span class="normal">116</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">116</span>
 <span class="normal">117</span>
 <span class="normal">118</span>
 <span class="normal">119</span>
@@ -1720,7 +1719,9 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.__getitem__" class="doc doc-head
 <span class="normal">122</span>
 <span class="normal">123</span>
 <span class="normal">124</span>
-<span class="normal">125</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">arg</span><span class="p">):</span>
+<span class="normal">125</span>
+<span class="normal">126</span>
+<span class="normal">127</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">arg</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Method to access rows in the embedding by ID.</span>
 
@@ -1776,9 +1777,7 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.embedding" class="doc doc-headin
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/embeddings/graphembeddings.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">203</span>
-<span class="normal">204</span>
-<span class="normal">205</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">205</span>
 <span class="normal">206</span>
 <span class="normal">207</span>
 <span class="normal">208</span>
@@ -1786,7 +1785,9 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.embedding" class="doc doc-headin
 <span class="normal">210</span>
 <span class="normal">211</span>
 <span class="normal">212</span>
-<span class="normal">213</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+<span class="normal">213</span>
+<span class="normal">214</span>
+<span class="normal">215</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Return the internal Embeddings object.</span>
 
@@ -1879,9 +1880,7 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.fit" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/embeddings/graphembeddings.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">127</span>
-<span class="normal">128</span>
-<span class="normal">129</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">129</span>
 <span class="normal">130</span>
 <span class="normal">131</span>
 <span class="normal">132</span>
@@ -1953,7 +1952,9 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.fit" class="doc doc-heading">
 <span class="normal">198</span>
 <span class="normal">199</span>
 <span class="normal">200</span>
-<span class="normal">201</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">g</span><span class="p">:</span> <span class="n">Graph</span><span class="p">):</span>
+<span class="normal">201</span>
+<span class="normal">202</span>
+<span class="normal">203</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">g</span><span class="p">:</span> <span class="n">Graph</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Train the embedding by doing random walks.</span>
 
@@ -2161,9 +2162,7 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.get_most_similar_nodes" class="d
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/embeddings/graphembeddings.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">215</span>
-<span class="normal">216</span>
-<span class="normal">217</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">217</span>
 <span class="normal">218</span>
 <span class="normal">219</span>
 <span class="normal">220</span>
@@ -2188,7 +2187,9 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.get_most_similar_nodes" class="d
 <span class="normal">239</span>
 <span class="normal">240</span>
 <span class="normal">241</span>
-<span class="normal">242</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">get_most_similar_nodes</span><span class="p">(</span>
+<span class="normal">242</span>
+<span class="normal">243</span>
+<span class="normal">244</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">get_most_similar_nodes</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span> <span class="n">node_id</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">metric</span><span class="o">=</span><span class="s2">&quot;cosine&quot;</span><span class="p">,</span> <span class="n">return_as_indices</span><span class="o">=</span><span class="kc">False</span>
 <span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
@@ -2287,9 +2288,7 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.save" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/embeddings/graphembeddings.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">244</span>
-<span class="normal">245</span>
-<span class="normal">246</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">246</span>
 <span class="normal">247</span>
 <span class="normal">248</span>
 <span class="normal">249</span>
@@ -2316,7 +2315,9 @@ <h3 id="mercury.graph.embeddings.GraphEmbedding.save" class="doc doc-heading">
 <span class="normal">270</span>
 <span class="normal">271</span>
 <span class="normal">272</span>
-<span class="normal">273</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name</span><span class="p">,</span> <span class="n">save_embedding</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
+<span class="normal">273</span>
+<span class="normal">274</span>
+<span class="normal">275</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name</span><span class="p">,</span> <span class="n">save_embedding</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Saves a GraphEmbedding to a compressed binary file with or without the embedding itself. It saves the graph&#39;s node names</span>
 <span class="sd">    and the adjacency matrix as a sparse matrix.</span>
diff --git a/site/reference/ml/index.html b/site/reference/ml/index.html
index c1e9932..f6ba05b 100644
--- a/site/reference/ml/index.html
+++ b/site/reference/ml/index.html
@@ -774,6 +774,8 @@ <h2 id="mercury.graph.ml.LouvainCommunities" class="doc doc-heading">
 all nodes are reassigned at the same time and conflicts (i.e., 1 -&gt; C2 and
 2 -&gt; C1) are resolved with a simple tie-breaking rule. This version also
 introduces the resolution parameter <em>gamma</em>, as in <sup id="mercury.graph.ml.LouvainCommunities--fnref:2"><a class="footnote-ref" href="#mercury.graph.ml.LouvainCommunities--fn:2">2</a></sup>.</p>
+<p>Contributed by Arturo Soberon Cedillo, Jose Antonio Guzman Vazquez and 
+Isaac Dodanim Hernandez Garcia.</p>
 <div class="footnote">
 <hr />
 <ol>
@@ -916,10 +918,7 @@ <h2 id="mercury.graph.ml.LouvainCommunities" class="doc doc-heading">
 
                   <details class="quote">
                     <summary>Source code in <code>mercury/graph/ml/louvain.py</code></summary>
-                    <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">67</span>
-<span class="normal">68</span>
-<span class="normal">69</span>
-<span class="normal">70</span>
+                    <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">70</span>
 <span class="normal">71</span>
 <span class="normal">72</span>
 <span class="normal">73</span>
@@ -935,7 +934,10 @@ <h2 id="mercury.graph.ml.LouvainCommunities" class="doc doc-heading">
 <span class="normal">83</span>
 <span class="normal">84</span>
 <span class="normal">85</span>
-<span class="normal">86</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+<span class="normal">86</span>
+<span class="normal">87</span>
+<span class="normal">88</span>
+<span class="normal">89</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span>
     <span class="n">min_modularity_gain</span><span class="o">=</span><span class="mf">1e-03</span><span class="p">,</span>
     <span class="n">max_pass</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
@@ -1038,10 +1040,7 @@ <h3 id="mercury.graph.ml.LouvainCommunities.fit" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/ml/louvain.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">102</span>
-<span class="normal">103</span>
-<span class="normal">104</span>
-<span class="normal">105</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">105</span>
 <span class="normal">106</span>
 <span class="normal">107</span>
 <span class="normal">108</span>
@@ -1157,7 +1156,10 @@ <h3 id="mercury.graph.ml.LouvainCommunities.fit" class="doc doc-heading">
 <span class="normal">218</span>
 <span class="normal">219</span>
 <span class="normal">220</span>
-<span class="normal">221</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">g</span><span class="p">:</span> <span class="n">Graph</span><span class="p">):</span>
+<span class="normal">221</span>
+<span class="normal">222</span>
+<span class="normal">223</span>
+<span class="normal">224</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">g</span><span class="p">:</span> <span class="n">Graph</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Args:</span>
 <span class="sd">        g (Graph): A mercury graph structure.</span>
@@ -1940,6 +1942,7 @@ <h2 id="mercury.graph.ml.SpectralClustering" class="doc doc-heading">
 
 
         <p>Implementation of the spectral clustering algorithm which detect communities inside a graph.</p>
+<p>Contributed by Gibran Gabriel Otazo Sanchez.</p>
 
 
 <p><span class="doc-section-title">Parameters:</span></p>
@@ -2028,16 +2031,16 @@ <h2 id="mercury.graph.ml.SpectralClustering" class="doc doc-heading">
 
                   <details class="quote">
                     <summary>Source code in <code>mercury/graph/ml/spectral.py</code></summary>
-                    <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">24</span>
-<span class="normal">25</span>
-<span class="normal">26</span>
+                    <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">26</span>
 <span class="normal">27</span>
 <span class="normal">28</span>
 <span class="normal">29</span>
 <span class="normal">30</span>
 <span class="normal">31</span>
 <span class="normal">32</span>
-<span class="normal">33</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+<span class="normal">33</span>
+<span class="normal">34</span>
+<span class="normal">35</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span> <span class="n">n_clusters</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;networkx&quot;</span><span class="p">,</span> <span class="n">max_iterations</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span>
 <span class="p">):</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">n_clusters</span> <span class="o">=</span> <span class="n">n_clusters</span>
@@ -2132,9 +2135,7 @@ <h3 id="mercury.graph.ml.SpectralClustering.fit" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/ml/spectral.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">49</span>
-<span class="normal">50</span>
-<span class="normal">51</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">51</span>
 <span class="normal">52</span>
 <span class="normal">53</span>
 <span class="normal">54</span>
@@ -2149,7 +2150,9 @@ <h3 id="mercury.graph.ml.SpectralClustering.fit" class="doc doc-heading">
 <span class="normal">63</span>
 <span class="normal">64</span>
 <span class="normal">65</span>
-<span class="normal">66</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">graph</span><span class="p">:</span> <span class="n">Graph</span><span class="p">):</span>
+<span class="normal">66</span>
+<span class="normal">67</span>
+<span class="normal">68</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">graph</span><span class="p">:</span> <span class="n">Graph</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Find the optimal clusters of a given graph. The function returns nothing, but saves the clusters and</span>
 <span class="sd">    the modularity in the object self.</span>
diff --git a/site/reference/viz/index.html b/site/reference/viz/index.html
index 8fbf8fa..58b271d 100644
--- a/site/reference/viz/index.html
+++ b/site/reference/viz/index.html
@@ -681,6 +681,11 @@ <h2 id="mercury.graph.viz.Moebius" class="doc doc-heading">
         <p>Moebius class for visualizing graphs using JavaScript and HTML.</p>
 
 
+<details class="note" open>
+  <summary>Note</summary>
+  <p>Moebius is currently only compatible with Google Colab and Jupyter Notebooks Classic (prior to v7).</p>
+</details>
+
 <details class="usage" open>
   <summary>Usage</summary>
   <div class="highlight"><pre><span></span><code><span class="kn">from</span> <span class="nn">mercury.graph.viz</span> <span class="kn">import</span> <span class="n">Moebius</span>
@@ -766,15 +771,27 @@ <h2 id="mercury.graph.viz.Moebius" class="doc doc-heading">
 
                   <details class="quote">
                     <summary>Source code in <code>mercury/graph/viz/moebius.py</code></summary>
-                    <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">34</span>
-<span class="normal">35</span>
-<span class="normal">36</span>
-<span class="normal">37</span>
+                    <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">37</span>
 <span class="normal">38</span>
 <span class="normal">39</span>
 <span class="normal">40</span>
 <span class="normal">41</span>
-<span class="normal">42</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">G</span><span class="p">):</span>
+<span class="normal">42</span>
+<span class="normal">43</span>
+<span class="normal">44</span>
+<span class="normal">45</span>
+<span class="normal">46</span>
+<span class="normal">47</span>
+<span class="normal">48</span>
+<span class="normal">49</span>
+<span class="normal">50</span>
+<span class="normal">51</span>
+<span class="normal">52</span>
+<span class="normal">53</span>
+<span class="normal">54</span>
+<span class="normal">55</span>
+<span class="normal">56</span>
+<span class="normal">57</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">G</span><span class="p">):</span>
 
     <span class="k">if</span> <span class="n">HTML</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
         <span class="k">raise</span> <span class="ne">ImportError</span><span class="p">(</span><span class="s1">&#39;IPython is not installed&#39;</span><span class="p">)</span>
@@ -783,6 +800,18 @@ <h2 id="mercury.graph.viz.Moebius" class="doc doc-heading">
     <span class="bp">self</span><span class="o">.</span><span class="n">use_spark</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">G</span><span class="o">.</span><span class="n">_as_networkx</span> <span class="ow">is</span> <span class="kc">None</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">front_pat</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">realpath</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)))</span> <span class="o">+</span> <span class="s1">&#39;/frontend&#39;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">_int_id_map</span> <span class="o">=</span> <span class="p">{</span><span class="n">node</span><span class="p">[</span><span class="s1">&#39;id&#39;</span><span class="p">]</span> <span class="p">:</span> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">node</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">G</span><span class="o">.</span><span class="n">nodes</span><span class="p">)}</span>
+
+    <span class="c1"># Define callback for JS interactions within Google Colab</span>
+    <span class="k">if</span> <span class="n">importlib</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">find_spec</span><span class="p">(</span><span class="s1">&#39;google&#39;</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">importlib</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">find_spec</span><span class="p">(</span><span class="s1">&#39;google.colab&#39;</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="kn">from</span> <span class="nn">google.colab</span> <span class="kn">import</span> <span class="n">output</span>
+        <span class="kn">from</span> <span class="nn">IPython</span> <span class="kn">import</span> <span class="n">get_ipython</span>
+
+        <span class="k">def</span> <span class="nf">colab_execute_python</span><span class="p">(</span><span class="n">code</span><span class="p">):</span>
+            <span class="c1"># Use get_ipython() to access the Moebius object defined by the user in a Colab cell</span>
+            <span class="n">get_ipython</span><span class="p">()</span><span class="o">.</span><span class="n">run_cell</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;_temp_colab_execute_python_result = </span><span class="si">{</span><span class="n">code</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+            <span class="k">return</span> <span class="n">get_ipython</span><span class="p">()</span><span class="o">.</span><span class="n">user_ns</span><span class="p">[</span><span class="s2">&quot;_temp_colab_execute_python_result&quot;</span><span class="p">]</span>
+
+        <span class="n">output</span><span class="o">.</span><span class="n">register_callback</span><span class="p">(</span><span class="s2">&quot;notebook.colab_execute_python&quot;</span><span class="p">,</span> <span class="n">colab_execute_python</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
                   </details>
 
@@ -834,12 +863,12 @@ <h3 id="mercury.graph.viz.Moebius.FHT" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/viz/moebius.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">86</span>
-<span class="normal">87</span>
-<span class="normal">88</span>
-<span class="normal">89</span>
-<span class="normal">90</span>
-<span class="normal">91</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">FHT</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fn</span><span class="p">):</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">100</span>
+<span class="normal">101</span>
+<span class="normal">102</span>
+<span class="normal">103</span>
+<span class="normal">104</span>
+<span class="normal">105</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">FHT</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fn</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Syntactic sugar for display(HTML(filename = fn))</span>
 <span class="sd">    &quot;&quot;&quot;</span>
@@ -866,12 +895,12 @@ <h3 id="mercury.graph.viz.Moebius.FJS" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/viz/moebius.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">78</span>
-<span class="normal">79</span>
-<span class="normal">80</span>
-<span class="normal">81</span>
-<span class="normal">82</span>
-<span class="normal">83</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">FJS</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fn</span><span class="p">):</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">92</span>
+<span class="normal">93</span>
+<span class="normal">94</span>
+<span class="normal">95</span>
+<span class="normal">96</span>
+<span class="normal">97</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">FJS</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fn</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Syntactic sugar for display(Javascript(filename = fn))</span>
 <span class="sd">    &quot;&quot;&quot;</span>
@@ -898,12 +927,12 @@ <h3 id="mercury.graph.viz.Moebius.JS" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/viz/moebius.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">70</span>
-<span class="normal">71</span>
-<span class="normal">72</span>
-<span class="normal">73</span>
-<span class="normal">74</span>
-<span class="normal">75</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">JS</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">s</span><span class="p">):</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">84</span>
+<span class="normal">85</span>
+<span class="normal">86</span>
+<span class="normal">87</span>
+<span class="normal">88</span>
+<span class="normal">89</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">JS</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">s</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Syntactic sugar for display(Javascript())</span>
 <span class="sd">    &quot;&quot;&quot;</span>
@@ -930,12 +959,12 @@ <h3 id="mercury.graph.viz.Moebius.__getitem__" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/viz/moebius.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">53</span>
-<span class="normal">54</span>
-<span class="normal">55</span>
-<span class="normal">56</span>
-<span class="normal">57</span>
-<span class="normal">58</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">67</span>
+<span class="normal">68</span>
+<span class="normal">69</span>
+<span class="normal">70</span>
+<span class="normal">71</span>
+<span class="normal">72</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Add support for the [] operator.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
@@ -962,12 +991,12 @@ <h3 id="mercury.graph.viz.Moebius.__str__" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/viz/moebius.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">45</span>
-<span class="normal">46</span>
-<span class="normal">47</span>
-<span class="normal">48</span>
-<span class="normal">49</span>
-<span class="normal">50</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">59</span>
+<span class="normal">60</span>
+<span class="normal">61</span>
+<span class="normal">62</span>
+<span class="normal">63</span>
+<span class="normal">64</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Convert the object via str()</span>
 <span class="sd">    &quot;&quot;&quot;</span>
@@ -1138,21 +1167,7 @@ <h3 id="mercury.graph.viz.Moebius.node_or_edge_config" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/viz/moebius.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 94</span>
-<span class="normal"> 95</span>
-<span class="normal"> 96</span>
-<span class="normal"> 97</span>
-<span class="normal"> 98</span>
-<span class="normal"> 99</span>
-<span class="normal">100</span>
-<span class="normal">101</span>
-<span class="normal">102</span>
-<span class="normal">103</span>
-<span class="normal">104</span>
-<span class="normal">105</span>
-<span class="normal">106</span>
-<span class="normal">107</span>
-<span class="normal">108</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">108</span>
 <span class="normal">109</span>
 <span class="normal">110</span>
 <span class="normal">111</span>
@@ -1193,7 +1208,21 @@ <h3 id="mercury.graph.viz.Moebius.node_or_edge_config" class="doc doc-heading">
 <span class="normal">146</span>
 <span class="normal">147</span>
 <span class="normal">148</span>
-<span class="normal">149</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">node_or_edge_config</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">text_is</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">color_is</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">colors</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">size_is</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">size_range</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">size_scale</span> <span class="o">=</span> <span class="s1">&#39;linear&#39;</span><span class="p">):</span>
+<span class="normal">149</span>
+<span class="normal">150</span>
+<span class="normal">151</span>
+<span class="normal">152</span>
+<span class="normal">153</span>
+<span class="normal">154</span>
+<span class="normal">155</span>
+<span class="normal">156</span>
+<span class="normal">157</span>
+<span class="normal">158</span>
+<span class="normal">159</span>
+<span class="normal">160</span>
+<span class="normal">161</span>
+<span class="normal">162</span>
+<span class="normal">163</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">node_or_edge_config</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">text_is</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">color_is</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">colors</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">size_is</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">size_range</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">size_scale</span> <span class="o">=</span> <span class="s1">&#39;linear&#39;</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Create a `node_config` or `edge_config` configuration dictionary for `show()` in an understandable way.</span>
 
@@ -1349,21 +1378,7 @@ <h3 id="mercury.graph.viz.Moebius.show" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>mercury/graph/viz/moebius.py</code></summary>
-              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">152</span>
-<span class="normal">153</span>
-<span class="normal">154</span>
-<span class="normal">155</span>
-<span class="normal">156</span>
-<span class="normal">157</span>
-<span class="normal">158</span>
-<span class="normal">159</span>
-<span class="normal">160</span>
-<span class="normal">161</span>
-<span class="normal">162</span>
-<span class="normal">163</span>
-<span class="normal">164</span>
-<span class="normal">165</span>
-<span class="normal">166</span>
+              <div class="codehilite"><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">166</span>
 <span class="normal">167</span>
 <span class="normal">168</span>
 <span class="normal">169</span>
@@ -1371,7 +1386,21 @@ <h3 id="mercury.graph.viz.Moebius.show" class="doc doc-heading">
 <span class="normal">171</span>
 <span class="normal">172</span>
 <span class="normal">173</span>
-<span class="normal">174</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">show</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">initial_id</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">initial_depth</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">node_config</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">edge_config</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+<span class="normal">174</span>
+<span class="normal">175</span>
+<span class="normal">176</span>
+<span class="normal">177</span>
+<span class="normal">178</span>
+<span class="normal">179</span>
+<span class="normal">180</span>
+<span class="normal">181</span>
+<span class="normal">182</span>
+<span class="normal">183</span>
+<span class="normal">184</span>
+<span class="normal">185</span>
+<span class="normal">186</span>
+<span class="normal">187</span>
+<span class="normal">188</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">show</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">initial_id</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">initial_depth</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">node_config</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">edge_config</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Start the interactive graph visualization in a Jupyter notebook.</span>
 
diff --git a/site/search/search_index.json b/site/search/search_index.json
index 4959885..e5619b2 100644
--- a/site/search/search_index.json
+++ b/site/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"mercury-graph","text":"<p><code>mercury-graph</code> is a Python library that offers graph analytics capabilities with a technology-agnostic API, enabling users to apply a curated range of performant and scalable algorithms and utilities regardless of the underlying data framework. The consistent, scikit-like interface abstracts away the complexities of internal transformations, allowing users to effortlessly switch between different graph representations to leverage optimized algorithms implemented using pure Python, numba, networkx and PySpark GraphFrames.</p> <p>Currently implemented submodules in <code>mercury.graph</code> include:</p> <ul> <li> <p><code>mercury.graph.core</code>, with the main classes of the library that create and store the graphs' data and properties.</p> </li> <li> <p><code>mercury.graph.ml</code>, with graph theory and machine learning algorithms such as Louvain community detection, spectral clustering, Markov chains, spreading activation-based diffusion models and graph random walkers.</p> </li> <li> <p><code>mercury.graph.embeddings</code>, with classes that calculate graph embeddings in different ways, such as following the Node2Vec algorithm.</p> </li> <li> <p><code>mercury.graph.viz</code>, with capabilities for graph visualization.</p> </li> </ul>"},{"location":"#repository","title":"Repository","text":"<p>The website for the GitHub repository can be found here.</p>"},{"location":"reference/core/","title":"mercury.graph.core","text":""},{"location":"reference/core/#mercury.graph.core.Graph","title":"<code>mercury.graph.core.Graph(data=None, keys=None, nodes=None)</code>","text":"<p>This is the main class in mercury.graph.</p> <p>This class seamlessly abstracts the underlying technology used to represent the graph. You can create a graph passing the following objects to the constructor:</p> <ul> <li>A pandas DataFrame containing edges (with a keys dictionary to specify the columns and possibly a nodes DataFrame)</li> <li>A pyspark DataFrame containing edges (with a keys dictionary to specify the columns and possibly a nodes DataFrame)</li> <li>A networkx graph</li> <li>A graphframes graph</li> </ul> <p>Bear in mind that the graph object is immutable. This means that you can't modify the graph object once it has been created. If you want to modify it, you have to create a new graph object.</p> <p>The graph object provides:</p> <ul> <li>Properties to access the graph in different formats (networkx, graphframes, dgl)</li> <li>Properties with metrics and summary information that are calculated on demand and technology independent.</li> <li>It is inherited by other graph classes in mercury-graph providing ML algorithms such as graph embedding, visualization, etc.</li> </ul> <p>Using this class from the other classes in mercury-graph:</p> <p>The other classes in mercury-graph define models or functionalities that are based on graphs. They use a Scikit-learn-like API to interact with the graph object. This means that the graph object is passed to the class constructor and the class follow the Scikit-learn conventions. It is recommended to follow the same conventions when creating your own classes to work with mercury-graph.</p> <p>The conventions can be found here:</p> <ul> <li>Scikit API</li> <li>On scikit conventions</li> </ul> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>(DataFrame, Graph or DataFrame)</code> <p>The data to create the graph from.  It can be a pandas DataFrame, a networkx Graph, a pyspark DataFrame, or a Graphframe.  In case it already contains a graph (networkx or graphframes), the keys and nodes arguments are ignored.</p> <code>None</code> <code>keys</code> <code>dict</code> <p>A dictionary with keys to specify the columns in the data DataFrame. The keys are:</p> <ul> <li>'src': The name of the column with the source node.</li> <li>'dst': The name of the column with the destination node.</li> <li>'id': The name of the column with the node id.</li> <li>'weight': The name of the column with the edge weight.</li> <li>'directed': A boolean to specify if the graph is directed. (Only for pyspark DataFrames)</li> </ul> <p>When the keys argument is not provided or the key is missing, the default values are:</p> <ul> <li>'src': 'src'</li> <li>'dst': 'dst'</li> <li>'id': 'id'</li> <li>'weight': 'weight'</li> <li>'directed': True</li> </ul> <code>None</code> <code>nodes</code> <code>DataFrame</code> <p>A pandas DataFrame or a pyspark DataFrame with the nodes data. (Only when <code>data</code> is pandas or pyspark DataFrame and with the same type as <code>data</code>) If not given, the nodes are inferred from the edges DataFrame.</p> <code>None</code> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def __init__(self, data = None, keys = None, nodes = None):\n    self._as_networkx = None\n    self._as_graphframe = None\n    self._as_dgl = None\n    self._degree = None\n    self._in_degree = None\n    self._out_degree = None\n    self._closeness_centrality = None\n    self._betweenness_centrality = None\n    self._pagerank = None\n    self._connected_components = None\n    self._nodes_colnames = None\n    self._edges_colnames = None\n\n    self._number_of_nodes = 0\n    self._number_of_edges = 0\n    self._node_ix = 0\n    self._is_directed = False\n    self._is_weighted = False\n\n    self._init_values = {k: v for k, v in locals().items() if k in inspect.signature(self.__init__).parameters}\n\n    if type(data) == pd.core.frame.DataFrame:\n        self._from_pandas(data, nodes, keys)\n        return\n\n    if isinstance(data, nx.Graph):      # This is the most general case, including: ...Graph, ...DiGraph and ...MultiGraph\n        self._from_networkx(data)\n        return\n\n    spark_int = SparkInterface()\n\n    if pyspark_installed and graphframes_installed:\n        if type(data) == spark_int.type_spark_dataframe:\n            self._from_dataframe(data, nodes, keys)\n            return\n\n        if type(data) == spark_int.type_graphframe:\n            self._from_graphframes(data)\n            return\n\n    raise ValueError('Invalid input data. (Expected: pandas DataFrame, a networkx Graph, a pyspark DataFrame, a graphframes Graph.)')\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.Graph.betweenness_centrality","title":"<code>betweenness_centrality</code>  <code>property</code>","text":"<p>Returns the betweenness centrality of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.closeness_centrality","title":"<code>closeness_centrality</code>  <code>property</code>","text":"<p>Returns the closeness centrality of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.connected_components","title":"<code>connected_components</code>  <code>property</code>","text":"<p>Returns the connected components of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.degree","title":"<code>degree</code>  <code>property</code>","text":"<p>Returns the degree of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.dgl","title":"<code>dgl</code>  <code>property</code>","text":"<p>Returns the graph as a DGL graph.</p> <p>If the graph has not been converted to a DGL graph yet, it will be converted and cached for future use.</p> <p>Returns:</p> Type Description <code>DGLGraph</code> <p>The graph represented as a DGL graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.edges","title":"<code>edges</code>  <code>property</code>","text":"<p>Returns an iterator over the edges in the graph.</p> <p>Returns:</p> Type Description <code>EdgeIterator</code> <p>An iterator object that allows iterating over the edges in the graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.edges_colnames","title":"<code>edges_colnames</code>  <code>property</code>","text":"<p>Returns the column names of the edges DataFrame.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.graphframe","title":"<code>graphframe</code>  <code>property</code>","text":"<p>Returns the graph as a GraphFrame.</p> <p>If the graph has not been converted to a GraphFrame yet, it will be converted and cached for future use.</p> <p>Returns:</p> Type Description <code>GraphFrame</code> <p>The graph represented as a GraphFrame.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.in_degree","title":"<code>in_degree</code>  <code>property</code>","text":"<p>Returns the in-degree of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.is_directed","title":"<code>is_directed</code>  <code>property</code>","text":"<p>Returns True if the graph is directed, False otherwise.</p> Note <p>Graphs created using graphframes are always directed. The way around it is to add the reverse edges to the graph. This can be done by creating the Graph with pyspark DataFrame() and defining a key 'directed' set as False in the <code>dict</code> argument. Otherwise, the graph will be considered directed even if these reversed edges have been created by other means this class cannot be aware of.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.is_weighted","title":"<code>is_weighted</code>  <code>property</code>","text":"<p>Returns True if the graph is weighted, False otherwise.</p> <p>A graph is considered weight if it has a column named 'weight' in the edges DataFrame or the column has a different name and that name is passed in the <code>dict</code> argument as the 'weight' key.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.networkx","title":"<code>networkx</code>  <code>property</code>","text":"<p>Returns the graph representation as a NetworkX graph.</p> <p>If the graph has not been converted to NetworkX format yet, it will be converted and cached for future use.</p> <p>Returns:</p> Type Description <code>Graph</code> <p>The graph representation as a NetworkX graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.nodes","title":"<code>nodes</code>  <code>property</code>","text":"<p>Returns an iterator over all the nodes in the graph.</p> <p>Returns:</p> Type Description <code>NodeIterator</code> <p>An iterator that yields each node in the graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.nodes_colnames","title":"<code>nodes_colnames</code>  <code>property</code>","text":"<p>Returns the column names of the nodes DataFrame.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.number_of_edges","title":"<code>number_of_edges</code>  <code>property</code>","text":"<p>Returns the number of edges in the graph.</p> <p>Returns:</p> Type Description <code>int</code> <p>The number of edges in the graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.number_of_nodes","title":"<code>number_of_nodes</code>  <code>property</code>","text":"<p>Returns the number of nodes in the graph.</p> <p>Returns:</p> Type Description <code>int</code> <p>The number of nodes in the graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.out_degree","title":"<code>out_degree</code>  <code>property</code>","text":"<p>Returns the out-degree of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.pagerank","title":"<code>pagerank</code>  <code>property</code>","text":"<p>Returns the PageRank of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.edges_as_dataframe","title":"<code>edges_as_dataframe()</code>","text":"<p>Returns the edges as a pyspark DataFrame.</p> <p>If the graph is represented as a graphframes graph, the edges are extracted from it. Otherwise, the edges are converted from the pandas DataFrame representation. The columns used as the source and destination nodes are always named 'src' and 'dst', respectively, regardless of the original column names passed to the constructor.</p> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def edges_as_dataframe(self):\n    \"\"\"\n    Returns the edges as a pyspark DataFrame.\n\n    If the graph is represented as a graphframes graph, the edges are extracted from it. Otherwise, the edges are converted from the\n    pandas DataFrame representation. The columns used as the source and destination nodes are always named 'src' and 'dst',\n    respectively, regardless of the original column names passed to the constructor.\n    \"\"\"\n    if self._as_graphframe is not None:\n        return self._as_graphframe.edges\n\n    return SparkInterface().spark.createDataFrame(self.edges_as_pandas())\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.Graph.edges_as_pandas","title":"<code>edges_as_pandas()</code>","text":"<p>Returns the edges as a pandas DataFrame.</p> <p>If the graph is represented as a networkx graph, the edges are extracted from it. Otherwise, the graphframes graph will be used. This dataset may differ from possible pandas DataFrame passed to the constructor in the column names and order. The columns used as the source and destination nodes are always named 'src' and 'dst', respectively.</p> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def edges_as_pandas(self):\n    \"\"\"\n    Returns the edges as a pandas DataFrame.\n\n    If the graph is represented as a networkx graph, the edges are extracted from it. Otherwise, the graphframes graph will be used.\n    This dataset may differ from possible pandas DataFrame passed to the constructor in the column names and order. The columns used\n    as the source and destination nodes are always named 'src' and 'dst', respectively.\n    \"\"\"\n    if self._as_networkx is not None:\n        edges_data = self._as_networkx.edges(data = True)\n        edges_df   = pd.DataFrame([(src, dst, attr) for src, dst, attr in edges_data], columns = ['src', 'dst', 'attributes'])\n\n        attrs_df   = pd.json_normalize(edges_df['attributes'])\n\n        return pd.concat([edges_df.drop('attributes', axis = 1), attrs_df], axis = 1)\n\n    return self.graphframe.edges.toPandas()\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.Graph.nodes_as_dataframe","title":"<code>nodes_as_dataframe()</code>","text":"<p>Returns the nodes as a pyspark DataFrame.</p> <p>If the graph is represented as a graphframes graph, the nodes are extracted from it. Otherwise, the nodes are converted from the pandas DataFrame representation. The column used as the node id is always named 'id', regardless of the original column name passed to the constructor.</p> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def nodes_as_dataframe(self):\n    \"\"\"\n    Returns the nodes as a pyspark DataFrame.\n\n    If the graph is represented as a graphframes graph, the nodes are extracted from it. Otherwise, the nodes are converted from the\n    pandas DataFrame representation. The column used as the node id is always named 'id', regardless of the original column name passed\n    to the constructor.\n    \"\"\"\n    if self._as_graphframe is not None:\n        return self._as_graphframe.vertices\n\n    return SparkInterface().spark.createDataFrame(self.nodes_as_pandas())\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.Graph.nodes_as_pandas","title":"<code>nodes_as_pandas()</code>","text":"<p>Returns the nodes as a pandas DataFrame.</p> <p>If the graph is represented as a networkx graph, the nodes are extracted from it. Otherwise, the graphframes graph will be used. This dataset may differ from possible pandas DataFrame passed to the constructor in the column names and order. The column used as the node id is always named 'id'.</p> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def nodes_as_pandas(self):\n    \"\"\"\n    Returns the nodes as a pandas DataFrame.\n\n    If the graph is represented as a networkx graph, the nodes are extracted from it. Otherwise, the graphframes graph will be used.\n    This dataset may differ from possible pandas DataFrame passed to the constructor in the column names and order. The column used\n    as the node id is always named 'id'.\n    \"\"\"\n    if self._as_networkx is not None:\n        nodes_data = self._as_networkx.nodes(data = True)\n        nodes_df   = pd.DataFrame([(node, attr) for node, attr in nodes_data], columns = ['id', 'attributes'])\n\n        attrs_df = pd.json_normalize(nodes_df['attributes'])\n\n        return pd.concat([nodes_df.drop('attributes', axis = 1), attrs_df], axis = 1)\n\n    return self.graphframe.vertices.toPandas()\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.SparkInterface","title":"<code>mercury.graph.core.SparkInterface(config=None, session=None)</code>","text":"<p>A class that provides an interface for interacting with Apache Spark, graphframes and dgl.</p> <p>Attributes:</p> Name Type Description <code>_spark_session</code> <code>SparkSession</code> <p>The shared Spark session.</p> <code>_graphframes</code> <code>module</code> <p>The shared graphframes namespace.</p> <p>Methods:</p> Name Description <code>_create_spark_session</code> <p>Creates a Spark session.</p> <code>spark</code> <p>Property that returns the shared Spark session.</p> <code>pyspark</code> <p>Property that returns the pyspark namespace.</p> <code>graphframes</code> <p>Property that returns the shared graphframes namespace.</p> <code>dgl</code> <p>Property that returns the shared dgl namespace.</p> <code>read_csv</code> <p>Reads a CSV file into a DataFrame.</p> <code>read_parquet</code> <p>Reads a Parquet file into a DataFrame.</p> <code>read_json</code> <p>Reads a JSON file into a DataFrame.</p> <code>read_text</code> <p>Reads a text file into a DataFrame.</p> <code>read</code> <p>Reads a file into a DataFrame.</p> <code>sql</code> <p>Executes a SQL query.</p> <code>udf</code> <p>Registers a user-defined function (UDF).</p> <code>stop</code> <p>Stops the Spark session.</p> <p>Parameters:</p> Name Type Description Default <code>config</code> <code>dict</code> <p>A dictionary of Spark configuration options. If not provided, the configuration in the global variable <code>default_spark_config</code> will be used.</p> <code>None</code> Source code in <code>mercury/graph/core/spark_interface.py</code> <pre><code>def __init__(self, config=None, session=None):\n    if SparkInterface._spark_session is None:\n        if session is not None:\n            SparkInterface._spark_session = session\n        else:\n            SparkInterface._spark_session = self._create_spark_session(config)\n            # Set checkpoint directory\n            SparkInterface._spark_session.sparkContext.setCheckpointDir(\".checkpoint\")\n\n    if SparkInterface._graphframes is None and graphframes_installed:\n        SparkInterface._graphframes = gf\n\n    if SparkInterface._dgl is None and dgl_installed:\n        SparkInterface._dgl = dgl\n</code></pre>"},{"location":"reference/embeddings/","title":"mercury.graph.embeddings","text":""},{"location":"reference/embeddings/#mercury.graph.embeddings.Embeddings","title":"<code>mercury.graph.embeddings.Embeddings(dimension, num_elements=0, mean=0, sd=1, learn_step=3, bidirectional=False)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>This class holds a matrix object that is interpreted as the embeddings for any list of objects, not only the nodes of a graph. You can see this class as the internal object holding the embedding for other classes such as class GraphEmbedding.</p> <p>Parameters:</p> Name Type Description Default <code>dimension</code> <code>int</code> <p>The number of columns in the embedding. See note below.</p> required <code>num_elements</code> <code>int</code> <p>The number of rows in the embedding. You can leave this empty on creation and then use initialize_as() to automatically match the nodes in a graph.</p> <code>0</code> <code>mean</code> <code>float</code> <p>The (expected) mean of the initial values.</p> <code>0</code> <code>sd</code> <code>float</code> <p>The (expected) standard deviation of the initial values.</p> <code>1</code> <code>learn_step</code> <code>float</code> <p>The size of the learning step elements get approached or moved away. Units are hexadecimal degrees in along an ellipse.</p> <code>3</code> <code>bidirectional</code> <code>bool</code> <p>Should the changes apply only to the elements of first column (False) or to both.</p> <code>False</code> Note <p>On dimension: Embeddings cannot be zero (that is against the whole concept). Smaller dimension embeddings can only hold few elements without introducing spurious correlations by some form of 'birthday attack' phenomenon as elements increase. Later it is very hard to get rid of that spurious 'knowledge'. </p> <p>Solution: With may elements, you have to go to high enough dimension even if the structure is simple.  Pretending to fit many embeddings in low dimension without them being correlated is like pretending to plot a trillion random  points in a square centimeter while keeping them 1 mm apart from each other: It's simply impossible!</p> Source code in <code>mercury/graph/embeddings/embeddings.py</code> <pre><code>def __init__(\n    self, dimension, num_elements=0, mean=0, sd=1, learn_step=3, bidirectional=False\n):\n    self.dimension = dimension\n    self.num_elements = num_elements\n    self.mean = mean\n    self.sd = sd\n    self.learn_step = learn_step\n    self.bidirectional = bidirectional\n\n    if self.num_elements &gt; 0:\n        self.embeddings_matrix_ = np.random.normal(\n            self.mean, self.sd, (self.num_elements, self.dimension)\n        )\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.Embeddings.as_numpy","title":"<code>as_numpy()</code>","text":"<p>Return the embedding as a numpy matrix where each row is an embedding.</p> Source code in <code>mercury/graph/embeddings/embeddings.py</code> <pre><code>def as_numpy(self):\n    \"\"\"\n    Return the embedding as a numpy matrix where each row is an embedding.\n    \"\"\"\n    if not hasattr(self, \"embeddings_matrix_\"):\n        return\n\n    return self.embeddings_matrix_\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.Embeddings.fit","title":"<code>fit(converge=None, diverge=None)</code>","text":"<p>Apply a learning step to the embedding.</p> <p>Parameters:</p> Name Type Description Default <code>converge</code> <code>numpy matrix of two columns</code> <p>A matrix of indices to elements meaning (first column) should be approached to (second column).</p> <code>None</code> <code>diverge</code> <code>numpy matrix of two columns</code> <p>A matrix of indices to elements meaning (first column) should be moved away from (second column).</p> <code>None</code> <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> Note <p>Embeddings start being randomly distributed and hold no structure other than spurious correlations. Each time you apply a learning step by calling this method, you are tweaking the embedding to approach some rows and/or move others away. You can use both converge and diverge or just one of them and call this as many times you want with varying learning step. A proxy of how much an embedding can learn can be estimated by measuring how row correlations are converging towards some asymptotic values.</p> Source code in <code>mercury/graph/embeddings/embeddings.py</code> <pre><code>def fit(self, converge=None, diverge=None):\n    \"\"\"\n    Apply a learning step to the embedding.\n\n    Args:\n        converge (numpy matrix of two columns): A matrix of indices to elements meaning (first column) should be approached to\n            (second column).\n        diverge (numpy matrix of two columns): A matrix of indices to elements meaning (first column) should be moved away from\n            (second column).\n\n    Returns:\n        (self): Fitted self (or raises an error)\n\n    Note:\n        Embeddings start being randomly distributed and hold no structure other than spurious correlations. Each time you apply a\n        learning step by calling this method, you are tweaking the embedding to approach some rows and/or move others away. You can use\n        both converge and diverge or just one of them and call this as many times you want with varying learning step. A proxy of how\n        much an embedding can learn can be estimated by measuring how row correlations are converging towards some asymptotic values.\n    \"\"\"\n\n    w = self.learn_step * np.pi / 180\n\n    cos_w = np.cos(w)\n    sin_w = np.sin(w)\n\n    if converge is not None:\n        self.embeddings_matrix_ = _elliptic_rotate(\n            self.embeddings_matrix_, converge[:, 0], converge[:, 1], cos_w, sin_w\n        )\n\n        if self.bidirectional:\n            self.embeddings_matrix_ = _elliptic_rotate(\n                self.embeddings_matrix_,\n                converge[:, 1],\n                converge[:, 0],\n                cos_w,\n                sin_w,\n            )\n\n    if diverge is not None:\n        self.embeddings_matrix_ = _elliptic_rotate(\n            self.embeddings_matrix_, diverge[:, 0], diverge[:, 1], cos_w, -sin_w\n        )\n\n        if self.bidirectional:\n            self.embeddings_matrix_ = _elliptic_rotate(\n                self.embeddings_matrix_, diverge[:, 1], diverge[:, 0], cos_w, -sin_w\n            )\n\n    return self\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.Embeddings.get_most_similar_embeddings","title":"<code>get_most_similar_embeddings(index, k=5, metric='cosine')</code>","text":"<p>Given an index of a vector in the embedding matrix, returns the k most similar embeddings in the matrix</p> <p>Parameters:</p> Name Type Description Default <code>index</code> <code>int</code> <p>index of the vector in the matrix that we want to compute the similar embeddings</p> required <code>k</code> <code>int</code> <p>Number of most similar embeddings to return</p> <code>5</code> <code>metric</code> <code>str</code> <p>metric to use as a similarity.</p> <code>'cosine'</code> <p>Returns:</p> Type Description <code>list</code> <p>list of k most similar nodes as indices and list of similarities of the most similar nodes</p> Source code in <code>mercury/graph/embeddings/embeddings.py</code> <pre><code>def get_most_similar_embeddings(self, index, k=5, metric=\"cosine\"):\n    \"\"\"\n    Given an index of a vector in the embedding matrix, returns the k most similar embeddings in the matrix\n\n    Args:\n        index (int): index of the vector in the matrix that we want to compute the similar embeddings\n        k (int): Number of most similar embeddings to return\n        metric (str): metric to use as a similarity.\n\n    Returns:\n        (list): list of k most similar nodes as indices and list of similarities of the most similar nodes\n    \"\"\"\n    if metric == \"cosine\":\n        similarities = (\n            1\n            - cdist(\n                np.expand_dims(self.as_numpy()[index], axis=0),\n                self.as_numpy(),\n                \"cosine\",\n            )[0]\n        )\n\n    elif metric == \"euclidean\":\n        similarities = 1 / (\n            1\n            + cdist(\n                np.expand_dims(self.as_numpy()[index], axis=0),\n                self.as_numpy(),\n                \"euclidean\",\n            )[0]\n        )\n\n    else:\n        raise ValueError(\"Unknown Distance Metric: %s\" % metric)\n\n    ordered_indices = np.argsort(similarities)[::-1][1 : (k + 1)]\n    ordered_similarities = similarities[ordered_indices]\n\n    return ordered_indices, ordered_similarities\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding","title":"<code>mercury.graph.embeddings.GraphEmbedding(dimension=None, n_jumps=None, max_per_epoch=None, learn_step=3, bidirectional=False, load_file=None)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Create an embedding mapping the nodes of a graph.</p> <p>Parameters:</p> Name Type Description Default <code>dimension</code> <code>int</code> <p>The number of columns in the embedding. See note the notes in <code>Embeddings</code> for details. (This parameter will be ignored when <code>load_file</code> is used.)</p> <code>None</code> <code>n_jumps</code> <code>int</code> <p>Number of random jumps from node to node.</p> <code>None</code> <code>max_per_epoch</code> <code>int</code> <p>Maximum number Number of consecutive random jumps without randomly jumping outside the edges. Note that normal random jumps are not going to explore outside a connected component.</p> <code>None</code> <code>learn_step</code> <code>float</code> <p>The size of the learning step elements get approached or moved away. Units are hexadecimal degrees in along an ellipse.</p> <code>3</code> <code>bidirectional</code> <code>bool</code> <p>Should the changes apply only to the elements of first column (False) or to both.</p> <code>False</code> <code>load_file</code> <code>str</code> <p>(optional) The full path to a binary file containing a serialized GraphEmbedding object. This file must be created using GraphEmbedding.save().</p> <code>None</code> <p>GraphEmbedding class constructor</p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def __init__(\n    self,\n    dimension=None,\n    n_jumps=None,\n    max_per_epoch=None,\n    learn_step=3,\n    bidirectional=False,\n    load_file=None,\n):\n    \"\"\"GraphEmbedding class constructor\"\"\"\n    if load_file is None and (dimension is None or n_jumps is None):\n        raise ValueError(\n            \"Parameters dimension and n_jumps are required when load_file is None\"\n        )\n\n    self.dimension = dimension\n    self.n_jumps = n_jumps\n    self.max_per_epoch = max_per_epoch\n    self.learn_step = learn_step\n    self.bidirectional = bidirectional\n    self.load_file = load_file\n\n    if self.load_file is not None:\n        self._load(self.load_file)\n        return\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.__getitem__","title":"<code>__getitem__(arg)</code>","text":"<p>Method to access rows in the embedding by ID.</p> <p>Parameters:</p> Name Type Description Default <code>arg</code> <code>same as node ids in the graph</code> <p>A node ID in the graph</p> required <p>Returns:</p> Type Description <code>matrix</code> <p>A numpy matrix of one row</p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def __getitem__(self, arg):\n    \"\"\"\n    Method to access rows in the embedding by ID.\n\n    Args:\n        arg (same as node ids in the graph): A node ID in the graph\n\n    Returns:\n        (numpy.matrix): A numpy matrix of one row\n\n    \"\"\"\n    return self.embeddings_.embeddings_matrix_[self.node_ids.index(arg)]\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.embedding","title":"<code>embedding()</code>","text":"<p>Return the internal Embeddings object.</p> <p>Returns:</p> Type Description <code>Embeddings</code> <p>The embedding which is a dense matrix of <code>float</code> that can be used with <code>numpy</code> functions.</p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def embedding(self):\n    \"\"\"\n    Return the internal Embeddings object.\n\n    Returns:\n        (mercury.graph.embeddings.Embeddings): The embedding which is a dense matrix of `float` that can be used with `numpy` functions.\n    \"\"\"\n    if not hasattr(self, \"embeddings_\"):\n        return\n\n    return self.embeddings_\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.fit","title":"<code>fit(g)</code>","text":"<p>Train the embedding by doing random walks.</p> <p>Parameters:</p> Name Type Description Default <code>g</code> <code>mercury.graph Graph asset</code> <p>A <code>mercury.graph</code> Graph object. The embedding will be created so that each row in the embedding maps a node ID in g.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> <p>This does a number of random walks starting from a random node and selecting the edges with a probability that is proportional to the weight of the edge. If the destination node also has outgoing edges, the next step will start from it, otherwise, a new random node will be selected. The edges visited (concordant pairs) will get some reinforcement in the embedding while a randomly selected non-existent edges will get divergence instead (discordant pairs).</p> <p>Internally, this stores the node IDS of the node visited and calls Embeddings.fit() to transfer the structure to the embedding. Of course, it can be called many times on the same GraphEmbedding.</p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def fit(self, g: Graph):\n    \"\"\"\n    Train the embedding by doing random walks.\n\n    Args:\n        g (mercury.graph Graph asset): A `mercury.graph` Graph object. The embedding will be created so that each row in the embedding maps\n            a node ID in g.\n\n    Returns:\n        (self): Fitted self (or raises an error)\n\n    This does a number of random walks starting from a random node and selecting the edges with a probability that is proportional to\n    the weight of the edge. If the destination node also has outgoing edges, the next step will start from it, otherwise, a new random\n    node will be selected. The edges visited (concordant pairs) will get some reinforcement in the embedding while a randomly selected\n    non-existent edges will get divergence instead (discordant pairs).\n\n    Internally, this stores the node IDS of the node visited and calls Embeddings.fit() to transfer the structure to the embedding.\n    Of course, it can be called many times on the same GraphEmbedding.\n\n    \"\"\"\n\n    self.node_ids = list(g.networkx.nodes)\n\n    j_matrix = nx.adjacency_matrix(g.networkx)\n\n    N = j_matrix.shape[1]\n    M = j_matrix.nnz\n\n    self.r_ini = np.zeros(N, dtype=int)\n    self.r_len = np.zeros(N, dtype=int)\n    self.r_sum = np.zeros(N, dtype=float)\n    self.r_col = np.zeros(M, dtype=int)\n    self.r_wgt = np.zeros(M, dtype=float)\n\n    i = 0\n    for r in range(N):\n        self.r_ini[r] = i\n\n        i_col = j_matrix[[r], :].nonzero()[1]\n        L = len(i_col)\n\n        self.r_len[r] = L\n\n        for k in range(L):\n            c = i_col[k]\n            w = j_matrix[r, c]\n\n            self.r_sum[r] += w\n            self.r_col[i] = c\n            self.r_wgt[i] = w\n\n            i += 1\n\n    self.TotW = sum(self.r_sum)\n\n    converge, diverge = _random_walks(\n        self.r_ini,\n        self.r_len,\n        self.r_sum,\n        self.r_col,\n        self.r_wgt,\n        self.TotW,\n        self.n_jumps,\n        self.max_per_epoch if self.max_per_epoch is not None else self.n_jumps,\n    )\n\n    self.embeddings_ = Embeddings(\n        dimension=self.dimension,\n        num_elements=len(self.node_ids),\n        learn_step=self.learn_step,\n        bidirectional=self.bidirectional,\n    )\n    self.embeddings_.fit(converge, diverge)\n\n    return self\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.get_most_similar_nodes","title":"<code>get_most_similar_nodes(node_id, k=5, metric='cosine', return_as_indices=False)</code>","text":"<p>Returns the k most similar nodes and the similarities</p> <p>Parameters:</p> Name Type Description Default <code>node_id</code> <code>object</code> <p>Id of the node that we want to search the similar nodes.</p> required <code>k</code> <code>int</code> <p>Number of most similar nodes to return</p> <code>5</code> <code>metric</code> <code>str</code> <p>metric to use as a similarity.</p> <code>'cosine'</code> <code>return_as_indices</code> <code>bool</code> <p>if return the nodes as indices (False), or as node ids (True)</p> <code>False</code> <p>Returns:</p> Type Description <code>list</code> <p>list of k most similar nodes and list of similarities of the most similar nodes</p> <code>DataFrame</code> <p>A list of k most similar nodes as a <code>pd.DataFrame[word: string, similarity: double]</code></p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def get_most_similar_nodes(\n    self, node_id, k=5, metric=\"cosine\", return_as_indices=False\n):\n    \"\"\"\n    Returns the k most similar nodes and the similarities\n\n    Args:\n        node_id (object): Id of the node that we want to search the similar nodes.\n        k (int): Number of most similar nodes to return\n        metric (str): metric to use as a similarity.\n        return_as_indices (bool): if return the nodes as indices (False), or as node ids (True)\n\n    Returns:\n        (list): list of k most similar nodes and list of similarities of the most similar nodes\n        (DataFrame): A list of k most similar nodes as a `pd.DataFrame[word: string, similarity: double]`\n    \"\"\"\n    node_index = self.node_ids.index(node_id)\n\n    ordered_indices, ordered_similarities = (\n        self.embeddings_.get_most_similar_embeddings(node_index, k, metric)\n    )\n\n    if not return_as_indices:\n        nodes = list(np.array(self.node_ids)[ordered_indices])\n    else:\n        nodes = list(ordered_indices)\n\n    return pd.DataFrame({\"word\": nodes, \"similarity\": ordered_similarities})\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.save","title":"<code>save(file_name, save_embedding=False)</code>","text":"<p>Saves a GraphEmbedding to a compressed binary file with or without the embedding itself. It saves the graph's node names and the adjacency matrix as a sparse matrix.</p> <p>Parameters:</p> Name Type Description Default <code>file_name</code> <code>str</code> <p>The name of the file to which the GraphEmbedding will be saved.</p> required <code>save_embedding</code> <code>bool</code> <p>Since the embedding can be big and, if not trained, it is just a matrix of uniform random numbers it is possible avoiding saving it. In case it is not saved, loading the file will create a new random embedding. This parameter controls if the embedding is saved or not (the default value).</p> <code>False</code> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def save(self, file_name, save_embedding=False):\n    \"\"\"\n    Saves a GraphEmbedding to a compressed binary file with or without the embedding itself. It saves the graph's node names\n    and the adjacency matrix as a sparse matrix.\n\n    Args:\n        file_name (str): The name of the file to which the GraphEmbedding will be saved.\n        save_embedding (bool): Since the embedding can be big and, if not trained, it is just a matrix of uniform random numbers it is\n            possible avoiding saving it. In case it is not saved, loading the file will create a new random embedding. This parameter\n            controls if the embedding is saved or not (the default value).\n    \"\"\"\n    with bz2.BZ2File(file_name, \"w\") as f:\n        pickle.dump(GraphEmbedding.FILE_HEAD, f)\n        pickle.dump(save_embedding, f)\n        pickle.dump(self.embeddings_.dimension, f)\n\n        pickle.dump(self.node_ids, f)\n\n        np.save(f, self.r_ini)\n        np.save(f, self.r_len)\n        np.save(f, self.r_sum)\n        np.save(f, self.r_col)\n        np.save(f, self.r_wgt)\n\n        pickle.dump(self.TotW, f)\n\n        if save_embedding:\n            np.save(f, self.embeddings_.embeddings_matrix_)\n\n        pickle.dump(GraphEmbedding.FILE_END, f)\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec","title":"<code>mercury.graph.embeddings.SparkNode2Vec(dimension=None, sampling_ratio=1.0, num_epochs=10, num_paths_per_node=1, batch_size=1000000, w2v_max_iter=1, w2v_num_partitions=1, w2v_step_size=0.025, w2v_min_count=5, path_cache=None, use_cached_rw=False, n_partitions_cache=10, load_file=None)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Create or reload a SparkNode2Vec embedding mapping the nodes of a graph.</p> <p>Parameters:</p> Name Type Description Default <code>dimension</code> <code>int</code> <p>The number of columns in the embedding. See note the notes in <code>Embeddings</code> for details. (This parameter will be ignored when <code>load_file</code> is used.)</p> <code>None</code> <code>sampling_ratio</code> <code>float</code> <p>The proportion from the total number of nodes to be used in parallel at each step (whenever possible).</p> <code>1.0</code> <code>num_epochs</code> <code>int</code> <p>Number of epochs. This is the total number of steps the iteration goes through. At each step, sampling_ratio times the total number of nodes paths will be computed in parallel.</p> <code>10</code> <code>num_paths_per_node</code> <code>int</code> <p>The amount of random walks to source from each node.</p> <code>1</code> <code>batch_size</code> <code>int</code> <p>This forces caching the random walks computed so far and breaks planning each time this number of epochs is reached. The default value is a high number to avoid this entering at all. In really large jobs, you may want to set this parameter to avoid possible overflows even if it can add some extra time to the process. Note that with a high number of epochs and nodes resource requirements for the active part of your random walks can be high. This allows to \"cache a continue\" so to say.</p> <code>1000000</code> <code>w2v_max_iter</code> <code>int</code> <p>This is the Spark Word2Vec parameter maxIter, the default value is the original default value.</p> <code>1</code> <code>w2v_num_partitions</code> <code>int</code> <p>This is the Spark Word2Vec parameter numPartitions, the default value is the original default value.</p> <code>1</code> <code>w2v_step_size</code> <code>float</code> <p>This is the Spark Word2Vec parameter stepSize, the default value is the original default value.</p> <code>0.025</code> <code>w2v_min_count</code> <code>int</code> <p>This is the Spark Word2Vec parameter minCount, the default value is the original default value (5). Is the minimum number of times that a node has to appear to generate an embedding.</p> <code>5</code> <code>path_cache</code> <code>str</code> <p>Folder where random walks will be stored, the default value is None which entails that random walks will not be stored.</p> <code>None</code> <code>use_cached_rw</code> <code>bool</code> <p>Flag that indicates if random walks should be read from disk (hence, they will not be computed again). Setting this parameter to True requires a valid path_cache.</p> <code>False</code> <code>n_partitions_cache</code> <code>int</code> <p>Number of partitions that will be used when storing the random walks, to optimize read access. The default value is 10.</p> <code>10</code> <code>load_file</code> <code>str</code> <p>(optional) The full path to a parquet file containing a serialized SparkNode2Vec object. This file must be created using SparkNode2Vec.save().</p> <code>None</code> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def __init__(\n    self,\n    dimension=None,\n    sampling_ratio=1.0,\n    num_epochs=10,\n    num_paths_per_node=1,\n    batch_size=1000000,\n    w2v_max_iter=1,\n    w2v_num_partitions=1,\n    w2v_step_size=0.025,\n    w2v_min_count=5,\n    path_cache=None,\n    use_cached_rw=False,\n    n_partitions_cache=10,\n    load_file=None,\n):\n    \"\"\"\n    Create or reload a SparkNode2Vec embedding mapping the nodes of a graph.\n\n    Args:\n        dimension (int): The number of columns in the embedding. See note the notes in `Embeddings` for details. (This parameter will be\n            ignored when `load_file` is used.)\n        sampling_ratio (float): The proportion from the total number of nodes to be used in parallel at each step (whenever possible).\n        num_epochs (int): Number of epochs. This is the total number of steps the iteration goes through. At each step, sampling_ratio\n            times the total number of nodes paths will be computed in parallel.\n        num_paths_per_node (int): The amount of random walks to source from each node.\n        batch_size (int): This forces caching the random walks computed so far and breaks planning each time this number of epochs\n            is reached. The default value is a high number to avoid this entering at all. In really large jobs, you may want to\n            set this parameter to avoid possible overflows even if it can add some extra time to the process. Note that with a high\n            number of epochs and nodes resource requirements for the active part of your random walks can be high. This allows to\n            \"cache a continue\" so to say.\n        w2v_max_iter (int): This is the Spark Word2Vec parameter maxIter, the default value is the original default value.\n        w2v_num_partitions (int): This is the Spark Word2Vec parameter numPartitions, the default value is the original default value.\n        w2v_step_size (float): This is the Spark Word2Vec parameter stepSize, the default value is the original default value.\n        w2v_min_count (int): This is the Spark Word2Vec parameter minCount, the default value is the original default value (5). Is the\n            minimum number of times that a node has to appear to generate an embedding.\n        path_cache (str): Folder where random walks will be stored, the default value is None which entails that random walks will not\n            be stored.\n        use_cached_rw (bool): Flag that indicates if random walks should be read from disk (hence, they will not be computed again).\n            Setting this parameter to True requires a valid path_cache.\n        n_partitions_cache (int): Number of partitions that will be used when storing the random walks, to optimize read access.\n            The default value is 10.\n        load_file (str): (optional) The full path to a parquet file containing a serialized SparkNode2Vec object. This file must be created\n            using SparkNode2Vec.save().\n    \"\"\"\n    self.dimension = dimension\n    self.sampling_ratio = sampling_ratio\n    self.num_epochs = num_epochs\n    self.num_paths_per_node = num_paths_per_node\n    self.batch_size = batch_size\n    self.w2v_max_iter = w2v_max_iter\n    self.w2v_num_partitions = w2v_num_partitions\n    self.w2v_step_size = w2v_step_size\n    self.w2v_min_count = w2v_min_count\n    self.path_cache = path_cache\n    self.use_cached_rw = use_cached_rw\n    self.n_partitions_cache = n_partitions_cache\n    self.load_file = load_file\n\n    if self.load_file is not None:\n        self._load(self.load_file)\n        return\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.embedding","title":"<code>embedding()</code>","text":"<p>Return all embeddings.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>All embeddings as a <code>DataFrame[word: string, vector: vector]</code>.</p> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def embedding(self):\n    \"\"\"\n    Return all embeddings.\n\n    Returns:\n        (DataFrame): All embeddings as a `DataFrame[word: string, vector: vector]`.\n    \"\"\"\n    if not hasattr(self, \"node2vec_\"):\n        return\n\n    return self.node2vec_.getVectors()\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.fit","title":"<code>fit(G)</code>","text":"<p>Train the embedding by doing random walks.</p> <p>Random walk paths are available in attribute <code>paths_</code>.</p> <p>Parameters:</p> Name Type Description Default <code>G</code> <code>Graph</code> <p>A <code>mercury.graph</code> Graph object. The embedding will be created so that each row in the embedding maps a node ID in G. (This parameter will be ignored when <code>load_file</code> is used.)</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def fit(self, G: Graph):\n    \"\"\"\n    Train the embedding by doing random walks.\n\n    Random walk paths are available in attribute `paths_`.\n\n    Args:\n        G (mercury.graph.core.Graph): A `mercury.graph` Graph object. The embedding will be created so that each row in the embedding maps\n            a node ID in G. (This parameter will be ignored when `load_file` is used.)\n\n    Returns:\n        (self): Fitted self (or raises an error)\n    \"\"\"\n\n    if self.path_cache is None:\n        if self.use_cached_rw:\n            logging.warning(\n                \"Wrong options (use_cached_rw and no path_cache). \"\n                \"Paths will be recomputed.\"\n            )\n        self.use_cached_rw = False\n\n    if not self.use_cached_rw:\n        paths = (\n            self._run_rw(G)\n            .withColumn(\"size\", f.size(\"random_walks\"))\n            .where(f.col(\"size\") &gt; 1)\n            .drop(\"size\")\n        )\n\n        if self.path_cache is not None:\n            (\n                paths.repartition(self.n_partitions_cache)\n                .write.mode(\"overwrite\")\n                .parquet(\"%s/block=0\" % self.path_cache)\n            )\n\n        if self.num_paths_per_node &gt; 1:\n            for block_id in range(1, self.num_paths_per_node):\n                new_paths = (\n                    self._run_rw(G)\n                    .withColumn(\"size\", f.size(\"random_walks\"))\n                    .where(f.col(\"size\") &gt; 1)\n                    .drop(\"size\")\n                )\n                if self.path_cache is None:\n                    paths = paths.unionByName(new_paths)\n                else:\n                    (\n                        new_paths.repartition(self.n_partitions_cache)\n                        .write.mode(\"overwrite\")\n                        .parquet(\"%s/block=%d\" % (self.path_cache, block_id))\n                    )\n                    # With this, we clear the persisted dataframe\n                    new_paths.unpersist()\n\n    if self.path_cache is None:\n        self.paths_ = paths.persist()\n    else:\n        self.paths_ = (\n            SparkInterface()\n            .read_parquet(self.path_cache)\n            .drop(\"block\")\n            .repartition(self.n_partitions_cache)\n            .persist()\n        )\n\n    w2v = Word2Vec(\n        vectorSize=self.dimension,\n        maxIter=self.w2v_max_iter,\n        numPartitions=self.w2v_num_partitions,\n        stepSize=self.w2v_step_size,\n        inputCol=\"random_walks\",\n        outputCol=\"model\",\n        minCount=self.w2v_min_count,\n    )\n\n    self.node2vec_ = w2v.fit(self.paths_)\n\n    return self\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.get_most_similar_nodes","title":"<code>get_most_similar_nodes(node_id, k=5)</code>","text":"<p>Returns the k most similar nodes and a similarity measure.</p> <p>Parameters:</p> Name Type Description Default <code>node_id</code> <code>str</code> <p>Id of the node we want to search.</p> required <code>k</code> <code>int</code> <p>Number of most similar nodes to return</p> <code>5</code> <p>Returns:</p> Type Description <code>DataFrame</code> <p>A list of k most similar nodes (using cosine similarity) as a <code>DataFrame[word: string, similarity: double]</code></p> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def get_most_similar_nodes(self, node_id, k=5):\n    \"\"\"\n    Returns the k most similar nodes and a similarity measure.\n\n    Args:\n        node_id (str): Id of the node we want to search.\n        k (int): Number of most similar nodes to return\n\n    Returns:\n        (DataFrame): A list of k most similar nodes (using cosine similarity) as a `DataFrame[word: string, similarity: double]`\n    \"\"\"\n    if not hasattr(self, \"node2vec_\"):\n        return\n\n    return self.node2vec_.findSynonyms(node_id, k)\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.model","title":"<code>model()</code>","text":"<p>Returns the Spark Word2VecModel object.</p> <p>Returns:</p> Type Description <code>Word2VecModel</code> <p>The Spark Word2VecModel of the embedding to use its API directly.</p> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def model(self):\n    \"\"\"\n    Returns the Spark Word2VecModel object.\n\n    Returns:\n        (pyspark.ml.feature.Word2VecModel): The Spark Word2VecModel of the embedding to use its API directly.\n    \"\"\"\n    if not hasattr(self, \"node2vec_\"):\n        return\n\n    return self.node2vec_\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.save","title":"<code>save(file_name)</code>","text":"<p>Saves the internal Word2VecModel to a human-readable (JSON) model metadata as a Parquet formatted data file.</p> <p>The model may be loaded using SparkNode2Vec(load_file='path/file')</p> <p>Parameters:</p> Name Type Description Default <code>file_name</code> <code>str</code> <p>The name of the file to which the Word2VecModel will be saved.</p> required Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def save(self, file_name):\n    \"\"\"\n    Saves the internal Word2VecModel to a human-readable (JSON) model metadata as a Parquet formatted data file.\n\n    The model may be loaded using SparkNode2Vec(load_file='path/file')\n\n    Args:\n        file_name (str): The name of the file to which the Word2VecModel will be saved.\n    \"\"\"\n    if not hasattr(self, \"node2vec_\"):\n        return\n\n    return self.node2vec_.save(file_name)\n</code></pre>"},{"location":"reference/ml/","title":"mercury.graph.ml","text":""},{"location":"reference/ml/#mercury.graph.ml.LouvainCommunities","title":"<code>mercury.graph.ml.LouvainCommunities(min_modularity_gain=0.001, max_pass=2, max_iter=10, resolution=1, all_partitions=True, verbose=True)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Class that defines the functions that run a PySpark implementation of the  Louvain algorithm to find the partition that maximizes the modularity of an  undirected graph (as in <sup>1</sup>).</p> <p>This version of the algorithm differs from <sup>1</sup> in that the reassignment of nodes to new communities is calculated in parallel, not sequentially. That is, all nodes are reassigned at the same time and conflicts (i.e., 1 -&gt; C2 and 2 -&gt; C1) are resolved with a simple tie-breaking rule. This version also introduces the resolution parameter gamma, as in <sup>2</sup>.</p> <ol> <li> <p>Blondel V D, Guillaume J-L, Lambiotte R and Lefebvre E (2008). Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment, 2008. https://doi.org/10.1088/1742-5468/2008/10/p10008 \u21a9\u21a9</p> </li> <li> <p>Aynaud T, Blondel V D, Guillaume J-L and Lambiotte R (2013). Multilevel local optimization of modularity. Graph Partitioning (315--345), 2013.\u00a0\u21a9</p> </li> </ol> <p>Parameters:</p> Name Type Description Default <code>min_modularity_gain</code> <code>float</code> <p>Modularity gain threshold between each pass. The algorithm  stops if the gain in modularity between the current pass and the previous one is less than the given threshold.</p> <code>0.001</code> <code>max_pass</code> <code>int</code> <p>Maximum number of passes.</p> <code>2</code> <code>max_iter</code> <code>int</code> <p>Maximum number of iterations within each pass.</p> <code>10</code> <code>resolution</code> <code>float</code> <p>The resolution parameter gamma. Its value must be greater or equal to zero. If resolution is less than 1, modularity favors larger communities, while values greater than 1 favor smaller communities.</p> <code>1</code> <code>all_partitions</code> <code>bool</code> <p>If True, the function will return all the partitions found at each step of the algorithm (i.e., pass0, pass1, pass2, ..., pass20). If False, only the last (and best) partition will be returned.</p> <code>True</code> <code>verbose</code> <code>bool</code> <p>If True, print progress information during the Louvain algorithm execution. Defaults to True.</p> <code>True</code> Source code in <code>mercury/graph/ml/louvain.py</code> <pre><code>def __init__(\n    self,\n    min_modularity_gain=1e-03,\n    max_pass=2,\n    max_iter=10,\n    resolution: Union[float, int] = 1,\n    all_partitions=True,\n    verbose=True,\n):\n    self.min_modularity_gain = min_modularity_gain\n    self.max_pass = max_pass\n    self.max_iter = max_iter\n    self.resolution = resolution\n    self.all_partitions = all_partitions\n    self.verbose = verbose\n\n    # Check resolution\n    if resolution &lt; 0:\n        exceptionMsg = f\"Resolution value is {resolution} and cannot be &lt; 0.\"\n        raise ValueError(exceptionMsg)\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.LouvainCommunities.fit","title":"<code>fit(g)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>g</code> <code>Graph</code> <p>A mercury graph structure.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error).</p> Source code in <code>mercury/graph/ml/louvain.py</code> <pre><code>def fit(self, g: Graph):\n    \"\"\"\n    Args:\n        g (Graph): A mercury graph structure.\n\n    Returns:\n        (self): Fitted self (or raises an error).\n    \"\"\"\n    edges = g.graphframe.edges\n\n    # Verify edges input\n    self._verify_data(\n        df=edges,\n        expected_cols_grouping=[\"src\", \"dst\"],\n        expected_cols_others=[\"weight\"],\n    )\n\n    # Init dataframe to be returned\n    ret = (\n        edges.selectExpr(\"src as id\")\n        .unionByName(edges.selectExpr(\"dst as id\"))\n        .distinct()\n        .withColumn(\"pass0\", F.row_number().over(Window.orderBy(\"id\")))\n    ).checkpoint()\n\n    # Convert edges to anonymized src's and dst's\n    edges = (\n        edges.selectExpr(\"src as src0\", \"dst as dst0\", \"weight\")\n        .join(other=ret.selectExpr(\"id as src0\", \"pass0 as src\"), on=\"src0\")\n        .join(other=ret.selectExpr(\"id as dst0\", \"pass0 as dst\"), on=\"dst0\")\n        .select(\"src\", \"dst\", \"weight\")\n    ).checkpoint()\n\n    # Calculate m and initialize modularity\n    m = self._calculate_m(edges)\n    modularity0 = -1.0\n\n    # Begin pass\n    canPass, _pass = True, 0\n    while canPass:\n\n        # Declare naive partition\n        p1 = (\n            edges.selectExpr(\"src as id\")\n            .unionByName(edges.selectExpr(\"dst as id\"))\n            .distinct()\n            .withColumn(\"c\", F.col(\"id\"))\n        )\n\n        # Begin iterations within pass\n        canIter, _iter = True, 0\n        # Carry reference to previously cached p2 to call unpersist()\n        prev_p2 = None\n        while canIter:\n\n            if _iter &gt;= self.max_iter:\n                break\n\n            # Print progress\n            if self.verbose:\n                print(f\"Starting Pass {_pass} Iteration {_iter}.\")\n\n            # Create new partition and check if movements were made\n            p2 = self._reassign_all(edges, p1)\n            # Break complex lineage caused by loops first\n            p2 = p2.checkpoint()\n            p2.cache()\n\n            canIter = len(p2.where(\"cx != cj\").take(1)) &gt; 0\n            if canIter:\n                p1 = p2.selectExpr(\"id\", \"cj as c\")\n            if prev_p2 is not None:\n                prev_p2.unpersist()\n            prev_p2 = p2\n            _iter += 1\n\n        # Calculate new modularity and update pass counter\n        modularity1 = self._calculate_modularity(edges=edges, partition=p1, m=m)\n\n        # Declare stopping criterion and update old modularity\n        canPass = (modularity1 - modularity0 &gt; self.min_modularity_gain) and (\n            _pass &lt; self.max_pass\n        )\n        modularity0 = modularity1\n\n        self.modularity_ = modularity0\n\n        # Update ret and compress graph\n        if canPass:\n            ret = ret.join(\n                other=p1.selectExpr(f\"id as pass{_pass}\", f\"c as pass{_pass + 1}\"),\n                on=f\"pass{_pass}\",\n            ).checkpoint()\n\n            edges = (\n                self._label_edges(edges, p1)\n                .select(\"cSrc\", \"cDst\", \"weight\")\n                .groupBy(\"cSrc\", \"cDst\")\n                .agg(F.sum(\"weight\").alias(\"weight\"))\n                .selectExpr(\"cSrc as src\", \"cDst as dst\", \"weight\")\n            ).checkpoint()\n\n        prev_p2.unpersist()\n        _pass += 1\n\n    # Return final dataframe with sorted columns\n    if self.all_partitions:\n\n        # Return sorted columns\n        cols = self._sort_passes(ret)\n        ret = ret.select(cols)\n\n    # Return final dataframe with id &amp; community\n    else:\n        _last = self._last_pass(ret)\n        ret = ret.selectExpr(\"id as node_id\", f\"{_last} as cluster\")\n\n    self.labels_ = ret\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SparkRandomWalker","title":"<code>mercury.graph.ml.SparkRandomWalker(num_epochs=10, batch_size=1, n_sampling_edges=None)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Class to perform random walks from a specific source_id node within a given Graph</p> <p>Parameters:</p> Name Type Description Default <code>num_epochs</code> <code>int</code> <p>Number of epochs. This is the total number of steps the iteration goes through.</p> <code>10</code> <code>batch_size</code> <code>int</code> <p>This forces caching the random walks computed so far and breaks planning each time this number of epochs is reached. The default value is a high number to avoid this entering at all. In really large jobs, you may want to set this parameter to avoid possible overflows even if it can add some extra time to the process. Note that with a high number of epochs and nodes resource requirements for the active part of your random walks can be high. This allows to \"cache a continue\" so to say.</p> <code>1</code> <code>n_sampling_edges</code> <code>int</code> <p>by setting this parameter you can limit at each timestep the number of new paths opened from each node. This is useful when the graph contains nodes with very high out-degree, where running the algorithm several epochs is not feasible. When using this parameter, the graph will consider only at most <code>edge_sampling</code> outgoing edges at each epoch for each path. If the last node of the path contains more than <code>edge_sampling</code> the selected edges are sampled using its weight.</p> <code>None</code> Source code in <code>mercury/graph/ml/spark_randomwalker.py</code> <pre><code>def __init__(self, num_epochs=10, batch_size=1, n_sampling_edges=None):\n    \"\"\"\n    Class to perform random walks from a specific source_id node within a given Graph\n\n    Args:\n        num_epochs (int): Number of epochs. This is the total number of steps the iteration goes through.\n        batch_size (int): This forces caching the random walks computed so far and breaks planning each time this number of epochs\n            is reached. The default value is a high number to avoid this entering at all. In really large jobs, you may want to\n            set this parameter to avoid possible overflows even if it can add some extra time to the process. Note that with a high\n            number of epochs and nodes resource requirements for the active part of your random walks can be high. This allows to\n            \"cache a continue\" so to say.\n        n_sampling_edges (int): by setting this parameter you can limit at each timestep the number of new paths opened from each node.\n            This is useful when the graph contains nodes with very high out-degree, where running the algorithm several epochs is\n            not feasible. When using this parameter, the graph will consider only at most `edge_sampling` outgoing edges at each\n            epoch for each path. If the last node of the path contains more than `edge_sampling` the selected edges are sampled\n            using its weight.\n    \"\"\"\n    self.num_epochs = num_epochs\n    self.batch_size = batch_size\n    self.n_sampling_edges = n_sampling_edges\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SparkRandomWalker.fit","title":"<code>fit(G, source_id)</code>","text":"<p>Perform random walks from a specific source_id node within a given Graph</p> <p>Parameters:</p> Name Type Description Default <code>G</code> <code>mercury.graph Graph asset</code> <p>A <code>mercury.graph</code> Graph</p> required <code>source_id</code> <code>int / str / list</code> <p>the source vertex or list for vertices to start the random walks.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> <p>Attribute <code>paths_</code> contains a Spark Dataframe with a columns <code>random_walks</code> containing an array of the elements of the path walked and another column with the corresponding weights. The weights represent the probability of following that specific path starting from source_id.</p> Source code in <code>mercury/graph/ml/spark_randomwalker.py</code> <pre><code>def fit(self, G: Graph, source_id):\n    \"\"\"\n    Perform random walks from a specific source_id node within a given Graph\n\n    Args:\n        G (mercury.graph Graph asset): A `mercury.graph` Graph\n        source_id (int/str/list): the source vertex or list for vertices to start the random walks.\n\n    Returns:\n        (self): Fitted self (or raises an error)\n\n    Attribute `paths_` contains a Spark Dataframe with a columns `random_walks` containing an array of the elements\n    of the path walked and another column with the corresponding weights. The weights represent the probability of\n    following that specific path starting from source_id.\n    \"\"\"\n    self.paths_ = self._run_rw(G, source_id)\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SparkSpreadingActivation","title":"<code>mercury.graph.ml.SparkSpreadingActivation(attribute='influence', spreading_factor=0.2, transfer_function='weighted', steps=1, influenced_by=False)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>This class is a model that represents a \u201cword-of-mouth\u201d scenario where a node influences his neighbors, from where the influence spreads to other neighbors, and so on.</p> <p>At the end of the diffusion process, we inspect the amount of influence received by each node. Using a threshold-based technique, a node that is currently not influenced can be declared to be a potential future one, based on the influence that has been accumulated.</p> <p>The diffusion model is based on Spreading Activation (SPA) techniques proposed in cognitive psychology and later used for trust metric computations. For more details, please see paper entitled  \"Social Ties and their Relevance to Churn in Mobile Telecom Networks\"</p> <p>Parameters:</p> Name Type Description Default <code>attribute</code> <code>str</code> <p>Column name which will store the amount of influence spread</p> <code>'influence'</code> <code>spreading_factor</code> <code>float</code> <p>Percentage of influence to distribute. Low values favor influence proximity to the source of injection, while high values allow the influence to also reach nodes which are further away. It must be a value in the range (0,1). Default value is 0.2</p> <code>0.2</code> <code>transfer_function</code> <code>str</code> <p>Allowed values: \"weighted\" or \"unweighted\". Once a node decides what fraction of energy to distribute, the next step is to decide what fraction of the energy is transferred to each neighbor. This is controlled by the Transfer Function. If \"weighted\" then the energy distributed along the directed edge  depends on its relatively weight compared to the sum of weights of all outgoing edges of X. If \"unweighted\", then the energy distributed along the edge  is independent of its relatively weight. <code>'weighted'</code> <code>steps</code> <code>int</code> <p>Number of steps to perform</p> <code>1</code> <code>influenced_by</code> <code>bool</code> <p>if True, and extra column \"influenced_by\" is calculated which contains the seed nodes that have spread some influence to a given node. When True, the ids of the nodes cannot contain commas \",\". Note that seed_nodes will have at least their own (remaining) influence</p> <code>False</code> Source code in <code>mercury/graph/ml/spark_spreadactivation.py</code> <pre><code>def __init__(\n    self,\n    attribute: str = \"influence\",\n    spreading_factor: float = 0.2,\n    transfer_function: str = \"weighted\",\n    steps: int = 1,\n    influenced_by: bool = False,\n):\n    self.attribute = attribute\n    self.spreading_factor = spreading_factor\n    self.transfer_function = transfer_function\n    self.steps = steps\n    self.influenced_by = influenced_by\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SparkSpreadingActivation.fit","title":"<code>fit(g, seed_nodes)</code>","text":"<p>Perform all iterations of spread_activation</p> <p>Parameters:</p> Name Type Description Default <code>g</code> <code>Graph</code> <p>A <code>mercury.graph</code> Graph object.</p> required <code>seed_nodes</code> <code>Union[List, DataFrame]</code> <p>Collection of nodes that are the \"seed\" or are the source to spread the influence. It must be pyspark dataframe with column 'id' or python list</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self</p> Source code in <code>mercury/graph/ml/spark_spreadactivation.py</code> <pre><code>def fit(\n    self,\n    g: Graph,\n    seed_nodes: Union[List, \"pyspark.sql.DataFrame\"],\n):\n    \"\"\"\n    Perform all iterations of spread_activation\n\n    Args:\n        g (mercury.graph.core.Graph): A `mercury.graph` Graph object.\n        seed_nodes (Union[List, pyspark.sql.DataFrame]): Collection of nodes that are the \"seed\" or are the source to spread\n            the influence. It must be pyspark dataframe with column 'id' or python list\n\n    Returns:\n        (self): Fitted self\n    \"\"\"\n\n    # Set seed nodes which are the source of influence\n    g = self._set_seed_nodes(g, seed_nodes)\n\n    # Compute degrees\n    g = self._compute_degrees(g)\n\n    # Number of iterations specified for spread activation\n    for _ in range(0, self.steps, 1):\n        g = self._spread_activation_step(\n            g,\n        )\n\n    # Graph with updated attributes\n    self.fitted_graph_ = g\n    # Influences as DataFrame\n    self.influences_ = self.fitted_graph_.nodes_as_dataframe().select(\n        \"id\", \"influence\"\n    )\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SpectralClustering","title":"<code>mercury.graph.ml.SpectralClustering(n_clusters=2, mode='networkx', max_iterations=10, random_state=0)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Implementation of the spectral clustering algorithm which detect communities inside a graph.</p> <p>Parameters:</p> Name Type Description Default <code>n_clusters</code> <code>int</code> <p>The number of clusters that you want to detect.</p> <code>2</code> <code>random_state</code> <code>int</code> <p>Seed for reproducibility</p> <code>0</code> <code>mode</code> <code>str</code> <p>Calculation mode. Pass 'networkx' for using pandas + networkx or         'spark' for spark + graphframes</p> <code>'networkx'</code> <code>max_iterations</code> <code>int</code> <p>Max iterations parameter (only used if mode==spark)</p> <code>10</code> Source code in <code>mercury/graph/ml/spectral.py</code> <pre><code>def __init__(\n    self, n_clusters=2, mode=\"networkx\", max_iterations=10, random_state=0\n):\n    self.n_clusters = n_clusters\n    self.mode = mode\n    self.max_iterations = max_iterations\n    self.random_state = random_state\n\n    if self.mode not in (\"networkx\", \"spark\"):\n        raise ValueError(\"Error: Mode must be either 'networkx' or 'spark'\")\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SpectralClustering.fit","title":"<code>fit(graph)</code>","text":"<p>Find the optimal clusters of a given graph. The function returns nothing, but saves the clusters and the modularity in the object self.</p> <p>Parameters:</p> Name Type Description Default <code>graph</code> <code>Graph</code> <p>A mercury graph structure.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> Source code in <code>mercury/graph/ml/spectral.py</code> <pre><code>def fit(self, graph: Graph):\n    \"\"\"\n    Find the optimal clusters of a given graph. The function returns nothing, but saves the clusters and\n    the modularity in the object self.\n\n    Args:\n        graph (Graph): A mercury graph structure.\n\n    Returns:\n        (self): Fitted self (or raises an error)\n\n    \"\"\"\n    if self.mode == \"networkx\":\n        self._fit_networkx(graph)\n    else:\n        self._fit_spark(graph)\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.Transition","title":"<code>mercury.graph.ml.Transition()</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Create an interface class to manage the adjacency matrix of a directed graph as a transition matrix. This enables computing distributions of probabilities over the nodes after a given number of iterations.</p> Source code in <code>mercury/graph/ml/transition.py</code> <pre><code>def __init__(self):\n    self.fitted_graph_ = None\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.Transition.fit","title":"<code>fit(G)</code>","text":"<p>Converts the adjacency matrix into a transition matrix. Transition matrices are used to compute the distribution of probability of being in each of the nodes (or states) of a directed graph (or Markov process). The distribution for state s is:</p> <ul> <li>\\(s_t = T*s_{t-1}\\)</li> </ul> <p>Where:</p> <p>T is the transition matrix. After calling.fit(), the adjacency matrix is the transition matrix. You can use .to_pandas() to see it. \\(s_{t-1}\\) is the previous state.</p> <p>What .fit() does is scaling the non-zero rows to make them sum 1 as they are probability distributions and make the zero rows recurrent states. A recurrent state is a final state, a state whose next state is itself.</p> <p>Parameters:</p> Name Type Description Default <code>G</code> <code>Graph</code> <p>A <code>mercury.graph</code> Graph.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error).</p> Note <p>If created using NetworkX directly, the name of the weight must be 'weight' and must be positive. The recommended way to create the graph is using .set_row() which will always name the weight as 'weight' but does not check the value.</p> Source code in <code>mercury/graph/ml/transition.py</code> <pre><code>def fit(self, G: Graph):\n    \"\"\"\n    Converts the adjacency matrix into a transition matrix. Transition matrices are used to compute the distribution of probability\n    of being in each of the nodes (or states) of a directed graph (or Markov process). The distribution for state s is:\n\n    * $s_t = T*s_{t-1}$\n\n    Where:\n\n    T is the transition matrix. After calling.fit(), the adjacency matrix is the transition matrix. You can use .to_pandas() to see it.\n    $s_{t-1}$ is the previous state.\n\n    What .fit() does is scaling the non-zero rows to make them sum 1 as they are probability distributions and make the zero rows\n    recurrent states. A recurrent state is a final state, a state whose next state is itself.\n\n    Args:\n        G (Graph): A `mercury.graph` Graph.\n\n    Returns:\n        (self): Fitted self (or raises an error).\n\n    Note:\n        If created using NetworkX directly, the name of the weight must be 'weight' and must be positive. The recommended way\n        to create the graph is using .set_row() which will always name the weight as 'weight' but does not check the value.\n\n    \"\"\"\n    names = list(G.networkx.nodes)\n    adj_m = nx.adjacency_matrix(G.networkx, weight=\"weight\", dtype=float)\n\n    with warnings.catch_warnings():\n        warnings.simplefilter(\"ignore\")\n\n        for i in range(adj_m.shape[0]):\n            row = adj_m[[i], :]\n            tot = row.sum()\n\n            if tot == 0:\n                row[0, i] = 1\n            else:\n                row = row / tot\n\n            adj_m[[i], :] = row\n\n    df = pd.DataFrame(adj_m.todense(), index=names, columns=names)\n    self.fitted_graph_ = Graph(nx.from_pandas_adjacency(df, create_using=nx.DiGraph))\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.Transition.to_pandas","title":"<code>to_pandas(num_iterations=1)</code>","text":"<p>Returns the adjacency (which is the transition matrix after <code>fit()</code> was called) for a given number of iterations as a pandas dataframe with labeled rows and columns.</p> <p>Parameters:</p> Name Type Description Default <code>num_iterations</code> <code>int</code> <p>If you want to compute the matrix for a different number of iterations, k, you can use this argument to raise the matrix to any non negative integer, since \\(s_{t+k} = T^k*s_t\\)</p> <code>1</code> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transition matrix for num_iterations.</p> Note <p>This method does not automatically call <code>fit()</code>. This allows inspecting the adjacency matrix as a pandas dataframe. The result of computing num_iterations will not make sense if <code>fit()</code> has not been called before <code>to_pandas()</code>.</p> Source code in <code>mercury/graph/ml/transition.py</code> <pre><code>def to_pandas(self, num_iterations=1):\n    \"\"\"\n    Returns the adjacency (which is the transition matrix after `fit()` was called) for a given number of iterations as a pandas\n    dataframe with labeled rows and columns.\n\n    Args:\n        num_iterations (int): If you want to compute the matrix for a different number of iterations, k, you can use this argument to\n            raise the matrix to any non negative integer, since $s_{t+k} = T^k*s_t$\n\n    Returns:\n        (pd.DataFrame): The transition matrix for num_iterations.\n\n    Note:\n        This method does not automatically call `fit()`. This allows inspecting the adjacency matrix as a pandas dataframe.\n        The result of computing num_iterations will not make sense if `fit()` has not been called before `to_pandas()`.\n\n    \"\"\"\n    if self.fitted_graph_ is None:\n        raise ValueError(\"Error: fit() must be called first.\")\n\n    names = list(self.fitted_graph_.networkx.nodes)\n    adj_m = nx.adjacency_matrix(self.fitted_graph_.networkx, weight=\"weight\").todense()\n\n    if num_iterations != 1:\n        adj_m = matrix_power(adj_m, num_iterations)\n\n    return pd.DataFrame(adj_m, index=names, columns=names)\n</code></pre>"},{"location":"reference/viz/","title":"mercury.graph.viz","text":""},{"location":"reference/viz/#mercury.graph.viz.Moebius","title":"<code>mercury.graph.viz.Moebius(G)</code>","text":"<p>Moebius class for visualizing graphs using JavaScript and HTML.</p> Usage <pre><code>from mercury.graph.viz import Moebius\n\nG = ... # A graph object\nmoebius = Moebius(G)\nmoebius.show()\n</code></pre> <p>Attributes:</p> Name Type Description <code>G</code> <code>Graph</code> <p>The graph to be visualized.</p> <code>use_spark</code> <code>bool</code> <p>Flag indicating if Spark is used.</p> <code>front_pat</code> <code>str</code> <p>Path to the frontend resources.</p> <code>_int_id_map</code> <code>dict</code> <p>A dictionary mapping node IDs to integer IDs.</p> <code>name()</code> <code>dict</code> <p>The instance name of the object required by the JS callback mechanism.</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def __init__(self, G):\n\n    if HTML is None:\n        raise ImportError('IPython is not installed')\n\n    self.G = G\n    self.use_spark = self.G._as_networkx is None\n    self.front_pat = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + '/frontend'\n    self._int_id_map = {node['id'] : i for i, node in enumerate(self.G.nodes)}\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.name","title":"<code>name</code>  <code>property</code>","text":"<p>Get the instance name of the object which is required by the JS callback mechanism.</p>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.FHT","title":"<code>FHT(fn)</code>","text":"<p>Syntactic sugar for display(HTML(filename = fn))</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def FHT(self, fn):\n    \"\"\"\n    Syntactic sugar for display(HTML(filename = fn))\n    \"\"\"\n\n    display(HTML(filename = fn))\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.FJS","title":"<code>FJS(fn)</code>","text":"<p>Syntactic sugar for display(Javascript(filename = fn))</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def FJS(self, fn):\n    \"\"\"\n    Syntactic sugar for display(Javascript(filename = fn))\n    \"\"\"\n\n    display(Javascript(filename = fn))\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.JS","title":"<code>JS(s)</code>","text":"<p>Syntactic sugar for display(Javascript())</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def JS(self, s):\n    \"\"\"\n    Syntactic sugar for display(Javascript())\n    \"\"\"\n\n    display(Javascript(s))\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.__getitem__","title":"<code>__getitem__(item)</code>","text":"<p>Add support for the [] operator.</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def __getitem__(self, item):\n    \"\"\"\n    Add support for the [] operator.\n    \"\"\"\n\n    return self._get_adjacent_nodes_moebius(item)\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.__str__","title":"<code>__str__()</code>","text":"<p>Convert the object via str()</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def __str__(self):\n    \"\"\"\n    Convert the object via str()\n    \"\"\"\n\n    return 'Moebius(%s)' % str(self.G)\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.node_or_edge_config","title":"<code>node_or_edge_config(text_is=None, color_is=None, colors=None, size_is=None, size_range=None, size_scale='linear')</code>","text":"<p>Create a <code>node_config</code> or <code>edge_config</code> configuration dictionary for <code>show()</code> in an understandable way.</p> <p>Parameters:</p> Name Type Description Default <code>text_is</code> <code>str</code> <p>The node/edge attribute to be displayed as text. Use the string <code>\u00ecd</code> to draw the node id (regardless of the column having another name) or any valid node attribute name.</p> <code>None</code> <code>color_is</code> <code>str</code> <p>A categorical node/edge attribute that can be represented as a color. This will also enable a legend interface where categories can be individually shown or hidden.</p> <code>None</code> <code>colors</code> <code>dict</code> <p>The colors for each category defined as a dictionary. The keys are possible outcomes of category. The values are html RGB strings. E.g., .draw(category = 'size', colors = {'big' : '#c0a080', 'small' : '#a0c080'}) where 'big' and 'small' are possible values of the category 'size'.</p> <code>None</code> <code>size_is</code> <code>str</code> <p>The node attribute to be displayed as the size of the nodes. Use the string <code>id</code> to set the node id (regardless of the column having another name) or any valid node attribute name. See the options in the Moebius configuration menu to set minimum, maximum sizes, linear or logarithmic scale, etc.</p> <code>None</code> <code>size_range</code> <code>List of two numbers</code> <p>Combined with edge_label, this parameter controls the values in the variable that correspond to the minimum and maximum displayed sizes. The values below or equal the first value will be displayed with the base radius (that depends on the zoom) and the values above or equal to the second value will be shown with the maximum radius.</p> <code>None</code> <code>size_scale</code> <code>(linear, power, sqrt or log)</code> <p>Combined with edge_label, the scale used to convert the value in the variable to the displayed radius.</p> <code>'linear'</code> <p>Returns:</p> Type Description <code>dict</code> <p>The node configuration dictionary</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def node_or_edge_config(self, text_is = None, color_is = None, colors = None, size_is = None, size_range = None, size_scale = 'linear'):\n    \"\"\"\n    Create a `node_config` or `edge_config` configuration dictionary for `show()` in an understandable way.\n\n    Args:\n        text_is (str): The node/edge attribute to be displayed as text. Use the string `\u00ecd` to draw the node id (regardless of the\n            column having another name) or any valid node attribute name.\n        color_is (str): A categorical node/edge attribute that can be represented as a color. This will also enable a legend interface\n            where categories can be individually shown or hidden.\n        colors (dict): The colors for each category defined as a dictionary. The keys are possible outcomes of category.\n            The values are html RGB strings. E.g., .draw(category = 'size', colors = {'big' : '#c0a080', 'small' : '#a0c080'})\n            where 'big' and 'small' are possible values of the category 'size'.\n        size_is (str): The node attribute to be displayed as the size of the nodes. Use the string `id` to set the node id (regardless\n            of the column having another name) or any valid node attribute name. See the options in the Moebius configuration menu to\n            set minimum, maximum sizes, linear or logarithmic scale, etc.\n        size_range (List of two numbers): Combined with edge_label, this parameter controls the values in the variable that\n            correspond to the minimum and maximum displayed sizes. The values below or equal the first value will be displayed with the\n            base radius (that depends on the zoom) and the values above or equal to the second value will be shown with the maximum\n            radius.\n        size_scale ('linear', 'power', 'sqrt' or 'log'): Combined with edge_label, the scale used to convert the value in the variable\n            to the displayed radius.\n\n    Returns:\n        (dict): The node configuration dictionary\n    \"\"\"\n\n    config = {}\n\n    if text_is is not None:\n        config['label'] = text_is\n\n    if color_is is not None:\n        config['color'] = color_is\n\n    if colors is not None:\n        config['color_palette'] = colors\n    else:\n        config['color_palette'] = {}\n\n    if size_is is None:\n        config['size_thresholds'] = []\n    else:\n        config['size'] = size_is\n\n        if size_range is None:\n            config['size_thresholds'] = []\n        else:\n            assert type(size_range) == list and len(size_range) == 2\n            config['size_thresholds'] = size_range\n\n        if size_scale != 'linear':\n            assert size_scale in {'power', 'sqrt', 'log'}\n\n        config['scale'] = size_scale\n\n    return config\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.show","title":"<code>show(initial_id=None, initial_depth=1, node_config=None, edge_config=None)</code>","text":"<p>Start the interactive graph visualization in a Jupyter notebook.</p> <p>Parameters:</p> Name Type Description Default <code>initial_id</code> <code>str</code> <p>The id of the node to start the visualization.</p> <code>None</code> <code>initial_depth</code> <code>int</code> <p>The initial depth of the graph (starting with <code>initial_id</code> as 0) to be shown.</p> <code>1</code> <code>node_config</code> <code>dict</code> <p>A node configuration dictionary created by <code>node_config()</code>.</p> <code>None</code> <code>edge_config</code> <code>dict</code> <p>An edge configuration dictionary created by <code>edge_config()</code>.</p> <code>None</code> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def show(self, initial_id = None, initial_depth = 1, node_config = None, edge_config = None):\n    \"\"\"\n    Start the interactive graph visualization in a Jupyter notebook.\n\n    Args:\n        initial_id (str): The id of the node to start the visualization.\n        initial_depth (int): The initial depth of the graph (starting with `initial_id` as 0) to be shown.\n        node_config (dict): A node configuration dictionary created by `node_config()`.\n        edge_config (dict): An edge configuration dictionary created by `edge_config()`.\n    \"\"\"\n\n    if initial_id is None:\n        initial_id = next(iter(self._int_id_map))\n\n    initial_json = self._get_adjacent_nodes_moebius(initial_id, depth = initial_depth)\n\n    if node_config is None:\n        node_config = self.node_or_edge_config()\n\n    if edge_config is None:\n        edge_config = self.node_or_edge_config()\n\n    self._load_moebius_js(initial_json, self.name, node_config, edge_config)\n</code></pre>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"mercury-graph","text":"<p><code>mercury-graph</code> is a Python library that offers graph analytics capabilities with a technology-agnostic API, enabling users to apply a curated range of performant and scalable algorithms and utilities regardless of the underlying data framework. The consistent, scikit-like interface abstracts away the complexities of internal transformations, allowing users to effortlessly switch between different graph representations to leverage optimized algorithms implemented using pure Python, numba, networkx and PySpark GraphFrames.</p> <p>Currently implemented submodules in <code>mercury.graph</code> include:</p> <ul> <li> <p><code>mercury.graph.core</code>, with the main classes of the library that create and store the graphs' data and properties.</p> </li> <li> <p><code>mercury.graph.ml</code>, with graph theory and machine learning algorithms such as Louvain community detection, spectral clustering, Markov chains, spreading activation-based diffusion models and graph random walkers.</p> </li> <li> <p><code>mercury.graph.embeddings</code>, with classes that calculate graph embeddings in different ways, such as following the Node2Vec algorithm.</p> </li> <li> <p><code>mercury.graph.viz</code>, with capabilities for graph visualization.</p> </li> </ul>"},{"location":"#repository","title":"Repository","text":"<p>The website for the GitHub repository can be found here.</p>"},{"location":"reference/core/","title":"mercury.graph.core","text":""},{"location":"reference/core/#mercury.graph.core.Graph","title":"<code>mercury.graph.core.Graph(data=None, keys=None, nodes=None)</code>","text":"<p>This is the main class in mercury.graph.</p> <p>This class seamlessly abstracts the underlying technology used to represent the graph. You can create a graph passing the following objects to the constructor:</p> <ul> <li>A pandas DataFrame containing edges (with a keys dictionary to specify the columns and possibly a nodes DataFrame)</li> <li>A pyspark DataFrame containing edges (with a keys dictionary to specify the columns and possibly a nodes DataFrame)</li> <li>A networkx graph</li> <li>A graphframes graph</li> </ul> <p>Bear in mind that the graph object is immutable. This means that you can't modify the graph object once it has been created. If you want to modify it, you have to create a new graph object.</p> <p>The graph object provides:</p> <ul> <li>Properties to access the graph in different formats (networkx, graphframes, dgl)</li> <li>Properties with metrics and summary information that are calculated on demand and technology independent.</li> <li>It is inherited by other graph classes in mercury-graph providing ML algorithms such as graph embedding, visualization, etc.</li> </ul> <p>Using this class from the other classes in mercury-graph:</p> <p>The other classes in mercury-graph define models or functionalities that are based on graphs. They use a Scikit-learn-like API to interact with the graph object. This means that the graph object is passed to the class constructor and the class follow the Scikit-learn conventions. It is recommended to follow the same conventions when creating your own classes to work with mercury-graph.</p> <p>The conventions can be found here:</p> <ul> <li>Scikit API</li> <li>On scikit conventions</li> </ul> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>(DataFrame, Graph or DataFrame)</code> <p>The data to create the graph from.  It can be a pandas DataFrame, a networkx Graph, a pyspark DataFrame, or a Graphframe.  In case it already contains a graph (networkx or graphframes), the keys and nodes arguments are ignored.</p> <code>None</code> <code>keys</code> <code>dict</code> <p>A dictionary with keys to specify the columns in the data DataFrame. The keys are:</p> <ul> <li>'src': The name of the column with the source node.</li> <li>'dst': The name of the column with the destination node.</li> <li>'id': The name of the column with the node id.</li> <li>'weight': The name of the column with the edge weight.</li> <li>'directed': A boolean to specify if the graph is directed. (Only for pyspark DataFrames)</li> </ul> <p>When the keys argument is not provided or the key is missing, the default values are:</p> <ul> <li>'src': 'src'</li> <li>'dst': 'dst'</li> <li>'id': 'id'</li> <li>'weight': 'weight'</li> <li>'directed': True</li> </ul> <code>None</code> <code>nodes</code> <code>DataFrame</code> <p>A pandas DataFrame or a pyspark DataFrame with the nodes data. (Only when <code>data</code> is pandas or pyspark DataFrame and with the same type as <code>data</code>) If not given, the nodes are inferred from the edges DataFrame.</p> <code>None</code> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def __init__(self, data = None, keys = None, nodes = None):\n    self._as_networkx = None\n    self._as_graphframe = None\n    self._as_dgl = None\n    self._degree = None\n    self._in_degree = None\n    self._out_degree = None\n    self._closeness_centrality = None\n    self._betweenness_centrality = None\n    self._pagerank = None\n    self._connected_components = None\n    self._nodes_colnames = None\n    self._edges_colnames = None\n\n    self._number_of_nodes = 0\n    self._number_of_edges = 0\n    self._node_ix = 0\n    self._is_directed = False\n    self._is_weighted = False\n\n    self._init_values = {k: v for k, v in locals().items() if k in inspect.signature(self.__init__).parameters}\n\n    if type(data) == pd.core.frame.DataFrame:\n        self._from_pandas(data, nodes, keys)\n        return\n\n    if isinstance(data, nx.Graph):      # This is the most general case, including: ...Graph, ...DiGraph and ...MultiGraph\n        self._from_networkx(data)\n        return\n\n    spark_int = SparkInterface()\n\n    if pyspark_installed and graphframes_installed:\n        if type(data) == spark_int.type_spark_dataframe:\n            self._from_dataframe(data, nodes, keys)\n            return\n\n        if type(data) == spark_int.type_graphframe:\n            self._from_graphframes(data)\n            return\n\n    raise ValueError('Invalid input data. (Expected: pandas DataFrame, a networkx Graph, a pyspark DataFrame, a graphframes Graph.)')\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.Graph.betweenness_centrality","title":"<code>betweenness_centrality</code>  <code>property</code>","text":"<p>Returns the betweenness centrality of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.closeness_centrality","title":"<code>closeness_centrality</code>  <code>property</code>","text":"<p>Returns the closeness centrality of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.connected_components","title":"<code>connected_components</code>  <code>property</code>","text":"<p>Returns the connected components of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.degree","title":"<code>degree</code>  <code>property</code>","text":"<p>Returns the degree of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.dgl","title":"<code>dgl</code>  <code>property</code>","text":"<p>Returns the graph as a DGL graph.</p> <p>If the graph has not been converted to a DGL graph yet, it will be converted and cached for future use.</p> <p>Returns:</p> Type Description <code>DGLGraph</code> <p>The graph represented as a DGL graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.edges","title":"<code>edges</code>  <code>property</code>","text":"<p>Returns an iterator over the edges in the graph.</p> <p>Returns:</p> Type Description <code>EdgeIterator</code> <p>An iterator object that allows iterating over the edges in the graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.edges_colnames","title":"<code>edges_colnames</code>  <code>property</code>","text":"<p>Returns the column names of the edges DataFrame.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.graphframe","title":"<code>graphframe</code>  <code>property</code>","text":"<p>Returns the graph as a GraphFrame.</p> <p>If the graph has not been converted to a GraphFrame yet, it will be converted and cached for future use.</p> <p>Returns:</p> Type Description <code>GraphFrame</code> <p>The graph represented as a GraphFrame.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.in_degree","title":"<code>in_degree</code>  <code>property</code>","text":"<p>Returns the in-degree of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.is_directed","title":"<code>is_directed</code>  <code>property</code>","text":"<p>Returns True if the graph is directed, False otherwise.</p> Note <p>Graphs created using graphframes are always directed. The way around it is to add the reverse edges to the graph. This can be done by creating the Graph with pyspark DataFrame() and defining a key 'directed' set as False in the <code>dict</code> argument. Otherwise, the graph will be considered directed even if these reversed edges have been created by other means this class cannot be aware of.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.is_weighted","title":"<code>is_weighted</code>  <code>property</code>","text":"<p>Returns True if the graph is weighted, False otherwise.</p> <p>A graph is considered weight if it has a column named 'weight' in the edges DataFrame or the column has a different name and that name is passed in the <code>dict</code> argument as the 'weight' key.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.networkx","title":"<code>networkx</code>  <code>property</code>","text":"<p>Returns the graph representation as a NetworkX graph.</p> <p>If the graph has not been converted to NetworkX format yet, it will be converted and cached for future use.</p> <p>Returns:</p> Type Description <code>Graph</code> <p>The graph representation as a NetworkX graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.nodes","title":"<code>nodes</code>  <code>property</code>","text":"<p>Returns an iterator over all the nodes in the graph.</p> <p>Returns:</p> Type Description <code>NodeIterator</code> <p>An iterator that yields each node in the graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.nodes_colnames","title":"<code>nodes_colnames</code>  <code>property</code>","text":"<p>Returns the column names of the nodes DataFrame.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.number_of_edges","title":"<code>number_of_edges</code>  <code>property</code>","text":"<p>Returns the number of edges in the graph.</p> <p>Returns:</p> Type Description <code>int</code> <p>The number of edges in the graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.number_of_nodes","title":"<code>number_of_nodes</code>  <code>property</code>","text":"<p>Returns the number of nodes in the graph.</p> <p>Returns:</p> Type Description <code>int</code> <p>The number of nodes in the graph.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.out_degree","title":"<code>out_degree</code>  <code>property</code>","text":"<p>Returns the out-degree of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.pagerank","title":"<code>pagerank</code>  <code>property</code>","text":"<p>Returns the PageRank of each node in the graph as a Python dictionary.</p>"},{"location":"reference/core/#mercury.graph.core.Graph.edges_as_dataframe","title":"<code>edges_as_dataframe()</code>","text":"<p>Returns the edges as a pyspark DataFrame.</p> <p>If the graph is represented as a graphframes graph, the edges are extracted from it. Otherwise, the edges are converted from the pandas DataFrame representation. The columns used as the source and destination nodes are always named 'src' and 'dst', respectively, regardless of the original column names passed to the constructor.</p> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def edges_as_dataframe(self):\n    \"\"\"\n    Returns the edges as a pyspark DataFrame.\n\n    If the graph is represented as a graphframes graph, the edges are extracted from it. Otherwise, the edges are converted from the\n    pandas DataFrame representation. The columns used as the source and destination nodes are always named 'src' and 'dst',\n    respectively, regardless of the original column names passed to the constructor.\n    \"\"\"\n    if self._as_graphframe is not None:\n        return self._as_graphframe.edges\n\n    return SparkInterface().spark.createDataFrame(self.edges_as_pandas())\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.Graph.edges_as_pandas","title":"<code>edges_as_pandas()</code>","text":"<p>Returns the edges as a pandas DataFrame.</p> <p>If the graph is represented as a networkx graph, the edges are extracted from it. Otherwise, the graphframes graph will be used. This dataset may differ from possible pandas DataFrame passed to the constructor in the column names and order. The columns used as the source and destination nodes are always named 'src' and 'dst', respectively.</p> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def edges_as_pandas(self):\n    \"\"\"\n    Returns the edges as a pandas DataFrame.\n\n    If the graph is represented as a networkx graph, the edges are extracted from it. Otherwise, the graphframes graph will be used.\n    This dataset may differ from possible pandas DataFrame passed to the constructor in the column names and order. The columns used\n    as the source and destination nodes are always named 'src' and 'dst', respectively.\n    \"\"\"\n    if self._as_networkx is not None:\n        edges_data = self._as_networkx.edges(data = True)\n        edges_df   = pd.DataFrame([(src, dst, attr) for src, dst, attr in edges_data], columns = ['src', 'dst', 'attributes'])\n\n        attrs_df   = pd.json_normalize(edges_df['attributes'])\n\n        return pd.concat([edges_df.drop('attributes', axis = 1), attrs_df], axis = 1)\n\n    return self.graphframe.edges.toPandas()\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.Graph.nodes_as_dataframe","title":"<code>nodes_as_dataframe()</code>","text":"<p>Returns the nodes as a pyspark DataFrame.</p> <p>If the graph is represented as a graphframes graph, the nodes are extracted from it. Otherwise, the nodes are converted from the pandas DataFrame representation. The column used as the node id is always named 'id', regardless of the original column name passed to the constructor.</p> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def nodes_as_dataframe(self):\n    \"\"\"\n    Returns the nodes as a pyspark DataFrame.\n\n    If the graph is represented as a graphframes graph, the nodes are extracted from it. Otherwise, the nodes are converted from the\n    pandas DataFrame representation. The column used as the node id is always named 'id', regardless of the original column name passed\n    to the constructor.\n    \"\"\"\n    if self._as_graphframe is not None:\n        return self._as_graphframe.vertices\n\n    return SparkInterface().spark.createDataFrame(self.nodes_as_pandas())\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.Graph.nodes_as_pandas","title":"<code>nodes_as_pandas()</code>","text":"<p>Returns the nodes as a pandas DataFrame.</p> <p>If the graph is represented as a networkx graph, the nodes are extracted from it. Otherwise, the graphframes graph will be used. This dataset may differ from possible pandas DataFrame passed to the constructor in the column names and order. The column used as the node id is always named 'id'.</p> Source code in <code>mercury/graph/core/graph.py</code> <pre><code>def nodes_as_pandas(self):\n    \"\"\"\n    Returns the nodes as a pandas DataFrame.\n\n    If the graph is represented as a networkx graph, the nodes are extracted from it. Otherwise, the graphframes graph will be used.\n    This dataset may differ from possible pandas DataFrame passed to the constructor in the column names and order. The column used\n    as the node id is always named 'id'.\n    \"\"\"\n    if self._as_networkx is not None:\n        nodes_data = self._as_networkx.nodes(data = True)\n        nodes_df   = pd.DataFrame([(node, attr) for node, attr in nodes_data], columns = ['id', 'attributes'])\n\n        attrs_df = pd.json_normalize(nodes_df['attributes'])\n\n        return pd.concat([nodes_df.drop('attributes', axis = 1), attrs_df], axis = 1)\n\n    return self.graphframe.vertices.toPandas()\n</code></pre>"},{"location":"reference/core/#mercury.graph.core.SparkInterface","title":"<code>mercury.graph.core.SparkInterface(config=None, session=None)</code>","text":"<p>A class that provides an interface for interacting with Apache Spark, graphframes and dgl.</p> <p>Attributes:</p> Name Type Description <code>_spark_session</code> <code>SparkSession</code> <p>The shared Spark session.</p> <code>_graphframes</code> <code>module</code> <p>The shared graphframes namespace.</p> <p>Methods:</p> Name Description <code>_create_spark_session</code> <p>Creates a Spark session.</p> <code>spark</code> <p>Property that returns the shared Spark session.</p> <code>pyspark</code> <p>Property that returns the pyspark namespace.</p> <code>graphframes</code> <p>Property that returns the shared graphframes namespace.</p> <code>dgl</code> <p>Property that returns the shared dgl namespace.</p> <code>read_csv</code> <p>Reads a CSV file into a DataFrame.</p> <code>read_parquet</code> <p>Reads a Parquet file into a DataFrame.</p> <code>read_json</code> <p>Reads a JSON file into a DataFrame.</p> <code>read_text</code> <p>Reads a text file into a DataFrame.</p> <code>read</code> <p>Reads a file into a DataFrame.</p> <code>sql</code> <p>Executes a SQL query.</p> <code>udf</code> <p>Registers a user-defined function (UDF).</p> <code>stop</code> <p>Stops the Spark session.</p> <p>Parameters:</p> Name Type Description Default <code>config</code> <code>dict</code> <p>A dictionary of Spark configuration options. If not provided, the configuration in the global variable <code>default_spark_config</code> will be used.</p> <code>None</code> Source code in <code>mercury/graph/core/spark_interface.py</code> <pre><code>def __init__(self, config=None, session=None):\n    if SparkInterface._spark_session is None:\n        if session is not None:\n            SparkInterface._spark_session = session\n        else:\n            SparkInterface._spark_session = self._create_spark_session(config)\n            # Set checkpoint directory\n            SparkInterface._spark_session.sparkContext.setCheckpointDir(\".checkpoint\")\n\n    if SparkInterface._graphframes is None and graphframes_installed:\n        SparkInterface._graphframes = gf\n\n    if SparkInterface._dgl is None and dgl_installed:\n        SparkInterface._dgl = dgl\n</code></pre>"},{"location":"reference/embeddings/","title":"mercury.graph.embeddings","text":""},{"location":"reference/embeddings/#mercury.graph.embeddings.Embeddings","title":"<code>mercury.graph.embeddings.Embeddings(dimension, num_elements=0, mean=0, sd=1, learn_step=3, bidirectional=False)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>This class holds a matrix object that is interpreted as the embeddings for any list of objects, not only the nodes of a graph. You can see this class as the internal object holding the embedding for other classes such as class GraphEmbedding.</p> <p>Parameters:</p> Name Type Description Default <code>dimension</code> <code>int</code> <p>The number of columns in the embedding. See note below.</p> required <code>num_elements</code> <code>int</code> <p>The number of rows in the embedding. You can leave this empty on creation and then use initialize_as() to automatically match the nodes in a graph.</p> <code>0</code> <code>mean</code> <code>float</code> <p>The (expected) mean of the initial values.</p> <code>0</code> <code>sd</code> <code>float</code> <p>The (expected) standard deviation of the initial values.</p> <code>1</code> <code>learn_step</code> <code>float</code> <p>The size of the learning step elements get approached or moved away. Units are hexadecimal degrees in along an ellipse.</p> <code>3</code> <code>bidirectional</code> <code>bool</code> <p>Should the changes apply only to the elements of first column (False) or to both.</p> <code>False</code> Note <p>On dimension: Embeddings cannot be zero (that is against the whole concept). Smaller dimension embeddings can only hold few elements without introducing spurious correlations by some form of 'birthday attack' phenomenon as elements increase. Later it is very hard to get rid of that spurious 'knowledge'. </p> <p>Solution: With may elements, you have to go to high enough dimension even if the structure is simple.  Pretending to fit many embeddings in low dimension without them being correlated is like pretending to plot a trillion random  points in a square centimeter while keeping them 1 mm apart from each other: It's simply impossible!</p> Source code in <code>mercury/graph/embeddings/embeddings.py</code> <pre><code>def __init__(\n    self, dimension, num_elements=0, mean=0, sd=1, learn_step=3, bidirectional=False\n):\n    self.dimension = dimension\n    self.num_elements = num_elements\n    self.mean = mean\n    self.sd = sd\n    self.learn_step = learn_step\n    self.bidirectional = bidirectional\n\n    if self.num_elements &gt; 0:\n        self.embeddings_matrix_ = np.random.normal(\n            self.mean, self.sd, (self.num_elements, self.dimension)\n        )\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.Embeddings.as_numpy","title":"<code>as_numpy()</code>","text":"<p>Return the embedding as a numpy matrix where each row is an embedding.</p> Source code in <code>mercury/graph/embeddings/embeddings.py</code> <pre><code>def as_numpy(self):\n    \"\"\"\n    Return the embedding as a numpy matrix where each row is an embedding.\n    \"\"\"\n    if not hasattr(self, \"embeddings_matrix_\"):\n        return\n\n    return self.embeddings_matrix_\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.Embeddings.fit","title":"<code>fit(converge=None, diverge=None)</code>","text":"<p>Apply a learning step to the embedding.</p> <p>Parameters:</p> Name Type Description Default <code>converge</code> <code>numpy matrix of two columns</code> <p>A matrix of indices to elements meaning (first column) should be approached to (second column).</p> <code>None</code> <code>diverge</code> <code>numpy matrix of two columns</code> <p>A matrix of indices to elements meaning (first column) should be moved away from (second column).</p> <code>None</code> <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> Note <p>Embeddings start being randomly distributed and hold no structure other than spurious correlations. Each time you apply a learning step by calling this method, you are tweaking the embedding to approach some rows and/or move others away. You can use both converge and diverge or just one of them and call this as many times you want with varying learning step. A proxy of how much an embedding can learn can be estimated by measuring how row correlations are converging towards some asymptotic values.</p> Source code in <code>mercury/graph/embeddings/embeddings.py</code> <pre><code>def fit(self, converge=None, diverge=None):\n    \"\"\"\n    Apply a learning step to the embedding.\n\n    Args:\n        converge (numpy matrix of two columns): A matrix of indices to elements meaning (first column) should be approached to\n            (second column).\n        diverge (numpy matrix of two columns): A matrix of indices to elements meaning (first column) should be moved away from\n            (second column).\n\n    Returns:\n        (self): Fitted self (or raises an error)\n\n    Note:\n        Embeddings start being randomly distributed and hold no structure other than spurious correlations. Each time you apply a\n        learning step by calling this method, you are tweaking the embedding to approach some rows and/or move others away. You can use\n        both converge and diverge or just one of them and call this as many times you want with varying learning step. A proxy of how\n        much an embedding can learn can be estimated by measuring how row correlations are converging towards some asymptotic values.\n    \"\"\"\n\n    w = self.learn_step * np.pi / 180\n\n    cos_w = np.cos(w)\n    sin_w = np.sin(w)\n\n    if converge is not None:\n        self.embeddings_matrix_ = _elliptic_rotate(\n            self.embeddings_matrix_, converge[:, 0], converge[:, 1], cos_w, sin_w\n        )\n\n        if self.bidirectional:\n            self.embeddings_matrix_ = _elliptic_rotate(\n                self.embeddings_matrix_,\n                converge[:, 1],\n                converge[:, 0],\n                cos_w,\n                sin_w,\n            )\n\n    if diverge is not None:\n        self.embeddings_matrix_ = _elliptic_rotate(\n            self.embeddings_matrix_, diverge[:, 0], diverge[:, 1], cos_w, -sin_w\n        )\n\n        if self.bidirectional:\n            self.embeddings_matrix_ = _elliptic_rotate(\n                self.embeddings_matrix_, diverge[:, 1], diverge[:, 0], cos_w, -sin_w\n            )\n\n    return self\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.Embeddings.get_most_similar_embeddings","title":"<code>get_most_similar_embeddings(index, k=5, metric='cosine')</code>","text":"<p>Given an index of a vector in the embedding matrix, returns the k most similar embeddings in the matrix</p> <p>Parameters:</p> Name Type Description Default <code>index</code> <code>int</code> <p>index of the vector in the matrix that we want to compute the similar embeddings</p> required <code>k</code> <code>int</code> <p>Number of most similar embeddings to return</p> <code>5</code> <code>metric</code> <code>str</code> <p>metric to use as a similarity.</p> <code>'cosine'</code> <p>Returns:</p> Type Description <code>list</code> <p>list of k most similar nodes as indices and list of similarities of the most similar nodes</p> Source code in <code>mercury/graph/embeddings/embeddings.py</code> <pre><code>def get_most_similar_embeddings(self, index, k=5, metric=\"cosine\"):\n    \"\"\"\n    Given an index of a vector in the embedding matrix, returns the k most similar embeddings in the matrix\n\n    Args:\n        index (int): index of the vector in the matrix that we want to compute the similar embeddings\n        k (int): Number of most similar embeddings to return\n        metric (str): metric to use as a similarity.\n\n    Returns:\n        (list): list of k most similar nodes as indices and list of similarities of the most similar nodes\n    \"\"\"\n    if metric == \"cosine\":\n        similarities = (\n            1\n            - cdist(\n                np.expand_dims(self.as_numpy()[index], axis=0),\n                self.as_numpy(),\n                \"cosine\",\n            )[0]\n        )\n\n    elif metric == \"euclidean\":\n        similarities = 1 / (\n            1\n            + cdist(\n                np.expand_dims(self.as_numpy()[index], axis=0),\n                self.as_numpy(),\n                \"euclidean\",\n            )[0]\n        )\n\n    else:\n        raise ValueError(\"Unknown Distance Metric: %s\" % metric)\n\n    ordered_indices = np.argsort(similarities)[::-1][1 : (k + 1)]\n    ordered_similarities = similarities[ordered_indices]\n\n    return ordered_indices, ordered_similarities\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding","title":"<code>mercury.graph.embeddings.GraphEmbedding(dimension=None, n_jumps=None, max_per_epoch=None, learn_step=3, bidirectional=False, load_file=None)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Create an embedding mapping the nodes of a graph.</p> <p>Includes contributions by David Muelas Recuenco.</p> <p>Parameters:</p> Name Type Description Default <code>dimension</code> <code>int</code> <p>The number of columns in the embedding. See note the notes in <code>Embeddings</code> for details. (This parameter will be ignored when <code>load_file</code> is used.)</p> <code>None</code> <code>n_jumps</code> <code>int</code> <p>Number of random jumps from node to node.</p> <code>None</code> <code>max_per_epoch</code> <code>int</code> <p>Maximum number Number of consecutive random jumps without randomly jumping outside the edges. Note that normal random jumps are not going to explore outside a connected component.</p> <code>None</code> <code>learn_step</code> <code>float</code> <p>The size of the learning step elements get approached or moved away. Units are hexadecimal degrees in along an ellipse.</p> <code>3</code> <code>bidirectional</code> <code>bool</code> <p>Should the changes apply only to the elements of first column (False) or to both.</p> <code>False</code> <code>load_file</code> <code>str</code> <p>(optional) The full path to a binary file containing a serialized GraphEmbedding object. This file must be created using GraphEmbedding.save().</p> <code>None</code> <p>GraphEmbedding class constructor</p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def __init__(\n    self,\n    dimension=None,\n    n_jumps=None,\n    max_per_epoch=None,\n    learn_step=3,\n    bidirectional=False,\n    load_file=None,\n):\n    \"\"\"GraphEmbedding class constructor\"\"\"\n    if load_file is None and (dimension is None or n_jumps is None):\n        raise ValueError(\n            \"Parameters dimension and n_jumps are required when load_file is None\"\n        )\n\n    self.dimension = dimension\n    self.n_jumps = n_jumps\n    self.max_per_epoch = max_per_epoch\n    self.learn_step = learn_step\n    self.bidirectional = bidirectional\n    self.load_file = load_file\n\n    if self.load_file is not None:\n        self._load(self.load_file)\n        return\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.__getitem__","title":"<code>__getitem__(arg)</code>","text":"<p>Method to access rows in the embedding by ID.</p> <p>Parameters:</p> Name Type Description Default <code>arg</code> <code>same as node ids in the graph</code> <p>A node ID in the graph</p> required <p>Returns:</p> Type Description <code>matrix</code> <p>A numpy matrix of one row</p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def __getitem__(self, arg):\n    \"\"\"\n    Method to access rows in the embedding by ID.\n\n    Args:\n        arg (same as node ids in the graph): A node ID in the graph\n\n    Returns:\n        (numpy.matrix): A numpy matrix of one row\n\n    \"\"\"\n    return self.embeddings_.embeddings_matrix_[self.node_ids.index(arg)]\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.embedding","title":"<code>embedding()</code>","text":"<p>Return the internal Embeddings object.</p> <p>Returns:</p> Type Description <code>Embeddings</code> <p>The embedding which is a dense matrix of <code>float</code> that can be used with <code>numpy</code> functions.</p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def embedding(self):\n    \"\"\"\n    Return the internal Embeddings object.\n\n    Returns:\n        (mercury.graph.embeddings.Embeddings): The embedding which is a dense matrix of `float` that can be used with `numpy` functions.\n    \"\"\"\n    if not hasattr(self, \"embeddings_\"):\n        return\n\n    return self.embeddings_\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.fit","title":"<code>fit(g)</code>","text":"<p>Train the embedding by doing random walks.</p> <p>Parameters:</p> Name Type Description Default <code>g</code> <code>mercury.graph Graph asset</code> <p>A <code>mercury.graph</code> Graph object. The embedding will be created so that each row in the embedding maps a node ID in g.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> <p>This does a number of random walks starting from a random node and selecting the edges with a probability that is proportional to the weight of the edge. If the destination node also has outgoing edges, the next step will start from it, otherwise, a new random node will be selected. The edges visited (concordant pairs) will get some reinforcement in the embedding while a randomly selected non-existent edges will get divergence instead (discordant pairs).</p> <p>Internally, this stores the node IDS of the node visited and calls Embeddings.fit() to transfer the structure to the embedding. Of course, it can be called many times on the same GraphEmbedding.</p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def fit(self, g: Graph):\n    \"\"\"\n    Train the embedding by doing random walks.\n\n    Args:\n        g (mercury.graph Graph asset): A `mercury.graph` Graph object. The embedding will be created so that each row in the embedding maps\n            a node ID in g.\n\n    Returns:\n        (self): Fitted self (or raises an error)\n\n    This does a number of random walks starting from a random node and selecting the edges with a probability that is proportional to\n    the weight of the edge. If the destination node also has outgoing edges, the next step will start from it, otherwise, a new random\n    node will be selected. The edges visited (concordant pairs) will get some reinforcement in the embedding while a randomly selected\n    non-existent edges will get divergence instead (discordant pairs).\n\n    Internally, this stores the node IDS of the node visited and calls Embeddings.fit() to transfer the structure to the embedding.\n    Of course, it can be called many times on the same GraphEmbedding.\n\n    \"\"\"\n\n    self.node_ids = list(g.networkx.nodes)\n\n    j_matrix = nx.adjacency_matrix(g.networkx)\n\n    N = j_matrix.shape[1]\n    M = j_matrix.nnz\n\n    self.r_ini = np.zeros(N, dtype=int)\n    self.r_len = np.zeros(N, dtype=int)\n    self.r_sum = np.zeros(N, dtype=float)\n    self.r_col = np.zeros(M, dtype=int)\n    self.r_wgt = np.zeros(M, dtype=float)\n\n    i = 0\n    for r in range(N):\n        self.r_ini[r] = i\n\n        i_col = j_matrix[[r], :].nonzero()[1]\n        L = len(i_col)\n\n        self.r_len[r] = L\n\n        for k in range(L):\n            c = i_col[k]\n            w = j_matrix[r, c]\n\n            self.r_sum[r] += w\n            self.r_col[i] = c\n            self.r_wgt[i] = w\n\n            i += 1\n\n    self.TotW = sum(self.r_sum)\n\n    converge, diverge = _random_walks(\n        self.r_ini,\n        self.r_len,\n        self.r_sum,\n        self.r_col,\n        self.r_wgt,\n        self.TotW,\n        self.n_jumps,\n        self.max_per_epoch if self.max_per_epoch is not None else self.n_jumps,\n    )\n\n    self.embeddings_ = Embeddings(\n        dimension=self.dimension,\n        num_elements=len(self.node_ids),\n        learn_step=self.learn_step,\n        bidirectional=self.bidirectional,\n    )\n    self.embeddings_.fit(converge, diverge)\n\n    return self\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.get_most_similar_nodes","title":"<code>get_most_similar_nodes(node_id, k=5, metric='cosine', return_as_indices=False)</code>","text":"<p>Returns the k most similar nodes and the similarities</p> <p>Parameters:</p> Name Type Description Default <code>node_id</code> <code>object</code> <p>Id of the node that we want to search the similar nodes.</p> required <code>k</code> <code>int</code> <p>Number of most similar nodes to return</p> <code>5</code> <code>metric</code> <code>str</code> <p>metric to use as a similarity.</p> <code>'cosine'</code> <code>return_as_indices</code> <code>bool</code> <p>if return the nodes as indices (False), or as node ids (True)</p> <code>False</code> <p>Returns:</p> Type Description <code>list</code> <p>list of k most similar nodes and list of similarities of the most similar nodes</p> <code>DataFrame</code> <p>A list of k most similar nodes as a <code>pd.DataFrame[word: string, similarity: double]</code></p> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def get_most_similar_nodes(\n    self, node_id, k=5, metric=\"cosine\", return_as_indices=False\n):\n    \"\"\"\n    Returns the k most similar nodes and the similarities\n\n    Args:\n        node_id (object): Id of the node that we want to search the similar nodes.\n        k (int): Number of most similar nodes to return\n        metric (str): metric to use as a similarity.\n        return_as_indices (bool): if return the nodes as indices (False), or as node ids (True)\n\n    Returns:\n        (list): list of k most similar nodes and list of similarities of the most similar nodes\n        (DataFrame): A list of k most similar nodes as a `pd.DataFrame[word: string, similarity: double]`\n    \"\"\"\n    node_index = self.node_ids.index(node_id)\n\n    ordered_indices, ordered_similarities = (\n        self.embeddings_.get_most_similar_embeddings(node_index, k, metric)\n    )\n\n    if not return_as_indices:\n        nodes = list(np.array(self.node_ids)[ordered_indices])\n    else:\n        nodes = list(ordered_indices)\n\n    return pd.DataFrame({\"word\": nodes, \"similarity\": ordered_similarities})\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.GraphEmbedding.save","title":"<code>save(file_name, save_embedding=False)</code>","text":"<p>Saves a GraphEmbedding to a compressed binary file with or without the embedding itself. It saves the graph's node names and the adjacency matrix as a sparse matrix.</p> <p>Parameters:</p> Name Type Description Default <code>file_name</code> <code>str</code> <p>The name of the file to which the GraphEmbedding will be saved.</p> required <code>save_embedding</code> <code>bool</code> <p>Since the embedding can be big and, if not trained, it is just a matrix of uniform random numbers it is possible avoiding saving it. In case it is not saved, loading the file will create a new random embedding. This parameter controls if the embedding is saved or not (the default value).</p> <code>False</code> Source code in <code>mercury/graph/embeddings/graphembeddings.py</code> <pre><code>def save(self, file_name, save_embedding=False):\n    \"\"\"\n    Saves a GraphEmbedding to a compressed binary file with or without the embedding itself. It saves the graph's node names\n    and the adjacency matrix as a sparse matrix.\n\n    Args:\n        file_name (str): The name of the file to which the GraphEmbedding will be saved.\n        save_embedding (bool): Since the embedding can be big and, if not trained, it is just a matrix of uniform random numbers it is\n            possible avoiding saving it. In case it is not saved, loading the file will create a new random embedding. This parameter\n            controls if the embedding is saved or not (the default value).\n    \"\"\"\n    with bz2.BZ2File(file_name, \"w\") as f:\n        pickle.dump(GraphEmbedding.FILE_HEAD, f)\n        pickle.dump(save_embedding, f)\n        pickle.dump(self.embeddings_.dimension, f)\n\n        pickle.dump(self.node_ids, f)\n\n        np.save(f, self.r_ini)\n        np.save(f, self.r_len)\n        np.save(f, self.r_sum)\n        np.save(f, self.r_col)\n        np.save(f, self.r_wgt)\n\n        pickle.dump(self.TotW, f)\n\n        if save_embedding:\n            np.save(f, self.embeddings_.embeddings_matrix_)\n\n        pickle.dump(GraphEmbedding.FILE_END, f)\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec","title":"<code>mercury.graph.embeddings.SparkNode2Vec(dimension=None, sampling_ratio=1.0, num_epochs=10, num_paths_per_node=1, batch_size=1000000, w2v_max_iter=1, w2v_num_partitions=1, w2v_step_size=0.025, w2v_min_count=5, path_cache=None, use_cached_rw=False, n_partitions_cache=10, load_file=None)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Create or reload a SparkNode2Vec embedding mapping the nodes of a graph.</p> <p>Parameters:</p> Name Type Description Default <code>dimension</code> <code>int</code> <p>The number of columns in the embedding. See note the notes in <code>Embeddings</code> for details. (This parameter will be ignored when <code>load_file</code> is used.)</p> <code>None</code> <code>sampling_ratio</code> <code>float</code> <p>The proportion from the total number of nodes to be used in parallel at each step (whenever possible).</p> <code>1.0</code> <code>num_epochs</code> <code>int</code> <p>Number of epochs. This is the total number of steps the iteration goes through. At each step, sampling_ratio times the total number of nodes paths will be computed in parallel.</p> <code>10</code> <code>num_paths_per_node</code> <code>int</code> <p>The amount of random walks to source from each node.</p> <code>1</code> <code>batch_size</code> <code>int</code> <p>This forces caching the random walks computed so far and breaks planning each time this number of epochs is reached. The default value is a high number to avoid this entering at all. In really large jobs, you may want to set this parameter to avoid possible overflows even if it can add some extra time to the process. Note that with a high number of epochs and nodes resource requirements for the active part of your random walks can be high. This allows to \"cache a continue\" so to say.</p> <code>1000000</code> <code>w2v_max_iter</code> <code>int</code> <p>This is the Spark Word2Vec parameter maxIter, the default value is the original default value.</p> <code>1</code> <code>w2v_num_partitions</code> <code>int</code> <p>This is the Spark Word2Vec parameter numPartitions, the default value is the original default value.</p> <code>1</code> <code>w2v_step_size</code> <code>float</code> <p>This is the Spark Word2Vec parameter stepSize, the default value is the original default value.</p> <code>0.025</code> <code>w2v_min_count</code> <code>int</code> <p>This is the Spark Word2Vec parameter minCount, the default value is the original default value (5). Is the minimum number of times that a node has to appear to generate an embedding.</p> <code>5</code> <code>path_cache</code> <code>str</code> <p>Folder where random walks will be stored, the default value is None which entails that random walks will not be stored.</p> <code>None</code> <code>use_cached_rw</code> <code>bool</code> <p>Flag that indicates if random walks should be read from disk (hence, they will not be computed again). Setting this parameter to True requires a valid path_cache.</p> <code>False</code> <code>n_partitions_cache</code> <code>int</code> <p>Number of partitions that will be used when storing the random walks, to optimize read access. The default value is 10.</p> <code>10</code> <code>load_file</code> <code>str</code> <p>(optional) The full path to a parquet file containing a serialized SparkNode2Vec object. This file must be created using SparkNode2Vec.save().</p> <code>None</code> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def __init__(\n    self,\n    dimension=None,\n    sampling_ratio=1.0,\n    num_epochs=10,\n    num_paths_per_node=1,\n    batch_size=1000000,\n    w2v_max_iter=1,\n    w2v_num_partitions=1,\n    w2v_step_size=0.025,\n    w2v_min_count=5,\n    path_cache=None,\n    use_cached_rw=False,\n    n_partitions_cache=10,\n    load_file=None,\n):\n    \"\"\"\n    Create or reload a SparkNode2Vec embedding mapping the nodes of a graph.\n\n    Args:\n        dimension (int): The number of columns in the embedding. See note the notes in `Embeddings` for details. (This parameter will be\n            ignored when `load_file` is used.)\n        sampling_ratio (float): The proportion from the total number of nodes to be used in parallel at each step (whenever possible).\n        num_epochs (int): Number of epochs. This is the total number of steps the iteration goes through. At each step, sampling_ratio\n            times the total number of nodes paths will be computed in parallel.\n        num_paths_per_node (int): The amount of random walks to source from each node.\n        batch_size (int): This forces caching the random walks computed so far and breaks planning each time this number of epochs\n            is reached. The default value is a high number to avoid this entering at all. In really large jobs, you may want to\n            set this parameter to avoid possible overflows even if it can add some extra time to the process. Note that with a high\n            number of epochs and nodes resource requirements for the active part of your random walks can be high. This allows to\n            \"cache a continue\" so to say.\n        w2v_max_iter (int): This is the Spark Word2Vec parameter maxIter, the default value is the original default value.\n        w2v_num_partitions (int): This is the Spark Word2Vec parameter numPartitions, the default value is the original default value.\n        w2v_step_size (float): This is the Spark Word2Vec parameter stepSize, the default value is the original default value.\n        w2v_min_count (int): This is the Spark Word2Vec parameter minCount, the default value is the original default value (5). Is the\n            minimum number of times that a node has to appear to generate an embedding.\n        path_cache (str): Folder where random walks will be stored, the default value is None which entails that random walks will not\n            be stored.\n        use_cached_rw (bool): Flag that indicates if random walks should be read from disk (hence, they will not be computed again).\n            Setting this parameter to True requires a valid path_cache.\n        n_partitions_cache (int): Number of partitions that will be used when storing the random walks, to optimize read access.\n            The default value is 10.\n        load_file (str): (optional) The full path to a parquet file containing a serialized SparkNode2Vec object. This file must be created\n            using SparkNode2Vec.save().\n    \"\"\"\n    self.dimension = dimension\n    self.sampling_ratio = sampling_ratio\n    self.num_epochs = num_epochs\n    self.num_paths_per_node = num_paths_per_node\n    self.batch_size = batch_size\n    self.w2v_max_iter = w2v_max_iter\n    self.w2v_num_partitions = w2v_num_partitions\n    self.w2v_step_size = w2v_step_size\n    self.w2v_min_count = w2v_min_count\n    self.path_cache = path_cache\n    self.use_cached_rw = use_cached_rw\n    self.n_partitions_cache = n_partitions_cache\n    self.load_file = load_file\n\n    if self.load_file is not None:\n        self._load(self.load_file)\n        return\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.embedding","title":"<code>embedding()</code>","text":"<p>Return all embeddings.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>All embeddings as a <code>DataFrame[word: string, vector: vector]</code>.</p> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def embedding(self):\n    \"\"\"\n    Return all embeddings.\n\n    Returns:\n        (DataFrame): All embeddings as a `DataFrame[word: string, vector: vector]`.\n    \"\"\"\n    if not hasattr(self, \"node2vec_\"):\n        return\n\n    return self.node2vec_.getVectors()\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.fit","title":"<code>fit(G)</code>","text":"<p>Train the embedding by doing random walks.</p> <p>Random walk paths are available in attribute <code>paths_</code>.</p> <p>Parameters:</p> Name Type Description Default <code>G</code> <code>Graph</code> <p>A <code>mercury.graph</code> Graph object. The embedding will be created so that each row in the embedding maps a node ID in G. (This parameter will be ignored when <code>load_file</code> is used.)</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def fit(self, G: Graph):\n    \"\"\"\n    Train the embedding by doing random walks.\n\n    Random walk paths are available in attribute `paths_`.\n\n    Args:\n        G (mercury.graph.core.Graph): A `mercury.graph` Graph object. The embedding will be created so that each row in the embedding maps\n            a node ID in G. (This parameter will be ignored when `load_file` is used.)\n\n    Returns:\n        (self): Fitted self (or raises an error)\n    \"\"\"\n\n    if self.path_cache is None:\n        if self.use_cached_rw:\n            logging.warning(\n                \"Wrong options (use_cached_rw and no path_cache). \"\n                \"Paths will be recomputed.\"\n            )\n        self.use_cached_rw = False\n\n    if not self.use_cached_rw:\n        paths = (\n            self._run_rw(G)\n            .withColumn(\"size\", f.size(\"random_walks\"))\n            .where(f.col(\"size\") &gt; 1)\n            .drop(\"size\")\n        )\n\n        if self.path_cache is not None:\n            (\n                paths.repartition(self.n_partitions_cache)\n                .write.mode(\"overwrite\")\n                .parquet(\"%s/block=0\" % self.path_cache)\n            )\n\n        if self.num_paths_per_node &gt; 1:\n            for block_id in range(1, self.num_paths_per_node):\n                new_paths = (\n                    self._run_rw(G)\n                    .withColumn(\"size\", f.size(\"random_walks\"))\n                    .where(f.col(\"size\") &gt; 1)\n                    .drop(\"size\")\n                )\n                if self.path_cache is None:\n                    paths = paths.unionByName(new_paths)\n                else:\n                    (\n                        new_paths.repartition(self.n_partitions_cache)\n                        .write.mode(\"overwrite\")\n                        .parquet(\"%s/block=%d\" % (self.path_cache, block_id))\n                    )\n                    # With this, we clear the persisted dataframe\n                    new_paths.unpersist()\n\n    if self.path_cache is None:\n        self.paths_ = paths.persist()\n    else:\n        self.paths_ = (\n            SparkInterface()\n            .read_parquet(self.path_cache)\n            .drop(\"block\")\n            .repartition(self.n_partitions_cache)\n            .persist()\n        )\n\n    w2v = Word2Vec(\n        vectorSize=self.dimension,\n        maxIter=self.w2v_max_iter,\n        numPartitions=self.w2v_num_partitions,\n        stepSize=self.w2v_step_size,\n        inputCol=\"random_walks\",\n        outputCol=\"model\",\n        minCount=self.w2v_min_count,\n    )\n\n    self.node2vec_ = w2v.fit(self.paths_)\n\n    return self\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.get_most_similar_nodes","title":"<code>get_most_similar_nodes(node_id, k=5)</code>","text":"<p>Returns the k most similar nodes and a similarity measure.</p> <p>Parameters:</p> Name Type Description Default <code>node_id</code> <code>str</code> <p>Id of the node we want to search.</p> required <code>k</code> <code>int</code> <p>Number of most similar nodes to return</p> <code>5</code> <p>Returns:</p> Type Description <code>DataFrame</code> <p>A list of k most similar nodes (using cosine similarity) as a <code>DataFrame[word: string, similarity: double]</code></p> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def get_most_similar_nodes(self, node_id, k=5):\n    \"\"\"\n    Returns the k most similar nodes and a similarity measure.\n\n    Args:\n        node_id (str): Id of the node we want to search.\n        k (int): Number of most similar nodes to return\n\n    Returns:\n        (DataFrame): A list of k most similar nodes (using cosine similarity) as a `DataFrame[word: string, similarity: double]`\n    \"\"\"\n    if not hasattr(self, \"node2vec_\"):\n        return\n\n    return self.node2vec_.findSynonyms(node_id, k)\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.model","title":"<code>model()</code>","text":"<p>Returns the Spark Word2VecModel object.</p> <p>Returns:</p> Type Description <code>Word2VecModel</code> <p>The Spark Word2VecModel of the embedding to use its API directly.</p> Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def model(self):\n    \"\"\"\n    Returns the Spark Word2VecModel object.\n\n    Returns:\n        (pyspark.ml.feature.Word2VecModel): The Spark Word2VecModel of the embedding to use its API directly.\n    \"\"\"\n    if not hasattr(self, \"node2vec_\"):\n        return\n\n    return self.node2vec_\n</code></pre>"},{"location":"reference/embeddings/#mercury.graph.embeddings.SparkNode2Vec.save","title":"<code>save(file_name)</code>","text":"<p>Saves the internal Word2VecModel to a human-readable (JSON) model metadata as a Parquet formatted data file.</p> <p>The model may be loaded using SparkNode2Vec(load_file='path/file')</p> <p>Parameters:</p> Name Type Description Default <code>file_name</code> <code>str</code> <p>The name of the file to which the Word2VecModel will be saved.</p> required Source code in <code>mercury/graph/embeddings/spark_node2vec.py</code> <pre><code>def save(self, file_name):\n    \"\"\"\n    Saves the internal Word2VecModel to a human-readable (JSON) model metadata as a Parquet formatted data file.\n\n    The model may be loaded using SparkNode2Vec(load_file='path/file')\n\n    Args:\n        file_name (str): The name of the file to which the Word2VecModel will be saved.\n    \"\"\"\n    if not hasattr(self, \"node2vec_\"):\n        return\n\n    return self.node2vec_.save(file_name)\n</code></pre>"},{"location":"reference/ml/","title":"mercury.graph.ml","text":""},{"location":"reference/ml/#mercury.graph.ml.LouvainCommunities","title":"<code>mercury.graph.ml.LouvainCommunities(min_modularity_gain=0.001, max_pass=2, max_iter=10, resolution=1, all_partitions=True, verbose=True)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Class that defines the functions that run a PySpark implementation of the  Louvain algorithm to find the partition that maximizes the modularity of an  undirected graph (as in <sup>1</sup>).</p> <p>This version of the algorithm differs from <sup>1</sup> in that the reassignment of nodes to new communities is calculated in parallel, not sequentially. That is, all nodes are reassigned at the same time and conflicts (i.e., 1 -&gt; C2 and 2 -&gt; C1) are resolved with a simple tie-breaking rule. This version also introduces the resolution parameter gamma, as in <sup>2</sup>.</p> <p>Contributed by Arturo Soberon Cedillo, Jose Antonio Guzman Vazquez and  Isaac Dodanim Hernandez Garcia.</p> <ol> <li> <p>Blondel V D, Guillaume J-L, Lambiotte R and Lefebvre E (2008). Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment, 2008. https://doi.org/10.1088/1742-5468/2008/10/p10008 \u21a9\u21a9</p> </li> <li> <p>Aynaud T, Blondel V D, Guillaume J-L and Lambiotte R (2013). Multilevel local optimization of modularity. Graph Partitioning (315--345), 2013.\u00a0\u21a9</p> </li> </ol> <p>Parameters:</p> Name Type Description Default <code>min_modularity_gain</code> <code>float</code> <p>Modularity gain threshold between each pass. The algorithm  stops if the gain in modularity between the current pass and the previous one is less than the given threshold.</p> <code>0.001</code> <code>max_pass</code> <code>int</code> <p>Maximum number of passes.</p> <code>2</code> <code>max_iter</code> <code>int</code> <p>Maximum number of iterations within each pass.</p> <code>10</code> <code>resolution</code> <code>float</code> <p>The resolution parameter gamma. Its value must be greater or equal to zero. If resolution is less than 1, modularity favors larger communities, while values greater than 1 favor smaller communities.</p> <code>1</code> <code>all_partitions</code> <code>bool</code> <p>If True, the function will return all the partitions found at each step of the algorithm (i.e., pass0, pass1, pass2, ..., pass20). If False, only the last (and best) partition will be returned.</p> <code>True</code> <code>verbose</code> <code>bool</code> <p>If True, print progress information during the Louvain algorithm execution. Defaults to True.</p> <code>True</code> Source code in <code>mercury/graph/ml/louvain.py</code> <pre><code>def __init__(\n    self,\n    min_modularity_gain=1e-03,\n    max_pass=2,\n    max_iter=10,\n    resolution: Union[float, int] = 1,\n    all_partitions=True,\n    verbose=True,\n):\n    self.min_modularity_gain = min_modularity_gain\n    self.max_pass = max_pass\n    self.max_iter = max_iter\n    self.resolution = resolution\n    self.all_partitions = all_partitions\n    self.verbose = verbose\n\n    # Check resolution\n    if resolution &lt; 0:\n        exceptionMsg = f\"Resolution value is {resolution} and cannot be &lt; 0.\"\n        raise ValueError(exceptionMsg)\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.LouvainCommunities.fit","title":"<code>fit(g)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>g</code> <code>Graph</code> <p>A mercury graph structure.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error).</p> Source code in <code>mercury/graph/ml/louvain.py</code> <pre><code>def fit(self, g: Graph):\n    \"\"\"\n    Args:\n        g (Graph): A mercury graph structure.\n\n    Returns:\n        (self): Fitted self (or raises an error).\n    \"\"\"\n    edges = g.graphframe.edges\n\n    # Verify edges input\n    self._verify_data(\n        df=edges,\n        expected_cols_grouping=[\"src\", \"dst\"],\n        expected_cols_others=[\"weight\"],\n    )\n\n    # Init dataframe to be returned\n    ret = (\n        edges.selectExpr(\"src as id\")\n        .unionByName(edges.selectExpr(\"dst as id\"))\n        .distinct()\n        .withColumn(\"pass0\", F.row_number().over(Window.orderBy(\"id\")))\n    ).checkpoint()\n\n    # Convert edges to anonymized src's and dst's\n    edges = (\n        edges.selectExpr(\"src as src0\", \"dst as dst0\", \"weight\")\n        .join(other=ret.selectExpr(\"id as src0\", \"pass0 as src\"), on=\"src0\")\n        .join(other=ret.selectExpr(\"id as dst0\", \"pass0 as dst\"), on=\"dst0\")\n        .select(\"src\", \"dst\", \"weight\")\n    ).checkpoint()\n\n    # Calculate m and initialize modularity\n    m = self._calculate_m(edges)\n    modularity0 = -1.0\n\n    # Begin pass\n    canPass, _pass = True, 0\n    while canPass:\n\n        # Declare naive partition\n        p1 = (\n            edges.selectExpr(\"src as id\")\n            .unionByName(edges.selectExpr(\"dst as id\"))\n            .distinct()\n            .withColumn(\"c\", F.col(\"id\"))\n        )\n\n        # Begin iterations within pass\n        canIter, _iter = True, 0\n        # Carry reference to previously cached p2 to call unpersist()\n        prev_p2 = None\n        while canIter:\n\n            if _iter &gt;= self.max_iter:\n                break\n\n            # Print progress\n            if self.verbose:\n                print(f\"Starting Pass {_pass} Iteration {_iter}.\")\n\n            # Create new partition and check if movements were made\n            p2 = self._reassign_all(edges, p1)\n            # Break complex lineage caused by loops first\n            p2 = p2.checkpoint()\n            p2.cache()\n\n            canIter = len(p2.where(\"cx != cj\").take(1)) &gt; 0\n            if canIter:\n                p1 = p2.selectExpr(\"id\", \"cj as c\")\n            if prev_p2 is not None:\n                prev_p2.unpersist()\n            prev_p2 = p2\n            _iter += 1\n\n        # Calculate new modularity and update pass counter\n        modularity1 = self._calculate_modularity(edges=edges, partition=p1, m=m)\n\n        # Declare stopping criterion and update old modularity\n        canPass = (modularity1 - modularity0 &gt; self.min_modularity_gain) and (\n            _pass &lt; self.max_pass\n        )\n        modularity0 = modularity1\n\n        self.modularity_ = modularity0\n\n        # Update ret and compress graph\n        if canPass:\n            ret = ret.join(\n                other=p1.selectExpr(f\"id as pass{_pass}\", f\"c as pass{_pass + 1}\"),\n                on=f\"pass{_pass}\",\n            ).checkpoint()\n\n            edges = (\n                self._label_edges(edges, p1)\n                .select(\"cSrc\", \"cDst\", \"weight\")\n                .groupBy(\"cSrc\", \"cDst\")\n                .agg(F.sum(\"weight\").alias(\"weight\"))\n                .selectExpr(\"cSrc as src\", \"cDst as dst\", \"weight\")\n            ).checkpoint()\n\n        prev_p2.unpersist()\n        _pass += 1\n\n    # Return final dataframe with sorted columns\n    if self.all_partitions:\n\n        # Return sorted columns\n        cols = self._sort_passes(ret)\n        ret = ret.select(cols)\n\n    # Return final dataframe with id &amp; community\n    else:\n        _last = self._last_pass(ret)\n        ret = ret.selectExpr(\"id as node_id\", f\"{_last} as cluster\")\n\n    self.labels_ = ret\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SparkRandomWalker","title":"<code>mercury.graph.ml.SparkRandomWalker(num_epochs=10, batch_size=1, n_sampling_edges=None)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Class to perform random walks from a specific source_id node within a given Graph</p> <p>Parameters:</p> Name Type Description Default <code>num_epochs</code> <code>int</code> <p>Number of epochs. This is the total number of steps the iteration goes through.</p> <code>10</code> <code>batch_size</code> <code>int</code> <p>This forces caching the random walks computed so far and breaks planning each time this number of epochs is reached. The default value is a high number to avoid this entering at all. In really large jobs, you may want to set this parameter to avoid possible overflows even if it can add some extra time to the process. Note that with a high number of epochs and nodes resource requirements for the active part of your random walks can be high. This allows to \"cache a continue\" so to say.</p> <code>1</code> <code>n_sampling_edges</code> <code>int</code> <p>by setting this parameter you can limit at each timestep the number of new paths opened from each node. This is useful when the graph contains nodes with very high out-degree, where running the algorithm several epochs is not feasible. When using this parameter, the graph will consider only at most <code>edge_sampling</code> outgoing edges at each epoch for each path. If the last node of the path contains more than <code>edge_sampling</code> the selected edges are sampled using its weight.</p> <code>None</code> Source code in <code>mercury/graph/ml/spark_randomwalker.py</code> <pre><code>def __init__(self, num_epochs=10, batch_size=1, n_sampling_edges=None):\n    \"\"\"\n    Class to perform random walks from a specific source_id node within a given Graph\n\n    Args:\n        num_epochs (int): Number of epochs. This is the total number of steps the iteration goes through.\n        batch_size (int): This forces caching the random walks computed so far and breaks planning each time this number of epochs\n            is reached. The default value is a high number to avoid this entering at all. In really large jobs, you may want to\n            set this parameter to avoid possible overflows even if it can add some extra time to the process. Note that with a high\n            number of epochs and nodes resource requirements for the active part of your random walks can be high. This allows to\n            \"cache a continue\" so to say.\n        n_sampling_edges (int): by setting this parameter you can limit at each timestep the number of new paths opened from each node.\n            This is useful when the graph contains nodes with very high out-degree, where running the algorithm several epochs is\n            not feasible. When using this parameter, the graph will consider only at most `edge_sampling` outgoing edges at each\n            epoch for each path. If the last node of the path contains more than `edge_sampling` the selected edges are sampled\n            using its weight.\n    \"\"\"\n    self.num_epochs = num_epochs\n    self.batch_size = batch_size\n    self.n_sampling_edges = n_sampling_edges\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SparkRandomWalker.fit","title":"<code>fit(G, source_id)</code>","text":"<p>Perform random walks from a specific source_id node within a given Graph</p> <p>Parameters:</p> Name Type Description Default <code>G</code> <code>mercury.graph Graph asset</code> <p>A <code>mercury.graph</code> Graph</p> required <code>source_id</code> <code>int / str / list</code> <p>the source vertex or list for vertices to start the random walks.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> <p>Attribute <code>paths_</code> contains a Spark Dataframe with a columns <code>random_walks</code> containing an array of the elements of the path walked and another column with the corresponding weights. The weights represent the probability of following that specific path starting from source_id.</p> Source code in <code>mercury/graph/ml/spark_randomwalker.py</code> <pre><code>def fit(self, G: Graph, source_id):\n    \"\"\"\n    Perform random walks from a specific source_id node within a given Graph\n\n    Args:\n        G (mercury.graph Graph asset): A `mercury.graph` Graph\n        source_id (int/str/list): the source vertex or list for vertices to start the random walks.\n\n    Returns:\n        (self): Fitted self (or raises an error)\n\n    Attribute `paths_` contains a Spark Dataframe with a columns `random_walks` containing an array of the elements\n    of the path walked and another column with the corresponding weights. The weights represent the probability of\n    following that specific path starting from source_id.\n    \"\"\"\n    self.paths_ = self._run_rw(G, source_id)\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SparkSpreadingActivation","title":"<code>mercury.graph.ml.SparkSpreadingActivation(attribute='influence', spreading_factor=0.2, transfer_function='weighted', steps=1, influenced_by=False)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>This class is a model that represents a \u201cword-of-mouth\u201d scenario where a node influences his neighbors, from where the influence spreads to other neighbors, and so on.</p> <p>At the end of the diffusion process, we inspect the amount of influence received by each node. Using a threshold-based technique, a node that is currently not influenced can be declared to be a potential future one, based on the influence that has been accumulated.</p> <p>The diffusion model is based on Spreading Activation (SPA) techniques proposed in cognitive psychology and later used for trust metric computations. For more details, please see paper entitled  \"Social Ties and their Relevance to Churn in Mobile Telecom Networks\"</p> <p>Parameters:</p> Name Type Description Default <code>attribute</code> <code>str</code> <p>Column name which will store the amount of influence spread</p> <code>'influence'</code> <code>spreading_factor</code> <code>float</code> <p>Percentage of influence to distribute. Low values favor influence proximity to the source of injection, while high values allow the influence to also reach nodes which are further away. It must be a value in the range (0,1). Default value is 0.2</p> <code>0.2</code> <code>transfer_function</code> <code>str</code> <p>Allowed values: \"weighted\" or \"unweighted\". Once a node decides what fraction of energy to distribute, the next step is to decide what fraction of the energy is transferred to each neighbor. This is controlled by the Transfer Function. If \"weighted\" then the energy distributed along the directed edge  depends on its relatively weight compared to the sum of weights of all outgoing edges of X. If \"unweighted\", then the energy distributed along the edge  is independent of its relatively weight. <code>'weighted'</code> <code>steps</code> <code>int</code> <p>Number of steps to perform</p> <code>1</code> <code>influenced_by</code> <code>bool</code> <p>if True, and extra column \"influenced_by\" is calculated which contains the seed nodes that have spread some influence to a given node. When True, the ids of the nodes cannot contain commas \",\". Note that seed_nodes will have at least their own (remaining) influence</p> <code>False</code> Source code in <code>mercury/graph/ml/spark_spreadactivation.py</code> <pre><code>def __init__(\n    self,\n    attribute: str = \"influence\",\n    spreading_factor: float = 0.2,\n    transfer_function: str = \"weighted\",\n    steps: int = 1,\n    influenced_by: bool = False,\n):\n    self.attribute = attribute\n    self.spreading_factor = spreading_factor\n    self.transfer_function = transfer_function\n    self.steps = steps\n    self.influenced_by = influenced_by\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SparkSpreadingActivation.fit","title":"<code>fit(g, seed_nodes)</code>","text":"<p>Perform all iterations of spread_activation</p> <p>Parameters:</p> Name Type Description Default <code>g</code> <code>Graph</code> <p>A <code>mercury.graph</code> Graph object.</p> required <code>seed_nodes</code> <code>Union[List, DataFrame]</code> <p>Collection of nodes that are the \"seed\" or are the source to spread the influence. It must be pyspark dataframe with column 'id' or python list</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self</p> Source code in <code>mercury/graph/ml/spark_spreadactivation.py</code> <pre><code>def fit(\n    self,\n    g: Graph,\n    seed_nodes: Union[List, \"pyspark.sql.DataFrame\"],\n):\n    \"\"\"\n    Perform all iterations of spread_activation\n\n    Args:\n        g (mercury.graph.core.Graph): A `mercury.graph` Graph object.\n        seed_nodes (Union[List, pyspark.sql.DataFrame]): Collection of nodes that are the \"seed\" or are the source to spread\n            the influence. It must be pyspark dataframe with column 'id' or python list\n\n    Returns:\n        (self): Fitted self\n    \"\"\"\n\n    # Set seed nodes which are the source of influence\n    g = self._set_seed_nodes(g, seed_nodes)\n\n    # Compute degrees\n    g = self._compute_degrees(g)\n\n    # Number of iterations specified for spread activation\n    for _ in range(0, self.steps, 1):\n        g = self._spread_activation_step(\n            g,\n        )\n\n    # Graph with updated attributes\n    self.fitted_graph_ = g\n    # Influences as DataFrame\n    self.influences_ = self.fitted_graph_.nodes_as_dataframe().select(\n        \"id\", \"influence\"\n    )\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SpectralClustering","title":"<code>mercury.graph.ml.SpectralClustering(n_clusters=2, mode='networkx', max_iterations=10, random_state=0)</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Implementation of the spectral clustering algorithm which detect communities inside a graph.</p> <p>Contributed by Gibran Gabriel Otazo Sanchez.</p> <p>Parameters:</p> Name Type Description Default <code>n_clusters</code> <code>int</code> <p>The number of clusters that you want to detect.</p> <code>2</code> <code>random_state</code> <code>int</code> <p>Seed for reproducibility</p> <code>0</code> <code>mode</code> <code>str</code> <p>Calculation mode. Pass 'networkx' for using pandas + networkx or         'spark' for spark + graphframes</p> <code>'networkx'</code> <code>max_iterations</code> <code>int</code> <p>Max iterations parameter (only used if mode==spark)</p> <code>10</code> Source code in <code>mercury/graph/ml/spectral.py</code> <pre><code>def __init__(\n    self, n_clusters=2, mode=\"networkx\", max_iterations=10, random_state=0\n):\n    self.n_clusters = n_clusters\n    self.mode = mode\n    self.max_iterations = max_iterations\n    self.random_state = random_state\n\n    if self.mode not in (\"networkx\", \"spark\"):\n        raise ValueError(\"Error: Mode must be either 'networkx' or 'spark'\")\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.SpectralClustering.fit","title":"<code>fit(graph)</code>","text":"<p>Find the optimal clusters of a given graph. The function returns nothing, but saves the clusters and the modularity in the object self.</p> <p>Parameters:</p> Name Type Description Default <code>graph</code> <code>Graph</code> <p>A mercury graph structure.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error)</p> Source code in <code>mercury/graph/ml/spectral.py</code> <pre><code>def fit(self, graph: Graph):\n    \"\"\"\n    Find the optimal clusters of a given graph. The function returns nothing, but saves the clusters and\n    the modularity in the object self.\n\n    Args:\n        graph (Graph): A mercury graph structure.\n\n    Returns:\n        (self): Fitted self (or raises an error)\n\n    \"\"\"\n    if self.mode == \"networkx\":\n        self._fit_networkx(graph)\n    else:\n        self._fit_spark(graph)\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.Transition","title":"<code>mercury.graph.ml.Transition()</code>","text":"<p>               Bases: <code>BaseClass</code></p> <p>Create an interface class to manage the adjacency matrix of a directed graph as a transition matrix. This enables computing distributions of probabilities over the nodes after a given number of iterations.</p> Source code in <code>mercury/graph/ml/transition.py</code> <pre><code>def __init__(self):\n    self.fitted_graph_ = None\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.Transition.fit","title":"<code>fit(G)</code>","text":"<p>Converts the adjacency matrix into a transition matrix. Transition matrices are used to compute the distribution of probability of being in each of the nodes (or states) of a directed graph (or Markov process). The distribution for state s is:</p> <ul> <li>\\(s_t = T*s_{t-1}\\)</li> </ul> <p>Where:</p> <p>T is the transition matrix. After calling.fit(), the adjacency matrix is the transition matrix. You can use .to_pandas() to see it. \\(s_{t-1}\\) is the previous state.</p> <p>What .fit() does is scaling the non-zero rows to make them sum 1 as they are probability distributions and make the zero rows recurrent states. A recurrent state is a final state, a state whose next state is itself.</p> <p>Parameters:</p> Name Type Description Default <code>G</code> <code>Graph</code> <p>A <code>mercury.graph</code> Graph.</p> required <p>Returns:</p> Type Description <code>self</code> <p>Fitted self (or raises an error).</p> Note <p>If created using NetworkX directly, the name of the weight must be 'weight' and must be positive. The recommended way to create the graph is using .set_row() which will always name the weight as 'weight' but does not check the value.</p> Source code in <code>mercury/graph/ml/transition.py</code> <pre><code>def fit(self, G: Graph):\n    \"\"\"\n    Converts the adjacency matrix into a transition matrix. Transition matrices are used to compute the distribution of probability\n    of being in each of the nodes (or states) of a directed graph (or Markov process). The distribution for state s is:\n\n    * $s_t = T*s_{t-1}$\n\n    Where:\n\n    T is the transition matrix. After calling.fit(), the adjacency matrix is the transition matrix. You can use .to_pandas() to see it.\n    $s_{t-1}$ is the previous state.\n\n    What .fit() does is scaling the non-zero rows to make them sum 1 as they are probability distributions and make the zero rows\n    recurrent states. A recurrent state is a final state, a state whose next state is itself.\n\n    Args:\n        G (Graph): A `mercury.graph` Graph.\n\n    Returns:\n        (self): Fitted self (or raises an error).\n\n    Note:\n        If created using NetworkX directly, the name of the weight must be 'weight' and must be positive. The recommended way\n        to create the graph is using .set_row() which will always name the weight as 'weight' but does not check the value.\n\n    \"\"\"\n    names = list(G.networkx.nodes)\n    adj_m = nx.adjacency_matrix(G.networkx, weight=\"weight\", dtype=float)\n\n    with warnings.catch_warnings():\n        warnings.simplefilter(\"ignore\")\n\n        for i in range(adj_m.shape[0]):\n            row = adj_m[[i], :]\n            tot = row.sum()\n\n            if tot == 0:\n                row[0, i] = 1\n            else:\n                row = row / tot\n\n            adj_m[[i], :] = row\n\n    df = pd.DataFrame(adj_m.todense(), index=names, columns=names)\n    self.fitted_graph_ = Graph(nx.from_pandas_adjacency(df, create_using=nx.DiGraph))\n\n    return self\n</code></pre>"},{"location":"reference/ml/#mercury.graph.ml.Transition.to_pandas","title":"<code>to_pandas(num_iterations=1)</code>","text":"<p>Returns the adjacency (which is the transition matrix after <code>fit()</code> was called) for a given number of iterations as a pandas dataframe with labeled rows and columns.</p> <p>Parameters:</p> Name Type Description Default <code>num_iterations</code> <code>int</code> <p>If you want to compute the matrix for a different number of iterations, k, you can use this argument to raise the matrix to any non negative integer, since \\(s_{t+k} = T^k*s_t\\)</p> <code>1</code> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transition matrix for num_iterations.</p> Note <p>This method does not automatically call <code>fit()</code>. This allows inspecting the adjacency matrix as a pandas dataframe. The result of computing num_iterations will not make sense if <code>fit()</code> has not been called before <code>to_pandas()</code>.</p> Source code in <code>mercury/graph/ml/transition.py</code> <pre><code>def to_pandas(self, num_iterations=1):\n    \"\"\"\n    Returns the adjacency (which is the transition matrix after `fit()` was called) for a given number of iterations as a pandas\n    dataframe with labeled rows and columns.\n\n    Args:\n        num_iterations (int): If you want to compute the matrix for a different number of iterations, k, you can use this argument to\n            raise the matrix to any non negative integer, since $s_{t+k} = T^k*s_t$\n\n    Returns:\n        (pd.DataFrame): The transition matrix for num_iterations.\n\n    Note:\n        This method does not automatically call `fit()`. This allows inspecting the adjacency matrix as a pandas dataframe.\n        The result of computing num_iterations will not make sense if `fit()` has not been called before `to_pandas()`.\n\n    \"\"\"\n    if self.fitted_graph_ is None:\n        raise ValueError(\"Error: fit() must be called first.\")\n\n    names = list(self.fitted_graph_.networkx.nodes)\n    adj_m = nx.adjacency_matrix(self.fitted_graph_.networkx, weight=\"weight\").todense()\n\n    if num_iterations != 1:\n        adj_m = matrix_power(adj_m, num_iterations)\n\n    return pd.DataFrame(adj_m, index=names, columns=names)\n</code></pre>"},{"location":"reference/viz/","title":"mercury.graph.viz","text":""},{"location":"reference/viz/#mercury.graph.viz.Moebius","title":"<code>mercury.graph.viz.Moebius(G)</code>","text":"<p>Moebius class for visualizing graphs using JavaScript and HTML.</p> Note <p>Moebius is currently only compatible with Google Colab and Jupyter Notebooks Classic (prior to v7).</p> Usage <pre><code>from mercury.graph.viz import Moebius\n\nG = ... # A graph object\nmoebius = Moebius(G)\nmoebius.show()\n</code></pre> <p>Attributes:</p> Name Type Description <code>G</code> <code>Graph</code> <p>The graph to be visualized.</p> <code>use_spark</code> <code>bool</code> <p>Flag indicating if Spark is used.</p> <code>front_pat</code> <code>str</code> <p>Path to the frontend resources.</p> <code>_int_id_map</code> <code>dict</code> <p>A dictionary mapping node IDs to integer IDs.</p> <code>name()</code> <code>dict</code> <p>The instance name of the object required by the JS callback mechanism.</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def __init__(self, G):\n\n    if HTML is None:\n        raise ImportError('IPython is not installed')\n\n    self.G = G\n    self.use_spark = self.G._as_networkx is None\n    self.front_pat = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + '/frontend'\n    self._int_id_map = {node['id'] : i for i, node in enumerate(self.G.nodes)}\n\n    # Define callback for JS interactions within Google Colab\n    if importlib.util.find_spec('google') is not None and importlib.util.find_spec('google.colab') is not None:\n        from google.colab import output\n        from IPython import get_ipython\n\n        def colab_execute_python(code):\n            # Use get_ipython() to access the Moebius object defined by the user in a Colab cell\n            get_ipython().run_cell(f\"_temp_colab_execute_python_result = {code}\")\n            return get_ipython().user_ns[\"_temp_colab_execute_python_result\"]\n\n        output.register_callback(\"notebook.colab_execute_python\", colab_execute_python)\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.name","title":"<code>name</code>  <code>property</code>","text":"<p>Get the instance name of the object which is required by the JS callback mechanism.</p>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.FHT","title":"<code>FHT(fn)</code>","text":"<p>Syntactic sugar for display(HTML(filename = fn))</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def FHT(self, fn):\n    \"\"\"\n    Syntactic sugar for display(HTML(filename = fn))\n    \"\"\"\n\n    display(HTML(filename = fn))\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.FJS","title":"<code>FJS(fn)</code>","text":"<p>Syntactic sugar for display(Javascript(filename = fn))</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def FJS(self, fn):\n    \"\"\"\n    Syntactic sugar for display(Javascript(filename = fn))\n    \"\"\"\n\n    display(Javascript(filename = fn))\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.JS","title":"<code>JS(s)</code>","text":"<p>Syntactic sugar for display(Javascript())</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def JS(self, s):\n    \"\"\"\n    Syntactic sugar for display(Javascript())\n    \"\"\"\n\n    display(Javascript(s))\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.__getitem__","title":"<code>__getitem__(item)</code>","text":"<p>Add support for the [] operator.</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def __getitem__(self, item):\n    \"\"\"\n    Add support for the [] operator.\n    \"\"\"\n\n    return self._get_adjacent_nodes_moebius(item)\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.__str__","title":"<code>__str__()</code>","text":"<p>Convert the object via str()</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def __str__(self):\n    \"\"\"\n    Convert the object via str()\n    \"\"\"\n\n    return 'Moebius(%s)' % str(self.G)\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.node_or_edge_config","title":"<code>node_or_edge_config(text_is=None, color_is=None, colors=None, size_is=None, size_range=None, size_scale='linear')</code>","text":"<p>Create a <code>node_config</code> or <code>edge_config</code> configuration dictionary for <code>show()</code> in an understandable way.</p> <p>Parameters:</p> Name Type Description Default <code>text_is</code> <code>str</code> <p>The node/edge attribute to be displayed as text. Use the string <code>\u00ecd</code> to draw the node id (regardless of the column having another name) or any valid node attribute name.</p> <code>None</code> <code>color_is</code> <code>str</code> <p>A categorical node/edge attribute that can be represented as a color. This will also enable a legend interface where categories can be individually shown or hidden.</p> <code>None</code> <code>colors</code> <code>dict</code> <p>The colors for each category defined as a dictionary. The keys are possible outcomes of category. The values are html RGB strings. E.g., .draw(category = 'size', colors = {'big' : '#c0a080', 'small' : '#a0c080'}) where 'big' and 'small' are possible values of the category 'size'.</p> <code>None</code> <code>size_is</code> <code>str</code> <p>The node attribute to be displayed as the size of the nodes. Use the string <code>id</code> to set the node id (regardless of the column having another name) or any valid node attribute name. See the options in the Moebius configuration menu to set minimum, maximum sizes, linear or logarithmic scale, etc.</p> <code>None</code> <code>size_range</code> <code>List of two numbers</code> <p>Combined with edge_label, this parameter controls the values in the variable that correspond to the minimum and maximum displayed sizes. The values below or equal the first value will be displayed with the base radius (that depends on the zoom) and the values above or equal to the second value will be shown with the maximum radius.</p> <code>None</code> <code>size_scale</code> <code>(linear, power, sqrt or log)</code> <p>Combined with edge_label, the scale used to convert the value in the variable to the displayed radius.</p> <code>'linear'</code> <p>Returns:</p> Type Description <code>dict</code> <p>The node configuration dictionary</p> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def node_or_edge_config(self, text_is = None, color_is = None, colors = None, size_is = None, size_range = None, size_scale = 'linear'):\n    \"\"\"\n    Create a `node_config` or `edge_config` configuration dictionary for `show()` in an understandable way.\n\n    Args:\n        text_is (str): The node/edge attribute to be displayed as text. Use the string `\u00ecd` to draw the node id (regardless of the\n            column having another name) or any valid node attribute name.\n        color_is (str): A categorical node/edge attribute that can be represented as a color. This will also enable a legend interface\n            where categories can be individually shown or hidden.\n        colors (dict): The colors for each category defined as a dictionary. The keys are possible outcomes of category.\n            The values are html RGB strings. E.g., .draw(category = 'size', colors = {'big' : '#c0a080', 'small' : '#a0c080'})\n            where 'big' and 'small' are possible values of the category 'size'.\n        size_is (str): The node attribute to be displayed as the size of the nodes. Use the string `id` to set the node id (regardless\n            of the column having another name) or any valid node attribute name. See the options in the Moebius configuration menu to\n            set minimum, maximum sizes, linear or logarithmic scale, etc.\n        size_range (List of two numbers): Combined with edge_label, this parameter controls the values in the variable that\n            correspond to the minimum and maximum displayed sizes. The values below or equal the first value will be displayed with the\n            base radius (that depends on the zoom) and the values above or equal to the second value will be shown with the maximum\n            radius.\n        size_scale ('linear', 'power', 'sqrt' or 'log'): Combined with edge_label, the scale used to convert the value in the variable\n            to the displayed radius.\n\n    Returns:\n        (dict): The node configuration dictionary\n    \"\"\"\n\n    config = {}\n\n    if text_is is not None:\n        config['label'] = text_is\n\n    if color_is is not None:\n        config['color'] = color_is\n\n    if colors is not None:\n        config['color_palette'] = colors\n    else:\n        config['color_palette'] = {}\n\n    if size_is is None:\n        config['size_thresholds'] = []\n    else:\n        config['size'] = size_is\n\n        if size_range is None:\n            config['size_thresholds'] = []\n        else:\n            assert type(size_range) == list and len(size_range) == 2\n            config['size_thresholds'] = size_range\n\n        if size_scale != 'linear':\n            assert size_scale in {'power', 'sqrt', 'log'}\n\n        config['scale'] = size_scale\n\n    return config\n</code></pre>"},{"location":"reference/viz/#mercury.graph.viz.Moebius.show","title":"<code>show(initial_id=None, initial_depth=1, node_config=None, edge_config=None)</code>","text":"<p>Start the interactive graph visualization in a Jupyter notebook.</p> <p>Parameters:</p> Name Type Description Default <code>initial_id</code> <code>str</code> <p>The id of the node to start the visualization.</p> <code>None</code> <code>initial_depth</code> <code>int</code> <p>The initial depth of the graph (starting with <code>initial_id</code> as 0) to be shown.</p> <code>1</code> <code>node_config</code> <code>dict</code> <p>A node configuration dictionary created by <code>node_config()</code>.</p> <code>None</code> <code>edge_config</code> <code>dict</code> <p>An edge configuration dictionary created by <code>edge_config()</code>.</p> <code>None</code> Source code in <code>mercury/graph/viz/moebius.py</code> <pre><code>def show(self, initial_id = None, initial_depth = 1, node_config = None, edge_config = None):\n    \"\"\"\n    Start the interactive graph visualization in a Jupyter notebook.\n\n    Args:\n        initial_id (str): The id of the node to start the visualization.\n        initial_depth (int): The initial depth of the graph (starting with `initial_id` as 0) to be shown.\n        node_config (dict): A node configuration dictionary created by `node_config()`.\n        edge_config (dict): An edge configuration dictionary created by `edge_config()`.\n    \"\"\"\n\n    if initial_id is None:\n        initial_id = next(iter(self._int_id_map))\n\n    initial_json = self._get_adjacent_nodes_moebius(initial_id, depth = initial_depth)\n\n    if node_config is None:\n        node_config = self.node_or_edge_config()\n\n    if edge_config is None:\n        edge_config = self.node_or_edge_config()\n\n    self._load_moebius_js(initial_json, self.name, node_config, edge_config)\n</code></pre>"}]}
\ No newline at end of file
diff --git a/site/sitemap.xml.gz b/site/sitemap.xml.gz
index 8c0754f..412ffb4 100644
Binary files a/site/sitemap.xml.gz and b/site/sitemap.xml.gz differ