From 51716244d919572e8a6f39003d5088d157fee415 Mon Sep 17 00:00:00 2001
From: nunofachada <3018963+nunofachada@users.noreply.github.com>
Date: Tue, 23 Jan 2024 12:33:50 +0000
Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20clugen/p?=
 =?UTF-8?q?yclugen@49fcc577c353a920630755cd795cae641e88379e=20=F0=9F=9A=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 generated/gallery/gallery_jupyter.zip         | Bin 73057 -> 73057 bytes
 generated/gallery/gallery_python.zip          | Bin 35575 -> 35575 bytes
 .../gallery/mg_execution_times/index.html     |  14 +++++++-------
 .../gallery/plot_1_1d_examples/index.html     |   2 +-
 .../gallery/plot_1_1d_examples_codeobj.pickle | Bin 671 -> 671 bytes
 .../gallery/plot_2_2d_examples/index.html     |   2 +-
 .../gallery/plot_2_2d_examples_codeobj.pickle | Bin 5657 -> 5642 bytes
 .../gallery/plot_3_3d_examples/index.html     |   2 +-
 .../gallery/plot_3_3d_examples_codeobj.pickle | Bin 5807 -> 5789 bytes
 .../gallery/plot_4_nd_examples/index.html     |   2 +-
 .../gallery/plot_4_nd_examples_codeobj.pickle | Bin 863 -> 863 bytes
 .../gallery/plot_5_mrg_examples/index.html    |   2 +-
 .../plot_5_mrg_examples_codeobj.pickle        | Bin 2335 -> 2335 bytes
 generated/gallery/plot_functions/index.html   |   2 +-
 .../gallery/plot_functions_codeobj.pickle     | Bin 3437 -> 3434 bytes
 search/search_index.json                      |   2 +-
 sitemap.xml.gz                                | Bin 307 -> 307 bytes
 17 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/generated/gallery/gallery_jupyter.zip b/generated/gallery/gallery_jupyter.zip
index aceda97c9a773f56d225cb44c9bdd5453082e42a..500ee36cdedda439ac50bf59def345878b210445 100644
GIT binary patch
delta 111
zcmaF3i{;@i7M=iaW)=|!5YSKA$m1QuWS+7)GUkj7h*qx@RA(|x*=)67z9^Gs%I4c!
z!+4mCQZ^eOYK~;mPno_$n$ZQQZu&oIMpqE+D#PdiR6o641}LtXGW{%wHcFW;EX(Kz
E0BPMJ0ssI2

delta 111
zcmaF3i{;@i7M=iaW)=|!5LlbMk;glRX>0Q4$e1%SAX>drP@QRW@@A_A^F^6fByYaG
zHH?R8L-J<hL(P#)Ym=w%kY;oNs+;~#n$Z<RyUH*+0M$=#mjQ~eNS=NcL~lr*E-cIF
F2LP0vCg1=7

diff --git a/generated/gallery/gallery_python.zip b/generated/gallery/gallery_python.zip
index b5f2ea81068e3d37c97d8144e326141877eb86ef..e187a7c5a51a58043d189a164bb4c511bbc4d022 100644
GIT binary patch
delta 105
zcmex9mFfFbCY}IqW)=|!5YSB7$g@tH$uwp29_?x#Ci9fdmKN;NOhzf2or1mDm^4y0
z_eZL;FzKgk-VwV@n@KZeazdvgQ2FG=olZd2lb?6m0u@hI>v9CDotz1z^iw9U>GB2u
DZ0R90

delta 105
zcmex9mFfFbCY}IqW)=|!5Ll7Ck!PJY)8^#Od$g;0n6@TwwzOcEX4;Uv*(unYjcG;l
z=Ke@^7N)hyn|H)6(+2V<Cv-Xjl}}#W=>$|g`FW=;Q1N87E=Q1DCXia2Jb6u*HvoK-
BClvqy

diff --git a/generated/gallery/mg_execution_times/index.html b/generated/gallery/mg_execution_times/index.html
index 23fbd3c..c6e94b9 100644
--- a/generated/gallery/mg_execution_times/index.html
+++ b/generated/gallery/mg_execution_times/index.html
@@ -629,19 +629,19 @@
 
 
 <h1 id="computation-times">Computation times<a class="headerlink" href="#computation-times" title="Permanent link">&para;</a></h1>
-<p><strong>00:26.400</strong> total execution time for <strong>generated_gallery</strong> files:</p>
+<p><strong>00:29.296</strong> total execution time for <strong>generated_gallery</strong> files:</p>
 <p>+----------------------------------------------------------------------------------------+-----------+--------+
-| <a href="../plot_2_2d_examples/">plot_2_2d_examples</a> (docs/examples/plot_2_2d_examples.py)    | 00:10.292 | 0.0 MB |
+| <a href="../plot_2_2d_examples/">plot_2_2d_examples</a> (docs/examples/plot_2_2d_examples.py)    | 00:11.459 | 0.0 MB |
 +----------------------------------------------------------------------------------------+-----------+--------+
-| <a href="../plot_4_nd_examples/">plot_4_nd_examples</a> (docs/examples/plot_4_nd_examples.py)    | 00:06.376 | 0.0 MB |
+| <a href="../plot_4_nd_examples/">plot_4_nd_examples</a> (docs/examples/plot_4_nd_examples.py)    | 00:07.035 | 0.0 MB |
 +----------------------------------------------------------------------------------------+-----------+--------+
-| <a href="../plot_3_3d_examples/">plot_3_3d_examples</a> (docs/examples/plot_3_3d_examples.py)    | 00:05.237 | 0.0 MB |
+| <a href="../plot_3_3d_examples/">plot_3_3d_examples</a> (docs/examples/plot_3_3d_examples.py)    | 00:05.724 | 0.0 MB |
 +----------------------------------------------------------------------------------------+-----------+--------+
-| <a href="../plot_5_mrg_examples/">plot_5_mrg_examples</a> (docs/examples/plot_5_mrg_examples.py) | 00:03.156 | 0.0 MB |
+| <a href="../plot_5_mrg_examples/">plot_5_mrg_examples</a> (docs/examples/plot_5_mrg_examples.py) | 00:03.588 | 0.0 MB |
 +----------------------------------------------------------------------------------------+-----------+--------+
-| <a href="../plot_1_1d_examples/">plot_1_1d_examples</a> (docs/examples/plot_1_1d_examples.py)    | 00:01.333 | 0.0 MB |
+| <a href="../plot_1_1d_examples/">plot_1_1d_examples</a> (docs/examples/plot_1_1d_examples.py)    | 00:01.484 | 0.0 MB |
 +----------------------------------------------------------------------------------------+-----------+--------+
-| <a href="../plot_functions/">plot_functions</a> (docs/examples/plot_functions.py)                | 00:00.005 | 0.0 MB |
+| <a href="../plot_functions/">plot_functions</a> (docs/examples/plot_functions.py)                | 00:00.006 | 0.0 MB |
 +----------------------------------------------------------------------------------------+-----------+--------+</p>
 
 
diff --git a/generated/gallery/plot_1_1d_examples/index.html b/generated/gallery/plot_1_1d_examples/index.html
index 307d43e..25032f3 100644
--- a/generated/gallery/plot_1_1d_examples/index.html
+++ b/generated/gallery/plot_1_1d_examples/index.html
@@ -749,7 +749,7 @@ <h2 id="basic-1d-example-with-density-plot">Basic 1D example with density plot<a
 <a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a>    <span class="n">e084</span><span class="p">,</span> <span class="s2">&quot;e084: custom proj_dist_fn (Weibull)&quot;</span><span class="p">)</span>
 </code></pre></div>
 <p><img alt="e082: proj_dist_fn = 'norm' (default), e083: proj_dist_fn = 'unif', e084: custom proj_dist_fn (Weibull)" class="mkd-glr-single-img" src="../images/mkd_glr_plot_1_1d_examples_001.png" srcset="../images/mkd_glr_plot_1_1d_examples_001.png" /></p>
-<p><strong>Total running time of the script:</strong> ( 0 minutes  1.333 seconds)</p>
+<p><strong>Total running time of the script:</strong> ( 0 minutes  1.484 seconds)</p>
 <div id="download_links"></div>
 
 <p><a class="md-button center" href="../plot_1_1d_examples.py"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M288 32c0-17.7-14.3-32-32-32s-32 14.3-32 32v242.7l-73.4-73.4c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3l128 128c12.5 12.5 32.8 12.5 45.3 0l128-128c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0L288 274.7V32zM64 352c-35.3 0-64 28.7-64 64v32c0 35.3 28.7 64 64 64h384c35.3 0 64-28.7 64-64v-32c0-35.3-28.7-64-64-64H346.5l-45.3 45.3c-25 25-65.5 25-90.5 0L165.5 352H64zm368 56a24 24 0 1 1 0 48 24 24 0 1 1 0-48z"/></svg></span> Download Python source code: plot_1_1d_examples.py</a></p>
diff --git a/generated/gallery/plot_1_1d_examples_codeobj.pickle b/generated/gallery/plot_1_1d_examples_codeobj.pickle
index c9fdce14d51a3d4e4d1e0bc5d27411275ad08cd2..9599db03eeb6fe59b3e3fd4b970ca89839381ee3 100644
GIT binary patch
delta 229
zcmXv{Jqp4w7$v6GiWXd4oJ5y`P%DCn=;Sd(N^OG)q)|*WD0l!NgVJyBncTgHt7(hV
zd%WMGSu~&19SN3m*n@WmPg^2N320}_mCVRRg0|CBqPWrA^EKw`NQ9q8On~mzMkwj&
zl9|fiHk8RQ=8=?;-_R)NQTL&Uwj?2m`7t-Nr7;s3(6)9Cit&sI3A97|WiraYXl4&8
s`uePpTaMr{b@KvEv1jm&hW4Zy&Hcs#2Sz$J0z5akMw`0etgC?Xe^;MR1ONa4

delta 188
zcmbQwI-hlden4?*YRZ(@DYa8HdRX!jb5p1EFlXkKOzC0E%}*)K0dhH#N;7jxGV_Y3
z^zcB$;)^r#i%O<saP@Fx7RM*&Bo-G>=`2m`VM#TxFrMtdWWvZZ*^g0)M>K<{vosYf
zY=RJGmdoIoY|dm0QN*kc<P|gOGwT59rHl$}`e02H?-;U~fLMu>Pcxb^TLL9nm<%{<
Iz>=kU07`s30ssI2

diff --git a/generated/gallery/plot_2_2d_examples/index.html b/generated/gallery/plot_2_2d_examples/index.html
index 6f76309..e4bc257 100644
--- a/generated/gallery/plot_2_2d_examples/index.html
+++ b/generated/gallery/plot_2_2d_examples/index.html
@@ -1175,7 +1175,7 @@ <h2 id="direct-specification-of-optional-parameters">Direct specification of opt
 <a id="__codelineno-54-4" name="__codelineno-54-4" href="#__codelineno-54-4"></a>    <span class="n">e042</span><span class="p">,</span> <span class="s2">&quot;e042: direct params 3&quot;</span><span class="p">)</span>
 </code></pre></div>
 <p><img alt="e040: direct params 1, e041: direct params 2, e042: direct params 3" class="mkd-glr-single-img" src="../images/mkd_glr_plot_2_2d_examples_014.png" srcset="../images/mkd_glr_plot_2_2d_examples_014.png" /></p>
-<p><strong>Total running time of the script:</strong> ( 0 minutes  10.292 seconds)</p>
+<p><strong>Total running time of the script:</strong> ( 0 minutes  11.459 seconds)</p>
 <div id="download_links"></div>
 
 <p><a class="md-button center" href="../plot_2_2d_examples.py"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M288 32c0-17.7-14.3-32-32-32s-32 14.3-32 32v242.7l-73.4-73.4c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3l128 128c12.5 12.5 32.8 12.5 45.3 0l128-128c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0L288 274.7V32zM64 352c-35.3 0-64 28.7-64 64v32c0 35.3 28.7 64 64 64h384c35.3 0 64-28.7 64-64v-32c0-35.3-28.7-64-64-64H346.5l-45.3 45.3c-25 25-65.5 25-90.5 0L165.5 352H64zm368 56a24 24 0 1 1 0 48 24 24 0 1 1 0-48z"/></svg></span> Download Python source code: plot_2_2d_examples.py</a></p>
diff --git a/generated/gallery/plot_2_2d_examples_codeobj.pickle b/generated/gallery/plot_2_2d_examples_codeobj.pickle
index 973cbac0e325619bb4898ceb00136295c15989e0..0641ffd6a7fcae278e1d7fb1f1513423854e7bbd 100644
GIT binary patch
delta 816
zcmY*XOK1~O6zzMN$>cMW$)|CpB3)>esF|d;R;^emLd7n+Ng7BkQyIMuro=_WidE5t
zA1WlAxQIKeSnz``3gX6H5nQO?LPQsS7Gf8*MFe-6H<M4@%{}Mba}VcDEpa0uUy^VA
zjaTFZPa2i<6|Cbq=uAPylV@EhNB3~rf;Uk^qiv*JFIlsal{C^=jb%8cvEMhz_F+Cg
z#Ay&K@eLXcwN<-?N}&-4!4WpjGI+oy*#ypoOilzjc7Rg>hhug3VtcPo?5Q-}LQ+_X
zWjS3&nDwex1#F5KDJO<PW%s~^&b&&s)>7JCn#1-$ADhSBaAG4p66{R1o0M5zJXt0G
ztUOgJ6sM~cq$fC}8=Mw!USH4Y8SeNdG2m0M7}>1QqH`2SBYA~hyXSsKk{Z2pa1o{_
z>2~r7AHDtRmlmGvaq%T6!9JCKwg>~GY)t**m^q~ybzzn<9E=Zc^|Fo*!MIsH1@r8G
zVQqhdw_pc$1?*5`G%=&(5l}-=!#RCZQzY!u{lXsaRSiEvISgnT49^(1_u^a3(udYc
zQ;t*`sbd(G%z&L~ve+3N*C(2j^9D|8iRxJ8vB~YzI3DizpJ_-;d{*+@z6e#d*freK
zhS^OlYh8MItw&n?C|Op+Ow3|)xUE~#!#a-zsW)IhZCk11>^X)z6Z(rL=JY0t&q^=F
z#<Ith?YH9VmA{_*Lqlg_Rm#iuS3wkcvfKXNAl!Pr$4-?_SW}{5UvauQ1nni<4_d)r
IO_S1&f1o@BhX4Qo

delta 741
zcmYL_PiPZC6vi{_PBxqDY_>_`L8?#@R9IxU8=J&~B&{d$CXyBsVOLhh(bPf+R<Yo}
z9+XgsFQv3vv<E>14S2BNN$@0ywH{Ls7Ck7Uh#tJUGfmdheDizXyzjlaV=kHa8eY(M
zwy-HV`Id7Ny2%O9Q4G4+O6;NJcEfGTzLkaHbXlYOIq*3c;XtBR>#2tdFqW|-dK4~Y
zh9z189ee0F)QBt5lVBhZRlpbwE<QZagJTLlrbZQ77tLX7u%ghbqAwWHDw>0hsE6j^
ze9|R!G0d}UD9L5?5Ej&e_DDPxV~ldFJ^Sx-1Xv4!lM~=o2yF2Ly#XcWp%1XCxeDDB
zRhW)fwJ)JDyB+xhvPv%cn`0O5B@Pq%S7;t@=iz#)EJsiz0weJpW|*sW)s>hI%NZYX
z8i7y7B&-DIm_Df<4C>oiL+L7<BSgwtyalY2IE)VzQ4wwp?3KrPn|E<9d7|$Z%ZfB~
z3S{ETr@7^YVq<oOt8dDX#LhxJ<)e#`*L*YsS)(A&^4bN(#OwxF%gQc;%>^o4(sp>f
zI^Soo602ag$jvNf#@YQ|!M&?mNw(U*@o^m3GA86F+*(IRFzayDLBT}NxmuEon5}WO
zDccfz2_%^9HM}Q2dJ9+5W^yB_g+FQK#2noo^P!WjqR(I_O8dU{*lha%Rhm0$?o6Ai
hO+3}8O<!))1pCf29!tkC`^D8yX}8Ay^!qdp{R8N++Y|r*

diff --git a/generated/gallery/plot_3_3d_examples/index.html b/generated/gallery/plot_3_3d_examples/index.html
index 2da1a7e..e9d9218 100644
--- a/generated/gallery/plot_3_3d_examples/index.html
+++ b/generated/gallery/plot_3_3d_examples/index.html
@@ -1154,7 +1154,7 @@ <h2 id="manipulating-cluster-sizes">Manipulating cluster sizes<a class="headerli
 <a id="__codelineno-50-4" name="__codelineno-50-4" href="#__codelineno-50-4"></a>    <span class="n">e084</span><span class="p">,</span> <span class="s2">&quot;e084: equal size (custom)&quot;</span><span class="p">)</span>
 </code></pre></div>
 <p><img alt="e082: normal dist. (default), e083: unif. dist. (custom), e084: equal size (custom)" class="mkd-glr-single-img" src="../images/mkd_glr_plot_3_3d_examples_014.png" srcset="../images/mkd_glr_plot_3_3d_examples_014.png" /></p>
-<p><strong>Total running time of the script:</strong> ( 0 minutes  5.237 seconds)</p>
+<p><strong>Total running time of the script:</strong> ( 0 minutes  5.724 seconds)</p>
 <div id="download_links"></div>
 
 <p><a class="md-button center" href="../plot_3_3d_examples.py"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M288 32c0-17.7-14.3-32-32-32s-32 14.3-32 32v242.7l-73.4-73.4c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3l128 128c12.5 12.5 32.8 12.5 45.3 0l128-128c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0L288 274.7V32zM64 352c-35.3 0-64 28.7-64 64v32c0 35.3 28.7 64 64 64h384c35.3 0 64-28.7 64-64v-32c0-35.3-28.7-64-64-64H346.5l-45.3 45.3c-25 25-65.5 25-90.5 0L165.5 352H64zm368 56a24 24 0 1 1 0 48 24 24 0 1 1 0-48z"/></svg></span> Download Python source code: plot_3_3d_examples.py</a></p>
diff --git a/generated/gallery/plot_3_3d_examples_codeobj.pickle b/generated/gallery/plot_3_3d_examples_codeobj.pickle
index 79799d1f45a56b025718a19594d08f0c9ee9984d..f8b286529d8ac89b35e94108bd8d9472868ca918 100644
GIT binary patch
literal 5789
zcmcJTJ!lj`6oAj<E;+qRqD>Sl?GlR^llu!+;$f$PNIDtDySeNRd%K(c(Ig@Stb}dU
zwXm_Uv9PfT*eHUPAT}b1kT#8=r4|alx!IeYO*lO<@5*D|`{w=3``OLH!~4^@-Y+V(
z9Lr*@z;tXsj!KukHuG4!X|{s+K|FgUo{5UKJGa3+$B(bYGq>T7yiL0-j*edsO)D^M
z(h2RJJaIhIWr5+e<7lkwv_lI{6_Zn-ye$7CzTtR5Y#fS;rcYWH_5Jv62#SsIs31Yc
z!Kh$|U3Wb;O2(8?z8kVA&*rQ3<O*=Ic-{*AfO&olhbP?imKClrd#+1O8-$`_|DRY$
zOXV36?Lf&mmLisr5GNI)aRv!-N+LwOsOQo36!WK0f&0=JfxAzq4l77CnP@C1JE{vQ
zAmiHqjmBAps5FoeO@*jpMdPAE)EAMgSXPJ{mK8)H8uLh27z$Crdf%%GQLP|Zv7rz(
ztZ?5^hzeGfiX>ths;TRkcHk46oFM`0x)u$X=DL2(7u7dYKJEz(vfd8TB1F>iTrzqk
znnh)5ue6y(mgY&v22@hC2a+BM5*2-+j+L5EQjga(<M*@mJd=ot7PCb+BOP;%wTH0&
zxwO6j#ojg3yA?W?L!r6uiprKaEgd=n$1@fEg}lKyXmV`U@A9I>4s`v?R9(I&lTr9u
zDijSH>nXgIh*8%HQeRzyt6W`o*&gfPi_<hJyPm^|MIo14BA5S`U>~H|R6jIrldcSx
znxCZPBs?SE++aRot0A?9KyFJxk<(ZSw<8cN8iR0m=^ETg%yba#W8|C|8ZIQmT+{Sj
z8ng`NWwYgr5SJ38LOv7uuTr<TK2|CJE)hqv6^gC7LwNV6knL5B22x`3!hu{)IiZ2n
zZ1|*3sU!7%el$bDvgL$MsbEoNbxI9sa^(vd3exb(&+3#qmMu-4QmdwgVn1hnelbHq
zDjIxQr!=rUMs!LIseW@KLqQq@`KqFn;q!n^Ry^nCP`!`eNF<@owoO>i`+YWVnf{)^
zgx^x95BAnpm;tph^B?wBzL{u}y=jbkJym*p;&+r$Vm84FP^44pSOI#dQ>s|c^RZ5;
zU^UIBI;Doy0-h_%gqq7j+w7<e9XRjutwdCA8(7YMr7oEFnYILs!XmbNrsZ!kSHfyF
z@6s!=`3jcd9~ET`A_JfCv}QAC2jQO+q2#?Mg7Dt;-I8xB)$rtNNP8mR(J56d%C`&!
zE95^E<*;ix0by$twtv`R7R6ecEDM95%CAIRhGnk`2+(9tZ$Gf`0k_0+Bgxnt&iw{f
Cvtld&

literal 5807
zcmcJTJ!lkB5XYm*<%4_}oA`ksh>!>p6LWiaDWgFlg@Gv8DC_g?-R&-WyPN&MBq%6W
zk?XSQ#76Bzu+c`aQ3MgJ1Q9H3ENm=nEEJq~Z*TT(j*#<XQYE>a`OUnU`M=q_ncI9Y
z*46xJ43=Hn=H<|Iogi)uF8USj^UAzg4&#mZ*it;v=;2evnfP)%aYOv!usV+${ip3H
z2)Pd?y>+(|+3>$%Z?$YkHSSE-nd!uhA^CwG7_J|JN$X?y)eQ8q&4M6akHD$Xr}#4l
zlqZcrW7HU4kGKdTUr2)3`urBKF{pX~>zxn>5K)>ULL8BZA?DO7+}3BO^r{1>B(iGc
zGz$=m<gZ4i9hy)KNPb+Hv1SD{qR6H&d2N~qF`FXtM2J%o(JLEJfoyxUXN844|CZI(
z)?2{1^RtReY<+nCUt8}yA~dQ3UqVEl>hw7xic|<kB8H*Bfw{&5J#x${6lJFg3WS}U
zAgbBEE<@W>=8jNAy<)OjXXAYfgUh!B_J3>J9T^<Rr$!~88;B@TWpNh~MXD?wA|hWR
z8urJC(27KeXA&{o>cy(L!Ydt&$#Y@X9Fqc-)V-3S?S%>UT+;~y-O;CYzT$Nd-WwU-
z2u?p=j+otn;XBMLPj%!UWL_{~1uF6ToFb@1>}!gk8shJWD4ir~|DT8`5G^xSZdX@V
zN+D7sYa~M<YF%p|Qu-Zla*g|LkQU}(vWV)f6-sw~z0O0!t!&*>twV_;c^l`XCs>bi
zPE_|+VUx8EBWeIp%!fso{~haShSi(6%D~1E)(0@x;YwQlt#sOwIG%Cb(=^MtJ%Mh;
zuoLZTR!ODgsVH+QrARc7)&f#SJ=+aI?OENkVSAeqmy&p)6#LG$6<JKo<#X*WpgJQ>
zzby~qrO8@C=n%ZwI(p4_uXVJKSr-$_=0&IxNv(AS<I6#JdS-eOxvK55u&pt9<`{N^
zMW!tc^apKOBwAjqWtE~4EeY0|N-0p~a$BWnRL$Q|DMhOLXj7$VM0dA!FQrsW_|~WO
z;?-lDL3_jAPo8ehQHd(^M;KGLXNMS4|2a_Z3#%S)o1-Tf){y7BmToi8W@T{=?Ic7`
z(Nc^U)ph&=6B^4d$sq^TWTA3lZm*dy6DKmI?HqxCq~FJZX1+#rPr#KohHhBY+s^T=
zdKx97rPq3oA@zbx*4av`ll%2K-)F05lCRC?&A?+}+2DRw7(Zr?qFcDg2<ImxKvgv=
vvH7A>id4e+O{Em5B<%-M#KOo#5z?ys$-?+6aU{DmCp~3~)3rO<v!lu1AZuWH

diff --git a/generated/gallery/plot_4_nd_examples/index.html b/generated/gallery/plot_4_nd_examples/index.html
index 80b11b5..1c7e06d 100644
--- a/generated/gallery/plot_4_nd_examples/index.html
+++ b/generated/gallery/plot_4_nd_examples/index.html
@@ -802,7 +802,7 @@ <h2 id="4d-example-with-custom-projection-placement-using-the-beta-distribution"
 <div class="highlight"><pre><span></span><code><a id="__codelineno-14-1" name="__codelineno-14-1" href="#__codelineno-14-1"></a><span class="n">plot_examples_nd</span><span class="p">(</span><span class="n">e087</span><span class="p">,</span> <span class="s2">&quot;e087: 4D with custom proj_dist_fn (Beta)&quot;</span><span class="p">)</span>
 </code></pre></div>
 <p><img alt="e087: 4D with custom proj_dist_fn (Beta)" class="mkd-glr-single-img" src="../images/mkd_glr_plot_4_nd_examples_003.png" srcset="../images/mkd_glr_plot_4_nd_examples_003.png" /></p>
-<p><strong>Total running time of the script:</strong> ( 0 minutes  6.376 seconds)</p>
+<p><strong>Total running time of the script:</strong> ( 0 minutes  7.035 seconds)</p>
 <div id="download_links"></div>
 
 <p><a class="md-button center" href="../plot_4_nd_examples.py"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M288 32c0-17.7-14.3-32-32-32s-32 14.3-32 32v242.7l-73.4-73.4c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3l128 128c12.5 12.5 32.8 12.5 45.3 0l128-128c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0L288 274.7V32zM64 352c-35.3 0-64 28.7-64 64v32c0 35.3 28.7 64 64 64h384c35.3 0 64-28.7 64-64v-32c0-35.3-28.7-64-64-64H346.5l-45.3 45.3c-25 25-65.5 25-90.5 0L165.5 352H64zm368 56a24 24 0 1 1 0 48 24 24 0 1 1 0-48z"/></svg></span> Download Python source code: plot_4_nd_examples.py</a></p>
diff --git a/generated/gallery/plot_4_nd_examples_codeobj.pickle b/generated/gallery/plot_4_nd_examples_codeobj.pickle
index 88be522b49e038fa2176fa9860efb3402182baee..6b6684b87cb2c3e13f30bb67a93b92fe8d25106e 100644
GIT binary patch
delta 170
zcmcc5cAss6-sDY;5`sM}d5O8HQ+hbkO7oISGV}8$IvY=Z!>Gr|Kk=$2n@9#vXKCW(
z8%(<Lk|1X76pak_9+v#P)Z!@_oIR|0rMU%_Q!=<SWF|{9B{8Z_R$(?~*2>_SSdzeL
z4APL=!<0AizC51=*q|P^<ebv<)V#?LnN%2UCvRX<Vsikin#{te!RR_!mr<J0W3ne>
L5=S6dxKs}SU{N;?

delta 171
zcmcc5cAss6o+wjZ%9PkCwNo^DSn?8cQ>XMWXXcem3^$&9o-vM5bmC4;HmMAr&eFum
z8<;d0<tN`{l4ewz{FNz*O&cVfI#E}0;tyG7gAAUD9R@tcV6h&SR09k1NzCDFHemkb
zXeJ#-$H^T`N^C9=O)qriJwYs>wHfR^Ectn<#Zxjkdsy>Ia|<e`WN>HrOrFkY%n=Au
IlUS+;0E|mHQUCw|

diff --git a/generated/gallery/plot_5_mrg_examples/index.html b/generated/gallery/plot_5_mrg_examples/index.html
index fc03666..64f16fa 100644
--- a/generated/gallery/plot_5_mrg_examples/index.html
+++ b/generated/gallery/plot_5_mrg_examples/index.html
@@ -844,7 +844,7 @@ <h2 id="merging-with-data-not-generated-with-clugen">Merging with data not gener
 <a id="__codelineno-14-5" name="__codelineno-14-5" href="#__codelineno-14-5"></a>    <span class="n">clusters_field</span><span class="o">=</span><span class="s2">&quot;hclusters&quot;</span><span class="p">)</span>
 </code></pre></div>
 <p><img alt="e097: generated w/ make_moons(), e098: generated w/ clugen(), e099: merged data" class="mkd-glr-single-img" src="../images/mkd_glr_plot_5_mrg_examples_005.png" srcset="../images/mkd_glr_plot_5_mrg_examples_005.png" /></p>
-<p><strong>Total running time of the script:</strong> ( 0 minutes  3.156 seconds)</p>
+<p><strong>Total running time of the script:</strong> ( 0 minutes  3.588 seconds)</p>
 <div id="download_links"></div>
 
 <p><a class="md-button center" href="../plot_5_mrg_examples.py"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M288 32c0-17.7-14.3-32-32-32s-32 14.3-32 32v242.7l-73.4-73.4c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3l128 128c12.5 12.5 32.8 12.5 45.3 0l128-128c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0L288 274.7V32zM64 352c-35.3 0-64 28.7-64 64v32c0 35.3 28.7 64 64 64h384c35.3 0 64-28.7 64-64v-32c0-35.3-28.7-64-64-64H346.5l-45.3 45.3c-25 25-65.5 25-90.5 0L165.5 352H64zm368 56a24 24 0 1 1 0 48 24 24 0 1 1 0-48z"/></svg></span> Download Python source code: plot_5_mrg_examples.py</a></p>
diff --git a/generated/gallery/plot_5_mrg_examples_codeobj.pickle b/generated/gallery/plot_5_mrg_examples_codeobj.pickle
index e06040347f6d9e44017d7ac085592dc6679b95e8..85d487ac7f8af6b8a0bda3ec2180070c68c5b87b 100644
GIT binary patch
delta 597
zcmYLG&ubGw7-jd{%x*Ocp=vL&X&Vx{Pzk|ENGVc(Tr5~Y3Zf7Ru`b!b?lxw-V3msE
zMPUlo84rpOkV1<Ty!28Jf)^3J6vV$mi-I@t;M>`Xmw9i#@4b05-wcKa!%r$r!>u+N
z^15_ZrNfP|9k`NP=lwWZcKea!ELgf3`>5Hyl|?`9xM3w|cSFguX`>Z+OZ{b7{<T57
zQESm}N0AiUdNG^x?8mXokuyCl_GpZbdA7K1&J)UF&CKWiTfM_x0nK0LWNzZX)$50y
z<aXk$aa0^JrZij38g;df2yPr=6(tl8GIgrK`!-W$^|Vyb%h{QPwpDmdg`a^Y)KB98
z+>Onp$~n+WO1Zw}(1K^Le<{%w!0!TGgCVbtQWH3%6{rPlYRBmYaMW^GNBP8MvqU~h
zfAkvN0-mH<Q@vZeDlY0{bO*I>>>!sOV)q>8N2&RvR%UA{jp+nEMR$Y7=^1c8<IoG>
z1*=K};9V<EuM*UzikpcA-{zTI?{%d}+W^j0M1_&VviVgdFykpHk8+5!I5oZPNT
z;ugL$aQhustWt^I;kEaR+4st(=5;EW7yF>Z;uDqG=ak-K=h$9KUmElD6{mhT8Z3#6
GD}Mp_6Tz(j

delta 588
zcmbO)G+$_fUJyrePHApxQF`i>*eM#dQ#5*5@)C1Xr}S{7mF6XvWaj5h>0!&wPbtj-
zi6xb0=9FaS6;J8mfr!NyXXF=^Ov&Kt;m9nGPtHj!E}k;^G`kWTe+ExyY3k%kW({VM
z44%nLm{pj?f%Iu+Ic6y!{f1eVSr$n1u*fkh0BKzo8D?c5eUeRENdu%6=#mWf9;SlK
zDH)tSta+um1(j1WxHGgN#$I4mWi|jR&tcbKGX)7HPM*P}%4P*&1~Mx#+XH1^Gublo
zOs-{B;_(1$>tRVXur!`Lk4c`{7bv}u#h5t=NMC2MX8}9*H>(bFG*Cc`O@S>AVv!t&
zDqAwd)FwtxplN3q&D7z}<8)8WOD#$)$uB~6WdTI<L=Io(QlNPcIV>1?Ca%|DtAm&%
z26S~ZgsI77#@qo^lEh@d+zq6C*)(`2fTci=wXm4%!D<5W1KUi9p`IL0Z1W(RrC61i
z7XvlfvqJsJwg#f6ibaKaV+PN}TfWRdw@!R7Ex=Jw35@6T)I3li?gHuwWmIS03#5Hm
zG?)(o=?XRj=HozmHJdcgDX=Nv&@u;w7SDOGeP96-aPVCLs&r)6;J68vDAfZ1#|69V

diff --git a/generated/gallery/plot_functions/index.html b/generated/gallery/plot_functions/index.html
index f25db20..44b1a5d 100644
--- a/generated/gallery/plot_functions/index.html
+++ b/generated/gallery/plot_functions/index.html
@@ -1015,7 +1015,7 @@ <h2 id="plot_examples_nd">plot_examples_nd<a class="headerlink" href="#plot_exam
 <a id="__codelineno-6-34" name="__codelineno-6-34" href="#__codelineno-6-34"></a>                <span class="n">g</span><span class="o">.</span><span class="n">axes</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">]</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">([</span><span class="n">xmins</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">xmaxs</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">]])</span>
 <a id="__codelineno-6-35" name="__codelineno-6-35" href="#__codelineno-6-35"></a>                <span class="n">g</span><span class="o">.</span><span class="n">axes</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">]</span><span class="o">.</span><span class="n">set_aspect</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
 </code></pre></div>
-<p><strong>Total running time of the script:</strong> ( 0 minutes  0.005 seconds)</p>
+<p><strong>Total running time of the script:</strong> ( 0 minutes  0.006 seconds)</p>
 <div id="download_links"></div>
 
 <p><a class="md-button center" href="../plot_functions.py"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M288 32c0-17.7-14.3-32-32-32s-32 14.3-32 32v242.7l-73.4-73.4c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3l128 128c12.5 12.5 32.8 12.5 45.3 0l128-128c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0L288 274.7V32zM64 352c-35.3 0-64 28.7-64 64v32c0 35.3 28.7 64 64 64h384c35.3 0 64-28.7 64-64v-32c0-35.3-28.7-64-64-64H346.5l-45.3 45.3c-25 25-65.5 25-90.5 0L165.5 352H64zm368 56a24 24 0 1 1 0 48 24 24 0 1 1 0-48z"/></svg></span> Download Python source code: plot_functions.py</a></p>
diff --git a/generated/gallery/plot_functions_codeobj.pickle b/generated/gallery/plot_functions_codeobj.pickle
index 56c8375d9615a7dc65e8a83e754e628c077fe22e..01117e602048b27d37a63e6e5e366672d80be493 100644
GIT binary patch
delta 556
zcmY*VJ7^OD819#QCAq5wEfm_6v`r$ZO2A^ULL?m|D2Np6Ad2X<X}pWgCH8t&i$!r!
z%MD(>a}XRH6a*av2VV$+9Sa@YL<PkzfeN|^4(eSC2A7}z#rIn8ZS{&*rQ4@HaR)EN
zlNndE^xCxVMk|grHD}f>-<50ivNIFzCLMET&a`arWQMQf1L&gMnU5IV9P1oZj2=U~
z=}dD~%_Bic+m0lk$(NDk;dlx=d0a75BZhXvS@g4p;wncDlx9OJBj(Bt-74$0Z>UC|
zUzIuvJQf{7kuRcsY(|fw6n4#+ibK?5^B50mg7<=Xf{%mwL{JkPi>o-bL3W1AI0uuT
z#cg~kme3%iDa`X*B!vYQ)IGSu^XdSuu_f)pb*AMaN3`9z#j;lNR&%)H{Z8O6mxKx)
z@VhXGhg=cTc*LdP{|T3+44$$NyYQR`(T`WWukBA7Z^8gJ-3c}QV(3h=oLb{jXzVRV
zMGGtcG`$NoN$<lzI4*W`QPS{{pW>tV!X7n^HC9kew%U-QZ7yu2)y7=+nD6=#zUZ!M
zTXt^NIm-`f7C-6BL-@s}x_8T;&AV(ha{8prVf1IJB04(S$U?CHPi$2Q?%XOArK%Qo
K6G9>4<o*IN$+j>6

delta 498
zcmY*VPe>GT81?PU{AR|PrNTdvtmNvhZnbM3TsA>kq7WjIphLz|FzVK{Iit7@s402z
z5Ddlerb9u8BIp)%6Y5gZEeN|5U7{cgAxwz6ba2Lm@b<p<z4t!8x7PNfO<q*)j0W<-
zf^esLt<idTA<Qj=IbU`yryBZ-cgb_dLSHzO<K9F}xSlgL8%$@Lq*!h1+e~O1_Aag(
zMbT`VH$96l)x+4w4K;-nQ+gh0p48isVcjV2KM+fzE4J#(^(%ALoiTY9l&dK6u5uoO
z^c%-;oaaOrhWOnqC+t(qCkNT2Ss0<GrBUG-ErW|((Oq2LGPzqOk3SnTnBfa;01N!B
zWwFRrF@Pl+YMDv154U*K>_ot8n8!VCpbPhTBhku3iL-bVp)?-zL1Ge5xJT;5Gxkdk
zmibWX#&aH%Q~WAb@G3$*c+Jnzyd+ofCPF=U8>fkP@tf74(7_K{yY${RgWEZLWQ%bG
zpEzt}@P*e!Kfdv)=)f8+vn1C4mA4l~{NibJ;}1`$CH&>QIz&e=Worr=8vfTOIo8aM
Y_G((R50;BcEg6ILv{~D|?WlUao5IzhQvd(}

diff --git a/search/search_index.json b/search/search_index.json
index d036dc0..3750f22 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"pyclugen","text":"<p>pyclugen is Python package for generating multidimensional clusters. Each cluster is supported by a line segment, the position, orientation and length of which guide where the respective points are placed. The <code>clugen()</code> function is provided for this purpose, as well as a number of auxiliary functions, used internally and modularly by <code>clugen()</code>. Users can swap these auxiliary functions by their own customized versions, fine-tuning their cluster generation strategies, or even use them as the basis for their own generation algorithms.</p>"},{"location":"#installation","title":"Installation","text":"<p>Install from PyPI:</p> <pre><code>pip install --upgrade pip\npip install pyclugen\n</code></pre> <p>Or directly from GitHub:</p> <pre><code>pip install --upgrade pip\npip install git+https://github.com/clugen/pyclugen.git#egg=pyclugen\n</code></pre>"},{"location":"#quick-start","title":"Quick start","text":"<pre><code>from pyclugen import clugen\nimport matplotlib.pyplot as plt\n</code></pre> <pre><code>out2 = clugen(2, 4, 400, [1, 0], 0.4, [50, 10], 20, 1, 2)\nplt.scatter(out2.points[:, 0], out2.points[:, 1], c=out2.clusters)\nplt.show()\n</code></pre> <pre><code>out3 = clugen(3, 5, 10000, [0.5, 0.5, 0.5], 0.2, [10, 10, 10], 10, 1, 2)\nfig = plt.figure()\nax = fig.add_subplot(projection=\"3d\")\nax.scatter(out3.points[:, 0], out3.points[:, 1], out3.points[:, 2], c=out3.clusters)\nplt.show()\n</code></pre>"},{"location":"#further-reading","title":"Further reading","text":"<p>The clugen algorithm and its several implementations are detailed in the following reference (please cite it if you use this software):</p> <ul> <li>Fachada, N. &amp; de Andrade, D. (2023). Generating multidimensional clusters   with support lines. Knowledge-Based Systems, 277, 110836.   https://doi.org/10.1016/j.knosys.2023.110836   (arXiv preprint)</li> </ul>"},{"location":"#also-in-this-documentation","title":"Also in this documentation","text":"<ul> <li>Theory: the clugen algorithm in detail</li> <li>Detailed usage examples</li> <li>Reference</li> <li>Developing this package</li> </ul>"},{"location":"dev/","title":"Development","text":""},{"location":"dev/#installing-for-development-andor-improving-the-package","title":"Installing for development and/or improving the package","text":"<pre><code>$ git clone https://github.com/clugen/pyclugen.git\n$ cd pyclugen\n$ python -m venv env\n$ source env/bin/activate\n$ pip install -e .[dev]\n$ pre-commit install\n</code></pre> <p>On Windows replace <code>source env/bin/activate</code> with <code>. env\\Scripts\\activate</code>.</p>"},{"location":"dev/#run-tests","title":"Run tests","text":"<p>Tests can be executed with the following command:</p> <pre><code>$ pytest\n</code></pre> <p>The previous command runs the tests at <code>normal</code> level by default. This test level can also be specified explicitly:</p> <pre><code>$ pytest --test-level=normal\n</code></pre> <p>There are four test levels, from fastest to slowest (i.e., from less thorough to more exhaustive): <code>fast</code>, <code>ci</code>, <code>normal</code> and <code>full</code>. The <code>fast</code> level tests all functions using typical parameters, just to check if everything is working. The <code>ci</code> level performs the minimal amount of testing that yields complete test coverage. Beyond complete coverage, the <code>normal</code> and <code>full</code> levels also test increasing combinations of parameters and PRNG seeds, which may be important to root out rare corner cases. Note that the <code>full</code> level can be extremely slow.</p> <p>To generate a test coverage report, run pytest as follows:</p> <pre><code>$ pytest --cov=pyclugen --cov-report=html --test-level=ci\n</code></pre>"},{"location":"dev/#build-docs","title":"Build docs","text":"<p>Considering we're in the <code>pyclugen</code> folder, run the following commands:</p> <pre><code>$ cd docs\n$ mkdocs build\n</code></pre> <p>The generated documentation will be placed in <code>docs/site</code>. Alternatively, the documentation can be generated and served locally with:</p> <pre><code>$ mkdocs serve\n</code></pre>"},{"location":"dev/#code-style","title":"Code style","text":"<p>Code style is enforced with flake8 (and a number of plugins), black, and isort. Some highlights include, but are not limited to:</p> <ul> <li>Encoding: UTF-8</li> <li>Indentation: 4 spaces (no tabs)</li> <li>Line size limit: 88 chars</li> <li>Newlines: Unix style, i.e. LF or \\n</li> </ul>"},{"location":"reference/","title":"Reference","text":"<p>Various functions for multidimensional cluster generation in Python.</p> <p>Note that:</p> <ol> <li><code>clugen()</code> is the main function of the pyclugen    package, and possibly the only function most users will need.</li> <li>Functions which accept <code>rng</code> as the last parameter are stochastic. Thus, in    order to obtain the same result on separate invocations of these functions,    pass them an instance of same pseudo-random number    <code>Generator</code> initialized with the same seed.</li> </ol>"},{"location":"reference/#pyclugen.Clusters","title":"Clusters","text":"<p>             Bases: <code>NamedTuple</code></p> <p>Read-only container for results returned by <code>clugen()</code>.</p> <p>The symbols presented in the instances variable below have the following meanings:</p> <ul> <li>\\(n\\) : Number of dimensions.</li> <li>\\(p\\) : Number of points.</li> <li>\\(c\\) : Number of clusters.</li> </ul> Source code in <code>pyclugen/main.py</code> <pre><code>class Clusters(NamedTuple):\n    r\"\"\"Read-only container for results returned by [`clugen()`][pyclugen.main.clugen].\n\n    The symbols presented in the instances variable below have the following\n    meanings:\n\n    - $n$ : Number of dimensions.\n    - $p$ : Number of points.\n    - $c$ : Number of clusters.\n    \"\"\"\n\n    points: NDArray\n    r\"\"\"$p \\times n$ matrix containing the generated points for all clusters.\"\"\"\n\n    clusters: NDArray\n    r\"\"\"Vector of size $p$ indicating the cluster each point in `points`\n    belongs to.\"\"\"\n\n    projections: NDArray\n    r\"\"\"$p \\times n$ matrix with the point projections on the cluster-supporting\n    lines.\"\"\"\n\n    sizes: NDArray\n    r\"\"\"Vector of size $c$ with the number of points in each cluster.\"\"\"\n\n    centers: NDArray\n    r\"\"\"$c \\times n$ matrix with the coordinates of the cluster centers.\"\"\"\n\n    directions: NDArray\n    r\"\"\"$c \\times n$ matrix with the direction of each cluster-supporting line.\"\"\"\n\n    angles: NDArray\n    r\"\"\"Vector of size $c$ with the angles between the cluster-supporting lines and\n    the main direction.\"\"\"\n\n    lengths: NDArray\n    r\"\"\"Vector of size $c$ with the lengths of the cluster-supporting lines.\"\"\"\n</code></pre>"},{"location":"reference/#pyclugen.Clusters.angles","title":"angles  <code>instance-attribute</code>","text":"<pre><code>angles: NDArray\n</code></pre> <p>Vector of size \\(c\\) with the angles between the cluster-supporting lines and the main direction.</p>"},{"location":"reference/#pyclugen.Clusters.centers","title":"centers  <code>instance-attribute</code>","text":"<pre><code>centers: NDArray\n</code></pre> <p>\\(c \\times n\\) matrix with the coordinates of the cluster centers.</p>"},{"location":"reference/#pyclugen.Clusters.clusters","title":"clusters  <code>instance-attribute</code>","text":"<pre><code>clusters: NDArray\n</code></pre> <p>Vector of size \\(p\\) indicating the cluster each point in <code>points</code> belongs to.</p>"},{"location":"reference/#pyclugen.Clusters.directions","title":"directions  <code>instance-attribute</code>","text":"<pre><code>directions: NDArray\n</code></pre> <p>\\(c \\times n\\) matrix with the direction of each cluster-supporting line.</p>"},{"location":"reference/#pyclugen.Clusters.lengths","title":"lengths  <code>instance-attribute</code>","text":"<pre><code>lengths: NDArray\n</code></pre> <p>Vector of size \\(c\\) with the lengths of the cluster-supporting lines.</p>"},{"location":"reference/#pyclugen.Clusters.points","title":"points  <code>instance-attribute</code>","text":"<pre><code>points: NDArray\n</code></pre> <p>\\(p \\times n\\) matrix containing the generated points for all clusters.</p>"},{"location":"reference/#pyclugen.Clusters.projections","title":"projections  <code>instance-attribute</code>","text":"<pre><code>projections: NDArray\n</code></pre> <p>\\(p \\times n\\) matrix with the point projections on the cluster-supporting lines.</p>"},{"location":"reference/#pyclugen.Clusters.sizes","title":"sizes  <code>instance-attribute</code>","text":"<pre><code>sizes: NDArray\n</code></pre> <p>Vector of size \\(c\\) with the number of points in each cluster.</p>"},{"location":"reference/#pyclugen.angle_btw","title":"angle_btw","text":"<pre><code>angle_btw(v1: NDArray, v2: NDArray) -&gt; float\n</code></pre> <p>Angle between two \\(n\\)-dimensional vectors.</p> <p>Typically, the angle between two vectors <code>v1</code> and <code>v2</code> can be obtained with:</p> <pre><code>arccos(dot(u, v) / (norm(u) * norm(v)))\n</code></pre> <p>However, this approach is numerically unstable. The version provided here is numerically stable and based on the AngleBetweenVectors Julia package by Jeffrey Sarnoff (MIT license), implementing an algorithm provided by Prof. W. Kahan in these notes (see page 15).</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy import array, degrees\n&gt;&gt;&gt; from pyclugen import angle_btw\n&gt;&gt;&gt; v1 = array([1.0, 1.0, 1.0, 1.0])\n&gt;&gt;&gt; v2 = array([1.0, 0.0, 0.0, 0.0])\n&gt;&gt;&gt; degrees(angle_btw(v1, v2))\n60.00000000000001\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>v1</code> <code>NDArray</code> <p>First vector.</p> required <code>v2</code> <code>NDArray</code> <p>Second vector.</p> required <p>Returns:</p> Type Description <code>float</code> <p>Angle between <code>v1</code> and <code>v2</code> in radians.</p> Source code in <code>pyclugen/helper.py</code> <pre><code>def angle_btw(v1: NDArray, v2: NDArray) -&gt; float:\n    r\"\"\"Angle between two $n$-dimensional vectors.\n\n    Typically, the angle between two vectors `v1` and `v2` can be obtained with:\n\n    ```python\n    arccos(dot(u, v) / (norm(u) * norm(v)))\n    ```\n\n    However, this approach is numerically unstable. The version provided here is\n    numerically stable and based on the\n    [AngleBetweenVectors](https://github.com/JeffreySarnoff/AngleBetweenVectors.jl)\n    Julia package by Jeffrey Sarnoff (MIT license), implementing an algorithm\n    provided by Prof. W. Kahan in\n    [these notes](https://people.eecs.berkeley.edu/~wkahan/MathH110/Cross.pdf)\n    (see page 15).\n\n    Examples:\n        &gt;&gt;&gt; from numpy import array, degrees\n        &gt;&gt;&gt; from pyclugen import angle_btw\n        &gt;&gt;&gt; v1 = array([1.0, 1.0, 1.0, 1.0])\n        &gt;&gt;&gt; v2 = array([1.0, 0.0, 0.0, 0.0])\n        &gt;&gt;&gt; degrees(angle_btw(v1, v2))\n        60.00000000000001\n\n    Args:\n      v1: First vector.\n      v2: Second vector.\n\n    Returns:\n      Angle between `v1` and `v2` in radians.\n    \"\"\"\n    u1 = v1 / norm(v1)\n    u2 = v2 / norm(v2)\n\n    y = u1 - u2\n    x = u1 + u2\n\n    return 2 * arctan(norm(y) / norm(x))\n</code></pre>"},{"location":"reference/#pyclugen.angle_deltas","title":"angle_deltas","text":"<pre><code>angle_deltas(\n    num_clusters: int, angle_disp: float, rng: Generator = _default_rng\n) -&gt; NDArray\n</code></pre> <p>Get angles between average cluster direction and cluster-supporting lines.</p> <p>Determine the angles between the average cluster direction and the cluster-supporting lines. These angles are obtained from a wrapped normal distribution ( \\(\\mu=0\\), \\(\\sigma=\\)<code>angle_disp</code>) with support in the interval \\(\\left[-\\pi/2,\\pi/2\\right]\\). Note this is different from the standard wrapped normal distribution, the support of which is given by the interval \\(\\left[-\\pi,\\pi\\right]\\).</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import angle_deltas\n&gt;&gt;&gt; from numpy import degrees, pi\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; a_rad = angle_deltas(4, pi/8, rng=prng) # Angle dispersion of 22.5 degrees\n&gt;&gt;&gt; a_rad\narray([-0.38842705, -0.14442948,  0.50576707,  0.07617358])\n&gt;&gt;&gt; degrees(a_rad) # Show angle deltas in degrees\narray([-22.25523038,  -8.27519966,  28.97831838,   4.36442443])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_clusters</code> <code>int</code> <p>Number of clusters.</p> required <code>angle_disp</code> <code>float</code> <p>Angle dispersion, in radians.</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Angles between the average cluster direction and the cluster-supporting lines, given in radians in the interval \\(\\left[-\\pi/2,\\pi/2\\right]\\).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def angle_deltas(\n    num_clusters: int, angle_disp: float, rng: Generator = _default_rng\n) -&gt; NDArray:\n    r\"\"\"Get angles between average cluster direction and cluster-supporting lines.\n\n    Determine the angles between the average cluster direction and the\n    cluster-supporting lines. These angles are obtained from a wrapped normal\n    distribution ( $\\mu=0$, $\\sigma=$`angle_disp`) with support in the interval\n    $\\left[-\\pi/2,\\pi/2\\right]$. Note this is different from the standard\n    wrapped normal distribution, the support of which is given by the interval\n    $\\left[-\\pi,\\pi\\right]$.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import angle_deltas\n        &gt;&gt;&gt; from numpy import degrees, pi\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; a_rad = angle_deltas(4, pi/8, rng=prng) # Angle dispersion of 22.5 degrees\n        &gt;&gt;&gt; a_rad\n        array([-0.38842705, -0.14442948,  0.50576707,  0.07617358])\n        &gt;&gt;&gt; degrees(a_rad) # Show angle deltas in degrees\n        array([-22.25523038,  -8.27519966,  28.97831838,   4.36442443])\n\n    Args:\n      num_clusters: Number of clusters.\n      angle_disp: Angle dispersion, in radians.\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Angles between the average cluster direction and the cluster-supporting\n        lines, given in radians in the interval $\\left[-\\pi/2,\\pi/2\\right]$.\n    \"\"\"\n    # Get random angle differences using the normal distribution\n    angles = angle_disp * rng.normal(size=num_clusters)\n\n    # Reduce angle differences to the interval [-\u03c0, \u03c0]\n    angles = arctan2(sin(angles), cos(angles))\n\n    # Make sure angle differences are within interval [-\u03c0/2, \u03c0/2]\n    return where(abs(angles) &gt; pi / 2, angles - sign(angles) * pi / 2, angles)\n</code></pre>"},{"location":"reference/#pyclugen.clucenters","title":"clucenters","text":"<pre><code>clucenters(\n    num_clusters: int,\n    clu_sep: NDArray,\n    clu_offset: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Determine cluster centers using the uniform distribution.</p> <p>The number of clusters (<code>num_clusters</code>) and the average cluster separation (<code>clu_sep</code>) are taken into account.</p> <p>More specifically, let \\(c=\\)<code>num_clusters</code>, \\(\\mathbf{s}=\\)<code>clu_sep.reshape(-1,1)</code>, \\(\\mathbf{o}=\\)<code>clu_offset.reshape(-1,1)</code>, \\(n=\\)<code>clu_sep.size</code> (i.e., number of dimensions). Cluster centers are obtained according to the following equation:</p> \\[ \\mathbf{C}=c\\mathbf{U} \\cdot \\operatorname{diag}(\\mathbf{s}) +     \\mathbf{1}\\,\\mathbf{o}^T \\] <p>where \\(\\mathbf{C}\\) is the \\(c \\times n\\) matrix of cluster centers, \\(\\mathbf{U}\\) is an \\(c \\times n\\) matrix of random values drawn from the uniform distribution between -0.5 and 0.5, and \\(\\mathbf{1}\\) is an \\(c \\times 1\\) vector with all entries equal to 1.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import clucenters\n&gt;&gt;&gt; from numpy import array\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; clucenters(3, array([30,10]), array([-50,50]), rng=prng)\narray([[-33.58833231,  36.61463056],\n       [-75.16761145,  40.53115432],\n       [-79.1684689 ,  59.3628352 ]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_clusters</code> <code>int</code> <p>Number of clusters.</p> required <code>clu_sep</code> <code>NDArray</code> <p>Average cluster separation ( \\(n \\times 1\\) vector).</p> required <code>clu_offset</code> <code>NDArray</code> <p>Cluster offsets ( \\(n \\times 1\\) vector).</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>A \\(c \\times n\\) matrix containing the cluster centers.</p> Source code in <code>pyclugen/module.py</code> <pre><code>def clucenters(\n    num_clusters: int,\n    clu_sep: NDArray,\n    clu_offset: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Determine cluster centers using the uniform distribution.\n\n    The number of clusters (`num_clusters`) and the average cluster separation\n    (`clu_sep`) are taken into account.\n\n    More specifically, let $c=$`num_clusters`, $\\mathbf{s}=$`clu_sep.reshape(-1,1)`,\n    $\\mathbf{o}=$`clu_offset.reshape(-1,1)`, $n=$`clu_sep.size` (i.e., number of\n    dimensions). Cluster centers are obtained according to the following equation:\n\n    $$\n    \\mathbf{C}=c\\mathbf{U} \\cdot \\operatorname{diag}(\\mathbf{s}) +\n        \\mathbf{1}\\,\\mathbf{o}^T\n    $$\n\n    where $\\mathbf{C}$ is the $c \\times n$ matrix of cluster centers,\n    $\\mathbf{U}$ is an $c \\times n$ matrix of random values drawn from the\n    uniform distribution between -0.5 and 0.5, and $\\mathbf{1}$ is an $c \\times\n    1$ vector with all entries equal to 1.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import clucenters\n        &gt;&gt;&gt; from numpy import array\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; clucenters(3, array([30,10]), array([-50,50]), rng=prng)\n        array([[-33.58833231,  36.61463056],\n               [-75.16761145,  40.53115432],\n               [-79.1684689 ,  59.3628352 ]])\n\n    Args:\n      num_clusters: Number of clusters.\n      clu_sep: Average cluster separation ( $n \\times 1$ vector).\n      clu_offset: Cluster offsets ( $n \\times 1$ vector).\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n        A $c \\times n$ matrix containing the cluster centers.\n    \"\"\"\n    # Obtain a num_clusters x num_dims matrix of uniformly distributed values\n    # between -0.5 and 0.5 representing the relative cluster centers\n    ctr_rel = rng.random((num_clusters, clu_sep.size)) - 0.5\n\n    return num_clusters * (ctr_rel @ diag(clu_sep)) + clu_offset\n</code></pre>"},{"location":"reference/#pyclugen.clugen","title":"clugen","text":"<pre><code>clugen(\n    num_dims: int,\n    num_clusters: int,\n    num_points: int,\n    direction: ArrayLike,\n    angle_disp: float,\n    cluster_sep: ArrayLike,\n    llength: float,\n    llength_disp: float,\n    lateral_disp: float,\n    allow_empty: bool = False,\n    cluster_offset: Optional[ArrayLike] = None,\n    proj_dist_fn: str | Callable[[float, int, Generator], NDArray] = \"norm\",\n    point_dist_fn: str\n    | Callable[\n        [NDArray, float, float, NDArray, NDArray, Generator], NDArray\n    ] = \"n-1\",\n    clusizes_fn: Callable[[int, int, bool, Generator], NDArray]\n    | ArrayLike = clusizes,\n    clucenters_fn: Callable[[int, NDArray, NDArray, Generator], NDArray]\n    | ArrayLike = clucenters,\n    llengths_fn: Callable[[int, float, float, Generator], NDArray]\n    | ArrayLike = llengths,\n    angle_deltas_fn: Callable[[int, float, Generator], NDArray]\n    | ArrayLike = angle_deltas,\n    rng: int | Generator = _default_rng,\n) -&gt; Clusters\n</code></pre> <p>Generate multidimensional clusters.</p> <p>Tip</p> <p>This is the main function of the pyclugen package, and possibly the only function most users will need.</p>"},{"location":"reference/#pyclugen.clugen--examples","title":"Examples:","text":"<pre><code>&gt;&gt;&gt; import matplotlib.pyplot as plt\n&gt;&gt;&gt; from pyclugen import clugen\n&gt;&gt;&gt; from numpy import pi\n&gt;&gt;&gt; out = clugen(2, 5, 10000, [1, 0.5], pi/16, [10, 40], 10, 1, 2, rng=321)\n&gt;&gt;&gt; out.centers # What are the cluster centers?\narray([[ 20.02876212,  36.59611434],\n       [-15.60290734, -26.52169579],\n       [ 23.09775166,  91.66309916],\n       [ -5.76816015,  54.9775074 ],\n       [ -4.64224681,  78.40990876]])\n&gt;&gt;&gt; plt.scatter(out.points[:,0],\n...             out.points[:,1],\n...             c=out.clusters) # doctest: +SKIP\n&gt;&gt;&gt; plt.show() # doctest: +SKIP\n</code></pre> <p>Note</p> <p>In the descriptions below, the terms \"average\" and \"dispersion\" refer to measures of central tendency and statistical dispersion, respectively. Their exact meaning depends on several optional arguments.</p> <p>Parameters:</p> Name Type Description Default <code>num_dims</code> <code>int</code> <p>Number of dimensions.</p> required <code>num_clusters</code> <code>int</code> <p>Number of clusters to generate.</p> required <code>num_points</code> <code>int</code> <p>Total number of points to generate.</p> required <code>direction</code> <code>ArrayLike</code> <p>Average direction of the cluster-supporting lines. Can be a vector of length <code>num_dims</code> (same direction for all clusters) or a matrix of size <code>num_clusters</code> x <code>num_dims</code> (one direction per cluster).</p> required <code>angle_disp</code> <code>float</code> <p>Angle dispersion of cluster-supporting lines (radians).</p> required <code>cluster_sep</code> <code>ArrayLike</code> <p>Average cluster separation in each dimension (vector of size <code>num_dims</code>).</p> required <code>llength</code> <code>float</code> <p>Average length of cluster-supporting lines.</p> required <code>llength_disp</code> <code>float</code> <p>Length dispersion of cluster-supporting lines.</p> required <code>lateral_disp</code> <code>float</code> <p>Cluster lateral dispersion, i.e., dispersion of points from their projection on the cluster-supporting line.</p> required <code>allow_empty</code> <code>bool</code> <p>Allow empty clusters? <code>False</code> by default.</p> <code>False</code> <code>cluster_offset</code> <code>Optional[ArrayLike]</code> <p>Offset to add to all cluster centers (vector of size <code>num_dims</code>). By default the offset will be equal to <code>numpy.zeros(num_dims)</code>.</p> <code>None</code> <code>proj_dist_fn</code> <code>str | Callable[[float, int, Generator], NDArray]</code> <p>Distribution of point projections along cluster-supporting lines, with three possible values:</p> <ul> <li><code>\"norm\"</code> (default): Distribute point projections along lines using a normal   distribution (\u03bc=line center, \u03c3=<code>llength/6</code>).</li> <li><code>\"unif\"</code>: Distribute points uniformly along the line.</li> <li>User-defined function, which accepts three parameters, line length (<code>float</code>),   number of points (<code>int</code>), and an instance of   <code>Generator</code>,   and returns an array containing the distance of each point projection to   the center of the line. For example, the <code>\"norm\"</code> option roughly corresponds   to <code>lambda l, n, rg: l * rg.random((n, 1)) / 6</code>.</li> </ul> <code>'norm'</code> <code>point_dist_fn</code> <code>str | Callable[[NDArray, float, float, NDArray, NDArray, Generator], NDArray]</code> <p>Controls how the final points are created from their projections on the cluster-supporting lines, with three possible values:</p> <ul> <li><code>\"n-1\"</code> (default): Final points are placed on a hyperplane orthogonal to   the cluster-supporting line, centered at each point's projection, using the   normal distribution (\u03bc=0, \u03c3=<code>lateral_disp</code>). This is done by the   <code>clupoints_n_1()</code> function.</li> <li><code>\"n\"</code>: Final points are placed around their projection on the   cluster-supporting line using the normal distribution (\u03bc=0,   \u03c3=<code>lateral_disp</code>). This is done by the   <code>clupoints_n()</code> function.</li> <li>User-defined function: The user can specify a custom point placement   strategy by passing a function with the same signature as   <code>clupoints_n_1()</code> and   <code>clupoints_n()</code>.</li> </ul> <code>'n-1'</code> <code>clusizes_fn</code> <code>Callable[[int, int, bool, Generator], NDArray] | ArrayLike</code> <p>Distribution of cluster sizes. By default, cluster sizes are determined by the <code>clusizes()</code> function, which uses the normal distribution (\u03bc=<code>num_points</code>/<code>num_clusters</code>, \u03c3=\u03bc/3), and assures that the final cluster sizes add up to <code>num_points</code>. This parameter allows the user to specify a custom function for this purpose, which must follow <code>clusizes()</code> signature. Note that custom functions are not required to strictly obey the <code>num_points</code> parameter. Alternatively, the user can specify an array of cluster sizes directly.</p> <code>clusizes</code> <code>clucenters_fn</code> <code>Callable[[int, NDArray, NDArray, Generator], NDArray] | ArrayLike</code> <p>Distribution of cluster centers. By default, cluster centers are determined by the <code>clucenters()</code> function, which uses the uniform distribution, and takes into account the <code>num_clusters</code> and <code>cluster_sep</code> parameters for generating well-distributed cluster centers. This parameter allows the user to specify a custom function for this purpose, which must follow <code>clucenters()</code> signature. Alternatively, the user can specify a matrix of size <code>num_clusters</code> x <code>num_dims</code> with the exact cluster centers.</p> <code>clucenters</code> <code>llengths_fn</code> <code>Callable[[int, float, float, Generator], NDArray] | ArrayLike</code> <p>Distribution of line lengths. By default, the lengths of cluster-supporting lines are determined by the <code>llengths()</code> function, which uses the folded normal distribution (\u03bc=<code>llength</code>, \u03c3=<code>llength_disp</code>). This parameter allows the user to specify a custom function for this purpose, which must follow <code>llengths()</code> signature. Alternatively, the user can specify an array of line lengths directly.</p> <code>llengths</code> <code>angle_deltas_fn</code> <code>Callable[[int, float, Generator], NDArray] | ArrayLike</code> <p>Distribution of line angle differences with respect to <code>direction</code>. By default, the angles between <code>direction</code> and the direction of cluster-supporting lines are determined by the <code>angle_deltas()</code> function, which uses the wrapped normal distribution (\u03bc=0, \u03c3=<code>angle_disp</code>) with support in the interval [-\u03c0/2, \u03c0/2]. This parameter allows the user to specify a custom function for this purpose, which must follow <code>angle_deltas()</code> signature. Alternatively, the user can specify an array of angle deltas directly.</p> <code>angle_deltas</code> <code>rng</code> <code>int | Generator</code> <p>The seed for the random number generator or an instance of <code>Generator</code> for reproducible executions.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>Clusters</code> <p>The generated clusters and associated information in the form of a <code>Clusters</code> object.</p> Source code in <code>pyclugen/main.py</code> <pre><code>def clugen(\n    num_dims: int,\n    num_clusters: int,\n    num_points: int,\n    direction: ArrayLike,\n    angle_disp: float,\n    cluster_sep: ArrayLike,\n    llength: float,\n    llength_disp: float,\n    lateral_disp: float,\n    allow_empty: bool = False,\n    cluster_offset: Optional[ArrayLike] = None,\n    proj_dist_fn: str | Callable[[float, int, Generator], NDArray] = \"norm\",\n    point_dist_fn: str\n    | Callable[[NDArray, float, float, NDArray, NDArray, Generator], NDArray] = \"n-1\",\n    clusizes_fn: Callable[[int, int, bool, Generator], NDArray] | ArrayLike = clusizes,\n    clucenters_fn: Callable[[int, NDArray, NDArray, Generator], NDArray]\n    | ArrayLike = clucenters,\n    llengths_fn: Callable[[int, float, float, Generator], NDArray]\n    | ArrayLike = llengths,\n    angle_deltas_fn: Callable[[int, float, Generator], NDArray]\n    | ArrayLike = angle_deltas,\n    rng: int | Generator = _default_rng,\n) -&gt; Clusters:\n    \"\"\"Generate multidimensional clusters.\n\n    !!! tip\n        This is the main function of the **pyclugen** package, and possibly the\n        only function most users will need.\n\n    ## Examples:\n\n        &gt;&gt;&gt; import matplotlib.pyplot as plt\n        &gt;&gt;&gt; from pyclugen import clugen\n        &gt;&gt;&gt; from numpy import pi\n        &gt;&gt;&gt; out = clugen(2, 5, 10000, [1, 0.5], pi/16, [10, 40], 10, 1, 2, rng=321)\n        &gt;&gt;&gt; out.centers # What are the cluster centers?\n        array([[ 20.02876212,  36.59611434],\n               [-15.60290734, -26.52169579],\n               [ 23.09775166,  91.66309916],\n               [ -5.76816015,  54.9775074 ],\n               [ -4.64224681,  78.40990876]])\n        &gt;&gt;&gt; plt.scatter(out.points[:,0],\n        ...             out.points[:,1],\n        ...             c=out.clusters) # doctest: +SKIP\n        &gt;&gt;&gt; plt.show() # doctest: +SKIP\n\n    ![clugen](https://user-images.githubusercontent.com/3018963/151056890-c83c9509-b40d-4ab2-a842-f2a4706344c6.png)\n\n    !!! Note\n        In the descriptions below, the terms \"average\" and \"dispersion\" refer to\n        measures of central tendency and statistical dispersion, respectively.\n        Their exact meaning depends on several optional arguments.\n\n    Args:\n      num_dims: Number of dimensions.\n      num_clusters: Number of clusters to generate.\n      num_points: Total number of points to generate.\n      direction: Average direction of the cluster-supporting lines. Can be a\n        vector of length `num_dims` (same direction for all clusters) or a\n        matrix of size `num_clusters` x `num_dims` (one direction per cluster).\n      angle_disp: Angle dispersion of cluster-supporting lines (radians).\n      cluster_sep: Average cluster separation in each dimension (vector of size\n        `num_dims`).\n      llength: Average length of cluster-supporting lines.\n      llength_disp: Length dispersion of cluster-supporting lines.\n      lateral_disp: Cluster lateral dispersion, i.e., dispersion of points from their\n        projection on the cluster-supporting line.\n      allow_empty: Allow empty clusters? `False` by default.\n      cluster_offset: Offset to add to all cluster centers (vector of size `num_dims`).\n        By default the offset will be equal to `numpy.zeros(num_dims)`.\n      proj_dist_fn: Distribution of point projections along cluster-supporting lines,\n        with three possible values:\n\n        - `\"norm\"` (default): Distribute point projections along lines using a normal\n          distribution (\u03bc=_line center_, \u03c3=`llength/6`).\n        - `\"unif\"`: Distribute points uniformly along the line.\n        - User-defined function, which accepts three parameters, line length (`float`),\n          number of points (`int`), and an instance of\n          [`Generator`](https://numpy.org/doc/stable/reference/random/generator.html?highlight=generator#numpy.random.Generator),\n          and returns an array containing the distance of each point projection to\n          the center of the line. For example, the `\"norm\"` option roughly corresponds\n          to `lambda l, n, rg: l * rg.random((n, 1)) / 6`.\n\n      point_dist_fn: Controls how the final points are created from their projections\n        on the cluster-supporting lines, with three possible values:\n\n        - `\"n-1\"` (default): Final points are placed on a hyperplane orthogonal to\n          the cluster-supporting line, centered at each point's projection, using the\n          normal distribution (\u03bc=0, \u03c3=`lateral_disp`). This is done by the\n          [`clupoints_n_1()`][pyclugen.module.clupoints_n_1] function.\n        - `\"n\"`: Final points are placed around their projection on the\n          cluster-supporting line using the normal distribution (\u03bc=0,\n          \u03c3=`lateral_disp`). This is done by the\n          [`clupoints_n()`][pyclugen.module.clupoints_n] function.\n        - User-defined function: The user can specify a custom point placement\n          strategy by passing a function with the same signature as\n          [`clupoints_n_1()`][pyclugen.module.clupoints_n_1] and\n          [`clupoints_n()`][pyclugen.module.clupoints_n].\n\n      clusizes_fn: Distribution of cluster sizes. By default, cluster sizes are\n        determined by the [`clusizes()`][pyclugen.module.clusizes] function, which\n        uses the normal distribution (\u03bc=`num_points`/`num_clusters`, \u03c3=\u03bc/3), and\n        assures that the final cluster sizes add up to `num_points`. This parameter\n        allows the user to specify a custom function for this purpose, which must\n        follow [`clusizes()`][pyclugen.module.clusizes] signature. Note that custom\n        functions are not required to strictly obey the `num_points` parameter.\n        Alternatively, the user can specify an array of cluster sizes directly.\n      clucenters_fn: Distribution of cluster centers. By default, cluster centers\n        are determined by the [`clucenters()`][pyclugen.module.clucenters] function,\n        which uses the uniform distribution, and takes into account the `num_clusters`\n        and `cluster_sep` parameters for generating well-distributed cluster centers.\n        This parameter allows the user to specify a custom function for this purpose,\n        which must follow [`clucenters()`][pyclugen.module.clucenters] signature.\n        Alternatively, the user can specify a matrix of size `num_clusters` x\n        `num_dims` with the exact cluster centers.\n      llengths_fn: Distribution of line lengths. By default, the lengths of\n        cluster-supporting lines are determined by the\n        [`llengths()`][pyclugen.module.llengths] function, which uses the folded\n        normal distribution (\u03bc=`llength`, \u03c3=`llength_disp`). This parameter allows\n        the user to specify a custom function for this purpose, which must follow\n        [`llengths()`][pyclugen.module.llengths] signature. Alternatively, the user\n        can specify an array of line lengths directly.\n      angle_deltas_fn: Distribution of line angle differences with respect to\n        `direction`. By default, the angles between `direction` and the direction of\n        cluster-supporting lines are determined by the\n        [`angle_deltas()`][pyclugen.module.angle_deltas] function, which uses the\n        wrapped normal distribution (\u03bc=0, \u03c3=`angle_disp`) with support in the interval\n        [-\u03c0/2, \u03c0/2]. This parameter allows the user to specify a custom function for\n        this purpose, which must follow [`angle_deltas()`][pyclugen.module.angle_deltas]\n        signature. Alternatively, the user can specify an array of angle deltas\n        directly.\n      rng: The seed for the random number generator or an instance of\n        [`Generator`][numpy.random.Generator] for reproducible executions.\n\n    Returns:\n      The generated clusters and associated information in the form of a\n        [`Clusters`][pyclugen.main.Clusters] object.\n    \"\"\"\n    # ############### #\n    # Validate inputs #\n    # ############### #\n\n    # Check that number of dimensions is &gt; 0\n    if num_dims &lt; 1:\n        raise ValueError(\"Number of dimensions, `num_dims`, must be &gt; 0\")\n\n    # Check that number of clusters is &gt; 0\n    if num_clusters &lt; 1:\n        raise ValueError(\"Number of clusters, `num_clust`, must be &gt; 0\")\n\n    # Convert given direction into a NumPy array\n    arrdir: NDArray = asarray(direction)\n\n    # Get number of dimensions in `direction` array\n    dir_ndims = arrdir.ndim\n\n    # Is direction a vector or a matrix?\n    if dir_ndims == 1:\n        # It's a vector, let's convert it into a row matrix, since this will be\n        # useful down the road\n        arrdir = arrdir.reshape((1, -1))\n    elif dir_ndims == 2:\n        # If a matrix was given (i.e. a main direction is given for each cluster),\n        # check if the number of directions is the same as the number of clusters\n        dir_size_1 = arrdir.shape[0]\n        if dir_size_1 != num_clusters:\n            raise ValueError(\n                \"Number of rows in `direction` must be the same as the \"\n                + f\"number of clusters ({dir_size_1} != {num_clusters})\"\n            )\n    else:\n        # The `directions` array must be a vector or a matrix, so if we get here\n        # it means we have invalid arguments\n        raise ValueError(\n            \"`direction` must be a vector (1D array) or a matrix (2D array), \"\n            + f\"but is {dir_ndims}D\"\n        )\n\n    # Check that direction has num_dims dimensions\n    dir_size_2 = arrdir.shape[1]\n    if dir_size_2 != num_dims:\n        raise ValueError(\n            \"Length of directions in `direction` must be equal to \"\n            + f\"`num_dims` ({dir_size_2} != {num_dims})\"\n        )\n\n    # Check that directions have magnitude &gt; 0\n    dir_magnitudes = apply_along_axis(norm, 1, arrdir)\n    if any(isclose(dir_magnitudes, 0)):\n        raise ValueError(\"Directions in `direction` must have magnitude &gt; 0\")\n\n    # If allow_empty is false, make sure there are enough points to distribute\n    # by the clusters\n    if (not allow_empty) and num_points &lt; num_clusters:\n        raise ValueError(\n            f\"A total of {num_points} points is not enough for \"\n            + f\"{num_clusters} non-empty clusters\"\n        )\n\n    # Check that cluster_sep has num_dims dimensions\n    cluster_sep = asarray(cluster_sep)\n    if cluster_sep.size != num_dims:\n        raise ValueError(\n            \"Length of `cluster_sep` must be equal to `num_dims` \"\n            + f\"({cluster_sep.size} != {num_dims})\"\n        )\n\n    # If given, cluster_offset must have the correct number of dimensions,\n    # if not given then it will be a num_dims x 1 vector of zeros\n    if cluster_offset is None:\n        cluster_offset = zeros(num_dims)\n    else:\n        cluster_offset = asarray(cluster_offset)\n        if cluster_offset.size != num_dims:\n            raise ValueError(\n                \"Length of `cluster_offset` must be equal to `num_dims` \"\n                + f\"({cluster_offset.size} != {num_dims})\"\n            )\n\n    # If the user specified rng as an int, create a proper rng object\n    rng_sel: Generator\n    if isinstance(rng, Generator):\n        rng_sel = cast(Generator, rng)\n    elif isinstance(rng, int):\n        rng_sel = Generator(PCG64(cast(int, rng)))\n    else:\n        raise ValueError(\n            f\"`rng` must be an instance of int or Generator, but is {type(rng)}\"\n        )\n\n    # Check that proj_dist_fn specifies a valid way for projecting points along\n    # cluster-supporting lines i.e., either \"norm\" (default), \"unif\" or a\n    # user-defined function\n    pointproj_fn: Callable[[float, int, Generator], NDArray]\n\n    if callable(proj_dist_fn):\n        # Use user-defined distribution; assume function accepts length of line\n        # and number of points, and returns a number of points x 1 vector\n        pointproj_fn = proj_dist_fn\n\n    elif proj_dist_fn == \"unif\":\n        # Point projections will be uniformly placed along cluster-supporting lines\n        def pointproj_fn(length, n, rg):\n            return length * rg.random(n) - length / 2\n\n    elif proj_dist_fn == \"norm\":\n        # Use normal distribution for placing point projections along cluster-supporting\n        # lines, mean equal to line center, standard deviation equal to 1/6 of line\n        # length such that the line length contains \u224899.73% of the points\n        def pointproj_fn(length, n, rg):\n            return (1.0 / 6.0) * length * rg.normal(size=n)\n\n    else:\n        raise ValueError(\n            \"`proj_dist_fn` has to be either 'norm', 'unif' or user-defined function\"\n        )\n\n    # Check that point_dist_fn specifies a valid way for generating points given\n    # their projections along cluster-supporting lines, i.e., either \"n-1\"\n    # (default), \"n\" or a user-defined function\n    pt_from_proj_fn: Callable[\n        [NDArray, float, float, NDArray, NDArray, Generator], NDArray\n    ]\n\n    if num_dims == 1:\n        # If 1D was specified, point projections are the points themselves\n        def pt_from_proj_fn(projs, lat_disp, length, clu_dir, clu_ctr, rng=rng_sel):\n            return projs\n\n    elif callable(point_dist_fn):\n        # Use user-defined distribution; assume function accepts point projections\n        # on the line, lateral disp., cluster direction and cluster center, and\n        # returns a num_points x num_dims matrix containing the final points\n        # for the current cluster\n        pt_from_proj_fn = point_dist_fn\n\n    elif point_dist_fn == \"n-1\":\n        # Points will be placed on a hyperplane orthogonal to the cluster-supporting\n        # line using a normal distribution centered at their intersection\n        pt_from_proj_fn = clupoints_n_1\n\n    elif point_dist_fn == \"n\":\n        # Points will be placed using a multivariate normal distribution\n        # centered at the point projection\n        pt_from_proj_fn = clupoints_n\n\n    else:\n        raise ValueError(\n            \"point_dist_fn has to be either 'n-1', 'n' or a user-defined function\"\n        )\n\n    # ############################ #\n    # Determine cluster properties #\n    # ############################ #\n\n    # Normalize main direction(s)\n    arrdir = apply_along_axis(lambda a: a / norm(a), 1, arrdir)\n\n    # If only one main direction was given, expand it for all clusters\n    if dir_ndims == 1:\n        arrdir = repeat(arrdir, num_clusters, axis=0)\n\n    # Determine cluster sizes\n    if callable(clusizes_fn):\n        cluster_sizes = clusizes_fn(num_clusters, num_points, allow_empty, rng_sel)\n    elif len(asarray(clusizes_fn)) == num_clusters:\n        cluster_sizes = asarray(clusizes_fn)\n    else:\n        raise ValueError(\n            \"clusizes_fn has to be either a function or a `num_clusters`-sized array\"\n        )\n\n    # Custom clusizes_fn's are not required to obey num_points, so we update\n    # it here just in case it's different from what the user specified\n    num_points = sum(cluster_sizes)\n\n    # Determine cluster centers\n    if callable(clucenters_fn):\n        cluster_centers = clucenters_fn(\n            num_clusters, cluster_sep, cluster_offset, rng_sel\n        )\n    elif asarray(clucenters_fn).shape == (num_clusters, num_dims):\n        cluster_centers = asarray(clucenters_fn)\n    else:\n        raise ValueError(\n            \"clucenters_fn has to be either a function or a matrix of size \"\n            + \"`num_clusters` x `num_dims`\"\n        )\n\n    # Determine length of lines supporting clusters\n    if callable(llengths_fn):\n        cluster_lengths = llengths_fn(num_clusters, llength, llength_disp, rng_sel)\n    elif len(asarray(llengths_fn)) == num_clusters:\n        cluster_lengths = asarray(llengths_fn)\n    else:\n        raise ValueError(\n            \"llengths_fn has to be either a function or a `num_clusters`-sized array\"\n        )\n\n    # Obtain angles between main direction and cluster-supporting lines\n    if callable(angle_deltas_fn):\n        cluster_angles = angle_deltas_fn(num_clusters, angle_disp, rng_sel)\n    elif len(asarray(angle_deltas_fn)) == num_clusters:\n        cluster_angles = asarray(angle_deltas_fn)\n    else:\n        raise ValueError(\n            \"angle_deltas_fn has to be either a function or a \"\n            + \"`num_clusters`-sized array\"\n        )\n\n    # Determine normalized cluster directions by applying the obtained angles\n    cluster_directions = apply_along_axis(\n        lambda v, a: rand_vector_at_angle(v, next(a), rng_sel),\n        1,\n        arrdir,\n        iter(cluster_angles),\n    )\n\n    # ################################# #\n    # Determine points for each cluster #\n    # ################################# #\n\n    # Aux. vector with cumulative sum of number of points in each cluster\n    cumsum_points = concatenate((asarray([0]), cumsum(cluster_sizes)))\n\n    # Pre-allocate data structures for holding cluster info and points\n    point_clusters: NDArray = empty(\n        num_points, dtype=int32\n    )  # Cluster indices of each point\n    point_projections = empty((num_points, num_dims))  # Point projections on\n    #                                                  # cluster-supporting lines\n    points = empty((num_points, num_dims))  # Final points to be generated\n\n    # Loop through clusters and create points for each one\n    for i in range(num_clusters):\n        # Start and end indexes for points in current cluster\n        idx_start = cumsum_points[i]\n        idx_end = cumsum_points[i + 1]\n\n        # Update cluster indices of each point\n        point_clusters[idx_start:idx_end] = i\n\n        # Determine distance of point projections from the center of the line\n        ptproj_dist_fn_center = pointproj_fn(\n            cluster_lengths[i], cluster_sizes[i], rng_sel\n        )\n\n        # Determine coordinates of point projections on the line using the\n        # parametric line equation (this works since cluster direction is normalized)\n        point_projections[idx_start:idx_end, :] = points_on_line(\n            cluster_centers[i, :], cluster_directions[i, :], ptproj_dist_fn_center\n        )\n\n        # Determine points from their projections on the line\n        points[idx_start:idx_end, :] = pt_from_proj_fn(\n            point_projections[idx_start:idx_end, :],\n            lateral_disp,\n            cluster_lengths[i],\n            cluster_directions[i, :],\n            cluster_centers[i, :],\n            rng_sel,\n        )\n\n    return Clusters(\n        points,\n        point_clusters,\n        point_projections,\n        cluster_sizes,\n        cluster_centers,\n        cluster_directions,\n        cluster_angles,\n        cluster_lengths,\n    )\n</code></pre>"},{"location":"reference/#pyclugen.clumerge","title":"clumerge","text":"<pre><code>clumerge(\n    *data: NamedTuple | Mapping[str, ArrayLike],\n    fields: tuple[str, ...] = (\"points\", \"clusters\"),\n    clusters_field: str | None = \"clusters\"\n) -&gt; dict[str, NDArray]\n</code></pre> <p>Merges the fields (specified in <code>fields</code>) of two or more <code>data</code> sets.</p> <p>Merges the fields (specified in <code>fields</code>) of two or more <code>data</code> sets (named tuples or dictionaries). The fields to be merged need to have the same number of columns. The corresponding merged field will contain the rows of the fields to be merged, and will have a common supertype.</p> <p>The <code>clusters_field</code> parameter specifies a field containing integers that identify the cluster to which the respective points belongs to. If <code>clusters_field</code> is specified (by default it's specified as <code>\"clusters\"</code>), cluster assignments in individual datasets will be updated in the merged dataset so that clusters are considered separate. This parameter can be set to <code>None</code>, in which case no field will be considered as a special cluster assignments field.</p> <p>This function can be used to merge data sets generated with the <code>clugen()</code> function, by default merging the <code>points</code> and <code>clusters</code> fields in those data sets. It also works with arbitrary data by specifying alternative fields in the <code>fields</code> parameter. It can be used, for example, to merge third-party data with <code>clugen()</code>-generated data.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import clugen, clumerge\n&gt;&gt;&gt; data1 = clugen(2, 5, 1000, [1, 1], 0.01, [20, 20], 14, 1.2, 1.5);\n&gt;&gt;&gt; data2 = clugen(2, 3, 450, [0.8, -0.3], 0, [25, 21], 6, 0.4, 3.5);\n&gt;&gt;&gt; data3 = clugen(2, 2, 600, [0, -0.7], 0.2, [15, 10], 1, 0.1, 5.2);\n&gt;&gt;&gt; data_merged = clumerge(data1, data2, data3)\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>*data</code> <code>NamedTuple | Mapping[str, ArrayLike]</code> <p>One or more cluster data sets whose <code>fields</code> are to be merged.</p> <code>()</code> <code>fields</code> <code>tuple[str, ...]</code> <p>Fields to be merged, which must exist in the data set given in <code>*data</code>.</p> <code>('points', 'clusters')</code> <code>clusters_field</code> <code>str | None</code> <p>Field containing the integer cluster labels. If specified, cluster assignments in individual datasets will be updated in the merged dataset so that clusters are considered separate.</p> <code>'clusters'</code> <p>Returns:</p> Type Description <code>dict[str, NDArray]</code> <p>A dictionary, where keys correspond to field names, and values to the merged numerical arrays.</p> Source code in <code>pyclugen/main.py</code> <pre><code>def clumerge(\n    *data: NamedTuple | Mapping[str, ArrayLike],\n    fields: tuple[str, ...] = (\"points\", \"clusters\"),\n    clusters_field: str | None = \"clusters\",\n) -&gt; dict[str, NDArray]:\n    r\"\"\"Merges the fields (specified in `fields`) of two or more `data` sets.\n\n    Merges the fields (specified in `fields`) of two or more `data` sets (named\n    tuples or dictionaries). The fields to be merged need to have the same\n    number of columns. The corresponding merged field will contain the rows of\n    the fields to be merged, and will have a common supertype.\n\n    The `clusters_field` parameter specifies a field containing integers that\n    identify the cluster to which the respective points belongs to. If\n    `clusters_field` is specified (by default it's specified as `\"clusters\"`),\n    cluster assignments in individual datasets will be updated in the merged\n    dataset so that clusters are considered separate. This parameter can be set\n    to `None`, in which case no field will be considered as a special cluster\n    assignments field.\n\n    This function can be used to merge data sets generated with the\n    [`clugen()`][pyclugen.main.clugen] function, by default merging the\n    `points` and `clusters` fields in those data sets. It also works with\n    arbitrary data by specifying alternative fields in the `fields` parameter.\n    It can be used, for example, to merge third-party data with\n    [`clugen()`][pyclugen.main.clugen]-generated data.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import clugen, clumerge\n        &gt;&gt;&gt; data1 = clugen(2, 5, 1000, [1, 1], 0.01, [20, 20], 14, 1.2, 1.5);\n        &gt;&gt;&gt; data2 = clugen(2, 3, 450, [0.8, -0.3], 0, [25, 21], 6, 0.4, 3.5);\n        &gt;&gt;&gt; data3 = clugen(2, 2, 600, [0, -0.7], 0.2, [15, 10], 1, 0.1, 5.2);\n        &gt;&gt;&gt; data_merged = clumerge(data1, data2, data3)\n\n    Args:\n      *data: One or more cluster data sets whose `fields` are to be merged.\n      fields: Fields to be merged, which must exist in the data set given in\n        `*data`.\n      clusters_field: Field containing the integer cluster labels. If specified,\n        cluster assignments in individual datasets will be updated in the merged\n        dataset so that clusters are considered separate.\n\n    Returns:\n      A dictionary, where keys correspond to field names, and values to the\n        merged numerical arrays.\n    \"\"\"\n    # Number of elements in each array the merged dataset\n    numel: int = 0\n\n    # Number of columns of values in each field\n    fields_info: dict[str, _FieldInfo] = {}\n\n    # Merged dataset to output, initially empty\n    output: dict[str, NDArray] = {}\n\n    # Create a fields set\n    fields_set: MutableSet[str] = set(fields)\n\n    # If a clusters field is given, add it\n    if clusters_field is not None:\n        fields_set.add(str(clusters_field))\n\n    # Data in dictionary format with NDArray views on data\n    ddata: MutableSequence[Mapping[str, NDArray]] = []\n    for dt in data:\n        # If dt is a named tuple, convert it into a dictionary\n        ddt: Mapping[str, ArrayLike]\n        if isinstance(dt, dict):\n            ddt = cast(dict, dt)\n        else:\n            ntdt = cast(NamedTuple, dt)\n            ddt = ntdt._asdict()\n\n        # Convert dictionary values to NDArrays\n        ddtnp: Mapping[str, NDArray] = {k: asarray(v) for k, v in ddt.items()}\n\n        # Add converted dictionary to our sequence of dictionaries\n        ddata.append(ddtnp)\n\n    # Cycle through data items\n    for dt in ddata:\n        # Number of elements in the current item\n        numel_i: int = -1\n\n        # Cycle through fields for the current item\n        for field in fields_set:\n            if field not in dt:\n                raise ValueError(f\"Data item does not contain required field `{field}`\")\n            elif field == clusters_field and not can_cast(\n                dt[clusters_field].dtype, int64\n            ):\n                raise ValueError(f\"`{clusters_field}` must contain integer types\")\n\n            # Get the field value\n            value: NDArray = dt[field]\n\n            # Number of elements in field value\n            numel_tmp = len(value)\n\n            # Check the number of elements in the field value\n            if numel_i == -1:\n                # First field: get number of elements in value (must be the same\n                # for the remaining field values)\n                numel_i = numel_tmp\n\n            elif numel_tmp != numel_i:\n                # Fields values after the first must have the same number of\n                # elements\n                raise ValueError(\n                    \"Data item contains fields with different sizes \"\n                    + f\"({numel_tmp} != {numel_i})\"\n                )\n\n            # Get/check info about the field value type\n            if field not in fields_info:\n                # If it's the first time this field appears, just get the info\n                fields_info[field] = _FieldInfo(value.dtype, _getcols(value))\n\n            else:\n                # If this field already appeared in previous data items, get the\n                # info and check/determine its compatibility with respect to\n                # previous data items\n                if _getcols(value) != fields_info[field].ncol:\n                    # Number of columns must be the same\n                    raise ValueError(f\"Dimension mismatch in field `{field}`\")\n\n                # Get the common supertype\n                fields_info[field].dtype = promote_types(\n                    fields_info[field].dtype, value.dtype\n                )\n\n        # Update total number of elements\n        numel += numel_i\n\n    # Initialize output dictionary fields with room for all items\n    for field in fields_info:\n        if fields_info[field].ncol == 1:\n            output[field] = empty((numel,), dtype=fields_info[field].dtype)\n        else:\n            output[field] = empty(\n                (numel, fields_info[field].ncol), dtype=fields_info[field].dtype\n            )\n\n    # Copy items from input data to output dictionary, field-wise\n    copied: int = 0\n    last_cluster: int = 0\n\n    # Create merged output\n    for dt in ddata:\n        # How many elements to copy for the current data item?\n        tocopy: int = len(dt[fields[0]])\n\n        # Cycle through each field and its information\n        for field in fields_info:\n            # Copy elements\n            if field == clusters_field:\n                # If this is a clusters field, update the cluster IDs\n                old_clusters = unique(dt[clusters_field])\n                new_clusters = list(\n                    range(last_cluster + 1, last_cluster + len(old_clusters) + 1)\n                )\n                old2new = zip(old_clusters, new_clusters)\n                mapping = dict(old2new)\n                last_cluster = new_clusters[-1]\n\n                output[field][copied : (copied + tocopy)] = [\n                    mapping[val] for val in dt[clusters_field]\n                ]\n\n            else:\n                # Otherwise just copy the elements\n                ncol: int = fields_info[field].ncol\n                output[field].flat[copied * ncol : (copied + tocopy) * ncol] = dt[field]\n\n        # Update how many were copied so far\n        copied += tocopy\n\n    # Return result\n    return output\n</code></pre>"},{"location":"reference/#pyclugen.clupoints_n","title":"clupoints_n","text":"<pre><code>clupoints_n(\n    projs: NDArray,\n    lat_disp: float,\n    line_len: float,\n    clu_dir: NDArray,\n    clu_ctr: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Generate points from their \\(n\\)-D projections on a cluster-supporting line.</p> <p>Each point is placed around its projection using the normal distribution ( \\(\\mu=0\\), \\(\u03c3=\\)<code>lat_disp</code>).</p> <p>This function's main intended use is by the <code>clugen()</code> function, generating the final points when the <code>point_dist_fn</code> parameter is set to <code>\"n\"</code>.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import clupoints_n, points_on_line\n&gt;&gt;&gt; from numpy import array, linspace\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; projs = points_on_line(array([5,5]),     # Get 5 point projections\n...                        array([1,0]),     # on a 2D line\n...                        linspace(-4,4,5))\n&gt;&gt;&gt; projs\narray([[1., 5.],\n       [3., 5.],\n       [5., 5.],\n       [7., 5.],\n       [9., 5.]])\n&gt;&gt;&gt; clupoints_n(projs, 0.5, 1.0, array([1,0]), array([0,0]), rng=prng)\narray([[0.50543932, 4.81610667],\n       [3.64396263, 5.09698721],\n       [5.46011545, 5.2885519 ],\n       [6.68176818, 5.27097611],\n       [8.84170227, 4.83880544]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>projs</code> <code>NDArray</code> <p>Point projections on the cluster-supporting line ( \\(p \\times n\\) matrix).</p> required <code>lat_disp</code> <code>float</code> <p>Standard deviation for the normal distribution, i.e., cluster lateral dispersion.</p> required <code>line_len</code> <code>float</code> <p>Length of cluster-supporting line (ignored).</p> required <code>clu_dir</code> <code>NDArray</code> <p>Direction of the cluster-supporting line.</p> required <code>clu_ctr</code> <code>NDArray</code> <p>Center position of the cluster-supporting line (ignored).</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Generated points ( \\(p \\times n\\) matrix).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def clupoints_n(\n    projs: NDArray,\n    lat_disp: float,\n    line_len: float,\n    clu_dir: NDArray,\n    clu_ctr: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Generate points from their $n$-D projections on a cluster-supporting line.\n\n    Each point is placed around its projection using the normal distribution\n    ( $\\mu=0$, $\u03c3=$`lat_disp`).\n\n    This function's main intended use is by the [`clugen()`][pyclugen.main.clugen]\n    function, generating the final points when the `point_dist_fn` parameter is\n    set to `\"n\"`.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import clupoints_n, points_on_line\n        &gt;&gt;&gt; from numpy import array, linspace\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; projs = points_on_line(array([5,5]),     # Get 5 point projections\n        ...                        array([1,0]),     # on a 2D line\n        ...                        linspace(-4,4,5))\n        &gt;&gt;&gt; projs\n        array([[1., 5.],\n               [3., 5.],\n               [5., 5.],\n               [7., 5.],\n               [9., 5.]])\n        &gt;&gt;&gt; clupoints_n(projs, 0.5, 1.0, array([1,0]), array([0,0]), rng=prng)\n        array([[0.50543932, 4.81610667],\n               [3.64396263, 5.09698721],\n               [5.46011545, 5.2885519 ],\n               [6.68176818, 5.27097611],\n               [8.84170227, 4.83880544]])\n\n    Args:\n      projs: Point projections on the cluster-supporting line ( $p \\times n$ matrix).\n      lat_disp: Standard deviation for the normal distribution, i.e., cluster\n        lateral dispersion.\n      line_len: Length of cluster-supporting line (ignored).\n      clu_dir: Direction of the cluster-supporting line.\n      clu_ctr: Center position of the cluster-supporting line (ignored).\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Generated points ( $p \\times n$ matrix).\n    \"\"\"\n    # Number of dimensions\n    num_dims = clu_dir.size\n\n    # Number of points in this cluster\n    clu_num_points = projs.shape[0]\n\n    # Get random displacement vectors for each point projection\n    displ = lat_disp * rng.normal(size=(clu_num_points, num_dims))\n\n    # Add displacement vectors to each point projection\n    points = projs + displ\n\n    return points\n</code></pre>"},{"location":"reference/#pyclugen.clupoints_n_1","title":"clupoints_n_1","text":"<pre><code>clupoints_n_1(\n    projs: NDArray,\n    lat_disp: float,\n    line_len: float,\n    clu_dir: NDArray,\n    clu_ctr: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Generate points from their \\(n\\)-D projections on a cluster-supporting line.</p> <p>Each point is placed on a hyperplane orthogonal to that line and centered at the point's projection, using the normal distribution ( \\(\\mu=0\\), \\(\u03c3=\\)<code>lat_disp</code>).</p> <p>This function's main intended use is by the <code>clugen()</code> function, generating the final points when the <code>point_dist_fn</code> parameter is set to <code>\"n-1\"</code>.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import clupoints_n_1, points_on_line\n&gt;&gt;&gt; from numpy import array, linspace\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; projs = points_on_line(array([5,5]),     # Get 5 point projections\n...                        array([1,0]),     # on a 2D line\n...                        linspace(-4,4,5))\n&gt;&gt;&gt; projs\narray([[1., 5.],\n       [3., 5.],\n       [5., 5.],\n       [7., 5.],\n       [9., 5.]])\n&gt;&gt;&gt; clupoints_n_1(projs, 0.5, 1.0, array([1,0]), array([0,0]), rng=prng)\narray([[1.        , 5.49456068],\n       [3.        , 5.18389333],\n       [5.        , 5.64396263],\n       [7.        , 5.09698721],\n       [9.        , 5.46011545]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>projs</code> <code>NDArray</code> <p>Point projections on the cluster-supporting line ( \\(p \\times n\\) matrix).</p> required <code>lat_disp</code> <code>float</code> <p>Standard deviation for the normal distribution, i.e., cluster lateral dispersion.</p> required <code>line_len</code> <code>float</code> <p>Length of cluster-supporting line (ignored).</p> required <code>clu_dir</code> <code>NDArray</code> <p>Direction of the cluster-supporting line.</p> required <code>clu_ctr</code> <code>NDArray</code> <p>Center position of the cluster-supporting line (ignored).</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Generated points ( \\(p \\times n\\) matrix).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def clupoints_n_1(\n    projs: NDArray,\n    lat_disp: float,\n    line_len: float,\n    clu_dir: NDArray,\n    clu_ctr: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Generate points from their $n$-D projections on a cluster-supporting line.\n\n    Each point is placed on a hyperplane orthogonal to that line and centered at\n    the point's projection, using the normal distribution ( $\\mu=0$,\n    $\u03c3=$`lat_disp`).\n\n    This function's main intended use is by the [`clugen()`][pyclugen.main.clugen]\n    function, generating the final points when the `point_dist_fn` parameter is\n    set to `\"n-1\"`.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import clupoints_n_1, points_on_line\n        &gt;&gt;&gt; from numpy import array, linspace\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; projs = points_on_line(array([5,5]),     # Get 5 point projections\n        ...                        array([1,0]),     # on a 2D line\n        ...                        linspace(-4,4,5))\n        &gt;&gt;&gt; projs\n        array([[1., 5.],\n               [3., 5.],\n               [5., 5.],\n               [7., 5.],\n               [9., 5.]])\n        &gt;&gt;&gt; clupoints_n_1(projs, 0.5, 1.0, array([1,0]), array([0,0]), rng=prng)\n        array([[1.        , 5.49456068],\n               [3.        , 5.18389333],\n               [5.        , 5.64396263],\n               [7.        , 5.09698721],\n               [9.        , 5.46011545]])\n\n    Args:\n      projs: Point projections on the cluster-supporting line ( $p \\times n$ matrix).\n      lat_disp: Standard deviation for the normal distribution, i.e., cluster\n        lateral dispersion.\n      line_len: Length of cluster-supporting line (ignored).\n      clu_dir: Direction of the cluster-supporting line.\n      clu_ctr: Center position of the cluster-supporting line (ignored).\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Generated points ( $p \\times n$ matrix).\n    \"\"\"\n    # No blank line allowed here\n\n    # Define function to get distances from points to their projections on the\n    # line (i.e., using the normal distribution)\n    def dist_fn(clu_num_points, ldisp, rg):\n        return ldisp * rg.normal(size=clu_num_points)\n\n    # Use clupoints_n_1_template() to do the heavy lifting\n    return clupoints_n_1_template(projs, lat_disp, clu_dir, dist_fn, rng=rng)\n</code></pre>"},{"location":"reference/#pyclugen.clupoints_n_1_template","title":"clupoints_n_1_template","text":"<pre><code>clupoints_n_1_template(\n    projs: NDArray,\n    lat_disp: float,\n    clu_dir: NDArray,\n    dist_fn: Callable[[int, float, Generator], NDArray],\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Create \\(p\\) points from their \\(n\\)-D projections on a cluster-supporting line.</p> <p>Each point is placed on a hyperplane orthogonal to that line and centered at the point's projection. The function specified in <code>dist_fn</code> is used to perform the actual placement.</p> <p>This function is used internally by <code>clupoints_n_1()</code> and may be useful for constructing user-defined final point placement strategies for the <code>point_dist_fn</code> parameter of the main <code>clugen()</code> function.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy import array, zeros\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; from pyclugen import clupoints_n_1_template, points_on_line\n&gt;&gt;&gt; ctr = zeros(2)\n&gt;&gt;&gt; dir = array([1, 0])\n&gt;&gt;&gt; pdist = array([-0.5, -0.2, 0.1, 0.3])\n&gt;&gt;&gt; rng = Generator(PCG64(123))\n&gt;&gt;&gt; proj = points_on_line(ctr, dir, pdist)\n&gt;&gt;&gt; clupoints_n_1_template(proj, 0, dir, lambda p, l, r: r.random(p), rng=rng)\narray([[-0.5       ,  0.68235186],\n       [-0.2       , -0.05382102],\n       [ 0.1       ,  0.22035987],\n       [ 0.3       , -0.18437181]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>projs</code> <code>NDArray</code> <p>Point projections on the cluster-supporting line ( \\(p \\times n\\) matrix).</p> required <code>lat_disp</code> <code>float</code> <p>Dispersion of points from their projection.</p> required <code>clu_dir</code> <code>NDArray</code> <p>Direction of the cluster-supporting line (unit vector).</p> required <code>dist_fn</code> <code>Callable[[int, float, Generator], NDArray]</code> <p>Function to place points on a second line, orthogonal to the first. The functions accepts as parameters the number of points in the current cluster, the <code>lateral_disp</code> parameter (the same passed to the <code>clugen()</code> function), and a random number generator, returning a vector containing the distance of each point to its projection on the cluster-supporting line.</p> required <code>rng</code> <code>Generator</code> <p>An optional pseudo-random number generator for reproducible executions.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Generated points ( \\(p \\times n\\) matrix).</p> Source code in <code>pyclugen/helper.py</code> <pre><code>def clupoints_n_1_template(\n    projs: NDArray,\n    lat_disp: float,\n    clu_dir: NDArray,\n    dist_fn: Callable[[int, float, Generator], NDArray],\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Create $p$ points from their $n$-D projections on a cluster-supporting line.\n\n    Each point is placed on a hyperplane orthogonal to that line and centered at\n    the point's projection. The function specified in `dist_fn` is used to perform\n    the actual placement.\n\n    This function is used internally by\n    [`clupoints_n_1()`][pyclugen.module.clupoints_n_1] and may be useful for\n    constructing user-defined final point placement strategies for the `point_dist_fn`\n    parameter of the main [`clugen()`][pyclugen.main.clugen] function.\n\n    Examples:\n        &gt;&gt;&gt; from numpy import array, zeros\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; from pyclugen import clupoints_n_1_template, points_on_line\n        &gt;&gt;&gt; ctr = zeros(2)\n        &gt;&gt;&gt; dir = array([1, 0])\n        &gt;&gt;&gt; pdist = array([-0.5, -0.2, 0.1, 0.3])\n        &gt;&gt;&gt; rng = Generator(PCG64(123))\n        &gt;&gt;&gt; proj = points_on_line(ctr, dir, pdist)\n        &gt;&gt;&gt; clupoints_n_1_template(proj, 0, dir, lambda p, l, r: r.random(p), rng=rng)\n        array([[-0.5       ,  0.68235186],\n               [-0.2       , -0.05382102],\n               [ 0.1       ,  0.22035987],\n               [ 0.3       , -0.18437181]])\n\n    Args:\n      projs: Point projections on the cluster-supporting line ( $p \\times n$ matrix).\n      lat_disp: Dispersion of points from their projection.\n      clu_dir: Direction of the cluster-supporting line (unit vector).\n      dist_fn: Function to place points on a second line, orthogonal to the first.\n        The functions accepts as parameters the number of points in the current\n        cluster, the `lateral_disp` parameter (the same passed to the\n        [`clugen()`][pyclugen.main.clugen] function), and a random number generator,\n        returning a vector containing the distance of each point to its projection\n        on the cluster-supporting line.\n      rng: An optional pseudo-random number generator for reproducible executions.\n\n    Returns:\n      Generated points ( $p \\times n$ matrix).\n    \"\"\"\n    # Number of dimensions\n    num_dims = clu_dir.size\n\n    # Number of points in this cluster\n    clu_num_points = projs.shape[0]\n\n    # Get distances from points to their projections on the line\n    points_dist = dist_fn(clu_num_points, lat_disp, rng)\n\n    # Get normalized vectors, orthogonal to the current line, for each point\n    orth_vecs = zeros((clu_num_points, num_dims))\n\n    for j in range(clu_num_points):\n        orth_vecs[j, :] = rand_ortho_vector(clu_dir, rng=rng).ravel()\n\n    # Set vector magnitudes\n    orth_vecs = abs(points_dist).reshape(-1, 1) * orth_vecs\n\n    # Add perpendicular vectors to point projections on the line,\n    # yielding final cluster points\n    points = projs + orth_vecs\n\n    return points\n</code></pre>"},{"location":"reference/#pyclugen.clusizes","title":"clusizes","text":"<pre><code>clusizes(\n    num_clusters: int,\n    num_points: int,\n    allow_empty: bool,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Determine cluster sizes, i.e., the number of points in each cluster.</p> <p>Cluster sizes are determined using the normal distribution ( \\(\\mu=\\)<code>num_points</code> \\(/\\)<code>num_clusters</code>, \\(\\sigma=\\mu/3\\)), and then assuring that the final cluster sizes add up to <code>num_points</code> via the <code>fix_num_points()</code> function.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; from pyclugen import clusizes\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; sizes = clusizes(4, 1000, True, rng=prng)\n&gt;&gt;&gt; sizes\narray([166, 217, 354, 263])\n&gt;&gt;&gt; sum(sizes)\n1000\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_clusters</code> <code>int</code> <p>Number of clusters.</p> required <code>num_points</code> <code>int</code> <p>Total number of points.</p> required <code>allow_empty</code> <code>bool</code> <p>Allow empty clusters?</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Number of points in each cluster (vector of size <code>num_clusters</code>).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def clusizes(\n    num_clusters: int,\n    num_points: int,\n    allow_empty: bool,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Determine cluster sizes, i.e., the number of points in each cluster.\n\n    Cluster sizes are determined using the normal distribution (\n    $\\mu=$`num_points` $/$`num_clusters`, $\\sigma=\\mu/3$), and then\n    assuring that the final cluster sizes add up to `num_points` via the\n    [`fix_num_points()`][pyclugen.helper.fix_num_points] function.\n\n    Examples:\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; from pyclugen import clusizes\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; sizes = clusizes(4, 1000, True, rng=prng)\n        &gt;&gt;&gt; sizes\n        array([166, 217, 354, 263])\n        &gt;&gt;&gt; sum(sizes)\n        1000\n\n    Args:\n      num_clusters: Number of clusters.\n      num_points: Total number of points.\n      allow_empty: Allow empty clusters?\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Number of points in each cluster (vector of size `num_clusters`).\n    \"\"\"\n    # Determine number of points in each cluster using the normal distribution\n\n    # Consider the mean an equal division of points between clusters\n    mean = num_points / num_clusters\n    # The standard deviation is such that the interval [0, 2 * mean] will contain\n    # \u224899.7% of cluster sizes\n    std = mean / 3\n\n    # Determine points with the normal distribution\n    clu_num_points = std * rng.normal(size=num_clusters) + mean\n\n    # Set negative values to zero\n    clu_num_points = where(clu_num_points &gt; 0, clu_num_points, 0)\n\n    # Fix imbalances, so that num_points is respected\n    if sum(clu_num_points) &gt; 0:  # Be careful not to divide by zero\n        clu_num_points *= num_points / sum(clu_num_points)\n\n    # Round the real values to integers since a cluster sizes is represented by\n    # an integer\n    clu_num_points = rint(clu_num_points).astype(int)\n\n    # Make sure total points is respected, which may not be the case at this time due\n    # to rounding\n    fix_num_points(clu_num_points, num_points)\n\n    # If empty clusters are not allowed, make sure there aren't any\n    if not allow_empty:\n        fix_empty(clu_num_points)\n\n    return clu_num_points\n</code></pre>"},{"location":"reference/#pyclugen.fix_empty","title":"fix_empty","text":"<pre><code>fix_empty(clu_num_points: NDArray, allow_empty: bool = False) -&gt; NDArray\n</code></pre> <p>Certifies that, given enough points, no clusters are left empty.</p> <p>This is done by removing a point from the largest cluster and adding it to an empty cluster while there are empty clusters. If the total number of points is smaller than the number of clusters (or if the <code>allow_empty</code> parameter is set to <code>true</code>), this function does nothing.</p> <p>This function is used internally by <code>clusizes()</code> and might be useful for custom cluster sizing implementations given as the <code>clusizes_fn</code> parameter of the main <code>clugen()</code> function.</p> <p>Note that the array is changed in-place.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy import array\n&gt;&gt;&gt; from pyclugen import fix_empty\n&gt;&gt;&gt; clusters = array([3, 4, 5, 0, 0])\n&gt;&gt;&gt; fix_empty(clusters)\narray([3, 3, 4, 1, 1])\n&gt;&gt;&gt; clusters # Verify that the array was changed in-place\narray([3, 3, 4, 1, 1])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>clu_num_points</code> <code>NDArray</code> <p>Number of points in each cluster (vector of size \\(c\\)), where \\(c\\) is the number of clusters.</p> required <code>allow_empty</code> <code>bool</code> <p>Allow empty clusters?</p> <code>False</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Number of points in each cluster, after being fixed by this function (vector of size \\(c\\), which is the same reference than <code>clu_num_points</code>).</p> Source code in <code>pyclugen/helper.py</code> <pre><code>def fix_empty(clu_num_points: NDArray, allow_empty: bool = False) -&gt; NDArray:\n    r\"\"\"Certifies that, given enough points, no clusters are left empty.\n\n    This is done by removing a point from the largest cluster and adding it to an\n    empty cluster while there are empty clusters. If the total number of points is\n    smaller than the number of clusters (or if the `allow_empty` parameter is set\n    to `true`), this function does nothing.\n\n    This function is used internally by [`clusizes()`][pyclugen.module.clusizes]\n    and might be useful for custom cluster sizing implementations given as the\n    `clusizes_fn` parameter of the main [`clugen()`][pyclugen.main.clugen] function.\n\n    Note that the array is changed in-place.\n\n    Examples:\n        &gt;&gt;&gt; from numpy import array\n        &gt;&gt;&gt; from pyclugen import fix_empty\n        &gt;&gt;&gt; clusters = array([3, 4, 5, 0, 0])\n        &gt;&gt;&gt; fix_empty(clusters)\n        array([3, 3, 4, 1, 1])\n        &gt;&gt;&gt; clusters # Verify that the array was changed in-place\n        array([3, 3, 4, 1, 1])\n\n    Args:\n      clu_num_points: Number of points in each cluster (vector of size $c$),\n        where $c$ is the number of clusters.\n      allow_empty: Allow empty clusters?\n\n    Returns:\n      Number of points in each cluster, after being fixed by this function (vector\n        of size $c$, which is the same reference than `clu_num_points`).\n    \"\"\"\n    # If the allow_empty parameter is set to true, don't do anything and return\n    # immediately; this is useful for quick `clusizes_fn` one-liners\n    if not allow_empty:\n        # Find empty clusters\n        empty_clusts = [idx for idx, val in enumerate(clu_num_points) if val == 0]\n\n        # If there are empty clusters and enough points for all clusters...\n        if len(empty_clusts) &gt; 0 and sum(clu_num_points) &gt;= clu_num_points.size:\n            # Go through the empty clusters...\n            for i0 in empty_clusts:\n                # ...get a point from the largest cluster and assign it to the\n                # current empty cluster\n                imax = argmax(clu_num_points)\n                clu_num_points[imax] -= 1\n                clu_num_points[i0] += 1\n\n    return clu_num_points\n</code></pre>"},{"location":"reference/#pyclugen.fix_num_points","title":"fix_num_points","text":"<pre><code>fix_num_points(clu_num_points: NDArray, num_points: int) -&gt; NDArray\n</code></pre> <p>Certifies that the values in the <code>clu_num_points</code> array add up to <code>num_points</code>.</p> <p>If this is not the case, the <code>clu_num_points</code> array is modified in-place, incrementing the value corresponding to the smallest cluster while <code>sum(clu_num_points) &lt; num_points</code>, or decrementing the value corresponding to the largest cluster while <code>sum(clu_num_points) &gt; num_points</code>.</p> <p>This function is used internally by <code>clusizes()</code> and might be useful for custom cluster sizing implementations given as the <code>clusizes_fn</code> parameter of the main <code>clugen()</code> function.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy import array\n&gt;&gt;&gt; from pyclugen import fix_num_points\n&gt;&gt;&gt; clusters = array([1, 6, 3])  # 10 total points\n&gt;&gt;&gt; fix_num_points(clusters, 12) # But we want 12 total points\narray([3, 6, 3])\n&gt;&gt;&gt; clusters # Verify that the array was changed in-place\narray([3, 6, 3])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>clu_num_points</code> <code>NDArray</code> <p>Number of points in each cluster (vector of size \\(c\\)), where \\(c\\) is the number of clusters.</p> required <code>num_points</code> <code>int</code> <p>The expected total number of points.</p> required <p>Returns:</p> Type Description <code>NDArray</code> <p>Number of points in each cluster, after being fixed by this function (vector of size \\(c\\), which is the same reference than <code>clu_num_points</code>).</p> Source code in <code>pyclugen/helper.py</code> <pre><code>def fix_num_points(clu_num_points: NDArray, num_points: int) -&gt; NDArray:\n    r\"\"\"Certifies that the values in the `clu_num_points` array add up to `num_points`.\n\n    If this is not the case, the `clu_num_points` array is modified in-place,\n    incrementing the value corresponding to the smallest cluster while\n    `sum(clu_num_points) &lt; num_points`, or decrementing the value corresponding to\n    the largest cluster while `sum(clu_num_points) &gt; num_points`.\n\n    This function is used internally by [`clusizes()`][pyclugen.module.clusizes]\n    and might be useful for custom cluster sizing implementations given as the\n    `clusizes_fn` parameter of the main [`clugen()`][pyclugen.main.clugen] function.\n\n    Examples:\n        &gt;&gt;&gt; from numpy import array\n        &gt;&gt;&gt; from pyclugen import fix_num_points\n        &gt;&gt;&gt; clusters = array([1, 6, 3])  # 10 total points\n        &gt;&gt;&gt; fix_num_points(clusters, 12) # But we want 12 total points\n        array([3, 6, 3])\n        &gt;&gt;&gt; clusters # Verify that the array was changed in-place\n        array([3, 6, 3])\n\n    Args:\n      clu_num_points: Number of points in each cluster (vector of size $c$),\n        where $c$ is the number of clusters.\n      num_points: The expected total number of points.\n\n    Returns:\n      Number of points in each cluster, after being fixed by this function (vector\n        of size $c$, which is the same reference than `clu_num_points`).\n    \"\"\"\n    while sum(clu_num_points) &lt; num_points:\n        imin = argmin(clu_num_points)\n        clu_num_points[imin] += 1\n    while sum(clu_num_points) &gt; num_points:\n        imax = argmax(clu_num_points)\n        clu_num_points[imax] -= 1\n\n    return clu_num_points\n</code></pre>"},{"location":"reference/#pyclugen.llengths","title":"llengths","text":"<pre><code>llengths(\n    num_clusters: int,\n    llength: float,\n    llength_disp: float,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Determine length of cluster-supporting lines.</p> <p>Line lengths are determined using the folded normal distribution ( \\(\\mu=\\)<code>llength</code>, \\(\\sigma=\\)<code>llength_disp</code>).</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy.random import Generator, MT19937\n&gt;&gt;&gt; from pyclugen import llengths\n&gt;&gt;&gt; prng = Generator(MT19937(123))\n&gt;&gt;&gt; llengths(4, 20, 3.5, rng=prng)\narray([19.50968733, 19.92482858, 25.99013804, 18.58029672])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_clusters</code> <code>int</code> <p>Number of clusters.</p> required <code>llength</code> <code>float</code> <p>Average line length.</p> required <code>llength_disp</code> <code>float</code> <p>Line length dispersion.</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Lengths of cluster-supporting lines (vector of size <code>num_clusters</code>).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def llengths(\n    num_clusters: int,\n    llength: float,\n    llength_disp: float,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Determine length of cluster-supporting lines.\n\n    Line lengths are determined using the folded normal distribution (\n    $\\mu=$`llength`, $\\sigma=$`llength_disp`).\n\n    Examples:\n        &gt;&gt;&gt; from numpy.random import Generator, MT19937\n        &gt;&gt;&gt; from pyclugen import llengths\n        &gt;&gt;&gt; prng = Generator(MT19937(123))\n        &gt;&gt;&gt; llengths(4, 20, 3.5, rng=prng)\n        array([19.50968733, 19.92482858, 25.99013804, 18.58029672])\n\n    Args:\n      num_clusters: Number of clusters.\n      llength: Average line length.\n      llength_disp: Line length dispersion.\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Lengths of cluster-supporting lines (vector of size `num_clusters`).\n    \"\"\"\n    return abs(llength + llength_disp * rng.normal(size=num_clusters))\n</code></pre>"},{"location":"reference/#pyclugen.points_on_line","title":"points_on_line","text":"<pre><code>points_on_line(\n    center: NDArray, direction: NDArray, dist_center: NDArray\n) -&gt; NDArray\n</code></pre> <p>Determine coordinates of points on a line.</p> <p>Determine coordinates of points on a line with <code>center</code> and <code>direction</code>, based on the distances from the center given in <code>dist_center</code>.</p> <p>This works by using the vector formulation of the line equation assuming <code>direction</code> is a \\(n\\)-dimensional unit vector. In other words, considering \\(\\mathbf{d}=\\)<code>direction.reshape(-1,1)</code> ( \\(n \\times 1\\) vector), \\(\\mathbf{c}=\\)<code>center.reshape(-1,1)</code> ( \\(n \\times 1\\) vector), and \\(\\mathbf{w}=\\) <code>dist_center.reshape(-1,1)</code> ( \\(p \\times 1\\) vector), the coordinates of points on the line are given by:</p> \\[ \\mathbf{P}=\\mathbf{1}\\,\\mathbf{c}^T + \\mathbf{w}\\mathbf{d}^T \\] <p>where \\(\\mathbf{P}\\) is the \\(p \\times n\\) matrix of point coordinates on the line, and \\(\\mathbf{1}\\) is a \\(p \\times 1\\) vector with all entries equal to 1.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import points_on_line\n&gt;&gt;&gt; from numpy import array, linspace\n&gt;&gt;&gt; points_on_line(array([5., 5.]),\n...                array([1., 0.]),\n...                linspace(-4, 4, 5)) # 2D, 5 points\narray([[1., 5.],\n       [3., 5.],\n       [5., 5.],\n       [7., 5.],\n       [9., 5.]])\n&gt;&gt;&gt; points_on_line(array([-2, 0, 0., 2]),\n...                array([0., 0, -1, 0]),\n...                array([10, -10])) # 4D, 2 points\narray([[ -2.,   0., -10.,   2.],\n       [ -2.,   0.,  10.,   2.]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>center</code> <code>NDArray</code> <p>Center of the line ( \\(n\\)-component vector).</p> required <code>direction</code> <code>NDArray</code> <p>Line direction ( \\(n\\)-component unit vector).</p> required <code>dist_center</code> <code>NDArray</code> <p>Distance of each point to the center of the line ( \\(p\\)-component vector, where \\(p\\) is the number of points).</p> required <p>Returns:</p> Type Description <code>NDArray</code> <p>Coordinates of points on the specified line ( \\(p \\times n\\) matrix).</p> Source code in <code>pyclugen/core.py</code> <pre><code>def points_on_line(\n    center: NDArray, direction: NDArray, dist_center: NDArray\n) -&gt; NDArray:\n    r\"\"\"Determine coordinates of points on a line.\n\n    Determine coordinates of points on a line with `center` and `direction`,\n    based on the distances from the center given in `dist_center`.\n\n    This works by using the vector formulation of the line equation assuming\n    `direction` is a $n$-dimensional unit vector. In other words, considering\n    $\\mathbf{d}=$`direction.reshape(-1,1)` ( $n \\times 1$ vector),\n    $\\mathbf{c}=$`center.reshape(-1,1)` ( $n \\times 1$ vector), and\n    $\\mathbf{w}=$ `dist_center.reshape(-1,1)` ( $p \\times 1$ vector),\n    the coordinates of points on the line are given by:\n\n    $$\n    \\mathbf{P}=\\mathbf{1}\\,\\mathbf{c}^T + \\mathbf{w}\\mathbf{d}^T\n    $$\n\n    where $\\mathbf{P}$ is the $p \\times n$ matrix of point coordinates on the\n    line, and $\\mathbf{1}$ is a $p \\times 1$ vector with all entries equal to 1.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import points_on_line\n        &gt;&gt;&gt; from numpy import array, linspace\n        &gt;&gt;&gt; points_on_line(array([5., 5.]),\n        ...                array([1., 0.]),\n        ...                linspace(-4, 4, 5)) # 2D, 5 points\n        array([[1., 5.],\n               [3., 5.],\n               [5., 5.],\n               [7., 5.],\n               [9., 5.]])\n        &gt;&gt;&gt; points_on_line(array([-2, 0, 0., 2]),\n        ...                array([0., 0, -1, 0]),\n        ...                array([10, -10])) # 4D, 2 points\n        array([[ -2.,   0., -10.,   2.],\n               [ -2.,   0.,  10.,   2.]])\n\n    Args:\n      center: Center of the line ( $n$-component vector).\n      direction: Line direction ( $n$-component unit vector).\n      dist_center: Distance of each point to the center of the line\n        ( $p$-component vector, where $p$ is the number of points).\n\n    Returns:\n      Coordinates of points on the specified line ( $p \\times n$ matrix).\n    \"\"\"\n    return center.reshape(1, -1) + dist_center.reshape(-1, 1) @ direction.reshape(\n        (1, -1)\n    )\n</code></pre>"},{"location":"reference/#pyclugen.rand_ortho_vector","title":"rand_ortho_vector","text":"<pre><code>rand_ortho_vector(u: NDArray, rng: Generator = _default_rng) -&gt; NDArray\n</code></pre> <p>Get a random unit vector orthogonal to <code>u</code>.</p> <p>Note that <code>u</code> is expected to be a unit vector itself.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import rand_ortho_vector\n&gt;&gt;&gt; from numpy import isclose, dot\n&gt;&gt;&gt; from numpy.linalg import norm\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; rng = Generator(PCG64(123))\n&gt;&gt;&gt; r = rng.random(3) # Get a random vector with 3 components (3D)\n&gt;&gt;&gt; r = r / norm(r) # Normalize it\n&gt;&gt;&gt; r_ort = rand_ortho_vector(r, rng=rng) # Get random unit vector orth. to r\n&gt;&gt;&gt; r_ort\narray([-0.1982903 , -0.61401512,  0.76398062])\n&gt;&gt;&gt; isclose(dot(r, r_ort), 0) # Check that vectors are indeed orthogonal\nTrue\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>u</code> <code>NDArray</code> <p>Unit vector with \\(n\\) components.</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>A random unit vector with \\(n\\) components orthogonal to <code>u</code>.</p> Source code in <code>pyclugen/core.py</code> <pre><code>def rand_ortho_vector(u: NDArray, rng: Generator = _default_rng) -&gt; NDArray:\n    r\"\"\"Get a random unit vector orthogonal to `u`.\n\n    Note that `u` is expected to be a unit vector itself.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import rand_ortho_vector\n        &gt;&gt;&gt; from numpy import isclose, dot\n        &gt;&gt;&gt; from numpy.linalg import norm\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; rng = Generator(PCG64(123))\n        &gt;&gt;&gt; r = rng.random(3) # Get a random vector with 3 components (3D)\n        &gt;&gt;&gt; r = r / norm(r) # Normalize it\n        &gt;&gt;&gt; r_ort = rand_ortho_vector(r, rng=rng) # Get random unit vector orth. to r\n        &gt;&gt;&gt; r_ort\n        array([-0.1982903 , -0.61401512,  0.76398062])\n        &gt;&gt;&gt; isclose(dot(r, r_ort), 0) # Check that vectors are indeed orthogonal\n        True\n\n    Args:\n      u: Unit vector with $n$ components.\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      A random unit vector with $n$ components orthogonal to `u`.\n    \"\"\"\n    # If 1D, just return a random unit vector\n    if u.size == 1:\n        return rand_unit_vector(1, rng=rng)\n\n    # Find a random, non-parallel vector to u\n    while True:\n        # Find normalized random vector\n        r = rand_unit_vector(u.size, rng=rng)\n\n        # If not parallel to u we can keep it and break the loop\n        if not isclose(abs(dot(u, r)), 1):\n            break\n\n    # Get vector orthogonal to u using 1st iteration of Gram-Schmidt process\n    v = r - dot(u, r) / dot(u, u) * u\n\n    # Normalize it\n    v = v / norm(v)\n\n    # And return it\n    return v\n</code></pre>"},{"location":"reference/#pyclugen.rand_unit_vector","title":"rand_unit_vector","text":"<pre><code>rand_unit_vector(num_dims: int, rng: Generator = _default_rng) -&gt; NDArray\n</code></pre> <p>Get a random unit vector with <code>num_dims</code> components.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import rand_unit_vector\n&gt;&gt;&gt; rand_unit_vector(4)\narray([ 0.48653889,  0.50753862,  0.05711487, -0.70881757])\n</code></pre> <pre><code>&gt;&gt;&gt; from pyclugen import rand_unit_vector\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; rng = Generator(PCG64(123))\n&gt;&gt;&gt; rand_unit_vector(2, rng=rng) # Reproducible\narray([ 0.3783202 , -0.92567479])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_dims</code> <code>int</code> <p>Number of components in vector (i.e. vector size).</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>A random unit vector with <code>num_dims</code> components.</p> Source code in <code>pyclugen/core.py</code> <pre><code>def rand_unit_vector(num_dims: int, rng: Generator = _default_rng) -&gt; NDArray:\n    r\"\"\"Get a random unit vector with `num_dims` components.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import rand_unit_vector\n        &gt;&gt;&gt; rand_unit_vector(4) # doctest: +SKIP\n        array([ 0.48653889,  0.50753862,  0.05711487, -0.70881757])\n\n        &gt;&gt;&gt; from pyclugen import rand_unit_vector\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; rng = Generator(PCG64(123))\n        &gt;&gt;&gt; rand_unit_vector(2, rng=rng) # Reproducible\n        array([ 0.3783202 , -0.92567479])\n\n    Args:\n      num_dims: Number of components in vector (i.e. vector size).\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      A random unit vector with `num_dims` components.\n    \"\"\"\n    r = rng.random(num_dims) - 0.5\n    r = r / norm(r)\n    return r\n</code></pre>"},{"location":"reference/#pyclugen.rand_vector_at_angle","title":"rand_vector_at_angle","text":"<pre><code>rand_vector_at_angle(\n    u: NDArray, angle: float, rng: Generator = _default_rng\n) -&gt; NDArray\n</code></pre> <p>Get a random unit vector which is at <code>angle</code> radians of vector <code>u</code>.</p> <p>Note that <code>u</code> is expected to be a unit vector itself.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import rand_vector_at_angle\n&gt;&gt;&gt; from numpy import arccos, array, degrees, pi, dot\n&gt;&gt;&gt; from numpy.linalg import norm\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; rng = Generator(PCG64(123))\n&gt;&gt;&gt; u = array([ 1.0, 0, 0.5, -0.5 ]) # Define a 4D vector\n&gt;&gt;&gt; u = u / norm(u) # Normalize the vector\n&gt;&gt;&gt; v = rand_vector_at_angle(u, pi/4, rng=rng) # Get a vector at 45 degrees\n&gt;&gt;&gt; v\narray([ 0.633066  , -0.50953554, -0.10693823, -0.57285705])\n&gt;&gt;&gt; degrees(arccos(dot(u, v) / norm(u) * norm(v))) # Angle between u and v\n45.0\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>u</code> <code>NDArray</code> <p>Unit vector with \\(n\\) components.</p> required <code>angle</code> <code>float</code> <p>Angle in radians.</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Random unit vector with \\(n\\) components which is at <code>angle</code> radians with vector <code>u</code>.</p> Source code in <code>pyclugen/core.py</code> <pre><code>def rand_vector_at_angle(\n    u: NDArray, angle: float, rng: Generator = _default_rng\n) -&gt; NDArray:\n    r\"\"\"Get a random unit vector which is at `angle` radians of vector `u`.\n\n    Note that `u` is expected to be a unit vector itself.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import rand_vector_at_angle\n        &gt;&gt;&gt; from numpy import arccos, array, degrees, pi, dot\n        &gt;&gt;&gt; from numpy.linalg import norm\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; rng = Generator(PCG64(123))\n        &gt;&gt;&gt; u = array([ 1.0, 0, 0.5, -0.5 ]) # Define a 4D vector\n        &gt;&gt;&gt; u = u / norm(u) # Normalize the vector\n        &gt;&gt;&gt; v = rand_vector_at_angle(u, pi/4, rng=rng) # Get a vector at 45 degrees\n        &gt;&gt;&gt; v\n        array([ 0.633066  , -0.50953554, -0.10693823, -0.57285705])\n        &gt;&gt;&gt; degrees(arccos(dot(u, v) / norm(u) * norm(v))) # Angle between u and v\n        45.0\n\n    Args:\n      u: Unit vector with $n$ components.\n      angle: Angle in radians.\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Random unit vector with $n$ components which is at `angle` radians\n        with vector `u`.\n    \"\"\"\n    if isclose(abs(angle), pi / 2) and u.size &gt; 1:\n        return rand_ortho_vector(u, rng=rng)\n    elif -pi / 2 &lt; angle &lt; pi / 2 and u.size &gt; 1:\n        v = u + rand_ortho_vector(u, rng=rng) * tan(angle)\n        return v / norm(v)\n    else:\n        # For |\u03b8| &gt; \u03c0/2 or the 1D case, simply return a random vector\n        return rand_unit_vector(u.size, rng=rng)\n</code></pre>"},{"location":"theory/","title":"Theory","text":"<p>This section presents a general overview of the clugen algorithm. A complete description of the algorithm's theoretical framework is available in the article \"Generating multidimensional clusters with support lines\" (an open version is available on arXiv).</p> <p>Clugen is an algorithm for generating multidimensional clusters. Each cluster is supported by a line segment, the position, orientation and length of which guide where the respective points are placed. For brevity, line segments will be referred to as lines.</p> <p>Given an \\(n\\)-dimensional direction vector \\(\\mathbf{d}\\) (and a number of additional parameters, which will be discussed shortly), the clugen algorithm works as follows (\\(^*\\) means the algorithm step is stochastic):</p> <ol> <li>Normalize \\(\\mathbf{d}\\).</li> <li>\\(^*\\)Determine cluster sizes.</li> <li>\\(^*\\)Determine cluster centers.</li> <li>\\(^*\\)Determine lengths of cluster-supporting lines.</li> <li>\\(^*\\)Determine angles between \\(\\mathbf{d}\\) and cluster-supporting lines.</li> <li>For each cluster:</li> <li>\\(^*\\)Determine direction of the cluster-supporting line.</li> <li>\\(^*\\)Determine distance of point projections from the center of the       cluster-supporting line.</li> <li>Determine coordinates of point projections on the cluster-supporting line.</li> <li>\\(^*\\)Determine points from their projections on the cluster-supporting       line.</li> </ol> <p>Figure 1 provides a stylized overview of the algorithm's steps.</p> <p></p> <p>The example in Figure 1 was generated with the following parameters, the exact meaning of each will be discussed shortly:</p> Parameter values Description \\(n=2\\) Number of dimensions. \\(c=4\\) Number of clusters. \\(p=200\\) Total number of points. \\(\\mathbf{d}=\\begin{bmatrix}1 &amp; 1\\end{bmatrix}^T\\) Average direction. \\(\\theta_\\sigma=\\pi/16\\approx{}11.25^{\\circ}\\) Angle dispersion. \\(\\mathbf{s}=\\begin{bmatrix}10 &amp; 10\\end{bmatrix}^T\\) Average cluster separation. \\(l=10\\) Average line length. \\(l_\\sigma=1.5\\) Line length dispersion. \\(f_\\sigma=1\\) Cluster lateral dispersion. <p>Additionally, all optional parameters (not listed above) were left to their default values. The complete list of parameters is presented in the <code>clugen()</code> function documentation.</p>"},{"location":"generated/gallery/","title":"Examples","text":""},{"location":"generated/gallery/#examples","title":"Examples","text":"<p> Examples in 1D </p> <p> Examples in 2D </p> <p> Examples in 3D </p> <p> Examples in nD </p> <p> Merging and hierarchical cluster examples </p> <p> Plot functions </p> <p> Download all examples in Python source code: gallery_python.zip</p> <p> Download all examples in Jupyter notebooks: gallery_jupyter.zip</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/mg_execution_times/","title":"Computation times","text":"<p>00:26.400 total execution time for generated_gallery files:</p> <p>+----------------------------------------------------------------------------------------+-----------+--------+ | plot_2_2d_examples (docs/examples/plot_2_2d_examples.py)    | 00:10.292 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_4_nd_examples (docs/examples/plot_4_nd_examples.py)    | 00:06.376 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_3_3d_examples (docs/examples/plot_3_3d_examples.py)    | 00:05.237 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_5_mrg_examples (docs/examples/plot_5_mrg_examples.py) | 00:03.156 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_1_1d_examples (docs/examples/plot_1_1d_examples.py)    | 00:01.333 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_functions (docs/examples/plot_functions.py)                | 00:00.005 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+</p>"},{"location":"generated/gallery/plot_1_1d_examples/","title":"Examples in 1D","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_1_1d_examples/#examples-in-1d","title":"Examples in 1D","text":"<p>This section contains several examples on how to generate 1D data with pyclugen. To run the examples we first need to import the <code>clugen()</code> function:</p> <pre><code>from pyclugen import clugen\n</code></pre> <p>To plot these examples we use the <code>plot_examples_1d</code> function:</p> <pre><code>from plot_functions import plot_examples_1d\n</code></pre> <p>Out:</p> <pre><code>/home/runner/work/pyclugen/pyclugen/docs/docs/examples/plot_functions.py:15: DeprecationWarning: \nPyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\nbut was not found to be installed on your system.\nIf this would cause problems for you,\nplease provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n\n  import pandas as pd\n</code></pre>"},{"location":"generated/gallery/plot_1_1d_examples/#basic-1d-example-with-density-plot","title":"Basic 1D example with density plot","text":"<pre><code>seed = 23456\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Weibull distribution\ndef proj_weibull(len, n, rng):\n    return len / 2 * rng.weibull(1.5, size=n)\n</code></pre> <pre><code>e082 = clugen(1, 3, 1000, [1], 0, [10], 6, 1.5, 0, rng=seed)\ne083 = clugen(1, 3, 1000, [1], 0, [10], 6, 1.5, 0, rng=seed, proj_dist_fn=\"unif\")\ne084 = clugen(1, 3, 1000, [1], 0, [10], 6, 1.5, 0, rng=seed, proj_dist_fn=proj_weibull)\n</code></pre> <pre><code>plot_examples_1d(\n    e082, \"e082: proj_dist_fn = 'norm' (default)\",\n    e083, \"e083: proj_dist_fn = 'unif'\",\n    e084, \"e084: custom proj_dist_fn (Weibull)\")\n</code></pre> <p>Total running time of the script: ( 0 minutes  1.333 seconds)</p> <p> Download Python source code: plot_1_1d_examples.py</p> <p> Download Jupyter notebook: plot_1_1d_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_2_2d_examples/","title":"Examples in 2D","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_2_2d_examples/#examples-in-2d","title":"Examples in 2D","text":"<p>This section contains several examples on how to generate 2D data with pyclugen. To run the examples we first need to import the <code>clugen()</code> function:</p> <pre><code>import numpy as np\nfrom pyclugen import clugen\n</code></pre> <p>To plot these examples we use the <code>plot_examples_2d</code> function:</p> <pre><code>from plot_functions import plot_examples_2d\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#manipulating-the-direction-of-cluster-supporting-lines","title":"Manipulating the direction of cluster-supporting lines","text":""},{"location":"generated/gallery/plot_2_2d_examples/#using-the-direction-parameter","title":"Using the <code>direction</code> parameter","text":"<pre><code>seed = 123\n</code></pre> <pre><code>e001 = clugen(2, 4, 2000, [1, 0], 0, [10, 10], 10, 1.5, 0.5, rng=seed)\ne002 = clugen(2, 4, 200, [1, 1], 0, [10, 10], 10, 1.5, 0.5, rng=seed)\ne003 = clugen(2, 4, 200, [0, 1], 0, [10, 10], 10, 1.5, 0.5, rng=seed)\n</code></pre> <pre><code>plot_examples_2d(\n    e001, \"e001: direction = [1, 0]\",\n    e002, \"e002: direction = [1, 1]\",\n    e003, \"e003: direction = [0, 1]\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#changing-the-angle_disp-parameter-and-using-a-custom-angle_deltas_fn-function","title":"Changing the <code>angle_disp</code> parameter and using a custom <code>angle_deltas_fn</code> function","text":"<pre><code>seed = 321\n</code></pre> <pre><code># Custom angle_deltas function: arbitrarily rotate some clusters by 90 degrees\ndef angdel_90_fn(nclu, astd, rng):\n    return rng.choice([0, np.pi / 2], size=nclu)\n</code></pre> <pre><code>e004 = clugen(2, 6, 500, [1, 0], 0, [10, 10], 10, 1.5, 0.5, rng=seed)\ne005 = clugen(2, 6, 500, [1, 0], np.pi / 8, [10, 10], 10, 1.5, 0.5, rng=seed)\ne006 = clugen(2, 6, 500, [1, 0], 0, [10, 10], 10, 1.5, 0.5, rng=seed,\n    angle_deltas_fn=angdel_90_fn)\n</code></pre> <pre><code>plot_examples_2d(\n    e004, \"e004: angle_disp = 0\",\n    e005, \"e005: angle_disp = \u03c0/8\",\n    e006, \"e006: custom angle_deltas function\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#manipulating-the-length-of-cluster-supporting-lines","title":"Manipulating the length of cluster-supporting lines","text":""},{"location":"generated/gallery/plot_2_2d_examples/#using-the-llength-parameter","title":"Using the <code>llength</code> parameter","text":"<pre><code>seed = 567\n</code></pre> <pre><code>e007 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10],  0, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne008 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 10, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne009 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 30, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\n</code></pre> <pre><code>plot_examples_2d(\n    e007, \"e007: llength = 0\",\n    e008, \"e008: llength = 10\",\n    e009, \"e009: llength = 30\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#changing-the-llength_disp-parameter-and-using-a-custom-llengths_fn-function","title":"Changing the <code>llength_disp</code> parameter and using a custom <code>llengths_fn</code> function","text":"<pre><code>seed = 567\n</code></pre> <pre><code># Custom llengths function: line lengths grow for each new cluster\ndef llen_grow_fn(nclu, llen, llenstd, rng):\n    return llen * np.arange(nclu) + rng.normal(scale=llenstd, size=nclu)\n</code></pre> <pre><code>e010 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 15,  0.0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne011 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 15, 10.0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne012 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 10,  0.1, 0.5, rng=seed,\n    llengths_fn=llen_grow_fn, point_dist_fn=\"n\")\n</code></pre> <pre><code>plot_examples_2d(\n    e010, \"e010: llength_disp = 0.0\",\n    e011, \"e011: llength_disp = 5.0\",\n    e012, \"e012: custom llengths function\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#manipulating-relative-cluster-positions","title":"Manipulating relative cluster positions","text":""},{"location":"generated/gallery/plot_2_2d_examples/#using-the-cluster_sep-parameter","title":"Using the <code>cluster_sep</code> parameter","text":"<pre><code>seed = 21\n</code></pre> <pre><code>e013 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 10], 10, 2, 2.5, rng=seed)\ne014 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [30, 10], 10, 2, 2.5, rng=seed)\ne015 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 30], 10, 2, 2.5, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e013, \"e013: cluster_sep = [10, 10]\",\n    e014, \"e014: cluster_sep = [30, 10]\",\n    e015, \"e015: cluster_sep = [10, 30]\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#changing-the-cluster_offset-parameter-and-using-a-custom-clucenters_fn-function","title":"Changing the <code>cluster_offset</code> parameter and using a custom <code>clucenters_fn</code> function","text":"<pre><code>seed = 21\n</code></pre> <pre><code># Custom clucenters function: places clusters in a diagonal\ndef centers_diag_fn(nclu, csep, coff, rng):\n    return np.ones((nclu, len(csep))) * np.arange(1, nclu + 1)[:, None] * np.max(csep) + coff\n</code></pre> <pre><code>e016 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 10], 10, 2, 2.5, rng=seed)\ne017 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 10], 10, 2, 2.5, rng=seed,\n    cluster_offset=[20, -20])\ne018 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 10], 10, 2, 2.5, rng=seed,\n    cluster_offset=[-50, -50], clucenters_fn=centers_diag_fn)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e016, \"e016: default\",\n    e017, \"e017: cluster_offset = [20, -20]\",\n    e018, \"e018: custom clucenters function\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#lateral-dispersion-and-placement-of-point-projections-on-the-line","title":"Lateral dispersion and placement of point projections on the line","text":""},{"location":"generated/gallery/plot_2_2d_examples/#normal-projection-placement-default-proj_dist_fn-norm","title":"Normal projection placement (default): <code>proj_dist_fn = \"norm\"</code>","text":"<pre><code>seed = 654\n</code></pre> <pre><code>e019 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 0.0, rng=seed)\ne020 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 1.0, rng=seed)\ne021 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 3.0, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e019, \"e019: lateral_disp = 0\",\n    e020, \"e020: lateral_disp = 1\",\n    e021, \"e021: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#uniform-projection-placement-proj_dist_fn-unif","title":"Uniform projection placement: <code>proj_dist_fn = \"unif\"</code>","text":"<pre><code>seed = 654\n</code></pre> <pre><code>e022 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 0.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne023 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 1.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne024 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 3.0, rng=seed,\n    proj_dist_fn=\"unif\")\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e022, \"e022: lateral_disp = 0\",\n    e023, \"e023: lateral_disp = 1\",\n    e024, \"e024: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#custom-projection-placement-using-the-laplace-distribution","title":"Custom projection placement using the Laplace distribution","text":"<pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e025 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 0.0, rng=seed,\n    proj_dist_fn=proj_laplace)\ne026 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 1.0, rng=seed,\n    proj_dist_fn=proj_laplace)\ne027 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 3.0, rng=seed,\n    proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e025, \"e025: lateral_disp = 0\",\n    e026, \"e026: lateral_disp = 1\",\n    e027, \"e027: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#controlling-final-point-positions-from-their-projections-on-the-cluster-supporting-line","title":"Controlling final point positions from their projections on the cluster-supporting line","text":""},{"location":"generated/gallery/plot_2_2d_examples/#points-on-hyperplane-orthogonal-to-cluster-supporting-line-default-point_dist_fn-n-1","title":"Points on hyperplane orthogonal to cluster-supporting line (default): <code>point_dist_fn = \"n-1\"</code>","text":"<pre><code>seed = 1357\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e028 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed)\ne029 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne030 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e028, \"e028: proj_dist_fn=\\\"norm\\\" (default)\",\n    e029, \"e029: proj_dist_fn=\\\"unif\\\"\",\n    e030, \"e030: custom proj_dist_fn (Laplace)\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#points-around-projection-on-cluster-supporting-line-point_dist_fn-n","title":"Points around projection on cluster-supporting line: <code>point_dist_fn = \"n\"</code>","text":"<pre><code>seed = 1357\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e031 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=\"n\")\ne032 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=\"n\", proj_dist_fn=\"unif\")\ne033 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=\"n\", proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e031, \"e031: proj_dist_fn=\\\"norm\\\" (default)\",\n    e032, \"e032: proj_dist_fn=\\\"unif\\\"\",\n    e033, \"e033: custom proj_dist_fn (Laplace)\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#custom-point-placement-using-the-exponential-distribution","title":"Custom point placement using the exponential distribution","text":"<p>For this example we require the <code>clupoints_n_1_template()</code> helper function:</p> <pre><code>from pyclugen import clupoints_n_1_template\n</code></pre> <pre><code>seed = 1357\n</code></pre> <pre><code># Custom point_dist_fn: final points placed using the Exponential distribution\ndef clupoints_n_1_exp(projs, lat_std, len, clu_dir, clu_ctr, rng):\n    def dist_exp(npts, lstd, rg):\n        return lstd * rg.exponential(scale=2 / lstd, size=npts)\n    return clupoints_n_1_template(projs, lat_std, clu_dir, dist_exp, rng=rng)\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e034 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=clupoints_n_1_exp)\ne035 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=clupoints_n_1_exp, proj_dist_fn=\"unif\")\ne036 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=clupoints_n_1_exp, proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e034, \"e034: proj_dist_fn=\\\"norm\\\" (default)\",\n    e035, \"e035: proj_dist_fn=\\\"unif\\\"\",\n    e036, \"e036: custom proj_dist_fn (Laplace)\")\n</code></pre> <p></p>"},{"location":"generated/gallery/plot_2_2d_examples/#manipulating-cluster-sizes","title":"Manipulating cluster sizes","text":"<pre><code>seed = 963\n</code></pre> <pre><code># Custom clusizes_fn (e038): cluster sizes determined via the uniform distribution,\n# no correction for total points\ndef clusizes_unif(nclu, npts, ae, rng):\n    return rng.integers(low=1, high=2 * npts / nclu + 1, size=nclu)\n</code></pre> <pre><code># Custom clusizes_fn (e039): clusters all have the same size, no correction for total points\ndef clusizes_equal(nclu, npts, ae, rng):\n    return (npts // nclu) * np.ones(nclu, dtype=int)\n</code></pre> <pre><code># Custom clucenters_fn (all): yields fixed positions for the clusters\ndef centers_fixed(nclu, csep, coff, rng):\n    return np.array([[-csep[0], -csep[1]], [csep[0], -csep[1]], [-csep[0], csep[1]], [csep[0], csep[1]]])\n</code></pre> <pre><code>e037 = clugen(2, 4, 1500, [1, 1], np.pi, [20, 20], 0, 0, 5, rng=seed,\n    point_dist_fn=\"n\", clucenters_fn=centers_fixed)\ne038 = clugen(2, 4, 1500, [1, 1], np.pi, [20, 20], 0, 0, 5, rng=seed,\n    point_dist_fn=\"n\", clucenters_fn=centers_fixed, clusizes_fn=clusizes_unif)\ne039 = clugen(2, 4, 1500, [1, 1], np.pi, [20, 20], 0, 0, 5, rng=seed,\n    point_dist_fn=\"n\", clucenters_fn=centers_fixed, clusizes_fn=clusizes_equal)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e037, \"e037: normal dist. (default)\",\n    e038, \"e038: unif. dist. (custom)\",\n    e039, \"e039: equal size (custom)\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#direct-specification-of-optional-parameters","title":"Direct specification of optional parameters","text":"<pre><code>seed = 123\n</code></pre> <pre><code>e040 = clugen(2, 4, 1000, [-1, 1], 0, [0, 0], 0, 0, 0.2, rng=seed,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\", clusizes_fn=[50, 200, 500, 2000],\n    llengths_fn=[0, 2, 4, 6], clucenters_fn=[[-5, -5], [-2.5, -2.5], [0, 0], [2.5, 2.5]])\n\ne041 = clugen(2, 5, 1000, [[1, 1], [1, 0], [1, 0], [0, 1], [0, 1]],\n    0, [0, 0], 0, 0, 0.2, rng=seed,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\",\n    clusizes_fn=[200, 500, 500, 500, 500], llengths_fn=[0, 5, 5, 5, 5],\n    clucenters_fn=[[0, 0], [0, 5], [0, -5], [5, 0], [-5, 0]])\n\ne042 = clugen(2, 5, 1000, [[0, 1], [0.25, 0.75], [0.5, 0.5], [0.75, 0.25], [1, 0]],\n    0, [0, 0], 5, 0, 0.2, rng=seed,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\", clusizes_fn=[500, 500, 500, 500, 500],\n    clucenters_fn=[[-5, 0], [-3, -0.3], [-1, -0.8], [1, -1.6], [3, -2.5]])\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e040, \"e040: direct params 1\",\n    e041, \"e041: direct params 2\",\n    e042, \"e042: direct params 3\")\n</code></pre> <p>Total running time of the script: ( 0 minutes  10.292 seconds)</p> <p> Download Python source code: plot_2_2d_examples.py</p> <p> Download Jupyter notebook: plot_2_2d_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_3_3d_examples/","title":"Examples in 3D","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_3_3d_examples/#examples-in-3d","title":"Examples in 3D","text":"<p>This section contains several examples on how to generate 3D data with pyclugen. To run the examples we first need to import the <code>clugen()</code> function:</p> <pre><code>import numpy as np\nfrom pyclugen import clugen\n</code></pre> <p>To plot these examples we use the <code>plot_examples_3d</code> function:</p> <pre><code>from plot_functions import plot_examples_3d\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#manipulating-the-direction-of-cluster-supporting-lines","title":"Manipulating the direction of cluster-supporting lines","text":""},{"location":"generated/gallery/plot_3_3d_examples/#using-the-direction-parameter","title":"Using the <code>direction</code> parameter","text":"<pre><code>seed = 321\n</code></pre> <pre><code>e043 = clugen(3, 4, 500, [1, 0, 0], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\ne044 = clugen(3, 4, 500, [1, 1, 1], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\ne045 = clugen(3, 4, 500, [0, 0, 1], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e043, \"e043: direction = [1, 0, 0]\",\n    e044, \"e044: direction = [1, 1, 1]\",\n    e045, \"e045: direction = [0, 0, 1]\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#changing-the-angle_disp-parameter-and-using-a-custom-angle_deltas_fn-function","title":"Changing the <code>angle_disp</code> parameter and using a custom <code>angle_deltas_fn</code> function","text":"<pre><code>seed = 321\n\n# Custom angle_deltas function: arbitrarily rotate some clusters by 90 degrees\ndef angdel_90_fn(nclu, astd, rng):\n    return rng.choice([0, np.pi / 2], size=nclu)\n</code></pre> <pre><code>e046 = clugen(3, 6, 1000, [1, 0, 0], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\ne047 = clugen(3, 6, 1000, [1, 0, 0], np.pi / 8, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\ne048 = clugen(3, 6, 1000, [1, 0, 0], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed,\n    angle_deltas_fn=angdel_90_fn)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e046, \"e046: angle_disp = 0\",\n    e047, \"e047: angle_disp = \u03c0 / 8\",\n    e048, \"e048: custom angle_deltas function\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#specifying-a-main-direction-for-each-cluster-and-changing-angle_disp","title":"Specifying a main <code>direction</code> for each cluster and changing <code>angle_disp</code>","text":"<pre><code>seed = 123\n\n# Define a main direction for each cluster\ndirs = [[1, 1, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0], [-1, 1, 1]]\n</code></pre> <pre><code>e049 = clugen(3, 5, 1000, dirs, 0, np.zeros(3), 20, 0, 0.2, proj_dist_fn=\"unif\", rng=seed)\ne050 = clugen(3, 5, 1000, dirs, np.pi / 12, np.zeros(3), 20, 0, 0.2, proj_dist_fn=\"unif\", rng=seed)\ne051 = clugen(3, 5, 1000, dirs, np.pi / 4, np.zeros(3), 20, 0, 0.2, proj_dist_fn=\"unif\", rng=seed)\n</code></pre> <pre><code>plot_examples_3d(\n    e049, \"e049: angle_disp = 0\",\n    e050, \"e050: angle_disp = \u03c0 / 12\",\n    e051, \"e051: angle_disp = \u03c0 / 4\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#manipulating-the-length-of-cluster-supporting-lines","title":"Manipulating the length of cluster-supporting lines","text":""},{"location":"generated/gallery/plot_3_3d_examples/#using-the-llength-parameter","title":"Using the <code>llength</code> parameter","text":"<pre><code>seed = 789\n</code></pre> <pre><code>e052 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 0, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne053 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 10, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne054 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 30, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e052, \"e052: llength = 0\",\n    e053, \"e053: llength = 10\",\n    e054, \"e054: llength = 30\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#changing-the-llength_disp-parameter-and-using-a-custom-llengths_fn-function","title":"Changing the <code>llength_disp</code> parameter and using a custom <code>llengths_fn</code> function","text":"<pre><code>seed = 765\n</code></pre> <pre><code># Custom llengths function: line lengths tend to grow for each new cluster\ndef llen_grow_fn(nclu, llen, llenstd, rng):\n    return llen * np.arange(nclu) + rng.normal(scale=llenstd, size=nclu)\n\ne055 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 15,  0.0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne056 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 15, 10.0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne057 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 10,  0.1, 0.5, rng=seed,\n    point_dist_fn=\"n\", llengths_fn=llen_grow_fn)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e055, \"e055: llength_disp = 0.0\",\n    e056, \"e056: llength_disp = 10.0\",\n    e057, \"e057: custom llengths function\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#manipulating-relative-cluster-positions","title":"Manipulating relative cluster positions","text":""},{"location":"generated/gallery/plot_3_3d_examples/#using-the-cluster_sep-parameter","title":"Using the <code>cluster_sep</code> parameter","text":"<pre><code>seed = 765\n</code></pre> <pre><code>e058 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [30, 10, 10], 25, 4, 3, rng=seed)\ne059 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 30, 10], 25, 4, 3, rng=seed)\ne060 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 10, 30], 25, 4, 3, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e058, \"e058: cluster_sep = [30, 10, 10]\",\n    e059, \"e059: cluster_sep = [10, 30, 10]\",\n    e060, \"e060: cluster_sep = [10, 10, 30]\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#changing-the-cluster_offset-parameter-and-using-a-custom-clucenters_fn-function","title":"Changing the <code>cluster_offset</code> parameter and using a custom <code>clucenters_fn</code> function","text":"<pre><code># Custom clucenters function: places clusters in a diagonal\ndef centers_diag_fn(nclu, csep, coff, rng):\n    return np.ones((nclu, len(csep))) * np.arange(1, nclu + 1)[:, None] * np.max(csep) + coff\n\ne061 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 10, 10], 12, 3, 2.5, rng=seed)\ne062 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 10, 10], 12, 3, 2.5, rng=seed,\n    cluster_offset=[30, -30, 30])\ne063 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 10, 10], 12, 3, 2.5, rng=seed,\n    cluster_offset=[-40, -40, -40], clucenters_fn=centers_diag_fn)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e061, \"e061: default\",\n    e062, \"e062: cluster_offset=[30, -30, 30]\",\n    e063, \"e063: custom clucenters function\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#lateral-dispersion-and-placement-of-point-projections-on-the-line","title":"Lateral dispersion and placement of point projections on the line","text":""},{"location":"generated/gallery/plot_3_3d_examples/#normal-projection-placement-default-proj_dist_fnnorm","title":"Normal projection placement (default): <code>proj_dist_fn=\"norm\"</code>","text":"<pre><code>seed = 246\n</code></pre> <pre><code>e064 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 0.0, rng=seed)\ne065 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 1.0, rng=seed)\ne066 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 3.0, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e064, \"e064: lateral_disp = 0\",\n    e065, \"e065: lateral_disp = 1\",\n    e066, \"e066: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#uniform-projection-placement-proj_dist_fnunif","title":"Uniform projection placement: <code>proj_dist_fn=\"unif\"</code>","text":"<pre><code>seed = 246\n</code></pre> <pre><code>e067 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 0.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne068 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 1.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne069 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 3.0, rng=seed,\n    proj_dist_fn=\"unif\")\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e067, \"e067: lateral_disp = 0\",\n    e068, \"e068: lateral_disp = 1\",\n    e069, \"e069: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#custom-projection-placement-using-the-laplace-distribution","title":"Custom projection placement using the Laplace distribution","text":"<pre><code>seed = 246\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e070 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 0.0, rng=seed,\n    proj_dist_fn=proj_laplace)\ne071 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 1.0, rng=seed,\n    proj_dist_fn=proj_laplace)\ne072 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 3.0, rng=seed,\n    proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e070, \"e070: lateral_disp = 0\",\n    e071, \"e071: lateral_disp = 1\",\n    e072, \"e072: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#controlling-final-point-positions-from-their-projections-on-the-cluster-supporting-line","title":"Controlling final point positions from their projections on the cluster-supporting line","text":""},{"location":"generated/gallery/plot_3_3d_examples/#points-on-hyperplane-orthogonal-to-cluster-supporting-line-default-point_dist_fnn-1","title":"Points on hyperplane orthogonal to cluster-supporting line (default): <code>point_dist_fn=\"n-1\"</code>","text":"<pre><code>seed = 840\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e073 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed)\ne074 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    proj_dist_fn=\"unif\")\ne075 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e073, \"e073: proj_dist_fn=\\\"norm\\\" (default)\",\n    e074, \"e074: proj_dist_fn=\\\"unif\\\"\",\n    e075, \"e075: custom proj_dist_fn (Laplace)\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#points-around-projection-on-cluster-supporting-line-point_dist_fnn","title":"Points around projection on cluster-supporting line: <code>point_dist_fn=\"n\"</code>","text":"<pre><code>seed = 840\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n\ne076 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=\"n\")\ne077 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=\"n\", proj_dist_fn=\"unif\")\ne078 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=\"n\", proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e076, \"e076: proj_dist_fn=\\\"norm\\\" (default)\",\n    e077, \"e077: proj_dist_fn=\\\"unif\\\"\",\n    e078, \"e078: custom proj_dist_fn (Laplace)\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#custom-point-placement-using-the-exponential-distribution","title":"Custom point placement using the exponential distribution","text":"<p>For this example we require the <code>clupoints_n_1_template()</code> helper function:</p> <pre><code>from pyclugen import clupoints_n_1_template\n</code></pre> <pre><code>seed = 840\n</code></pre> <pre><code># Custom point_dist_fn: final points placed using the Exponential distribution\ndef clupoints_n_1_exp(projs, lat_std, len, clu_dir, clu_ctr, rng):\n    def dist_exp(npts, lstd, rg):\n        return lstd * rg.exponential(scale=2 / lstd, size=npts)\n    return clupoints_n_1_template(projs, lat_std, clu_dir, dist_exp, rng=rng)\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e079 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=clupoints_n_1_exp)\ne080 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=clupoints_n_1_exp, proj_dist_fn=\"unif\")\ne081 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=clupoints_n_1_exp, proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e079, \"e079: proj_dist_fn=\\\"norm\\\" (default)\",\n    e080, \"e080: proj_dist_fn=\\\"unif\\\"\",\n    e081, \"e081: custom proj_dist_fn (Laplace)\")\n</code></pre> <p></p>"},{"location":"generated/gallery/plot_3_3d_examples/#manipulating-cluster-sizes","title":"Manipulating cluster sizes","text":"<pre><code>seed = 555\n</code></pre> <pre><code># Custom clusizes_fn (e083): cluster sizes determined via the uniform distribution,\n# no correction for total points\ndef clusizes_unif(nclu, npts, ae, rng):\n    return rng.integers(low=1, high=2 * npts / nclu + 1, size=nclu)\n</code></pre> <pre><code># Custom clusizes_fn (e084): clusters all have the same size, no correction for total points\ndef clusizes_equal(nclu, npts, ae, rng):\n    return (npts // nclu) * np.ones(nclu, dtype=int)\n</code></pre> <pre><code># Custom clucenters_fn (all): yields fixed positions for the clusters\ndef centers_fixed(nclu, csep, coff, rng):\n    return np.array([\n        [-csep[0], -csep[1], -csep[2]],\n        [csep[0], -csep[1], -csep[2]],\n        [-csep[0], csep[1], csep[2]],\n        [csep[0], csep[1], csep[2]]])\n</code></pre> <pre><code>e082 = clugen(3, 4, 1500, [1, 1, 1], np.pi, [20, 20, 20], 0, 0, 5, rng=seed,\n    clucenters_fn=centers_fixed, point_dist_fn=\"n\")\ne083 = clugen(3, 4, 1500, [1, 1, 1], np.pi, [20, 20, 20], 0, 0, 5, rng=seed,\n    clucenters_fn=centers_fixed, clusizes_fn=clusizes_unif, point_dist_fn=\"n\")\ne084 = clugen(3, 4, 1500, [1, 1, 1], np.pi, [20, 20, 20], 0, 0, 5, rng=seed,\n    clucenters_fn=centers_fixed, clusizes_fn=clusizes_equal, point_dist_fn=\"n\")\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e082, \"e082: normal dist. (default)\",\n    e083, \"e083: unif. dist. (custom)\",\n    e084, \"e084: equal size (custom)\")\n</code></pre> <p>Total running time of the script: ( 0 minutes  5.237 seconds)</p> <p> Download Python source code: plot_3_3d_examples.py</p> <p> Download Jupyter notebook: plot_3_3d_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_4_nd_examples/","title":"Examples in nD","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_4_nd_examples/#examples-in-nd","title":"Examples in nD","text":"<p>This section contains several examples on how to generate nD (n &gt; 3) data with pyclugen. To run the examples we first need to import the <code>clugen()</code> function:</p> <pre><code>import numpy as np\nfrom pyclugen import clugen\n</code></pre> <p>To plot these examples we use the <code>plot_examples_nd</code> function:</p> <pre><code>from plot_functions import plot_examples_nd\n</code></pre>"},{"location":"generated/gallery/plot_4_nd_examples/#5d-example-with-default-optional-arguments","title":"5D example with default optional arguments","text":"<pre><code>seed = 123\n</code></pre> <pre><code># Number of dimensions\nnd = 5\n</code></pre> <pre><code>e085 = clugen(nd, 6, 1500, [1, 1, 0.5, 0, 0], np.pi / 16, 30 * np.ones(nd), 30, 4, 3, rng=seed)\n</code></pre> <pre><code>plot_examples_nd(e085, \"e085: 5D with optional parameters set to defaults\")\n</code></pre>"},{"location":"generated/gallery/plot_4_nd_examples/#5d-example-with-proj_dist_fn-unif-and-point_dist_fn-n","title":"5D example with <code>proj_dist_fn = \"unif\"</code> and <code>point_dist_fn = \"n\"</code>","text":"<pre><code>seed = 579\n</code></pre> <pre><code># Number of dimensions\nnd = 5\n</code></pre> <pre><code>e086 = clugen(nd, 6, 1500, [0.1, 0.3, 0.5, 0.3, 0.1], np.pi / 12, 30 * np.ones(nd), 35, 5, 3.5,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\", rng=seed)\n</code></pre> <pre><code>plot_examples_nd(e086, \"e086: 5D with proj_dist_fn=\\\"unif\\\" and point_dist_fn=\\\"n\\\"\")\n</code></pre>"},{"location":"generated/gallery/plot_4_nd_examples/#4d-example-with-custom-projection-placement-using-the-beta-distribution","title":"4D example with custom projection placement using the Beta distribution","text":"<pre><code>seed = 963\n</code></pre> <pre><code># Number of dimensions\nnd = 4\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Beta distribution\ndef proj_beta(len, n, rng):\n    return len * rng.beta(0.1, 0.1, size=n) - len / 2\n</code></pre> <pre><code>e087 = clugen(nd, 5, 1500, np.ones(nd), np.pi / 6, 30 * np.ones(nd), 60, 15, 6, rng=seed,\n    proj_dist_fn=proj_beta)\n</code></pre> <pre><code>plot_examples_nd(e087, \"e087: 4D with custom proj_dist_fn (Beta)\")\n</code></pre> <p>Total running time of the script: ( 0 minutes  6.376 seconds)</p> <p> Download Python source code: plot_4_nd_examples.py</p> <p> Download Jupyter notebook: plot_4_nd_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_5_mrg_examples/","title":"Merging and hierarchical cluster examples","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_5_mrg_examples/#merging-and-hierarchical-cluster-examples","title":"Merging and hierarchical cluster examples","text":"<p>This section contains several examples on how to merge cluster data, either generated with pyclugen or from other sources. To run the examples we first need to import the <code>clugen()</code> and <code>clugen()</code> functions:</p> <pre><code>import numpy as np\nfrom pyclugen import clugen, clumerge\n</code></pre> <p>Although it is possible to merge data in any dimension, these examples will focus on merging 2D data. Therefore, we'll use the same <code>plot_examples_2d</code> function used for the 2D examples:</p> <pre><code>from plot_functions import plot_examples_2d\n</code></pre>"},{"location":"generated/gallery/plot_5_mrg_examples/#merging-two-data-sets-generated-with-clugen","title":"Merging two data sets generated with <code>clugen()</code>","text":"<pre><code>seed1 = 444\nseed2 = 555\n</code></pre> <pre><code>e088 = clugen(2, 5, 1000, [1, 1], np.pi / 12, [20, 20], 14, 1.2, 1.5, rng=seed1,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\")\ne089 = clugen(2, 3, 1500, [1, 0], 0.05, [20, 20], 0, 0, 4, rng=seed2,\n    point_dist_fn=\"n\", cluster_offset = [20, 0])\ne090 = clumerge(e088, e089)\n</code></pre> <pre><code>plot_examples_2d(\n    e088, \"e088: data set 1\",\n    e089, \"e089: data set 2\",\n    e090, \"e090: merged data sets\")\n</code></pre> <p>In the previous example, clusters from individual data sets remain as separate clusters in the merged data set. It's also possible to maintain the original cluster labels by setting the <code>clusters_field</code> parameter to <code>None</code>:</p> <pre><code>e091 = clumerge(e088, e089, clusters_field=None)\n</code></pre> <pre><code>plot_examples_2d(\n    e088, \"e088: data set 1\",\n    e089, \"e089: data set 2\",\n    e091, \"e091: merged data sets\")\n</code></pre> <p></p>"},{"location":"generated/gallery/plot_5_mrg_examples/#adding-noise-to-a-clugen-generated-data-set","title":"Adding noise to a <code>clugen()</code>-generated data set","text":"<pre><code>seed = 333\n</code></pre> <pre><code>prng = np.random.default_rng(seed)\ne092 = {\"points\": 120 * prng.random((500, 2)) - 60, \"clusters\": np.ones(500, dtype=np.int32)}\ne093 = clumerge(e092, e090) # clumerge(e092, e088, e089) would also work\n</code></pre> <pre><code>plot_examples_2d(\n    e090, \"e090: original merged data sets\",\n    e092, \"e092: random uniform noise\",\n    e093, \"e093: data sets with noise\",\n    pmargin=0)\n</code></pre>"},{"location":"generated/gallery/plot_5_mrg_examples/#merging-with-data-not-generated-with-clugen","title":"Merging with data not generated with <code>clugen()</code>","text":"<p>Data generated with <code>clugen()</code> can be merged with other data sets, for example data created with one of scikit-learn's generators:</p> <pre><code>seed = 321\n</code></pre> <pre><code>from sklearn.datasets import make_moons\n\nX, y = make_moons(100, noise=0.05, random_state=seed)\n\ne094 = {\"points\": X, \"clusters\": y}\ne095 = clugen(2, 4, 200, [1, 1], np.pi / 12, [1, 1], 0.1, 0.01, 0.25, rng=seed,\n    proj_dist_fn = \"unif\", point_dist_fn = \"n\")\ne096 = clumerge(e094, e095)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e094, \"e094: generated w/ make_moons()\",\n    e095, \"e095: generated w/ clugen()\",\n    e096, \"e096: merged data\")\n</code></pre> <p></p> <p>We can also hierarchize clusters from different sources:</p> <pre><code>e097 = {**e094, \"hclusters\": np.ones(100, dtype=np.int32)}\ne098 = {**e095._asdict(), \"hclusters\": 2 * np.ones(200, np.int32)}\ne099 = clumerge(e097, e098, clusters_field=\"hclusters\")\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e097, \"e097: generated w/ make_moons()\",\n    e098, \"e098: generated w/ clugen()\",\n    e099, \"e099: merged data\",\n    clusters_field=\"hclusters\")\n</code></pre> <p></p> <p>Total running time of the script: ( 0 minutes  3.156 seconds)</p> <p> Download Python source code: plot_5_mrg_examples.py</p> <p> Download Jupyter notebook: plot_5_mrg_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_functions/","title":"Plot functions","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_functions/#plot-functions","title":"Plot functions","text":"<p>Several auxiliary functions for plotting the examples in this documentation.</p>"},{"location":"generated/gallery/plot_functions/#import-the-required-libraries","title":"Import the required libraries","text":"<pre><code>import os\nimport warnings\n\nimport matplotlib.pyplot as plt  # type: ignore\nimport numpy as np\nimport numpy.typing as npt\nimport pandas as pd\nimport seaborn as sns  # type: ignore\n\nfrom pyclugen import Clusters\n\n# Hide annoying warnings when building docs in CI\nif os.getenv(\"CI\") != None:\n    warnings.filterwarnings(\"ignore\")\n</code></pre>"},{"location":"generated/gallery/plot_functions/#clusters2df","title":"clusters2df","text":"<pre><code>def clusters2df(\n    *exs: Clusters | dict[str, npt.ArrayLike], clusters_field: str = \"clusters\"\n) -&gt; pd.DataFrame:\n    \"\"\"Convert a sequence of clusters to a Pandas dataframe.\"\"\"\n\n    dfs = []\n    iex = 1\n\n    for ex in exs:\n        if isinstance(ex, dict):\n            points = ex[\"points\"]\n            clusters = ex[clusters_field]\n        else:\n            points = ex.points\n            clusters = ex.clusters\n\n        df = pd.DataFrame(\n            data=points, columns=[f\"x{i}\" for i in range(np.size(points, 1))]\n        )\n        df[\"cluster\"] = clusters.tolist()\n        df[\"example\"] = [iex] * clusters.size\n        dfs.append(df)\n        iex += 1\n\n    return pd.concat(dfs, ignore_index=True)\n</code></pre>"},{"location":"generated/gallery/plot_functions/#get_plot_lims","title":"get_plot_lims","text":"<pre><code>def get_plot_lims(df: pd.DataFrame, pmargin: float = 0.1):\n    \"\"\"Determine the plot limits for the cluster data given in `df`.\"\"\"\n\n    # Get maximum and minimum points in each dimension\n    xmaxs = df.iloc[:, :-2].max()\n    xmins = df.iloc[:, :-2].min()\n\n    # Determine plot centers in each dimension\n    xcenters = (xmaxs + xmins) / 2\n\n    # Determine plots span for all dimensions\n    sidespan = (1 + pmargin) * np.max(np.abs(xmaxs - xmins)) / 2\n\n    # Determine final plots limits\n    xmaxs = xcenters + sidespan\n    xmins = xcenters - sidespan\n\n    return xmaxs, xmins\n</code></pre>"},{"location":"generated/gallery/plot_functions/#plot_examples_1d","title":"plot_examples_1d","text":"<pre><code>def plot_examples_1d(*ets, ncols: int = 3, clusters_field: str = \"clusters\"):\n    \"\"\"Plot the 1D examples given in the ets parameter.\"\"\"\n\n    # Get examples\n    ex = ets[0::2]\n    # Get titles\n    et = ets[1::2]\n\n    df = clusters2df(*ex, clusters_field=clusters_field)\n\n    # Set seaborn's dark grid style\n    sns.set_theme(style=\"darkgrid\")\n\n    # Use seaborn to create the plots\n    g = sns.FacetGrid(df, col=\"example\", hue=\"cluster\", col_wrap=ncols)\n\n    # Plot the kernel density estimation plots\n    g.map(sns.kdeplot, \"x0\", multiple=\"layer\", fill=True)\n\n    # Get a flattened view of the axes array\n    g_axes = g.axes.reshape(-1)\n\n    # Determine the height of the rugs in the rug plot to 5% of total height\n    rug_height = g_axes[0].get_ylim()[1] * 0.05\n\n    # Plot the rug markers below the kde plots\n    g.map(sns.rugplot, \"x0\", height=rug_height)\n\n    # Set titles\n    for ax, t in zip(g_axes, et):\n        ax.set_title(t)\n</code></pre>"},{"location":"generated/gallery/plot_functions/#plot_examples_2d","title":"plot_examples_2d","text":"<pre><code>def plot_examples_2d(\n    *ets, pmargin: float = 0.1, ncols: int = 3, clusters_field: str = \"clusters\"\n):\n    \"\"\"Plot the 2D examples given in the ets parameter.\"\"\"\n\n    # Get examples\n    ex = ets[0::2]\n    # Get titles\n    et = ets[1::2]\n\n    df = clusters2df(*ex, clusters_field=clusters_field)\n\n    # Get limits in each dimension\n    xmaxs, xmins = get_plot_lims(df, pmargin=pmargin)\n\n    # Set seaborn's dark grid style\n    sns.set_theme(style=\"darkgrid\")\n\n    # Use seaborn to create the plots\n    g = sns.FacetGrid(\n        df,\n        col=\"example\",\n        hue=\"cluster\",\n        xlim=(xmins.iloc[0], xmaxs.iloc[0]),\n        ylim=(xmins.iloc[1], xmaxs.iloc[1]),\n        aspect=1,\n        col_wrap=ncols,\n    )\n\n    g.map(sns.scatterplot, \"x0\", \"x1\", s=10)\n\n    # Set the plot titles and x, y labels\n    for ax, t in zip(g.axes, et):\n        ax.set_title(t)\n        ax.set_xlabel(\"x\")\n        ax.set_ylabel(\"y\")\n</code></pre>"},{"location":"generated/gallery/plot_functions/#plot_examples_3d","title":"plot_examples_3d","text":"<pre><code>def plot_examples_3d(\n    *ets,\n    pmargin: float = 0.1,\n    ncols: int = 3,\n    side=350,\n    clusters_field: str = \"clusters\",\n):\n    \"\"\"Plot the 3D examples given in the ets parameter.\"\"\"\n\n    # Get examples\n    ex = ets[0::2]\n    # Get titles\n    et = ets[1::2]\n\n    # Number of plots and number of rows in combined plot\n    num_plots = len(ex)\n    nrows = max(1, int(np.ceil(num_plots / ncols)))\n    blank_plots = nrows * ncols - num_plots\n\n    df = clusters2df(*ex, clusters_field=clusters_field)\n\n    # Get limits in each dimension\n    xmaxs, xmins = get_plot_lims(df, pmargin=pmargin)\n\n    # Reset to default Matplotlib style, to avoid seaborn interference\n    sns.reset_orig()\n\n    # To convert inches to pixels afterwards\n    px = 1 / plt.rcParams[\"figure.dpi\"]  # pixel in inches\n\n    # Use Matplotlib to create the plots\n    _, axs = plt.subplots(\n        nrows,\n        ncols,\n        figsize=(side * px * ncols, side * px * nrows),\n        subplot_kw=dict(projection=\"3d\"),\n    )\n    axs = axs.reshape(-1)\n    for ax, e, t in zip(axs, ex, et):\n        ax.set_title(t, fontsize=10)\n        ax.set_xlim(xmins.iloc[0], xmaxs.iloc[0])\n        ax.set_ylim(xmins.iloc[1], xmaxs.iloc[1])\n        ax.set_zlim(xmins.iloc[2], xmaxs.iloc[2])\n        ax.set_xlabel(\"$x$\", labelpad=-2)\n        ax.set_ylabel(\"$y$\", labelpad=-2)\n        ax.set_zlabel(\"$z$\", labelpad=-2)\n        ax.tick_params(labelsize=8, pad=-2)\n        ax.scatter(\n            e.points[:, 0],\n            e.points[:, 1],\n            e.points[:, 2],\n            c=e.clusters,\n            depthshade=False,\n            edgecolor=\"black\",\n            linewidths=0.2,\n        )\n\n    # Remaining plots are left blank\n    for ax in axs[len(ex) : len(ex) + blank_plots]:\n        ax.set_axis_off()\n        ax.set_facecolor(color=\"white\")\n        ax.patch.set_alpha(0)\n</code></pre>"},{"location":"generated/gallery/plot_functions/#plot_examples_nd","title":"plot_examples_nd","text":"<pre><code>def plot_examples_nd(\n    ex: Clusters, t: str, pmargin: float = 0.1, clusters_field: str = \"clusters\"\n):\n    \"\"\"Plot the nD example given in the ex parameter.\"\"\"\n\n    # How many dimensions?\n    nd = ex.points.shape[1]\n\n    df = clusters2df(ex, clusters_field=clusters_field)\n\n    # Get limits in each dimension\n    xmaxs, xmins = get_plot_lims(df, pmargin=pmargin)\n\n    # Set seaborn's dark grid style\n    sns.set_theme(style=\"darkgrid\")\n\n    # Create pairwise plots with nothing on the diagonal\n    g = sns.PairGrid(df.iloc[:, :-1], hue=\"cluster\", palette=\"deep\")\n    g.map_offdiag(sns.scatterplot, s=10)\n    g.figure.suptitle(t, y=1)\n\n    # Decorate plot\n    for i in range(nd):\n        for j in range(nd):\n            if i == j:\n                # Set the x labels in the diagonal plots\n                xycoord = (xmaxs.iloc[i] + xmins.iloc[i]) / 2\n                g.axes[i, i].text(\n                    xycoord, xycoord, f\"$x{i}$\", fontsize=20, ha=\"center\", va=\"center\"\n                )\n            else:\n                # Set appropriate plot intervals and aspect ratio\n                g.axes[i, j].set_xlim([xmins.iloc[j], xmaxs.iloc[j]])\n                g.axes[i, j].set_ylim([xmins.iloc[i], xmaxs.iloc[i]])\n                g.axes[i, j].set_aspect(1)\n</code></pre> <p>Total running time of the script: ( 0 minutes  0.005 seconds)</p> <p> Download Python source code: plot_functions.py</p> <p> Download Jupyter notebook: plot_functions.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"pyclugen","text":"<p>pyclugen is Python package for generating multidimensional clusters. Each cluster is supported by a line segment, the position, orientation and length of which guide where the respective points are placed. The <code>clugen()</code> function is provided for this purpose, as well as a number of auxiliary functions, used internally and modularly by <code>clugen()</code>. Users can swap these auxiliary functions by their own customized versions, fine-tuning their cluster generation strategies, or even use them as the basis for their own generation algorithms.</p>"},{"location":"#installation","title":"Installation","text":"<p>Install from PyPI:</p> <pre><code>pip install --upgrade pip\npip install pyclugen\n</code></pre> <p>Or directly from GitHub:</p> <pre><code>pip install --upgrade pip\npip install git+https://github.com/clugen/pyclugen.git#egg=pyclugen\n</code></pre>"},{"location":"#quick-start","title":"Quick start","text":"<pre><code>from pyclugen import clugen\nimport matplotlib.pyplot as plt\n</code></pre> <pre><code>out2 = clugen(2, 4, 400, [1, 0], 0.4, [50, 10], 20, 1, 2)\nplt.scatter(out2.points[:, 0], out2.points[:, 1], c=out2.clusters)\nplt.show()\n</code></pre> <pre><code>out3 = clugen(3, 5, 10000, [0.5, 0.5, 0.5], 0.2, [10, 10, 10], 10, 1, 2)\nfig = plt.figure()\nax = fig.add_subplot(projection=\"3d\")\nax.scatter(out3.points[:, 0], out3.points[:, 1], out3.points[:, 2], c=out3.clusters)\nplt.show()\n</code></pre>"},{"location":"#further-reading","title":"Further reading","text":"<p>The clugen algorithm and its several implementations are detailed in the following reference (please cite it if you use this software):</p> <ul> <li>Fachada, N. &amp; de Andrade, D. (2023). Generating multidimensional clusters   with support lines. Knowledge-Based Systems, 277, 110836.   https://doi.org/10.1016/j.knosys.2023.110836   (arXiv preprint)</li> </ul>"},{"location":"#also-in-this-documentation","title":"Also in this documentation","text":"<ul> <li>Theory: the clugen algorithm in detail</li> <li>Detailed usage examples</li> <li>Reference</li> <li>Developing this package</li> </ul>"},{"location":"dev/","title":"Development","text":""},{"location":"dev/#installing-for-development-andor-improving-the-package","title":"Installing for development and/or improving the package","text":"<pre><code>$ git clone https://github.com/clugen/pyclugen.git\n$ cd pyclugen\n$ python -m venv env\n$ source env/bin/activate\n$ pip install -e .[dev]\n$ pre-commit install\n</code></pre> <p>On Windows replace <code>source env/bin/activate</code> with <code>. env\\Scripts\\activate</code>.</p>"},{"location":"dev/#run-tests","title":"Run tests","text":"<p>Tests can be executed with the following command:</p> <pre><code>$ pytest\n</code></pre> <p>The previous command runs the tests at <code>normal</code> level by default. This test level can also be specified explicitly:</p> <pre><code>$ pytest --test-level=normal\n</code></pre> <p>There are four test levels, from fastest to slowest (i.e., from less thorough to more exhaustive): <code>fast</code>, <code>ci</code>, <code>normal</code> and <code>full</code>. The <code>fast</code> level tests all functions using typical parameters, just to check if everything is working. The <code>ci</code> level performs the minimal amount of testing that yields complete test coverage. Beyond complete coverage, the <code>normal</code> and <code>full</code> levels also test increasing combinations of parameters and PRNG seeds, which may be important to root out rare corner cases. Note that the <code>full</code> level can be extremely slow.</p> <p>To generate a test coverage report, run pytest as follows:</p> <pre><code>$ pytest --cov=pyclugen --cov-report=html --test-level=ci\n</code></pre>"},{"location":"dev/#build-docs","title":"Build docs","text":"<p>Considering we're in the <code>pyclugen</code> folder, run the following commands:</p> <pre><code>$ cd docs\n$ mkdocs build\n</code></pre> <p>The generated documentation will be placed in <code>docs/site</code>. Alternatively, the documentation can be generated and served locally with:</p> <pre><code>$ mkdocs serve\n</code></pre>"},{"location":"dev/#code-style","title":"Code style","text":"<p>Code style is enforced with flake8 (and a number of plugins), black, and isort. Some highlights include, but are not limited to:</p> <ul> <li>Encoding: UTF-8</li> <li>Indentation: 4 spaces (no tabs)</li> <li>Line size limit: 88 chars</li> <li>Newlines: Unix style, i.e. LF or \\n</li> </ul>"},{"location":"reference/","title":"Reference","text":"<p>Various functions for multidimensional cluster generation in Python.</p> <p>Note that:</p> <ol> <li><code>clugen()</code> is the main function of the pyclugen    package, and possibly the only function most users will need.</li> <li>Functions which accept <code>rng</code> as the last parameter are stochastic. Thus, in    order to obtain the same result on separate invocations of these functions,    pass them an instance of same pseudo-random number    <code>Generator</code> initialized with the same seed.</li> </ol>"},{"location":"reference/#pyclugen.Clusters","title":"Clusters","text":"<p>             Bases: <code>NamedTuple</code></p> <p>Read-only container for results returned by <code>clugen()</code>.</p> <p>The symbols presented in the instances variable below have the following meanings:</p> <ul> <li>\\(n\\) : Number of dimensions.</li> <li>\\(p\\) : Number of points.</li> <li>\\(c\\) : Number of clusters.</li> </ul> Source code in <code>pyclugen/main.py</code> <pre><code>class Clusters(NamedTuple):\n    r\"\"\"Read-only container for results returned by [`clugen()`][pyclugen.main.clugen].\n\n    The symbols presented in the instances variable below have the following\n    meanings:\n\n    - $n$ : Number of dimensions.\n    - $p$ : Number of points.\n    - $c$ : Number of clusters.\n    \"\"\"\n\n    points: NDArray\n    r\"\"\"$p \\times n$ matrix containing the generated points for all clusters.\"\"\"\n\n    clusters: NDArray\n    r\"\"\"Vector of size $p$ indicating the cluster each point in `points`\n    belongs to.\"\"\"\n\n    projections: NDArray\n    r\"\"\"$p \\times n$ matrix with the point projections on the cluster-supporting\n    lines.\"\"\"\n\n    sizes: NDArray\n    r\"\"\"Vector of size $c$ with the number of points in each cluster.\"\"\"\n\n    centers: NDArray\n    r\"\"\"$c \\times n$ matrix with the coordinates of the cluster centers.\"\"\"\n\n    directions: NDArray\n    r\"\"\"$c \\times n$ matrix with the direction of each cluster-supporting line.\"\"\"\n\n    angles: NDArray\n    r\"\"\"Vector of size $c$ with the angles between the cluster-supporting lines and\n    the main direction.\"\"\"\n\n    lengths: NDArray\n    r\"\"\"Vector of size $c$ with the lengths of the cluster-supporting lines.\"\"\"\n</code></pre>"},{"location":"reference/#pyclugen.Clusters.angles","title":"angles  <code>instance-attribute</code>","text":"<pre><code>angles: NDArray\n</code></pre> <p>Vector of size \\(c\\) with the angles between the cluster-supporting lines and the main direction.</p>"},{"location":"reference/#pyclugen.Clusters.centers","title":"centers  <code>instance-attribute</code>","text":"<pre><code>centers: NDArray\n</code></pre> <p>\\(c \\times n\\) matrix with the coordinates of the cluster centers.</p>"},{"location":"reference/#pyclugen.Clusters.clusters","title":"clusters  <code>instance-attribute</code>","text":"<pre><code>clusters: NDArray\n</code></pre> <p>Vector of size \\(p\\) indicating the cluster each point in <code>points</code> belongs to.</p>"},{"location":"reference/#pyclugen.Clusters.directions","title":"directions  <code>instance-attribute</code>","text":"<pre><code>directions: NDArray\n</code></pre> <p>\\(c \\times n\\) matrix with the direction of each cluster-supporting line.</p>"},{"location":"reference/#pyclugen.Clusters.lengths","title":"lengths  <code>instance-attribute</code>","text":"<pre><code>lengths: NDArray\n</code></pre> <p>Vector of size \\(c\\) with the lengths of the cluster-supporting lines.</p>"},{"location":"reference/#pyclugen.Clusters.points","title":"points  <code>instance-attribute</code>","text":"<pre><code>points: NDArray\n</code></pre> <p>\\(p \\times n\\) matrix containing the generated points for all clusters.</p>"},{"location":"reference/#pyclugen.Clusters.projections","title":"projections  <code>instance-attribute</code>","text":"<pre><code>projections: NDArray\n</code></pre> <p>\\(p \\times n\\) matrix with the point projections on the cluster-supporting lines.</p>"},{"location":"reference/#pyclugen.Clusters.sizes","title":"sizes  <code>instance-attribute</code>","text":"<pre><code>sizes: NDArray\n</code></pre> <p>Vector of size \\(c\\) with the number of points in each cluster.</p>"},{"location":"reference/#pyclugen.angle_btw","title":"angle_btw","text":"<pre><code>angle_btw(v1: NDArray, v2: NDArray) -&gt; float\n</code></pre> <p>Angle between two \\(n\\)-dimensional vectors.</p> <p>Typically, the angle between two vectors <code>v1</code> and <code>v2</code> can be obtained with:</p> <pre><code>arccos(dot(u, v) / (norm(u) * norm(v)))\n</code></pre> <p>However, this approach is numerically unstable. The version provided here is numerically stable and based on the AngleBetweenVectors Julia package by Jeffrey Sarnoff (MIT license), implementing an algorithm provided by Prof. W. Kahan in these notes (see page 15).</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy import array, degrees\n&gt;&gt;&gt; from pyclugen import angle_btw\n&gt;&gt;&gt; v1 = array([1.0, 1.0, 1.0, 1.0])\n&gt;&gt;&gt; v2 = array([1.0, 0.0, 0.0, 0.0])\n&gt;&gt;&gt; degrees(angle_btw(v1, v2))\n60.00000000000001\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>v1</code> <code>NDArray</code> <p>First vector.</p> required <code>v2</code> <code>NDArray</code> <p>Second vector.</p> required <p>Returns:</p> Type Description <code>float</code> <p>Angle between <code>v1</code> and <code>v2</code> in radians.</p> Source code in <code>pyclugen/helper.py</code> <pre><code>def angle_btw(v1: NDArray, v2: NDArray) -&gt; float:\n    r\"\"\"Angle between two $n$-dimensional vectors.\n\n    Typically, the angle between two vectors `v1` and `v2` can be obtained with:\n\n    ```python\n    arccos(dot(u, v) / (norm(u) * norm(v)))\n    ```\n\n    However, this approach is numerically unstable. The version provided here is\n    numerically stable and based on the\n    [AngleBetweenVectors](https://github.com/JeffreySarnoff/AngleBetweenVectors.jl)\n    Julia package by Jeffrey Sarnoff (MIT license), implementing an algorithm\n    provided by Prof. W. Kahan in\n    [these notes](https://people.eecs.berkeley.edu/~wkahan/MathH110/Cross.pdf)\n    (see page 15).\n\n    Examples:\n        &gt;&gt;&gt; from numpy import array, degrees\n        &gt;&gt;&gt; from pyclugen import angle_btw\n        &gt;&gt;&gt; v1 = array([1.0, 1.0, 1.0, 1.0])\n        &gt;&gt;&gt; v2 = array([1.0, 0.0, 0.0, 0.0])\n        &gt;&gt;&gt; degrees(angle_btw(v1, v2))\n        60.00000000000001\n\n    Args:\n      v1: First vector.\n      v2: Second vector.\n\n    Returns:\n      Angle between `v1` and `v2` in radians.\n    \"\"\"\n    u1 = v1 / norm(v1)\n    u2 = v2 / norm(v2)\n\n    y = u1 - u2\n    x = u1 + u2\n\n    return 2 * arctan(norm(y) / norm(x))\n</code></pre>"},{"location":"reference/#pyclugen.angle_deltas","title":"angle_deltas","text":"<pre><code>angle_deltas(\n    num_clusters: int, angle_disp: float, rng: Generator = _default_rng\n) -&gt; NDArray\n</code></pre> <p>Get angles between average cluster direction and cluster-supporting lines.</p> <p>Determine the angles between the average cluster direction and the cluster-supporting lines. These angles are obtained from a wrapped normal distribution ( \\(\\mu=0\\), \\(\\sigma=\\)<code>angle_disp</code>) with support in the interval \\(\\left[-\\pi/2,\\pi/2\\right]\\). Note this is different from the standard wrapped normal distribution, the support of which is given by the interval \\(\\left[-\\pi,\\pi\\right]\\).</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import angle_deltas\n&gt;&gt;&gt; from numpy import degrees, pi\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; a_rad = angle_deltas(4, pi/8, rng=prng) # Angle dispersion of 22.5 degrees\n&gt;&gt;&gt; a_rad\narray([-0.38842705, -0.14442948,  0.50576707,  0.07617358])\n&gt;&gt;&gt; degrees(a_rad) # Show angle deltas in degrees\narray([-22.25523038,  -8.27519966,  28.97831838,   4.36442443])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_clusters</code> <code>int</code> <p>Number of clusters.</p> required <code>angle_disp</code> <code>float</code> <p>Angle dispersion, in radians.</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Angles between the average cluster direction and the cluster-supporting lines, given in radians in the interval \\(\\left[-\\pi/2,\\pi/2\\right]\\).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def angle_deltas(\n    num_clusters: int, angle_disp: float, rng: Generator = _default_rng\n) -&gt; NDArray:\n    r\"\"\"Get angles between average cluster direction and cluster-supporting lines.\n\n    Determine the angles between the average cluster direction and the\n    cluster-supporting lines. These angles are obtained from a wrapped normal\n    distribution ( $\\mu=0$, $\\sigma=$`angle_disp`) with support in the interval\n    $\\left[-\\pi/2,\\pi/2\\right]$. Note this is different from the standard\n    wrapped normal distribution, the support of which is given by the interval\n    $\\left[-\\pi,\\pi\\right]$.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import angle_deltas\n        &gt;&gt;&gt; from numpy import degrees, pi\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; a_rad = angle_deltas(4, pi/8, rng=prng) # Angle dispersion of 22.5 degrees\n        &gt;&gt;&gt; a_rad\n        array([-0.38842705, -0.14442948,  0.50576707,  0.07617358])\n        &gt;&gt;&gt; degrees(a_rad) # Show angle deltas in degrees\n        array([-22.25523038,  -8.27519966,  28.97831838,   4.36442443])\n\n    Args:\n      num_clusters: Number of clusters.\n      angle_disp: Angle dispersion, in radians.\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Angles between the average cluster direction and the cluster-supporting\n        lines, given in radians in the interval $\\left[-\\pi/2,\\pi/2\\right]$.\n    \"\"\"\n    # Get random angle differences using the normal distribution\n    angles = angle_disp * rng.normal(size=num_clusters)\n\n    # Reduce angle differences to the interval [-\u03c0, \u03c0]\n    angles = arctan2(sin(angles), cos(angles))\n\n    # Make sure angle differences are within interval [-\u03c0/2, \u03c0/2]\n    return where(abs(angles) &gt; pi / 2, angles - sign(angles) * pi / 2, angles)\n</code></pre>"},{"location":"reference/#pyclugen.clucenters","title":"clucenters","text":"<pre><code>clucenters(\n    num_clusters: int,\n    clu_sep: NDArray,\n    clu_offset: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Determine cluster centers using the uniform distribution.</p> <p>The number of clusters (<code>num_clusters</code>) and the average cluster separation (<code>clu_sep</code>) are taken into account.</p> <p>More specifically, let \\(c=\\)<code>num_clusters</code>, \\(\\mathbf{s}=\\)<code>clu_sep.reshape(-1,1)</code>, \\(\\mathbf{o}=\\)<code>clu_offset.reshape(-1,1)</code>, \\(n=\\)<code>clu_sep.size</code> (i.e., number of dimensions). Cluster centers are obtained according to the following equation:</p> \\[ \\mathbf{C}=c\\mathbf{U} \\cdot \\operatorname{diag}(\\mathbf{s}) +     \\mathbf{1}\\,\\mathbf{o}^T \\] <p>where \\(\\mathbf{C}\\) is the \\(c \\times n\\) matrix of cluster centers, \\(\\mathbf{U}\\) is an \\(c \\times n\\) matrix of random values drawn from the uniform distribution between -0.5 and 0.5, and \\(\\mathbf{1}\\) is an \\(c \\times 1\\) vector with all entries equal to 1.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import clucenters\n&gt;&gt;&gt; from numpy import array\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; clucenters(3, array([30,10]), array([-50,50]), rng=prng)\narray([[-33.58833231,  36.61463056],\n       [-75.16761145,  40.53115432],\n       [-79.1684689 ,  59.3628352 ]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_clusters</code> <code>int</code> <p>Number of clusters.</p> required <code>clu_sep</code> <code>NDArray</code> <p>Average cluster separation ( \\(n \\times 1\\) vector).</p> required <code>clu_offset</code> <code>NDArray</code> <p>Cluster offsets ( \\(n \\times 1\\) vector).</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>A \\(c \\times n\\) matrix containing the cluster centers.</p> Source code in <code>pyclugen/module.py</code> <pre><code>def clucenters(\n    num_clusters: int,\n    clu_sep: NDArray,\n    clu_offset: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Determine cluster centers using the uniform distribution.\n\n    The number of clusters (`num_clusters`) and the average cluster separation\n    (`clu_sep`) are taken into account.\n\n    More specifically, let $c=$`num_clusters`, $\\mathbf{s}=$`clu_sep.reshape(-1,1)`,\n    $\\mathbf{o}=$`clu_offset.reshape(-1,1)`, $n=$`clu_sep.size` (i.e., number of\n    dimensions). Cluster centers are obtained according to the following equation:\n\n    $$\n    \\mathbf{C}=c\\mathbf{U} \\cdot \\operatorname{diag}(\\mathbf{s}) +\n        \\mathbf{1}\\,\\mathbf{o}^T\n    $$\n\n    where $\\mathbf{C}$ is the $c \\times n$ matrix of cluster centers,\n    $\\mathbf{U}$ is an $c \\times n$ matrix of random values drawn from the\n    uniform distribution between -0.5 and 0.5, and $\\mathbf{1}$ is an $c \\times\n    1$ vector with all entries equal to 1.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import clucenters\n        &gt;&gt;&gt; from numpy import array\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; clucenters(3, array([30,10]), array([-50,50]), rng=prng)\n        array([[-33.58833231,  36.61463056],\n               [-75.16761145,  40.53115432],\n               [-79.1684689 ,  59.3628352 ]])\n\n    Args:\n      num_clusters: Number of clusters.\n      clu_sep: Average cluster separation ( $n \\times 1$ vector).\n      clu_offset: Cluster offsets ( $n \\times 1$ vector).\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n        A $c \\times n$ matrix containing the cluster centers.\n    \"\"\"\n    # Obtain a num_clusters x num_dims matrix of uniformly distributed values\n    # between -0.5 and 0.5 representing the relative cluster centers\n    ctr_rel = rng.random((num_clusters, clu_sep.size)) - 0.5\n\n    return num_clusters * (ctr_rel @ diag(clu_sep)) + clu_offset\n</code></pre>"},{"location":"reference/#pyclugen.clugen","title":"clugen","text":"<pre><code>clugen(\n    num_dims: int,\n    num_clusters: int,\n    num_points: int,\n    direction: ArrayLike,\n    angle_disp: float,\n    cluster_sep: ArrayLike,\n    llength: float,\n    llength_disp: float,\n    lateral_disp: float,\n    allow_empty: bool = False,\n    cluster_offset: Optional[ArrayLike] = None,\n    proj_dist_fn: str | Callable[[float, int, Generator], NDArray] = \"norm\",\n    point_dist_fn: str\n    | Callable[\n        [NDArray, float, float, NDArray, NDArray, Generator], NDArray\n    ] = \"n-1\",\n    clusizes_fn: Callable[[int, int, bool, Generator], NDArray]\n    | ArrayLike = clusizes,\n    clucenters_fn: Callable[[int, NDArray, NDArray, Generator], NDArray]\n    | ArrayLike = clucenters,\n    llengths_fn: Callable[[int, float, float, Generator], NDArray]\n    | ArrayLike = llengths,\n    angle_deltas_fn: Callable[[int, float, Generator], NDArray]\n    | ArrayLike = angle_deltas,\n    rng: int | Generator = _default_rng,\n) -&gt; Clusters\n</code></pre> <p>Generate multidimensional clusters.</p> <p>Tip</p> <p>This is the main function of the pyclugen package, and possibly the only function most users will need.</p>"},{"location":"reference/#pyclugen.clugen--examples","title":"Examples:","text":"<pre><code>&gt;&gt;&gt; import matplotlib.pyplot as plt\n&gt;&gt;&gt; from pyclugen import clugen\n&gt;&gt;&gt; from numpy import pi\n&gt;&gt;&gt; out = clugen(2, 5, 10000, [1, 0.5], pi/16, [10, 40], 10, 1, 2, rng=321)\n&gt;&gt;&gt; out.centers # What are the cluster centers?\narray([[ 20.02876212,  36.59611434],\n       [-15.60290734, -26.52169579],\n       [ 23.09775166,  91.66309916],\n       [ -5.76816015,  54.9775074 ],\n       [ -4.64224681,  78.40990876]])\n&gt;&gt;&gt; plt.scatter(out.points[:,0],\n...             out.points[:,1],\n...             c=out.clusters) # doctest: +SKIP\n&gt;&gt;&gt; plt.show() # doctest: +SKIP\n</code></pre> <p>Note</p> <p>In the descriptions below, the terms \"average\" and \"dispersion\" refer to measures of central tendency and statistical dispersion, respectively. Their exact meaning depends on several optional arguments.</p> <p>Parameters:</p> Name Type Description Default <code>num_dims</code> <code>int</code> <p>Number of dimensions.</p> required <code>num_clusters</code> <code>int</code> <p>Number of clusters to generate.</p> required <code>num_points</code> <code>int</code> <p>Total number of points to generate.</p> required <code>direction</code> <code>ArrayLike</code> <p>Average direction of the cluster-supporting lines. Can be a vector of length <code>num_dims</code> (same direction for all clusters) or a matrix of size <code>num_clusters</code> x <code>num_dims</code> (one direction per cluster).</p> required <code>angle_disp</code> <code>float</code> <p>Angle dispersion of cluster-supporting lines (radians).</p> required <code>cluster_sep</code> <code>ArrayLike</code> <p>Average cluster separation in each dimension (vector of size <code>num_dims</code>).</p> required <code>llength</code> <code>float</code> <p>Average length of cluster-supporting lines.</p> required <code>llength_disp</code> <code>float</code> <p>Length dispersion of cluster-supporting lines.</p> required <code>lateral_disp</code> <code>float</code> <p>Cluster lateral dispersion, i.e., dispersion of points from their projection on the cluster-supporting line.</p> required <code>allow_empty</code> <code>bool</code> <p>Allow empty clusters? <code>False</code> by default.</p> <code>False</code> <code>cluster_offset</code> <code>Optional[ArrayLike]</code> <p>Offset to add to all cluster centers (vector of size <code>num_dims</code>). By default the offset will be equal to <code>numpy.zeros(num_dims)</code>.</p> <code>None</code> <code>proj_dist_fn</code> <code>str | Callable[[float, int, Generator], NDArray]</code> <p>Distribution of point projections along cluster-supporting lines, with three possible values:</p> <ul> <li><code>\"norm\"</code> (default): Distribute point projections along lines using a normal   distribution (\u03bc=line center, \u03c3=<code>llength/6</code>).</li> <li><code>\"unif\"</code>: Distribute points uniformly along the line.</li> <li>User-defined function, which accepts three parameters, line length (<code>float</code>),   number of points (<code>int</code>), and an instance of   <code>Generator</code>,   and returns an array containing the distance of each point projection to   the center of the line. For example, the <code>\"norm\"</code> option roughly corresponds   to <code>lambda l, n, rg: l * rg.random((n, 1)) / 6</code>.</li> </ul> <code>'norm'</code> <code>point_dist_fn</code> <code>str | Callable[[NDArray, float, float, NDArray, NDArray, Generator], NDArray]</code> <p>Controls how the final points are created from their projections on the cluster-supporting lines, with three possible values:</p> <ul> <li><code>\"n-1\"</code> (default): Final points are placed on a hyperplane orthogonal to   the cluster-supporting line, centered at each point's projection, using the   normal distribution (\u03bc=0, \u03c3=<code>lateral_disp</code>). This is done by the   <code>clupoints_n_1()</code> function.</li> <li><code>\"n\"</code>: Final points are placed around their projection on the   cluster-supporting line using the normal distribution (\u03bc=0,   \u03c3=<code>lateral_disp</code>). This is done by the   <code>clupoints_n()</code> function.</li> <li>User-defined function: The user can specify a custom point placement   strategy by passing a function with the same signature as   <code>clupoints_n_1()</code> and   <code>clupoints_n()</code>.</li> </ul> <code>'n-1'</code> <code>clusizes_fn</code> <code>Callable[[int, int, bool, Generator], NDArray] | ArrayLike</code> <p>Distribution of cluster sizes. By default, cluster sizes are determined by the <code>clusizes()</code> function, which uses the normal distribution (\u03bc=<code>num_points</code>/<code>num_clusters</code>, \u03c3=\u03bc/3), and assures that the final cluster sizes add up to <code>num_points</code>. This parameter allows the user to specify a custom function for this purpose, which must follow <code>clusizes()</code> signature. Note that custom functions are not required to strictly obey the <code>num_points</code> parameter. Alternatively, the user can specify an array of cluster sizes directly.</p> <code>clusizes</code> <code>clucenters_fn</code> <code>Callable[[int, NDArray, NDArray, Generator], NDArray] | ArrayLike</code> <p>Distribution of cluster centers. By default, cluster centers are determined by the <code>clucenters()</code> function, which uses the uniform distribution, and takes into account the <code>num_clusters</code> and <code>cluster_sep</code> parameters for generating well-distributed cluster centers. This parameter allows the user to specify a custom function for this purpose, which must follow <code>clucenters()</code> signature. Alternatively, the user can specify a matrix of size <code>num_clusters</code> x <code>num_dims</code> with the exact cluster centers.</p> <code>clucenters</code> <code>llengths_fn</code> <code>Callable[[int, float, float, Generator], NDArray] | ArrayLike</code> <p>Distribution of line lengths. By default, the lengths of cluster-supporting lines are determined by the <code>llengths()</code> function, which uses the folded normal distribution (\u03bc=<code>llength</code>, \u03c3=<code>llength_disp</code>). This parameter allows the user to specify a custom function for this purpose, which must follow <code>llengths()</code> signature. Alternatively, the user can specify an array of line lengths directly.</p> <code>llengths</code> <code>angle_deltas_fn</code> <code>Callable[[int, float, Generator], NDArray] | ArrayLike</code> <p>Distribution of line angle differences with respect to <code>direction</code>. By default, the angles between <code>direction</code> and the direction of cluster-supporting lines are determined by the <code>angle_deltas()</code> function, which uses the wrapped normal distribution (\u03bc=0, \u03c3=<code>angle_disp</code>) with support in the interval [-\u03c0/2, \u03c0/2]. This parameter allows the user to specify a custom function for this purpose, which must follow <code>angle_deltas()</code> signature. Alternatively, the user can specify an array of angle deltas directly.</p> <code>angle_deltas</code> <code>rng</code> <code>int | Generator</code> <p>The seed for the random number generator or an instance of <code>Generator</code> for reproducible executions.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>Clusters</code> <p>The generated clusters and associated information in the form of a <code>Clusters</code> object.</p> Source code in <code>pyclugen/main.py</code> <pre><code>def clugen(\n    num_dims: int,\n    num_clusters: int,\n    num_points: int,\n    direction: ArrayLike,\n    angle_disp: float,\n    cluster_sep: ArrayLike,\n    llength: float,\n    llength_disp: float,\n    lateral_disp: float,\n    allow_empty: bool = False,\n    cluster_offset: Optional[ArrayLike] = None,\n    proj_dist_fn: str | Callable[[float, int, Generator], NDArray] = \"norm\",\n    point_dist_fn: str\n    | Callable[[NDArray, float, float, NDArray, NDArray, Generator], NDArray] = \"n-1\",\n    clusizes_fn: Callable[[int, int, bool, Generator], NDArray] | ArrayLike = clusizes,\n    clucenters_fn: Callable[[int, NDArray, NDArray, Generator], NDArray]\n    | ArrayLike = clucenters,\n    llengths_fn: Callable[[int, float, float, Generator], NDArray]\n    | ArrayLike = llengths,\n    angle_deltas_fn: Callable[[int, float, Generator], NDArray]\n    | ArrayLike = angle_deltas,\n    rng: int | Generator = _default_rng,\n) -&gt; Clusters:\n    \"\"\"Generate multidimensional clusters.\n\n    !!! tip\n        This is the main function of the **pyclugen** package, and possibly the\n        only function most users will need.\n\n    ## Examples:\n\n        &gt;&gt;&gt; import matplotlib.pyplot as plt\n        &gt;&gt;&gt; from pyclugen import clugen\n        &gt;&gt;&gt; from numpy import pi\n        &gt;&gt;&gt; out = clugen(2, 5, 10000, [1, 0.5], pi/16, [10, 40], 10, 1, 2, rng=321)\n        &gt;&gt;&gt; out.centers # What are the cluster centers?\n        array([[ 20.02876212,  36.59611434],\n               [-15.60290734, -26.52169579],\n               [ 23.09775166,  91.66309916],\n               [ -5.76816015,  54.9775074 ],\n               [ -4.64224681,  78.40990876]])\n        &gt;&gt;&gt; plt.scatter(out.points[:,0],\n        ...             out.points[:,1],\n        ...             c=out.clusters) # doctest: +SKIP\n        &gt;&gt;&gt; plt.show() # doctest: +SKIP\n\n    ![clugen](https://user-images.githubusercontent.com/3018963/151056890-c83c9509-b40d-4ab2-a842-f2a4706344c6.png)\n\n    !!! Note\n        In the descriptions below, the terms \"average\" and \"dispersion\" refer to\n        measures of central tendency and statistical dispersion, respectively.\n        Their exact meaning depends on several optional arguments.\n\n    Args:\n      num_dims: Number of dimensions.\n      num_clusters: Number of clusters to generate.\n      num_points: Total number of points to generate.\n      direction: Average direction of the cluster-supporting lines. Can be a\n        vector of length `num_dims` (same direction for all clusters) or a\n        matrix of size `num_clusters` x `num_dims` (one direction per cluster).\n      angle_disp: Angle dispersion of cluster-supporting lines (radians).\n      cluster_sep: Average cluster separation in each dimension (vector of size\n        `num_dims`).\n      llength: Average length of cluster-supporting lines.\n      llength_disp: Length dispersion of cluster-supporting lines.\n      lateral_disp: Cluster lateral dispersion, i.e., dispersion of points from their\n        projection on the cluster-supporting line.\n      allow_empty: Allow empty clusters? `False` by default.\n      cluster_offset: Offset to add to all cluster centers (vector of size `num_dims`).\n        By default the offset will be equal to `numpy.zeros(num_dims)`.\n      proj_dist_fn: Distribution of point projections along cluster-supporting lines,\n        with three possible values:\n\n        - `\"norm\"` (default): Distribute point projections along lines using a normal\n          distribution (\u03bc=_line center_, \u03c3=`llength/6`).\n        - `\"unif\"`: Distribute points uniformly along the line.\n        - User-defined function, which accepts three parameters, line length (`float`),\n          number of points (`int`), and an instance of\n          [`Generator`](https://numpy.org/doc/stable/reference/random/generator.html?highlight=generator#numpy.random.Generator),\n          and returns an array containing the distance of each point projection to\n          the center of the line. For example, the `\"norm\"` option roughly corresponds\n          to `lambda l, n, rg: l * rg.random((n, 1)) / 6`.\n\n      point_dist_fn: Controls how the final points are created from their projections\n        on the cluster-supporting lines, with three possible values:\n\n        - `\"n-1\"` (default): Final points are placed on a hyperplane orthogonal to\n          the cluster-supporting line, centered at each point's projection, using the\n          normal distribution (\u03bc=0, \u03c3=`lateral_disp`). This is done by the\n          [`clupoints_n_1()`][pyclugen.module.clupoints_n_1] function.\n        - `\"n\"`: Final points are placed around their projection on the\n          cluster-supporting line using the normal distribution (\u03bc=0,\n          \u03c3=`lateral_disp`). This is done by the\n          [`clupoints_n()`][pyclugen.module.clupoints_n] function.\n        - User-defined function: The user can specify a custom point placement\n          strategy by passing a function with the same signature as\n          [`clupoints_n_1()`][pyclugen.module.clupoints_n_1] and\n          [`clupoints_n()`][pyclugen.module.clupoints_n].\n\n      clusizes_fn: Distribution of cluster sizes. By default, cluster sizes are\n        determined by the [`clusizes()`][pyclugen.module.clusizes] function, which\n        uses the normal distribution (\u03bc=`num_points`/`num_clusters`, \u03c3=\u03bc/3), and\n        assures that the final cluster sizes add up to `num_points`. This parameter\n        allows the user to specify a custom function for this purpose, which must\n        follow [`clusizes()`][pyclugen.module.clusizes] signature. Note that custom\n        functions are not required to strictly obey the `num_points` parameter.\n        Alternatively, the user can specify an array of cluster sizes directly.\n      clucenters_fn: Distribution of cluster centers. By default, cluster centers\n        are determined by the [`clucenters()`][pyclugen.module.clucenters] function,\n        which uses the uniform distribution, and takes into account the `num_clusters`\n        and `cluster_sep` parameters for generating well-distributed cluster centers.\n        This parameter allows the user to specify a custom function for this purpose,\n        which must follow [`clucenters()`][pyclugen.module.clucenters] signature.\n        Alternatively, the user can specify a matrix of size `num_clusters` x\n        `num_dims` with the exact cluster centers.\n      llengths_fn: Distribution of line lengths. By default, the lengths of\n        cluster-supporting lines are determined by the\n        [`llengths()`][pyclugen.module.llengths] function, which uses the folded\n        normal distribution (\u03bc=`llength`, \u03c3=`llength_disp`). This parameter allows\n        the user to specify a custom function for this purpose, which must follow\n        [`llengths()`][pyclugen.module.llengths] signature. Alternatively, the user\n        can specify an array of line lengths directly.\n      angle_deltas_fn: Distribution of line angle differences with respect to\n        `direction`. By default, the angles between `direction` and the direction of\n        cluster-supporting lines are determined by the\n        [`angle_deltas()`][pyclugen.module.angle_deltas] function, which uses the\n        wrapped normal distribution (\u03bc=0, \u03c3=`angle_disp`) with support in the interval\n        [-\u03c0/2, \u03c0/2]. This parameter allows the user to specify a custom function for\n        this purpose, which must follow [`angle_deltas()`][pyclugen.module.angle_deltas]\n        signature. Alternatively, the user can specify an array of angle deltas\n        directly.\n      rng: The seed for the random number generator or an instance of\n        [`Generator`][numpy.random.Generator] for reproducible executions.\n\n    Returns:\n      The generated clusters and associated information in the form of a\n        [`Clusters`][pyclugen.main.Clusters] object.\n    \"\"\"\n    # ############### #\n    # Validate inputs #\n    # ############### #\n\n    # Check that number of dimensions is &gt; 0\n    if num_dims &lt; 1:\n        raise ValueError(\"Number of dimensions, `num_dims`, must be &gt; 0\")\n\n    # Check that number of clusters is &gt; 0\n    if num_clusters &lt; 1:\n        raise ValueError(\"Number of clusters, `num_clust`, must be &gt; 0\")\n\n    # Convert given direction into a NumPy array\n    arrdir: NDArray = asarray(direction)\n\n    # Get number of dimensions in `direction` array\n    dir_ndims = arrdir.ndim\n\n    # Is direction a vector or a matrix?\n    if dir_ndims == 1:\n        # It's a vector, let's convert it into a row matrix, since this will be\n        # useful down the road\n        arrdir = arrdir.reshape((1, -1))\n    elif dir_ndims == 2:\n        # If a matrix was given (i.e. a main direction is given for each cluster),\n        # check if the number of directions is the same as the number of clusters\n        dir_size_1 = arrdir.shape[0]\n        if dir_size_1 != num_clusters:\n            raise ValueError(\n                \"Number of rows in `direction` must be the same as the \"\n                + f\"number of clusters ({dir_size_1} != {num_clusters})\"\n            )\n    else:\n        # The `directions` array must be a vector or a matrix, so if we get here\n        # it means we have invalid arguments\n        raise ValueError(\n            \"`direction` must be a vector (1D array) or a matrix (2D array), \"\n            + f\"but is {dir_ndims}D\"\n        )\n\n    # Check that direction has num_dims dimensions\n    dir_size_2 = arrdir.shape[1]\n    if dir_size_2 != num_dims:\n        raise ValueError(\n            \"Length of directions in `direction` must be equal to \"\n            + f\"`num_dims` ({dir_size_2} != {num_dims})\"\n        )\n\n    # Check that directions have magnitude &gt; 0\n    dir_magnitudes = apply_along_axis(norm, 1, arrdir)\n    if any(isclose(dir_magnitudes, 0)):\n        raise ValueError(\"Directions in `direction` must have magnitude &gt; 0\")\n\n    # If allow_empty is false, make sure there are enough points to distribute\n    # by the clusters\n    if (not allow_empty) and num_points &lt; num_clusters:\n        raise ValueError(\n            f\"A total of {num_points} points is not enough for \"\n            + f\"{num_clusters} non-empty clusters\"\n        )\n\n    # Check that cluster_sep has num_dims dimensions\n    cluster_sep = asarray(cluster_sep)\n    if cluster_sep.size != num_dims:\n        raise ValueError(\n            \"Length of `cluster_sep` must be equal to `num_dims` \"\n            + f\"({cluster_sep.size} != {num_dims})\"\n        )\n\n    # If given, cluster_offset must have the correct number of dimensions,\n    # if not given then it will be a num_dims x 1 vector of zeros\n    if cluster_offset is None:\n        cluster_offset = zeros(num_dims)\n    else:\n        cluster_offset = asarray(cluster_offset)\n        if cluster_offset.size != num_dims:\n            raise ValueError(\n                \"Length of `cluster_offset` must be equal to `num_dims` \"\n                + f\"({cluster_offset.size} != {num_dims})\"\n            )\n\n    # If the user specified rng as an int, create a proper rng object\n    rng_sel: Generator\n    if isinstance(rng, Generator):\n        rng_sel = cast(Generator, rng)\n    elif isinstance(rng, int):\n        rng_sel = Generator(PCG64(cast(int, rng)))\n    else:\n        raise ValueError(\n            f\"`rng` must be an instance of int or Generator, but is {type(rng)}\"\n        )\n\n    # Check that proj_dist_fn specifies a valid way for projecting points along\n    # cluster-supporting lines i.e., either \"norm\" (default), \"unif\" or a\n    # user-defined function\n    pointproj_fn: Callable[[float, int, Generator], NDArray]\n\n    if callable(proj_dist_fn):\n        # Use user-defined distribution; assume function accepts length of line\n        # and number of points, and returns a number of points x 1 vector\n        pointproj_fn = proj_dist_fn\n\n    elif proj_dist_fn == \"unif\":\n        # Point projections will be uniformly placed along cluster-supporting lines\n        def pointproj_fn(length, n, rg):\n            return length * rg.random(n) - length / 2\n\n    elif proj_dist_fn == \"norm\":\n        # Use normal distribution for placing point projections along cluster-supporting\n        # lines, mean equal to line center, standard deviation equal to 1/6 of line\n        # length such that the line length contains \u224899.73% of the points\n        def pointproj_fn(length, n, rg):\n            return (1.0 / 6.0) * length * rg.normal(size=n)\n\n    else:\n        raise ValueError(\n            \"`proj_dist_fn` has to be either 'norm', 'unif' or user-defined function\"\n        )\n\n    # Check that point_dist_fn specifies a valid way for generating points given\n    # their projections along cluster-supporting lines, i.e., either \"n-1\"\n    # (default), \"n\" or a user-defined function\n    pt_from_proj_fn: Callable[\n        [NDArray, float, float, NDArray, NDArray, Generator], NDArray\n    ]\n\n    if num_dims == 1:\n        # If 1D was specified, point projections are the points themselves\n        def pt_from_proj_fn(projs, lat_disp, length, clu_dir, clu_ctr, rng=rng_sel):\n            return projs\n\n    elif callable(point_dist_fn):\n        # Use user-defined distribution; assume function accepts point projections\n        # on the line, lateral disp., cluster direction and cluster center, and\n        # returns a num_points x num_dims matrix containing the final points\n        # for the current cluster\n        pt_from_proj_fn = point_dist_fn\n\n    elif point_dist_fn == \"n-1\":\n        # Points will be placed on a hyperplane orthogonal to the cluster-supporting\n        # line using a normal distribution centered at their intersection\n        pt_from_proj_fn = clupoints_n_1\n\n    elif point_dist_fn == \"n\":\n        # Points will be placed using a multivariate normal distribution\n        # centered at the point projection\n        pt_from_proj_fn = clupoints_n\n\n    else:\n        raise ValueError(\n            \"point_dist_fn has to be either 'n-1', 'n' or a user-defined function\"\n        )\n\n    # ############################ #\n    # Determine cluster properties #\n    # ############################ #\n\n    # Normalize main direction(s)\n    arrdir = apply_along_axis(lambda a: a / norm(a), 1, arrdir)\n\n    # If only one main direction was given, expand it for all clusters\n    if dir_ndims == 1:\n        arrdir = repeat(arrdir, num_clusters, axis=0)\n\n    # Determine cluster sizes\n    if callable(clusizes_fn):\n        cluster_sizes = clusizes_fn(num_clusters, num_points, allow_empty, rng_sel)\n    elif len(asarray(clusizes_fn)) == num_clusters:\n        cluster_sizes = asarray(clusizes_fn)\n    else:\n        raise ValueError(\n            \"clusizes_fn has to be either a function or a `num_clusters`-sized array\"\n        )\n\n    # Custom clusizes_fn's are not required to obey num_points, so we update\n    # it here just in case it's different from what the user specified\n    num_points = sum(cluster_sizes)\n\n    # Determine cluster centers\n    if callable(clucenters_fn):\n        cluster_centers = clucenters_fn(\n            num_clusters, cluster_sep, cluster_offset, rng_sel\n        )\n    elif asarray(clucenters_fn).shape == (num_clusters, num_dims):\n        cluster_centers = asarray(clucenters_fn)\n    else:\n        raise ValueError(\n            \"clucenters_fn has to be either a function or a matrix of size \"\n            + \"`num_clusters` x `num_dims`\"\n        )\n\n    # Determine length of lines supporting clusters\n    if callable(llengths_fn):\n        cluster_lengths = llengths_fn(num_clusters, llength, llength_disp, rng_sel)\n    elif len(asarray(llengths_fn)) == num_clusters:\n        cluster_lengths = asarray(llengths_fn)\n    else:\n        raise ValueError(\n            \"llengths_fn has to be either a function or a `num_clusters`-sized array\"\n        )\n\n    # Obtain angles between main direction and cluster-supporting lines\n    if callable(angle_deltas_fn):\n        cluster_angles = angle_deltas_fn(num_clusters, angle_disp, rng_sel)\n    elif len(asarray(angle_deltas_fn)) == num_clusters:\n        cluster_angles = asarray(angle_deltas_fn)\n    else:\n        raise ValueError(\n            \"angle_deltas_fn has to be either a function or a \"\n            + \"`num_clusters`-sized array\"\n        )\n\n    # Determine normalized cluster directions by applying the obtained angles\n    cluster_directions = apply_along_axis(\n        lambda v, a: rand_vector_at_angle(v, next(a), rng_sel),\n        1,\n        arrdir,\n        iter(cluster_angles),\n    )\n\n    # ################################# #\n    # Determine points for each cluster #\n    # ################################# #\n\n    # Aux. vector with cumulative sum of number of points in each cluster\n    cumsum_points = concatenate((asarray([0]), cumsum(cluster_sizes)))\n\n    # Pre-allocate data structures for holding cluster info and points\n    point_clusters: NDArray = empty(\n        num_points, dtype=int32\n    )  # Cluster indices of each point\n    point_projections = empty((num_points, num_dims))  # Point projections on\n    #                                                  # cluster-supporting lines\n    points = empty((num_points, num_dims))  # Final points to be generated\n\n    # Loop through clusters and create points for each one\n    for i in range(num_clusters):\n        # Start and end indexes for points in current cluster\n        idx_start = cumsum_points[i]\n        idx_end = cumsum_points[i + 1]\n\n        # Update cluster indices of each point\n        point_clusters[idx_start:idx_end] = i\n\n        # Determine distance of point projections from the center of the line\n        ptproj_dist_fn_center = pointproj_fn(\n            cluster_lengths[i], cluster_sizes[i], rng_sel\n        )\n\n        # Determine coordinates of point projections on the line using the\n        # parametric line equation (this works since cluster direction is normalized)\n        point_projections[idx_start:idx_end, :] = points_on_line(\n            cluster_centers[i, :], cluster_directions[i, :], ptproj_dist_fn_center\n        )\n\n        # Determine points from their projections on the line\n        points[idx_start:idx_end, :] = pt_from_proj_fn(\n            point_projections[idx_start:idx_end, :],\n            lateral_disp,\n            cluster_lengths[i],\n            cluster_directions[i, :],\n            cluster_centers[i, :],\n            rng_sel,\n        )\n\n    return Clusters(\n        points,\n        point_clusters,\n        point_projections,\n        cluster_sizes,\n        cluster_centers,\n        cluster_directions,\n        cluster_angles,\n        cluster_lengths,\n    )\n</code></pre>"},{"location":"reference/#pyclugen.clumerge","title":"clumerge","text":"<pre><code>clumerge(\n    *data: NamedTuple | Mapping[str, ArrayLike],\n    fields: tuple[str, ...] = (\"points\", \"clusters\"),\n    clusters_field: str | None = \"clusters\"\n) -&gt; dict[str, NDArray]\n</code></pre> <p>Merges the fields (specified in <code>fields</code>) of two or more <code>data</code> sets.</p> <p>Merges the fields (specified in <code>fields</code>) of two or more <code>data</code> sets (named tuples or dictionaries). The fields to be merged need to have the same number of columns. The corresponding merged field will contain the rows of the fields to be merged, and will have a common supertype.</p> <p>The <code>clusters_field</code> parameter specifies a field containing integers that identify the cluster to which the respective points belongs to. If <code>clusters_field</code> is specified (by default it's specified as <code>\"clusters\"</code>), cluster assignments in individual datasets will be updated in the merged dataset so that clusters are considered separate. This parameter can be set to <code>None</code>, in which case no field will be considered as a special cluster assignments field.</p> <p>This function can be used to merge data sets generated with the <code>clugen()</code> function, by default merging the <code>points</code> and <code>clusters</code> fields in those data sets. It also works with arbitrary data by specifying alternative fields in the <code>fields</code> parameter. It can be used, for example, to merge third-party data with <code>clugen()</code>-generated data.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import clugen, clumerge\n&gt;&gt;&gt; data1 = clugen(2, 5, 1000, [1, 1], 0.01, [20, 20], 14, 1.2, 1.5);\n&gt;&gt;&gt; data2 = clugen(2, 3, 450, [0.8, -0.3], 0, [25, 21], 6, 0.4, 3.5);\n&gt;&gt;&gt; data3 = clugen(2, 2, 600, [0, -0.7], 0.2, [15, 10], 1, 0.1, 5.2);\n&gt;&gt;&gt; data_merged = clumerge(data1, data2, data3)\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>*data</code> <code>NamedTuple | Mapping[str, ArrayLike]</code> <p>One or more cluster data sets whose <code>fields</code> are to be merged.</p> <code>()</code> <code>fields</code> <code>tuple[str, ...]</code> <p>Fields to be merged, which must exist in the data set given in <code>*data</code>.</p> <code>('points', 'clusters')</code> <code>clusters_field</code> <code>str | None</code> <p>Field containing the integer cluster labels. If specified, cluster assignments in individual datasets will be updated in the merged dataset so that clusters are considered separate.</p> <code>'clusters'</code> <p>Returns:</p> Type Description <code>dict[str, NDArray]</code> <p>A dictionary, where keys correspond to field names, and values to the merged numerical arrays.</p> Source code in <code>pyclugen/main.py</code> <pre><code>def clumerge(\n    *data: NamedTuple | Mapping[str, ArrayLike],\n    fields: tuple[str, ...] = (\"points\", \"clusters\"),\n    clusters_field: str | None = \"clusters\",\n) -&gt; dict[str, NDArray]:\n    r\"\"\"Merges the fields (specified in `fields`) of two or more `data` sets.\n\n    Merges the fields (specified in `fields`) of two or more `data` sets (named\n    tuples or dictionaries). The fields to be merged need to have the same\n    number of columns. The corresponding merged field will contain the rows of\n    the fields to be merged, and will have a common supertype.\n\n    The `clusters_field` parameter specifies a field containing integers that\n    identify the cluster to which the respective points belongs to. If\n    `clusters_field` is specified (by default it's specified as `\"clusters\"`),\n    cluster assignments in individual datasets will be updated in the merged\n    dataset so that clusters are considered separate. This parameter can be set\n    to `None`, in which case no field will be considered as a special cluster\n    assignments field.\n\n    This function can be used to merge data sets generated with the\n    [`clugen()`][pyclugen.main.clugen] function, by default merging the\n    `points` and `clusters` fields in those data sets. It also works with\n    arbitrary data by specifying alternative fields in the `fields` parameter.\n    It can be used, for example, to merge third-party data with\n    [`clugen()`][pyclugen.main.clugen]-generated data.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import clugen, clumerge\n        &gt;&gt;&gt; data1 = clugen(2, 5, 1000, [1, 1], 0.01, [20, 20], 14, 1.2, 1.5);\n        &gt;&gt;&gt; data2 = clugen(2, 3, 450, [0.8, -0.3], 0, [25, 21], 6, 0.4, 3.5);\n        &gt;&gt;&gt; data3 = clugen(2, 2, 600, [0, -0.7], 0.2, [15, 10], 1, 0.1, 5.2);\n        &gt;&gt;&gt; data_merged = clumerge(data1, data2, data3)\n\n    Args:\n      *data: One or more cluster data sets whose `fields` are to be merged.\n      fields: Fields to be merged, which must exist in the data set given in\n        `*data`.\n      clusters_field: Field containing the integer cluster labels. If specified,\n        cluster assignments in individual datasets will be updated in the merged\n        dataset so that clusters are considered separate.\n\n    Returns:\n      A dictionary, where keys correspond to field names, and values to the\n        merged numerical arrays.\n    \"\"\"\n    # Number of elements in each array the merged dataset\n    numel: int = 0\n\n    # Number of columns of values in each field\n    fields_info: dict[str, _FieldInfo] = {}\n\n    # Merged dataset to output, initially empty\n    output: dict[str, NDArray] = {}\n\n    # Create a fields set\n    fields_set: MutableSet[str] = set(fields)\n\n    # If a clusters field is given, add it\n    if clusters_field is not None:\n        fields_set.add(str(clusters_field))\n\n    # Data in dictionary format with NDArray views on data\n    ddata: MutableSequence[Mapping[str, NDArray]] = []\n    for dt in data:\n        # If dt is a named tuple, convert it into a dictionary\n        ddt: Mapping[str, ArrayLike]\n        if isinstance(dt, dict):\n            ddt = cast(dict, dt)\n        else:\n            ntdt = cast(NamedTuple, dt)\n            ddt = ntdt._asdict()\n\n        # Convert dictionary values to NDArrays\n        ddtnp: Mapping[str, NDArray] = {k: asarray(v) for k, v in ddt.items()}\n\n        # Add converted dictionary to our sequence of dictionaries\n        ddata.append(ddtnp)\n\n    # Cycle through data items\n    for dt in ddata:\n        # Number of elements in the current item\n        numel_i: int = -1\n\n        # Cycle through fields for the current item\n        for field in fields_set:\n            if field not in dt:\n                raise ValueError(f\"Data item does not contain required field `{field}`\")\n            elif field == clusters_field and not can_cast(\n                dt[clusters_field].dtype, int64\n            ):\n                raise ValueError(f\"`{clusters_field}` must contain integer types\")\n\n            # Get the field value\n            value: NDArray = dt[field]\n\n            # Number of elements in field value\n            numel_tmp = len(value)\n\n            # Check the number of elements in the field value\n            if numel_i == -1:\n                # First field: get number of elements in value (must be the same\n                # for the remaining field values)\n                numel_i = numel_tmp\n\n            elif numel_tmp != numel_i:\n                # Fields values after the first must have the same number of\n                # elements\n                raise ValueError(\n                    \"Data item contains fields with different sizes \"\n                    + f\"({numel_tmp} != {numel_i})\"\n                )\n\n            # Get/check info about the field value type\n            if field not in fields_info:\n                # If it's the first time this field appears, just get the info\n                fields_info[field] = _FieldInfo(value.dtype, _getcols(value))\n\n            else:\n                # If this field already appeared in previous data items, get the\n                # info and check/determine its compatibility with respect to\n                # previous data items\n                if _getcols(value) != fields_info[field].ncol:\n                    # Number of columns must be the same\n                    raise ValueError(f\"Dimension mismatch in field `{field}`\")\n\n                # Get the common supertype\n                fields_info[field].dtype = promote_types(\n                    fields_info[field].dtype, value.dtype\n                )\n\n        # Update total number of elements\n        numel += numel_i\n\n    # Initialize output dictionary fields with room for all items\n    for field in fields_info:\n        if fields_info[field].ncol == 1:\n            output[field] = empty((numel,), dtype=fields_info[field].dtype)\n        else:\n            output[field] = empty(\n                (numel, fields_info[field].ncol), dtype=fields_info[field].dtype\n            )\n\n    # Copy items from input data to output dictionary, field-wise\n    copied: int = 0\n    last_cluster: int = 0\n\n    # Create merged output\n    for dt in ddata:\n        # How many elements to copy for the current data item?\n        tocopy: int = len(dt[fields[0]])\n\n        # Cycle through each field and its information\n        for field in fields_info:\n            # Copy elements\n            if field == clusters_field:\n                # If this is a clusters field, update the cluster IDs\n                old_clusters = unique(dt[clusters_field])\n                new_clusters = list(\n                    range(last_cluster + 1, last_cluster + len(old_clusters) + 1)\n                )\n                old2new = zip(old_clusters, new_clusters)\n                mapping = dict(old2new)\n                last_cluster = new_clusters[-1]\n\n                output[field][copied : (copied + tocopy)] = [\n                    mapping[val] for val in dt[clusters_field]\n                ]\n\n            else:\n                # Otherwise just copy the elements\n                ncol: int = fields_info[field].ncol\n                output[field].flat[copied * ncol : (copied + tocopy) * ncol] = dt[field]\n\n        # Update how many were copied so far\n        copied += tocopy\n\n    # Return result\n    return output\n</code></pre>"},{"location":"reference/#pyclugen.clupoints_n","title":"clupoints_n","text":"<pre><code>clupoints_n(\n    projs: NDArray,\n    lat_disp: float,\n    line_len: float,\n    clu_dir: NDArray,\n    clu_ctr: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Generate points from their \\(n\\)-D projections on a cluster-supporting line.</p> <p>Each point is placed around its projection using the normal distribution ( \\(\\mu=0\\), \\(\u03c3=\\)<code>lat_disp</code>).</p> <p>This function's main intended use is by the <code>clugen()</code> function, generating the final points when the <code>point_dist_fn</code> parameter is set to <code>\"n\"</code>.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import clupoints_n, points_on_line\n&gt;&gt;&gt; from numpy import array, linspace\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; projs = points_on_line(array([5,5]),     # Get 5 point projections\n...                        array([1,0]),     # on a 2D line\n...                        linspace(-4,4,5))\n&gt;&gt;&gt; projs\narray([[1., 5.],\n       [3., 5.],\n       [5., 5.],\n       [7., 5.],\n       [9., 5.]])\n&gt;&gt;&gt; clupoints_n(projs, 0.5, 1.0, array([1,0]), array([0,0]), rng=prng)\narray([[0.50543932, 4.81610667],\n       [3.64396263, 5.09698721],\n       [5.46011545, 5.2885519 ],\n       [6.68176818, 5.27097611],\n       [8.84170227, 4.83880544]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>projs</code> <code>NDArray</code> <p>Point projections on the cluster-supporting line ( \\(p \\times n\\) matrix).</p> required <code>lat_disp</code> <code>float</code> <p>Standard deviation for the normal distribution, i.e., cluster lateral dispersion.</p> required <code>line_len</code> <code>float</code> <p>Length of cluster-supporting line (ignored).</p> required <code>clu_dir</code> <code>NDArray</code> <p>Direction of the cluster-supporting line.</p> required <code>clu_ctr</code> <code>NDArray</code> <p>Center position of the cluster-supporting line (ignored).</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Generated points ( \\(p \\times n\\) matrix).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def clupoints_n(\n    projs: NDArray,\n    lat_disp: float,\n    line_len: float,\n    clu_dir: NDArray,\n    clu_ctr: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Generate points from their $n$-D projections on a cluster-supporting line.\n\n    Each point is placed around its projection using the normal distribution\n    ( $\\mu=0$, $\u03c3=$`lat_disp`).\n\n    This function's main intended use is by the [`clugen()`][pyclugen.main.clugen]\n    function, generating the final points when the `point_dist_fn` parameter is\n    set to `\"n\"`.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import clupoints_n, points_on_line\n        &gt;&gt;&gt; from numpy import array, linspace\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; projs = points_on_line(array([5,5]),     # Get 5 point projections\n        ...                        array([1,0]),     # on a 2D line\n        ...                        linspace(-4,4,5))\n        &gt;&gt;&gt; projs\n        array([[1., 5.],\n               [3., 5.],\n               [5., 5.],\n               [7., 5.],\n               [9., 5.]])\n        &gt;&gt;&gt; clupoints_n(projs, 0.5, 1.0, array([1,0]), array([0,0]), rng=prng)\n        array([[0.50543932, 4.81610667],\n               [3.64396263, 5.09698721],\n               [5.46011545, 5.2885519 ],\n               [6.68176818, 5.27097611],\n               [8.84170227, 4.83880544]])\n\n    Args:\n      projs: Point projections on the cluster-supporting line ( $p \\times n$ matrix).\n      lat_disp: Standard deviation for the normal distribution, i.e., cluster\n        lateral dispersion.\n      line_len: Length of cluster-supporting line (ignored).\n      clu_dir: Direction of the cluster-supporting line.\n      clu_ctr: Center position of the cluster-supporting line (ignored).\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Generated points ( $p \\times n$ matrix).\n    \"\"\"\n    # Number of dimensions\n    num_dims = clu_dir.size\n\n    # Number of points in this cluster\n    clu_num_points = projs.shape[0]\n\n    # Get random displacement vectors for each point projection\n    displ = lat_disp * rng.normal(size=(clu_num_points, num_dims))\n\n    # Add displacement vectors to each point projection\n    points = projs + displ\n\n    return points\n</code></pre>"},{"location":"reference/#pyclugen.clupoints_n_1","title":"clupoints_n_1","text":"<pre><code>clupoints_n_1(\n    projs: NDArray,\n    lat_disp: float,\n    line_len: float,\n    clu_dir: NDArray,\n    clu_ctr: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Generate points from their \\(n\\)-D projections on a cluster-supporting line.</p> <p>Each point is placed on a hyperplane orthogonal to that line and centered at the point's projection, using the normal distribution ( \\(\\mu=0\\), \\(\u03c3=\\)<code>lat_disp</code>).</p> <p>This function's main intended use is by the <code>clugen()</code> function, generating the final points when the <code>point_dist_fn</code> parameter is set to <code>\"n-1\"</code>.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import clupoints_n_1, points_on_line\n&gt;&gt;&gt; from numpy import array, linspace\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; projs = points_on_line(array([5,5]),     # Get 5 point projections\n...                        array([1,0]),     # on a 2D line\n...                        linspace(-4,4,5))\n&gt;&gt;&gt; projs\narray([[1., 5.],\n       [3., 5.],\n       [5., 5.],\n       [7., 5.],\n       [9., 5.]])\n&gt;&gt;&gt; clupoints_n_1(projs, 0.5, 1.0, array([1,0]), array([0,0]), rng=prng)\narray([[1.        , 5.49456068],\n       [3.        , 5.18389333],\n       [5.        , 5.64396263],\n       [7.        , 5.09698721],\n       [9.        , 5.46011545]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>projs</code> <code>NDArray</code> <p>Point projections on the cluster-supporting line ( \\(p \\times n\\) matrix).</p> required <code>lat_disp</code> <code>float</code> <p>Standard deviation for the normal distribution, i.e., cluster lateral dispersion.</p> required <code>line_len</code> <code>float</code> <p>Length of cluster-supporting line (ignored).</p> required <code>clu_dir</code> <code>NDArray</code> <p>Direction of the cluster-supporting line.</p> required <code>clu_ctr</code> <code>NDArray</code> <p>Center position of the cluster-supporting line (ignored).</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Generated points ( \\(p \\times n\\) matrix).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def clupoints_n_1(\n    projs: NDArray,\n    lat_disp: float,\n    line_len: float,\n    clu_dir: NDArray,\n    clu_ctr: NDArray,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Generate points from their $n$-D projections on a cluster-supporting line.\n\n    Each point is placed on a hyperplane orthogonal to that line and centered at\n    the point's projection, using the normal distribution ( $\\mu=0$,\n    $\u03c3=$`lat_disp`).\n\n    This function's main intended use is by the [`clugen()`][pyclugen.main.clugen]\n    function, generating the final points when the `point_dist_fn` parameter is\n    set to `\"n-1\"`.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import clupoints_n_1, points_on_line\n        &gt;&gt;&gt; from numpy import array, linspace\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; projs = points_on_line(array([5,5]),     # Get 5 point projections\n        ...                        array([1,0]),     # on a 2D line\n        ...                        linspace(-4,4,5))\n        &gt;&gt;&gt; projs\n        array([[1., 5.],\n               [3., 5.],\n               [5., 5.],\n               [7., 5.],\n               [9., 5.]])\n        &gt;&gt;&gt; clupoints_n_1(projs, 0.5, 1.0, array([1,0]), array([0,0]), rng=prng)\n        array([[1.        , 5.49456068],\n               [3.        , 5.18389333],\n               [5.        , 5.64396263],\n               [7.        , 5.09698721],\n               [9.        , 5.46011545]])\n\n    Args:\n      projs: Point projections on the cluster-supporting line ( $p \\times n$ matrix).\n      lat_disp: Standard deviation for the normal distribution, i.e., cluster\n        lateral dispersion.\n      line_len: Length of cluster-supporting line (ignored).\n      clu_dir: Direction of the cluster-supporting line.\n      clu_ctr: Center position of the cluster-supporting line (ignored).\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Generated points ( $p \\times n$ matrix).\n    \"\"\"\n    # No blank line allowed here\n\n    # Define function to get distances from points to their projections on the\n    # line (i.e., using the normal distribution)\n    def dist_fn(clu_num_points, ldisp, rg):\n        return ldisp * rg.normal(size=clu_num_points)\n\n    # Use clupoints_n_1_template() to do the heavy lifting\n    return clupoints_n_1_template(projs, lat_disp, clu_dir, dist_fn, rng=rng)\n</code></pre>"},{"location":"reference/#pyclugen.clupoints_n_1_template","title":"clupoints_n_1_template","text":"<pre><code>clupoints_n_1_template(\n    projs: NDArray,\n    lat_disp: float,\n    clu_dir: NDArray,\n    dist_fn: Callable[[int, float, Generator], NDArray],\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Create \\(p\\) points from their \\(n\\)-D projections on a cluster-supporting line.</p> <p>Each point is placed on a hyperplane orthogonal to that line and centered at the point's projection. The function specified in <code>dist_fn</code> is used to perform the actual placement.</p> <p>This function is used internally by <code>clupoints_n_1()</code> and may be useful for constructing user-defined final point placement strategies for the <code>point_dist_fn</code> parameter of the main <code>clugen()</code> function.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy import array, zeros\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; from pyclugen import clupoints_n_1_template, points_on_line\n&gt;&gt;&gt; ctr = zeros(2)\n&gt;&gt;&gt; dir = array([1, 0])\n&gt;&gt;&gt; pdist = array([-0.5, -0.2, 0.1, 0.3])\n&gt;&gt;&gt; rng = Generator(PCG64(123))\n&gt;&gt;&gt; proj = points_on_line(ctr, dir, pdist)\n&gt;&gt;&gt; clupoints_n_1_template(proj, 0, dir, lambda p, l, r: r.random(p), rng=rng)\narray([[-0.5       ,  0.68235186],\n       [-0.2       , -0.05382102],\n       [ 0.1       ,  0.22035987],\n       [ 0.3       , -0.18437181]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>projs</code> <code>NDArray</code> <p>Point projections on the cluster-supporting line ( \\(p \\times n\\) matrix).</p> required <code>lat_disp</code> <code>float</code> <p>Dispersion of points from their projection.</p> required <code>clu_dir</code> <code>NDArray</code> <p>Direction of the cluster-supporting line (unit vector).</p> required <code>dist_fn</code> <code>Callable[[int, float, Generator], NDArray]</code> <p>Function to place points on a second line, orthogonal to the first. The functions accepts as parameters the number of points in the current cluster, the <code>lateral_disp</code> parameter (the same passed to the <code>clugen()</code> function), and a random number generator, returning a vector containing the distance of each point to its projection on the cluster-supporting line.</p> required <code>rng</code> <code>Generator</code> <p>An optional pseudo-random number generator for reproducible executions.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Generated points ( \\(p \\times n\\) matrix).</p> Source code in <code>pyclugen/helper.py</code> <pre><code>def clupoints_n_1_template(\n    projs: NDArray,\n    lat_disp: float,\n    clu_dir: NDArray,\n    dist_fn: Callable[[int, float, Generator], NDArray],\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Create $p$ points from their $n$-D projections on a cluster-supporting line.\n\n    Each point is placed on a hyperplane orthogonal to that line and centered at\n    the point's projection. The function specified in `dist_fn` is used to perform\n    the actual placement.\n\n    This function is used internally by\n    [`clupoints_n_1()`][pyclugen.module.clupoints_n_1] and may be useful for\n    constructing user-defined final point placement strategies for the `point_dist_fn`\n    parameter of the main [`clugen()`][pyclugen.main.clugen] function.\n\n    Examples:\n        &gt;&gt;&gt; from numpy import array, zeros\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; from pyclugen import clupoints_n_1_template, points_on_line\n        &gt;&gt;&gt; ctr = zeros(2)\n        &gt;&gt;&gt; dir = array([1, 0])\n        &gt;&gt;&gt; pdist = array([-0.5, -0.2, 0.1, 0.3])\n        &gt;&gt;&gt; rng = Generator(PCG64(123))\n        &gt;&gt;&gt; proj = points_on_line(ctr, dir, pdist)\n        &gt;&gt;&gt; clupoints_n_1_template(proj, 0, dir, lambda p, l, r: r.random(p), rng=rng)\n        array([[-0.5       ,  0.68235186],\n               [-0.2       , -0.05382102],\n               [ 0.1       ,  0.22035987],\n               [ 0.3       , -0.18437181]])\n\n    Args:\n      projs: Point projections on the cluster-supporting line ( $p \\times n$ matrix).\n      lat_disp: Dispersion of points from their projection.\n      clu_dir: Direction of the cluster-supporting line (unit vector).\n      dist_fn: Function to place points on a second line, orthogonal to the first.\n        The functions accepts as parameters the number of points in the current\n        cluster, the `lateral_disp` parameter (the same passed to the\n        [`clugen()`][pyclugen.main.clugen] function), and a random number generator,\n        returning a vector containing the distance of each point to its projection\n        on the cluster-supporting line.\n      rng: An optional pseudo-random number generator for reproducible executions.\n\n    Returns:\n      Generated points ( $p \\times n$ matrix).\n    \"\"\"\n    # Number of dimensions\n    num_dims = clu_dir.size\n\n    # Number of points in this cluster\n    clu_num_points = projs.shape[0]\n\n    # Get distances from points to their projections on the line\n    points_dist = dist_fn(clu_num_points, lat_disp, rng)\n\n    # Get normalized vectors, orthogonal to the current line, for each point\n    orth_vecs = zeros((clu_num_points, num_dims))\n\n    for j in range(clu_num_points):\n        orth_vecs[j, :] = rand_ortho_vector(clu_dir, rng=rng).ravel()\n\n    # Set vector magnitudes\n    orth_vecs = abs(points_dist).reshape(-1, 1) * orth_vecs\n\n    # Add perpendicular vectors to point projections on the line,\n    # yielding final cluster points\n    points = projs + orth_vecs\n\n    return points\n</code></pre>"},{"location":"reference/#pyclugen.clusizes","title":"clusizes","text":"<pre><code>clusizes(\n    num_clusters: int,\n    num_points: int,\n    allow_empty: bool,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Determine cluster sizes, i.e., the number of points in each cluster.</p> <p>Cluster sizes are determined using the normal distribution ( \\(\\mu=\\)<code>num_points</code> \\(/\\)<code>num_clusters</code>, \\(\\sigma=\\mu/3\\)), and then assuring that the final cluster sizes add up to <code>num_points</code> via the <code>fix_num_points()</code> function.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; from pyclugen import clusizes\n&gt;&gt;&gt; prng = Generator(PCG64(123))\n&gt;&gt;&gt; sizes = clusizes(4, 1000, True, rng=prng)\n&gt;&gt;&gt; sizes\narray([166, 217, 354, 263])\n&gt;&gt;&gt; sum(sizes)\n1000\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_clusters</code> <code>int</code> <p>Number of clusters.</p> required <code>num_points</code> <code>int</code> <p>Total number of points.</p> required <code>allow_empty</code> <code>bool</code> <p>Allow empty clusters?</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Number of points in each cluster (vector of size <code>num_clusters</code>).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def clusizes(\n    num_clusters: int,\n    num_points: int,\n    allow_empty: bool,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Determine cluster sizes, i.e., the number of points in each cluster.\n\n    Cluster sizes are determined using the normal distribution (\n    $\\mu=$`num_points` $/$`num_clusters`, $\\sigma=\\mu/3$), and then\n    assuring that the final cluster sizes add up to `num_points` via the\n    [`fix_num_points()`][pyclugen.helper.fix_num_points] function.\n\n    Examples:\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; from pyclugen import clusizes\n        &gt;&gt;&gt; prng = Generator(PCG64(123))\n        &gt;&gt;&gt; sizes = clusizes(4, 1000, True, rng=prng)\n        &gt;&gt;&gt; sizes\n        array([166, 217, 354, 263])\n        &gt;&gt;&gt; sum(sizes)\n        1000\n\n    Args:\n      num_clusters: Number of clusters.\n      num_points: Total number of points.\n      allow_empty: Allow empty clusters?\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Number of points in each cluster (vector of size `num_clusters`).\n    \"\"\"\n    # Determine number of points in each cluster using the normal distribution\n\n    # Consider the mean an equal division of points between clusters\n    mean = num_points / num_clusters\n    # The standard deviation is such that the interval [0, 2 * mean] will contain\n    # \u224899.7% of cluster sizes\n    std = mean / 3\n\n    # Determine points with the normal distribution\n    clu_num_points = std * rng.normal(size=num_clusters) + mean\n\n    # Set negative values to zero\n    clu_num_points = where(clu_num_points &gt; 0, clu_num_points, 0)\n\n    # Fix imbalances, so that num_points is respected\n    if sum(clu_num_points) &gt; 0:  # Be careful not to divide by zero\n        clu_num_points *= num_points / sum(clu_num_points)\n\n    # Round the real values to integers since a cluster sizes is represented by\n    # an integer\n    clu_num_points = rint(clu_num_points).astype(int)\n\n    # Make sure total points is respected, which may not be the case at this time due\n    # to rounding\n    fix_num_points(clu_num_points, num_points)\n\n    # If empty clusters are not allowed, make sure there aren't any\n    if not allow_empty:\n        fix_empty(clu_num_points)\n\n    return clu_num_points\n</code></pre>"},{"location":"reference/#pyclugen.fix_empty","title":"fix_empty","text":"<pre><code>fix_empty(clu_num_points: NDArray, allow_empty: bool = False) -&gt; NDArray\n</code></pre> <p>Certifies that, given enough points, no clusters are left empty.</p> <p>This is done by removing a point from the largest cluster and adding it to an empty cluster while there are empty clusters. If the total number of points is smaller than the number of clusters (or if the <code>allow_empty</code> parameter is set to <code>true</code>), this function does nothing.</p> <p>This function is used internally by <code>clusizes()</code> and might be useful for custom cluster sizing implementations given as the <code>clusizes_fn</code> parameter of the main <code>clugen()</code> function.</p> <p>Note that the array is changed in-place.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy import array\n&gt;&gt;&gt; from pyclugen import fix_empty\n&gt;&gt;&gt; clusters = array([3, 4, 5, 0, 0])\n&gt;&gt;&gt; fix_empty(clusters)\narray([3, 3, 4, 1, 1])\n&gt;&gt;&gt; clusters # Verify that the array was changed in-place\narray([3, 3, 4, 1, 1])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>clu_num_points</code> <code>NDArray</code> <p>Number of points in each cluster (vector of size \\(c\\)), where \\(c\\) is the number of clusters.</p> required <code>allow_empty</code> <code>bool</code> <p>Allow empty clusters?</p> <code>False</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Number of points in each cluster, after being fixed by this function (vector of size \\(c\\), which is the same reference than <code>clu_num_points</code>).</p> Source code in <code>pyclugen/helper.py</code> <pre><code>def fix_empty(clu_num_points: NDArray, allow_empty: bool = False) -&gt; NDArray:\n    r\"\"\"Certifies that, given enough points, no clusters are left empty.\n\n    This is done by removing a point from the largest cluster and adding it to an\n    empty cluster while there are empty clusters. If the total number of points is\n    smaller than the number of clusters (or if the `allow_empty` parameter is set\n    to `true`), this function does nothing.\n\n    This function is used internally by [`clusizes()`][pyclugen.module.clusizes]\n    and might be useful for custom cluster sizing implementations given as the\n    `clusizes_fn` parameter of the main [`clugen()`][pyclugen.main.clugen] function.\n\n    Note that the array is changed in-place.\n\n    Examples:\n        &gt;&gt;&gt; from numpy import array\n        &gt;&gt;&gt; from pyclugen import fix_empty\n        &gt;&gt;&gt; clusters = array([3, 4, 5, 0, 0])\n        &gt;&gt;&gt; fix_empty(clusters)\n        array([3, 3, 4, 1, 1])\n        &gt;&gt;&gt; clusters # Verify that the array was changed in-place\n        array([3, 3, 4, 1, 1])\n\n    Args:\n      clu_num_points: Number of points in each cluster (vector of size $c$),\n        where $c$ is the number of clusters.\n      allow_empty: Allow empty clusters?\n\n    Returns:\n      Number of points in each cluster, after being fixed by this function (vector\n        of size $c$, which is the same reference than `clu_num_points`).\n    \"\"\"\n    # If the allow_empty parameter is set to true, don't do anything and return\n    # immediately; this is useful for quick `clusizes_fn` one-liners\n    if not allow_empty:\n        # Find empty clusters\n        empty_clusts = [idx for idx, val in enumerate(clu_num_points) if val == 0]\n\n        # If there are empty clusters and enough points for all clusters...\n        if len(empty_clusts) &gt; 0 and sum(clu_num_points) &gt;= clu_num_points.size:\n            # Go through the empty clusters...\n            for i0 in empty_clusts:\n                # ...get a point from the largest cluster and assign it to the\n                # current empty cluster\n                imax = argmax(clu_num_points)\n                clu_num_points[imax] -= 1\n                clu_num_points[i0] += 1\n\n    return clu_num_points\n</code></pre>"},{"location":"reference/#pyclugen.fix_num_points","title":"fix_num_points","text":"<pre><code>fix_num_points(clu_num_points: NDArray, num_points: int) -&gt; NDArray\n</code></pre> <p>Certifies that the values in the <code>clu_num_points</code> array add up to <code>num_points</code>.</p> <p>If this is not the case, the <code>clu_num_points</code> array is modified in-place, incrementing the value corresponding to the smallest cluster while <code>sum(clu_num_points) &lt; num_points</code>, or decrementing the value corresponding to the largest cluster while <code>sum(clu_num_points) &gt; num_points</code>.</p> <p>This function is used internally by <code>clusizes()</code> and might be useful for custom cluster sizing implementations given as the <code>clusizes_fn</code> parameter of the main <code>clugen()</code> function.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy import array\n&gt;&gt;&gt; from pyclugen import fix_num_points\n&gt;&gt;&gt; clusters = array([1, 6, 3])  # 10 total points\n&gt;&gt;&gt; fix_num_points(clusters, 12) # But we want 12 total points\narray([3, 6, 3])\n&gt;&gt;&gt; clusters # Verify that the array was changed in-place\narray([3, 6, 3])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>clu_num_points</code> <code>NDArray</code> <p>Number of points in each cluster (vector of size \\(c\\)), where \\(c\\) is the number of clusters.</p> required <code>num_points</code> <code>int</code> <p>The expected total number of points.</p> required <p>Returns:</p> Type Description <code>NDArray</code> <p>Number of points in each cluster, after being fixed by this function (vector of size \\(c\\), which is the same reference than <code>clu_num_points</code>).</p> Source code in <code>pyclugen/helper.py</code> <pre><code>def fix_num_points(clu_num_points: NDArray, num_points: int) -&gt; NDArray:\n    r\"\"\"Certifies that the values in the `clu_num_points` array add up to `num_points`.\n\n    If this is not the case, the `clu_num_points` array is modified in-place,\n    incrementing the value corresponding to the smallest cluster while\n    `sum(clu_num_points) &lt; num_points`, or decrementing the value corresponding to\n    the largest cluster while `sum(clu_num_points) &gt; num_points`.\n\n    This function is used internally by [`clusizes()`][pyclugen.module.clusizes]\n    and might be useful for custom cluster sizing implementations given as the\n    `clusizes_fn` parameter of the main [`clugen()`][pyclugen.main.clugen] function.\n\n    Examples:\n        &gt;&gt;&gt; from numpy import array\n        &gt;&gt;&gt; from pyclugen import fix_num_points\n        &gt;&gt;&gt; clusters = array([1, 6, 3])  # 10 total points\n        &gt;&gt;&gt; fix_num_points(clusters, 12) # But we want 12 total points\n        array([3, 6, 3])\n        &gt;&gt;&gt; clusters # Verify that the array was changed in-place\n        array([3, 6, 3])\n\n    Args:\n      clu_num_points: Number of points in each cluster (vector of size $c$),\n        where $c$ is the number of clusters.\n      num_points: The expected total number of points.\n\n    Returns:\n      Number of points in each cluster, after being fixed by this function (vector\n        of size $c$, which is the same reference than `clu_num_points`).\n    \"\"\"\n    while sum(clu_num_points) &lt; num_points:\n        imin = argmin(clu_num_points)\n        clu_num_points[imin] += 1\n    while sum(clu_num_points) &gt; num_points:\n        imax = argmax(clu_num_points)\n        clu_num_points[imax] -= 1\n\n    return clu_num_points\n</code></pre>"},{"location":"reference/#pyclugen.llengths","title":"llengths","text":"<pre><code>llengths(\n    num_clusters: int,\n    llength: float,\n    llength_disp: float,\n    rng: Generator = _default_rng,\n) -&gt; NDArray\n</code></pre> <p>Determine length of cluster-supporting lines.</p> <p>Line lengths are determined using the folded normal distribution ( \\(\\mu=\\)<code>llength</code>, \\(\\sigma=\\)<code>llength_disp</code>).</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from numpy.random import Generator, MT19937\n&gt;&gt;&gt; from pyclugen import llengths\n&gt;&gt;&gt; prng = Generator(MT19937(123))\n&gt;&gt;&gt; llengths(4, 20, 3.5, rng=prng)\narray([19.50968733, 19.92482858, 25.99013804, 18.58029672])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_clusters</code> <code>int</code> <p>Number of clusters.</p> required <code>llength</code> <code>float</code> <p>Average line length.</p> required <code>llength_disp</code> <code>float</code> <p>Line length dispersion.</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Lengths of cluster-supporting lines (vector of size <code>num_clusters</code>).</p> Source code in <code>pyclugen/module.py</code> <pre><code>def llengths(\n    num_clusters: int,\n    llength: float,\n    llength_disp: float,\n    rng: Generator = _default_rng,\n) -&gt; NDArray:\n    r\"\"\"Determine length of cluster-supporting lines.\n\n    Line lengths are determined using the folded normal distribution (\n    $\\mu=$`llength`, $\\sigma=$`llength_disp`).\n\n    Examples:\n        &gt;&gt;&gt; from numpy.random import Generator, MT19937\n        &gt;&gt;&gt; from pyclugen import llengths\n        &gt;&gt;&gt; prng = Generator(MT19937(123))\n        &gt;&gt;&gt; llengths(4, 20, 3.5, rng=prng)\n        array([19.50968733, 19.92482858, 25.99013804, 18.58029672])\n\n    Args:\n      num_clusters: Number of clusters.\n      llength: Average line length.\n      llength_disp: Line length dispersion.\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Lengths of cluster-supporting lines (vector of size `num_clusters`).\n    \"\"\"\n    return abs(llength + llength_disp * rng.normal(size=num_clusters))\n</code></pre>"},{"location":"reference/#pyclugen.points_on_line","title":"points_on_line","text":"<pre><code>points_on_line(\n    center: NDArray, direction: NDArray, dist_center: NDArray\n) -&gt; NDArray\n</code></pre> <p>Determine coordinates of points on a line.</p> <p>Determine coordinates of points on a line with <code>center</code> and <code>direction</code>, based on the distances from the center given in <code>dist_center</code>.</p> <p>This works by using the vector formulation of the line equation assuming <code>direction</code> is a \\(n\\)-dimensional unit vector. In other words, considering \\(\\mathbf{d}=\\)<code>direction.reshape(-1,1)</code> ( \\(n \\times 1\\) vector), \\(\\mathbf{c}=\\)<code>center.reshape(-1,1)</code> ( \\(n \\times 1\\) vector), and \\(\\mathbf{w}=\\) <code>dist_center.reshape(-1,1)</code> ( \\(p \\times 1\\) vector), the coordinates of points on the line are given by:</p> \\[ \\mathbf{P}=\\mathbf{1}\\,\\mathbf{c}^T + \\mathbf{w}\\mathbf{d}^T \\] <p>where \\(\\mathbf{P}\\) is the \\(p \\times n\\) matrix of point coordinates on the line, and \\(\\mathbf{1}\\) is a \\(p \\times 1\\) vector with all entries equal to 1.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import points_on_line\n&gt;&gt;&gt; from numpy import array, linspace\n&gt;&gt;&gt; points_on_line(array([5., 5.]),\n...                array([1., 0.]),\n...                linspace(-4, 4, 5)) # 2D, 5 points\narray([[1., 5.],\n       [3., 5.],\n       [5., 5.],\n       [7., 5.],\n       [9., 5.]])\n&gt;&gt;&gt; points_on_line(array([-2, 0, 0., 2]),\n...                array([0., 0, -1, 0]),\n...                array([10, -10])) # 4D, 2 points\narray([[ -2.,   0., -10.,   2.],\n       [ -2.,   0.,  10.,   2.]])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>center</code> <code>NDArray</code> <p>Center of the line ( \\(n\\)-component vector).</p> required <code>direction</code> <code>NDArray</code> <p>Line direction ( \\(n\\)-component unit vector).</p> required <code>dist_center</code> <code>NDArray</code> <p>Distance of each point to the center of the line ( \\(p\\)-component vector, where \\(p\\) is the number of points).</p> required <p>Returns:</p> Type Description <code>NDArray</code> <p>Coordinates of points on the specified line ( \\(p \\times n\\) matrix).</p> Source code in <code>pyclugen/core.py</code> <pre><code>def points_on_line(\n    center: NDArray, direction: NDArray, dist_center: NDArray\n) -&gt; NDArray:\n    r\"\"\"Determine coordinates of points on a line.\n\n    Determine coordinates of points on a line with `center` and `direction`,\n    based on the distances from the center given in `dist_center`.\n\n    This works by using the vector formulation of the line equation assuming\n    `direction` is a $n$-dimensional unit vector. In other words, considering\n    $\\mathbf{d}=$`direction.reshape(-1,1)` ( $n \\times 1$ vector),\n    $\\mathbf{c}=$`center.reshape(-1,1)` ( $n \\times 1$ vector), and\n    $\\mathbf{w}=$ `dist_center.reshape(-1,1)` ( $p \\times 1$ vector),\n    the coordinates of points on the line are given by:\n\n    $$\n    \\mathbf{P}=\\mathbf{1}\\,\\mathbf{c}^T + \\mathbf{w}\\mathbf{d}^T\n    $$\n\n    where $\\mathbf{P}$ is the $p \\times n$ matrix of point coordinates on the\n    line, and $\\mathbf{1}$ is a $p \\times 1$ vector with all entries equal to 1.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import points_on_line\n        &gt;&gt;&gt; from numpy import array, linspace\n        &gt;&gt;&gt; points_on_line(array([5., 5.]),\n        ...                array([1., 0.]),\n        ...                linspace(-4, 4, 5)) # 2D, 5 points\n        array([[1., 5.],\n               [3., 5.],\n               [5., 5.],\n               [7., 5.],\n               [9., 5.]])\n        &gt;&gt;&gt; points_on_line(array([-2, 0, 0., 2]),\n        ...                array([0., 0, -1, 0]),\n        ...                array([10, -10])) # 4D, 2 points\n        array([[ -2.,   0., -10.,   2.],\n               [ -2.,   0.,  10.,   2.]])\n\n    Args:\n      center: Center of the line ( $n$-component vector).\n      direction: Line direction ( $n$-component unit vector).\n      dist_center: Distance of each point to the center of the line\n        ( $p$-component vector, where $p$ is the number of points).\n\n    Returns:\n      Coordinates of points on the specified line ( $p \\times n$ matrix).\n    \"\"\"\n    return center.reshape(1, -1) + dist_center.reshape(-1, 1) @ direction.reshape(\n        (1, -1)\n    )\n</code></pre>"},{"location":"reference/#pyclugen.rand_ortho_vector","title":"rand_ortho_vector","text":"<pre><code>rand_ortho_vector(u: NDArray, rng: Generator = _default_rng) -&gt; NDArray\n</code></pre> <p>Get a random unit vector orthogonal to <code>u</code>.</p> <p>Note that <code>u</code> is expected to be a unit vector itself.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import rand_ortho_vector\n&gt;&gt;&gt; from numpy import isclose, dot\n&gt;&gt;&gt; from numpy.linalg import norm\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; rng = Generator(PCG64(123))\n&gt;&gt;&gt; r = rng.random(3) # Get a random vector with 3 components (3D)\n&gt;&gt;&gt; r = r / norm(r) # Normalize it\n&gt;&gt;&gt; r_ort = rand_ortho_vector(r, rng=rng) # Get random unit vector orth. to r\n&gt;&gt;&gt; r_ort\narray([-0.1982903 , -0.61401512,  0.76398062])\n&gt;&gt;&gt; isclose(dot(r, r_ort), 0) # Check that vectors are indeed orthogonal\nTrue\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>u</code> <code>NDArray</code> <p>Unit vector with \\(n\\) components.</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>A random unit vector with \\(n\\) components orthogonal to <code>u</code>.</p> Source code in <code>pyclugen/core.py</code> <pre><code>def rand_ortho_vector(u: NDArray, rng: Generator = _default_rng) -&gt; NDArray:\n    r\"\"\"Get a random unit vector orthogonal to `u`.\n\n    Note that `u` is expected to be a unit vector itself.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import rand_ortho_vector\n        &gt;&gt;&gt; from numpy import isclose, dot\n        &gt;&gt;&gt; from numpy.linalg import norm\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; rng = Generator(PCG64(123))\n        &gt;&gt;&gt; r = rng.random(3) # Get a random vector with 3 components (3D)\n        &gt;&gt;&gt; r = r / norm(r) # Normalize it\n        &gt;&gt;&gt; r_ort = rand_ortho_vector(r, rng=rng) # Get random unit vector orth. to r\n        &gt;&gt;&gt; r_ort\n        array([-0.1982903 , -0.61401512,  0.76398062])\n        &gt;&gt;&gt; isclose(dot(r, r_ort), 0) # Check that vectors are indeed orthogonal\n        True\n\n    Args:\n      u: Unit vector with $n$ components.\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      A random unit vector with $n$ components orthogonal to `u`.\n    \"\"\"\n    # If 1D, just return a random unit vector\n    if u.size == 1:\n        return rand_unit_vector(1, rng=rng)\n\n    # Find a random, non-parallel vector to u\n    while True:\n        # Find normalized random vector\n        r = rand_unit_vector(u.size, rng=rng)\n\n        # If not parallel to u we can keep it and break the loop\n        if not isclose(abs(dot(u, r)), 1):\n            break\n\n    # Get vector orthogonal to u using 1st iteration of Gram-Schmidt process\n    v = r - dot(u, r) / dot(u, u) * u\n\n    # Normalize it\n    v = v / norm(v)\n\n    # And return it\n    return v\n</code></pre>"},{"location":"reference/#pyclugen.rand_unit_vector","title":"rand_unit_vector","text":"<pre><code>rand_unit_vector(num_dims: int, rng: Generator = _default_rng) -&gt; NDArray\n</code></pre> <p>Get a random unit vector with <code>num_dims</code> components.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import rand_unit_vector\n&gt;&gt;&gt; rand_unit_vector(4)\narray([ 0.48653889,  0.50753862,  0.05711487, -0.70881757])\n</code></pre> <pre><code>&gt;&gt;&gt; from pyclugen import rand_unit_vector\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; rng = Generator(PCG64(123))\n&gt;&gt;&gt; rand_unit_vector(2, rng=rng) # Reproducible\narray([ 0.3783202 , -0.92567479])\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>num_dims</code> <code>int</code> <p>Number of components in vector (i.e. vector size).</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>A random unit vector with <code>num_dims</code> components.</p> Source code in <code>pyclugen/core.py</code> <pre><code>def rand_unit_vector(num_dims: int, rng: Generator = _default_rng) -&gt; NDArray:\n    r\"\"\"Get a random unit vector with `num_dims` components.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import rand_unit_vector\n        &gt;&gt;&gt; rand_unit_vector(4) # doctest: +SKIP\n        array([ 0.48653889,  0.50753862,  0.05711487, -0.70881757])\n\n        &gt;&gt;&gt; from pyclugen import rand_unit_vector\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; rng = Generator(PCG64(123))\n        &gt;&gt;&gt; rand_unit_vector(2, rng=rng) # Reproducible\n        array([ 0.3783202 , -0.92567479])\n\n    Args:\n      num_dims: Number of components in vector (i.e. vector size).\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      A random unit vector with `num_dims` components.\n    \"\"\"\n    r = rng.random(num_dims) - 0.5\n    r = r / norm(r)\n    return r\n</code></pre>"},{"location":"reference/#pyclugen.rand_vector_at_angle","title":"rand_vector_at_angle","text":"<pre><code>rand_vector_at_angle(\n    u: NDArray, angle: float, rng: Generator = _default_rng\n) -&gt; NDArray\n</code></pre> <p>Get a random unit vector which is at <code>angle</code> radians of vector <code>u</code>.</p> <p>Note that <code>u</code> is expected to be a unit vector itself.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; from pyclugen import rand_vector_at_angle\n&gt;&gt;&gt; from numpy import arccos, array, degrees, pi, dot\n&gt;&gt;&gt; from numpy.linalg import norm\n&gt;&gt;&gt; from numpy.random import Generator, PCG64\n&gt;&gt;&gt; rng = Generator(PCG64(123))\n&gt;&gt;&gt; u = array([ 1.0, 0, 0.5, -0.5 ]) # Define a 4D vector\n&gt;&gt;&gt; u = u / norm(u) # Normalize the vector\n&gt;&gt;&gt; v = rand_vector_at_angle(u, pi/4, rng=rng) # Get a vector at 45 degrees\n&gt;&gt;&gt; v\narray([ 0.633066  , -0.50953554, -0.10693823, -0.57285705])\n&gt;&gt;&gt; degrees(arccos(dot(u, v) / norm(u) * norm(v))) # Angle between u and v\n45.0\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>u</code> <code>NDArray</code> <p>Unit vector with \\(n\\) components.</p> required <code>angle</code> <code>float</code> <p>Angle in radians.</p> required <code>rng</code> <code>Generator</code> <p>Optional pseudo-random number generator.</p> <code>_default_rng</code> <p>Returns:</p> Type Description <code>NDArray</code> <p>Random unit vector with \\(n\\) components which is at <code>angle</code> radians with vector <code>u</code>.</p> Source code in <code>pyclugen/core.py</code> <pre><code>def rand_vector_at_angle(\n    u: NDArray, angle: float, rng: Generator = _default_rng\n) -&gt; NDArray:\n    r\"\"\"Get a random unit vector which is at `angle` radians of vector `u`.\n\n    Note that `u` is expected to be a unit vector itself.\n\n    Examples:\n        &gt;&gt;&gt; from pyclugen import rand_vector_at_angle\n        &gt;&gt;&gt; from numpy import arccos, array, degrees, pi, dot\n        &gt;&gt;&gt; from numpy.linalg import norm\n        &gt;&gt;&gt; from numpy.random import Generator, PCG64\n        &gt;&gt;&gt; rng = Generator(PCG64(123))\n        &gt;&gt;&gt; u = array([ 1.0, 0, 0.5, -0.5 ]) # Define a 4D vector\n        &gt;&gt;&gt; u = u / norm(u) # Normalize the vector\n        &gt;&gt;&gt; v = rand_vector_at_angle(u, pi/4, rng=rng) # Get a vector at 45 degrees\n        &gt;&gt;&gt; v\n        array([ 0.633066  , -0.50953554, -0.10693823, -0.57285705])\n        &gt;&gt;&gt; degrees(arccos(dot(u, v) / norm(u) * norm(v))) # Angle between u and v\n        45.0\n\n    Args:\n      u: Unit vector with $n$ components.\n      angle: Angle in radians.\n      rng: Optional pseudo-random number generator.\n\n    Returns:\n      Random unit vector with $n$ components which is at `angle` radians\n        with vector `u`.\n    \"\"\"\n    if isclose(abs(angle), pi / 2) and u.size &gt; 1:\n        return rand_ortho_vector(u, rng=rng)\n    elif -pi / 2 &lt; angle &lt; pi / 2 and u.size &gt; 1:\n        v = u + rand_ortho_vector(u, rng=rng) * tan(angle)\n        return v / norm(v)\n    else:\n        # For |\u03b8| &gt; \u03c0/2 or the 1D case, simply return a random vector\n        return rand_unit_vector(u.size, rng=rng)\n</code></pre>"},{"location":"theory/","title":"Theory","text":"<p>This section presents a general overview of the clugen algorithm. A complete description of the algorithm's theoretical framework is available in the article \"Generating multidimensional clusters with support lines\" (an open version is available on arXiv).</p> <p>Clugen is an algorithm for generating multidimensional clusters. Each cluster is supported by a line segment, the position, orientation and length of which guide where the respective points are placed. For brevity, line segments will be referred to as lines.</p> <p>Given an \\(n\\)-dimensional direction vector \\(\\mathbf{d}\\) (and a number of additional parameters, which will be discussed shortly), the clugen algorithm works as follows (\\(^*\\) means the algorithm step is stochastic):</p> <ol> <li>Normalize \\(\\mathbf{d}\\).</li> <li>\\(^*\\)Determine cluster sizes.</li> <li>\\(^*\\)Determine cluster centers.</li> <li>\\(^*\\)Determine lengths of cluster-supporting lines.</li> <li>\\(^*\\)Determine angles between \\(\\mathbf{d}\\) and cluster-supporting lines.</li> <li>For each cluster:</li> <li>\\(^*\\)Determine direction of the cluster-supporting line.</li> <li>\\(^*\\)Determine distance of point projections from the center of the       cluster-supporting line.</li> <li>Determine coordinates of point projections on the cluster-supporting line.</li> <li>\\(^*\\)Determine points from their projections on the cluster-supporting       line.</li> </ol> <p>Figure 1 provides a stylized overview of the algorithm's steps.</p> <p></p> <p>The example in Figure 1 was generated with the following parameters, the exact meaning of each will be discussed shortly:</p> Parameter values Description \\(n=2\\) Number of dimensions. \\(c=4\\) Number of clusters. \\(p=200\\) Total number of points. \\(\\mathbf{d}=\\begin{bmatrix}1 &amp; 1\\end{bmatrix}^T\\) Average direction. \\(\\theta_\\sigma=\\pi/16\\approx{}11.25^{\\circ}\\) Angle dispersion. \\(\\mathbf{s}=\\begin{bmatrix}10 &amp; 10\\end{bmatrix}^T\\) Average cluster separation. \\(l=10\\) Average line length. \\(l_\\sigma=1.5\\) Line length dispersion. \\(f_\\sigma=1\\) Cluster lateral dispersion. <p>Additionally, all optional parameters (not listed above) were left to their default values. The complete list of parameters is presented in the <code>clugen()</code> function documentation.</p>"},{"location":"generated/gallery/","title":"Examples","text":""},{"location":"generated/gallery/#examples","title":"Examples","text":"<p> Examples in 1D </p> <p> Examples in 2D </p> <p> Examples in 3D </p> <p> Examples in nD </p> <p> Merging and hierarchical cluster examples </p> <p> Plot functions </p> <p> Download all examples in Python source code: gallery_python.zip</p> <p> Download all examples in Jupyter notebooks: gallery_jupyter.zip</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/mg_execution_times/","title":"Computation times","text":"<p>00:29.296 total execution time for generated_gallery files:</p> <p>+----------------------------------------------------------------------------------------+-----------+--------+ | plot_2_2d_examples (docs/examples/plot_2_2d_examples.py)    | 00:11.459 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_4_nd_examples (docs/examples/plot_4_nd_examples.py)    | 00:07.035 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_3_3d_examples (docs/examples/plot_3_3d_examples.py)    | 00:05.724 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_5_mrg_examples (docs/examples/plot_5_mrg_examples.py) | 00:03.588 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_1_1d_examples (docs/examples/plot_1_1d_examples.py)    | 00:01.484 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+ | plot_functions (docs/examples/plot_functions.py)                | 00:00.006 | 0.0 MB | +----------------------------------------------------------------------------------------+-----------+--------+</p>"},{"location":"generated/gallery/plot_1_1d_examples/","title":"Examples in 1D","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_1_1d_examples/#examples-in-1d","title":"Examples in 1D","text":"<p>This section contains several examples on how to generate 1D data with pyclugen. To run the examples we first need to import the <code>clugen()</code> function:</p> <pre><code>from pyclugen import clugen\n</code></pre> <p>To plot these examples we use the <code>plot_examples_1d</code> function:</p> <pre><code>from plot_functions import plot_examples_1d\n</code></pre> <p>Out:</p> <pre><code>/home/runner/work/pyclugen/pyclugen/docs/docs/examples/plot_functions.py:15: DeprecationWarning: \nPyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\nbut was not found to be installed on your system.\nIf this would cause problems for you,\nplease provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n\n  import pandas as pd\n</code></pre>"},{"location":"generated/gallery/plot_1_1d_examples/#basic-1d-example-with-density-plot","title":"Basic 1D example with density plot","text":"<pre><code>seed = 23456\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Weibull distribution\ndef proj_weibull(len, n, rng):\n    return len / 2 * rng.weibull(1.5, size=n)\n</code></pre> <pre><code>e082 = clugen(1, 3, 1000, [1], 0, [10], 6, 1.5, 0, rng=seed)\ne083 = clugen(1, 3, 1000, [1], 0, [10], 6, 1.5, 0, rng=seed, proj_dist_fn=\"unif\")\ne084 = clugen(1, 3, 1000, [1], 0, [10], 6, 1.5, 0, rng=seed, proj_dist_fn=proj_weibull)\n</code></pre> <pre><code>plot_examples_1d(\n    e082, \"e082: proj_dist_fn = 'norm' (default)\",\n    e083, \"e083: proj_dist_fn = 'unif'\",\n    e084, \"e084: custom proj_dist_fn (Weibull)\")\n</code></pre> <p>Total running time of the script: ( 0 minutes  1.484 seconds)</p> <p> Download Python source code: plot_1_1d_examples.py</p> <p> Download Jupyter notebook: plot_1_1d_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_2_2d_examples/","title":"Examples in 2D","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_2_2d_examples/#examples-in-2d","title":"Examples in 2D","text":"<p>This section contains several examples on how to generate 2D data with pyclugen. To run the examples we first need to import the <code>clugen()</code> function:</p> <pre><code>import numpy as np\nfrom pyclugen import clugen\n</code></pre> <p>To plot these examples we use the <code>plot_examples_2d</code> function:</p> <pre><code>from plot_functions import plot_examples_2d\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#manipulating-the-direction-of-cluster-supporting-lines","title":"Manipulating the direction of cluster-supporting lines","text":""},{"location":"generated/gallery/plot_2_2d_examples/#using-the-direction-parameter","title":"Using the <code>direction</code> parameter","text":"<pre><code>seed = 123\n</code></pre> <pre><code>e001 = clugen(2, 4, 2000, [1, 0], 0, [10, 10], 10, 1.5, 0.5, rng=seed)\ne002 = clugen(2, 4, 200, [1, 1], 0, [10, 10], 10, 1.5, 0.5, rng=seed)\ne003 = clugen(2, 4, 200, [0, 1], 0, [10, 10], 10, 1.5, 0.5, rng=seed)\n</code></pre> <pre><code>plot_examples_2d(\n    e001, \"e001: direction = [1, 0]\",\n    e002, \"e002: direction = [1, 1]\",\n    e003, \"e003: direction = [0, 1]\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#changing-the-angle_disp-parameter-and-using-a-custom-angle_deltas_fn-function","title":"Changing the <code>angle_disp</code> parameter and using a custom <code>angle_deltas_fn</code> function","text":"<pre><code>seed = 321\n</code></pre> <pre><code># Custom angle_deltas function: arbitrarily rotate some clusters by 90 degrees\ndef angdel_90_fn(nclu, astd, rng):\n    return rng.choice([0, np.pi / 2], size=nclu)\n</code></pre> <pre><code>e004 = clugen(2, 6, 500, [1, 0], 0, [10, 10], 10, 1.5, 0.5, rng=seed)\ne005 = clugen(2, 6, 500, [1, 0], np.pi / 8, [10, 10], 10, 1.5, 0.5, rng=seed)\ne006 = clugen(2, 6, 500, [1, 0], 0, [10, 10], 10, 1.5, 0.5, rng=seed,\n    angle_deltas_fn=angdel_90_fn)\n</code></pre> <pre><code>plot_examples_2d(\n    e004, \"e004: angle_disp = 0\",\n    e005, \"e005: angle_disp = \u03c0/8\",\n    e006, \"e006: custom angle_deltas function\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#manipulating-the-length-of-cluster-supporting-lines","title":"Manipulating the length of cluster-supporting lines","text":""},{"location":"generated/gallery/plot_2_2d_examples/#using-the-llength-parameter","title":"Using the <code>llength</code> parameter","text":"<pre><code>seed = 567\n</code></pre> <pre><code>e007 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10],  0, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne008 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 10, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne009 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 30, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\n</code></pre> <pre><code>plot_examples_2d(\n    e007, \"e007: llength = 0\",\n    e008, \"e008: llength = 10\",\n    e009, \"e009: llength = 30\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#changing-the-llength_disp-parameter-and-using-a-custom-llengths_fn-function","title":"Changing the <code>llength_disp</code> parameter and using a custom <code>llengths_fn</code> function","text":"<pre><code>seed = 567\n</code></pre> <pre><code># Custom llengths function: line lengths grow for each new cluster\ndef llen_grow_fn(nclu, llen, llenstd, rng):\n    return llen * np.arange(nclu) + rng.normal(scale=llenstd, size=nclu)\n</code></pre> <pre><code>e010 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 15,  0.0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne011 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 15, 10.0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne012 = clugen(2, 5, 800, [1, 0], np.pi / 10, [10, 10], 10,  0.1, 0.5, rng=seed,\n    llengths_fn=llen_grow_fn, point_dist_fn=\"n\")\n</code></pre> <pre><code>plot_examples_2d(\n    e010, \"e010: llength_disp = 0.0\",\n    e011, \"e011: llength_disp = 5.0\",\n    e012, \"e012: custom llengths function\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#manipulating-relative-cluster-positions","title":"Manipulating relative cluster positions","text":""},{"location":"generated/gallery/plot_2_2d_examples/#using-the-cluster_sep-parameter","title":"Using the <code>cluster_sep</code> parameter","text":"<pre><code>seed = 21\n</code></pre> <pre><code>e013 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 10], 10, 2, 2.5, rng=seed)\ne014 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [30, 10], 10, 2, 2.5, rng=seed)\ne015 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 30], 10, 2, 2.5, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e013, \"e013: cluster_sep = [10, 10]\",\n    e014, \"e014: cluster_sep = [30, 10]\",\n    e015, \"e015: cluster_sep = [10, 30]\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#changing-the-cluster_offset-parameter-and-using-a-custom-clucenters_fn-function","title":"Changing the <code>cluster_offset</code> parameter and using a custom <code>clucenters_fn</code> function","text":"<pre><code>seed = 21\n</code></pre> <pre><code># Custom clucenters function: places clusters in a diagonal\ndef centers_diag_fn(nclu, csep, coff, rng):\n    return np.ones((nclu, len(csep))) * np.arange(1, nclu + 1)[:, None] * np.max(csep) + coff\n</code></pre> <pre><code>e016 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 10], 10, 2, 2.5, rng=seed)\ne017 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 10], 10, 2, 2.5, rng=seed,\n    cluster_offset=[20, -20])\ne018 = clugen(2, 8, 1000, [1, 1], np.pi / 4, [10, 10], 10, 2, 2.5, rng=seed,\n    cluster_offset=[-50, -50], clucenters_fn=centers_diag_fn)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e016, \"e016: default\",\n    e017, \"e017: cluster_offset = [20, -20]\",\n    e018, \"e018: custom clucenters function\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#lateral-dispersion-and-placement-of-point-projections-on-the-line","title":"Lateral dispersion and placement of point projections on the line","text":""},{"location":"generated/gallery/plot_2_2d_examples/#normal-projection-placement-default-proj_dist_fn-norm","title":"Normal projection placement (default): <code>proj_dist_fn = \"norm\"</code>","text":"<pre><code>seed = 654\n</code></pre> <pre><code>e019 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 0.0, rng=seed)\ne020 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 1.0, rng=seed)\ne021 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 3.0, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e019, \"e019: lateral_disp = 0\",\n    e020, \"e020: lateral_disp = 1\",\n    e021, \"e021: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#uniform-projection-placement-proj_dist_fn-unif","title":"Uniform projection placement: <code>proj_dist_fn = \"unif\"</code>","text":"<pre><code>seed = 654\n</code></pre> <pre><code>e022 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 0.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne023 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 1.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne024 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 3.0, rng=seed,\n    proj_dist_fn=\"unif\")\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e022, \"e022: lateral_disp = 0\",\n    e023, \"e023: lateral_disp = 1\",\n    e024, \"e024: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#custom-projection-placement-using-the-laplace-distribution","title":"Custom projection placement using the Laplace distribution","text":"<pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e025 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 0.0, rng=seed,\n    proj_dist_fn=proj_laplace)\ne026 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 1.0, rng=seed,\n    proj_dist_fn=proj_laplace)\ne027 = clugen(2, 4, 1000, [1, 0], np.pi / 2, [20, 20], 13, 2, 3.0, rng=seed,\n    proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e025, \"e025: lateral_disp = 0\",\n    e026, \"e026: lateral_disp = 1\",\n    e027, \"e027: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#controlling-final-point-positions-from-their-projections-on-the-cluster-supporting-line","title":"Controlling final point positions from their projections on the cluster-supporting line","text":""},{"location":"generated/gallery/plot_2_2d_examples/#points-on-hyperplane-orthogonal-to-cluster-supporting-line-default-point_dist_fn-n-1","title":"Points on hyperplane orthogonal to cluster-supporting line (default): <code>point_dist_fn = \"n-1\"</code>","text":"<pre><code>seed = 1357\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e028 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed)\ne029 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne030 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e028, \"e028: proj_dist_fn=\\\"norm\\\" (default)\",\n    e029, \"e029: proj_dist_fn=\\\"unif\\\"\",\n    e030, \"e030: custom proj_dist_fn (Laplace)\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#points-around-projection-on-cluster-supporting-line-point_dist_fn-n","title":"Points around projection on cluster-supporting line: <code>point_dist_fn = \"n\"</code>","text":"<pre><code>seed = 1357\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e031 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=\"n\")\ne032 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=\"n\", proj_dist_fn=\"unif\")\ne033 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=\"n\", proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e031, \"e031: proj_dist_fn=\\\"norm\\\" (default)\",\n    e032, \"e032: proj_dist_fn=\\\"unif\\\"\",\n    e033, \"e033: custom proj_dist_fn (Laplace)\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#custom-point-placement-using-the-exponential-distribution","title":"Custom point placement using the exponential distribution","text":"<p>For this example we require the <code>clupoints_n_1_template()</code> helper function:</p> <pre><code>from pyclugen import clupoints_n_1_template\n</code></pre> <pre><code>seed = 1357\n</code></pre> <pre><code># Custom point_dist_fn: final points placed using the Exponential distribution\ndef clupoints_n_1_exp(projs, lat_std, len, clu_dir, clu_ctr, rng):\n    def dist_exp(npts, lstd, rg):\n        return lstd * rg.exponential(scale=2 / lstd, size=npts)\n    return clupoints_n_1_template(projs, lat_std, clu_dir, dist_exp, rng=rng)\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e034 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=clupoints_n_1_exp)\ne035 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=clupoints_n_1_exp, proj_dist_fn=\"unif\")\ne036 = clugen(2, 5, 1500, [1, 0], np.pi / 3, [20, 20], 12, 3, 1.0, rng=seed,\n    point_dist_fn=clupoints_n_1_exp, proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e034, \"e034: proj_dist_fn=\\\"norm\\\" (default)\",\n    e035, \"e035: proj_dist_fn=\\\"unif\\\"\",\n    e036, \"e036: custom proj_dist_fn (Laplace)\")\n</code></pre> <p></p>"},{"location":"generated/gallery/plot_2_2d_examples/#manipulating-cluster-sizes","title":"Manipulating cluster sizes","text":"<pre><code>seed = 963\n</code></pre> <pre><code># Custom clusizes_fn (e038): cluster sizes determined via the uniform distribution,\n# no correction for total points\ndef clusizes_unif(nclu, npts, ae, rng):\n    return rng.integers(low=1, high=2 * npts / nclu + 1, size=nclu)\n</code></pre> <pre><code># Custom clusizes_fn (e039): clusters all have the same size, no correction for total points\ndef clusizes_equal(nclu, npts, ae, rng):\n    return (npts // nclu) * np.ones(nclu, dtype=int)\n</code></pre> <pre><code># Custom clucenters_fn (all): yields fixed positions for the clusters\ndef centers_fixed(nclu, csep, coff, rng):\n    return np.array([[-csep[0], -csep[1]], [csep[0], -csep[1]], [-csep[0], csep[1]], [csep[0], csep[1]]])\n</code></pre> <pre><code>e037 = clugen(2, 4, 1500, [1, 1], np.pi, [20, 20], 0, 0, 5, rng=seed,\n    point_dist_fn=\"n\", clucenters_fn=centers_fixed)\ne038 = clugen(2, 4, 1500, [1, 1], np.pi, [20, 20], 0, 0, 5, rng=seed,\n    point_dist_fn=\"n\", clucenters_fn=centers_fixed, clusizes_fn=clusizes_unif)\ne039 = clugen(2, 4, 1500, [1, 1], np.pi, [20, 20], 0, 0, 5, rng=seed,\n    point_dist_fn=\"n\", clucenters_fn=centers_fixed, clusizes_fn=clusizes_equal)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e037, \"e037: normal dist. (default)\",\n    e038, \"e038: unif. dist. (custom)\",\n    e039, \"e039: equal size (custom)\")\n</code></pre>"},{"location":"generated/gallery/plot_2_2d_examples/#direct-specification-of-optional-parameters","title":"Direct specification of optional parameters","text":"<pre><code>seed = 123\n</code></pre> <pre><code>e040 = clugen(2, 4, 1000, [-1, 1], 0, [0, 0], 0, 0, 0.2, rng=seed,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\", clusizes_fn=[50, 200, 500, 2000],\n    llengths_fn=[0, 2, 4, 6], clucenters_fn=[[-5, -5], [-2.5, -2.5], [0, 0], [2.5, 2.5]])\n\ne041 = clugen(2, 5, 1000, [[1, 1], [1, 0], [1, 0], [0, 1], [0, 1]],\n    0, [0, 0], 0, 0, 0.2, rng=seed,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\",\n    clusizes_fn=[200, 500, 500, 500, 500], llengths_fn=[0, 5, 5, 5, 5],\n    clucenters_fn=[[0, 0], [0, 5], [0, -5], [5, 0], [-5, 0]])\n\ne042 = clugen(2, 5, 1000, [[0, 1], [0.25, 0.75], [0.5, 0.5], [0.75, 0.25], [1, 0]],\n    0, [0, 0], 5, 0, 0.2, rng=seed,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\", clusizes_fn=[500, 500, 500, 500, 500],\n    clucenters_fn=[[-5, 0], [-3, -0.3], [-1, -0.8], [1, -1.6], [3, -2.5]])\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e040, \"e040: direct params 1\",\n    e041, \"e041: direct params 2\",\n    e042, \"e042: direct params 3\")\n</code></pre> <p>Total running time of the script: ( 0 minutes  11.459 seconds)</p> <p> Download Python source code: plot_2_2d_examples.py</p> <p> Download Jupyter notebook: plot_2_2d_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_3_3d_examples/","title":"Examples in 3D","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_3_3d_examples/#examples-in-3d","title":"Examples in 3D","text":"<p>This section contains several examples on how to generate 3D data with pyclugen. To run the examples we first need to import the <code>clugen()</code> function:</p> <pre><code>import numpy as np\nfrom pyclugen import clugen\n</code></pre> <p>To plot these examples we use the <code>plot_examples_3d</code> function:</p> <pre><code>from plot_functions import plot_examples_3d\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#manipulating-the-direction-of-cluster-supporting-lines","title":"Manipulating the direction of cluster-supporting lines","text":""},{"location":"generated/gallery/plot_3_3d_examples/#using-the-direction-parameter","title":"Using the <code>direction</code> parameter","text":"<pre><code>seed = 321\n</code></pre> <pre><code>e043 = clugen(3, 4, 500, [1, 0, 0], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\ne044 = clugen(3, 4, 500, [1, 1, 1], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\ne045 = clugen(3, 4, 500, [0, 0, 1], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e043, \"e043: direction = [1, 0, 0]\",\n    e044, \"e044: direction = [1, 1, 1]\",\n    e045, \"e045: direction = [0, 0, 1]\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#changing-the-angle_disp-parameter-and-using-a-custom-angle_deltas_fn-function","title":"Changing the <code>angle_disp</code> parameter and using a custom <code>angle_deltas_fn</code> function","text":"<pre><code>seed = 321\n\n# Custom angle_deltas function: arbitrarily rotate some clusters by 90 degrees\ndef angdel_90_fn(nclu, astd, rng):\n    return rng.choice([0, np.pi / 2], size=nclu)\n</code></pre> <pre><code>e046 = clugen(3, 6, 1000, [1, 0, 0], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\ne047 = clugen(3, 6, 1000, [1, 0, 0], np.pi / 8, [10, 10, 10], 15, 1.5, 0.5, rng=seed)\ne048 = clugen(3, 6, 1000, [1, 0, 0], 0, [10, 10, 10], 15, 1.5, 0.5, rng=seed,\n    angle_deltas_fn=angdel_90_fn)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e046, \"e046: angle_disp = 0\",\n    e047, \"e047: angle_disp = \u03c0 / 8\",\n    e048, \"e048: custom angle_deltas function\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#specifying-a-main-direction-for-each-cluster-and-changing-angle_disp","title":"Specifying a main <code>direction</code> for each cluster and changing <code>angle_disp</code>","text":"<pre><code>seed = 123\n\n# Define a main direction for each cluster\ndirs = [[1, 1, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0], [-1, 1, 1]]\n</code></pre> <pre><code>e049 = clugen(3, 5, 1000, dirs, 0, np.zeros(3), 20, 0, 0.2, proj_dist_fn=\"unif\", rng=seed)\ne050 = clugen(3, 5, 1000, dirs, np.pi / 12, np.zeros(3), 20, 0, 0.2, proj_dist_fn=\"unif\", rng=seed)\ne051 = clugen(3, 5, 1000, dirs, np.pi / 4, np.zeros(3), 20, 0, 0.2, proj_dist_fn=\"unif\", rng=seed)\n</code></pre> <pre><code>plot_examples_3d(\n    e049, \"e049: angle_disp = 0\",\n    e050, \"e050: angle_disp = \u03c0 / 12\",\n    e051, \"e051: angle_disp = \u03c0 / 4\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#manipulating-the-length-of-cluster-supporting-lines","title":"Manipulating the length of cluster-supporting lines","text":""},{"location":"generated/gallery/plot_3_3d_examples/#using-the-llength-parameter","title":"Using the <code>llength</code> parameter","text":"<pre><code>seed = 789\n</code></pre> <pre><code>e052 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 0, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne053 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 10, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne054 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 30, 0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e052, \"e052: llength = 0\",\n    e053, \"e053: llength = 10\",\n    e054, \"e054: llength = 30\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#changing-the-llength_disp-parameter-and-using-a-custom-llengths_fn-function","title":"Changing the <code>llength_disp</code> parameter and using a custom <code>llengths_fn</code> function","text":"<pre><code>seed = 765\n</code></pre> <pre><code># Custom llengths function: line lengths tend to grow for each new cluster\ndef llen_grow_fn(nclu, llen, llenstd, rng):\n    return llen * np.arange(nclu) + rng.normal(scale=llenstd, size=nclu)\n\ne055 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 15,  0.0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne056 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 15, 10.0, 0.5, rng=seed,\n    point_dist_fn=\"n\")\ne057 = clugen(3, 5, 800, [1, 0, 0], np.pi / 10, [10, 10, 10], 10,  0.1, 0.5, rng=seed,\n    point_dist_fn=\"n\", llengths_fn=llen_grow_fn)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e055, \"e055: llength_disp = 0.0\",\n    e056, \"e056: llength_disp = 10.0\",\n    e057, \"e057: custom llengths function\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#manipulating-relative-cluster-positions","title":"Manipulating relative cluster positions","text":""},{"location":"generated/gallery/plot_3_3d_examples/#using-the-cluster_sep-parameter","title":"Using the <code>cluster_sep</code> parameter","text":"<pre><code>seed = 765\n</code></pre> <pre><code>e058 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [30, 10, 10], 25, 4, 3, rng=seed)\ne059 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 30, 10], 25, 4, 3, rng=seed)\ne060 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 10, 30], 25, 4, 3, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e058, \"e058: cluster_sep = [30, 10, 10]\",\n    e059, \"e059: cluster_sep = [10, 30, 10]\",\n    e060, \"e060: cluster_sep = [10, 10, 30]\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#changing-the-cluster_offset-parameter-and-using-a-custom-clucenters_fn-function","title":"Changing the <code>cluster_offset</code> parameter and using a custom <code>clucenters_fn</code> function","text":"<pre><code># Custom clucenters function: places clusters in a diagonal\ndef centers_diag_fn(nclu, csep, coff, rng):\n    return np.ones((nclu, len(csep))) * np.arange(1, nclu + 1)[:, None] * np.max(csep) + coff\n\ne061 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 10, 10], 12, 3, 2.5, rng=seed)\ne062 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 10, 10], 12, 3, 2.5, rng=seed,\n    cluster_offset=[30, -30, 30])\ne063 = clugen(3, 8, 1000, [1, 1, 1], np.pi / 4, [10, 10, 10], 12, 3, 2.5, rng=seed,\n    cluster_offset=[-40, -40, -40], clucenters_fn=centers_diag_fn)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e061, \"e061: default\",\n    e062, \"e062: cluster_offset=[30, -30, 30]\",\n    e063, \"e063: custom clucenters function\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#lateral-dispersion-and-placement-of-point-projections-on-the-line","title":"Lateral dispersion and placement of point projections on the line","text":""},{"location":"generated/gallery/plot_3_3d_examples/#normal-projection-placement-default-proj_dist_fnnorm","title":"Normal projection placement (default): <code>proj_dist_fn=\"norm\"</code>","text":"<pre><code>seed = 246\n</code></pre> <pre><code>e064 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 0.0, rng=seed)\ne065 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 1.0, rng=seed)\ne066 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 3.0, rng=seed)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e064, \"e064: lateral_disp = 0\",\n    e065, \"e065: lateral_disp = 1\",\n    e066, \"e066: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#uniform-projection-placement-proj_dist_fnunif","title":"Uniform projection placement: <code>proj_dist_fn=\"unif\"</code>","text":"<pre><code>seed = 246\n</code></pre> <pre><code>e067 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 0.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne068 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 1.0, rng=seed,\n    proj_dist_fn=\"unif\")\ne069 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 3.0, rng=seed,\n    proj_dist_fn=\"unif\")\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e067, \"e067: lateral_disp = 0\",\n    e068, \"e068: lateral_disp = 1\",\n    e069, \"e069: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#custom-projection-placement-using-the-laplace-distribution","title":"Custom projection placement using the Laplace distribution","text":"<pre><code>seed = 246\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e070 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 0.0, rng=seed,\n    proj_dist_fn=proj_laplace)\ne071 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 1.0, rng=seed,\n    proj_dist_fn=proj_laplace)\ne072 = clugen(3, 4, 1000, [1, 0, 0], np.pi / 2, [20, 20, 20], 13, 2, 3.0, rng=seed,\n    proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e070, \"e070: lateral_disp = 0\",\n    e071, \"e071: lateral_disp = 1\",\n    e072, \"e072: lateral_disp = 3\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#controlling-final-point-positions-from-their-projections-on-the-cluster-supporting-line","title":"Controlling final point positions from their projections on the cluster-supporting line","text":""},{"location":"generated/gallery/plot_3_3d_examples/#points-on-hyperplane-orthogonal-to-cluster-supporting-line-default-point_dist_fnn-1","title":"Points on hyperplane orthogonal to cluster-supporting line (default): <code>point_dist_fn=\"n-1\"</code>","text":"<pre><code>seed = 840\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e073 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed)\ne074 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    proj_dist_fn=\"unif\")\ne075 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e073, \"e073: proj_dist_fn=\\\"norm\\\" (default)\",\n    e074, \"e074: proj_dist_fn=\\\"unif\\\"\",\n    e075, \"e075: custom proj_dist_fn (Laplace)\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#points-around-projection-on-cluster-supporting-line-point_dist_fnn","title":"Points around projection on cluster-supporting line: <code>point_dist_fn=\"n\"</code>","text":"<pre><code>seed = 840\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n\ne076 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=\"n\")\ne077 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=\"n\", proj_dist_fn=\"unif\")\ne078 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=\"n\", proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e076, \"e076: proj_dist_fn=\\\"norm\\\" (default)\",\n    e077, \"e077: proj_dist_fn=\\\"unif\\\"\",\n    e078, \"e078: custom proj_dist_fn (Laplace)\")\n</code></pre>"},{"location":"generated/gallery/plot_3_3d_examples/#custom-point-placement-using-the-exponential-distribution","title":"Custom point placement using the exponential distribution","text":"<p>For this example we require the <code>clupoints_n_1_template()</code> helper function:</p> <pre><code>from pyclugen import clupoints_n_1_template\n</code></pre> <pre><code>seed = 840\n</code></pre> <pre><code># Custom point_dist_fn: final points placed using the Exponential distribution\ndef clupoints_n_1_exp(projs, lat_std, len, clu_dir, clu_ctr, rng):\n    def dist_exp(npts, lstd, rg):\n        return lstd * rg.exponential(scale=2 / lstd, size=npts)\n    return clupoints_n_1_template(projs, lat_std, clu_dir, dist_exp, rng=rng)\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Laplace distribution\ndef proj_laplace(len, n, rng):\n    return rng.laplace(scale=len / 6, size=n)\n</code></pre> <pre><code>e079 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=clupoints_n_1_exp)\ne080 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=clupoints_n_1_exp, proj_dist_fn=\"unif\")\ne081 = clugen(3, 5, 1500, [1, 0, 0], np.pi / 3, [20, 20, 20], 22, 3, 2, rng=seed,\n    point_dist_fn=clupoints_n_1_exp, proj_dist_fn=proj_laplace)\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e079, \"e079: proj_dist_fn=\\\"norm\\\" (default)\",\n    e080, \"e080: proj_dist_fn=\\\"unif\\\"\",\n    e081, \"e081: custom proj_dist_fn (Laplace)\")\n</code></pre> <p></p>"},{"location":"generated/gallery/plot_3_3d_examples/#manipulating-cluster-sizes","title":"Manipulating cluster sizes","text":"<pre><code>seed = 555\n</code></pre> <pre><code># Custom clusizes_fn (e083): cluster sizes determined via the uniform distribution,\n# no correction for total points\ndef clusizes_unif(nclu, npts, ae, rng):\n    return rng.integers(low=1, high=2 * npts / nclu + 1, size=nclu)\n</code></pre> <pre><code># Custom clusizes_fn (e084): clusters all have the same size, no correction for total points\ndef clusizes_equal(nclu, npts, ae, rng):\n    return (npts // nclu) * np.ones(nclu, dtype=int)\n</code></pre> <pre><code># Custom clucenters_fn (all): yields fixed positions for the clusters\ndef centers_fixed(nclu, csep, coff, rng):\n    return np.array([\n        [-csep[0], -csep[1], -csep[2]],\n        [csep[0], -csep[1], -csep[2]],\n        [-csep[0], csep[1], csep[2]],\n        [csep[0], csep[1], csep[2]]])\n</code></pre> <pre><code>e082 = clugen(3, 4, 1500, [1, 1, 1], np.pi, [20, 20, 20], 0, 0, 5, rng=seed,\n    clucenters_fn=centers_fixed, point_dist_fn=\"n\")\ne083 = clugen(3, 4, 1500, [1, 1, 1], np.pi, [20, 20, 20], 0, 0, 5, rng=seed,\n    clucenters_fn=centers_fixed, clusizes_fn=clusizes_unif, point_dist_fn=\"n\")\ne084 = clugen(3, 4, 1500, [1, 1, 1], np.pi, [20, 20, 20], 0, 0, 5, rng=seed,\n    clucenters_fn=centers_fixed, clusizes_fn=clusizes_equal, point_dist_fn=\"n\")\n</code></pre> <pre><code>plt = plot_examples_3d(\n    e082, \"e082: normal dist. (default)\",\n    e083, \"e083: unif. dist. (custom)\",\n    e084, \"e084: equal size (custom)\")\n</code></pre> <p>Total running time of the script: ( 0 minutes  5.724 seconds)</p> <p> Download Python source code: plot_3_3d_examples.py</p> <p> Download Jupyter notebook: plot_3_3d_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_4_nd_examples/","title":"Examples in nD","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_4_nd_examples/#examples-in-nd","title":"Examples in nD","text":"<p>This section contains several examples on how to generate nD (n &gt; 3) data with pyclugen. To run the examples we first need to import the <code>clugen()</code> function:</p> <pre><code>import numpy as np\nfrom pyclugen import clugen\n</code></pre> <p>To plot these examples we use the <code>plot_examples_nd</code> function:</p> <pre><code>from plot_functions import plot_examples_nd\n</code></pre>"},{"location":"generated/gallery/plot_4_nd_examples/#5d-example-with-default-optional-arguments","title":"5D example with default optional arguments","text":"<pre><code>seed = 123\n</code></pre> <pre><code># Number of dimensions\nnd = 5\n</code></pre> <pre><code>e085 = clugen(nd, 6, 1500, [1, 1, 0.5, 0, 0], np.pi / 16, 30 * np.ones(nd), 30, 4, 3, rng=seed)\n</code></pre> <pre><code>plot_examples_nd(e085, \"e085: 5D with optional parameters set to defaults\")\n</code></pre>"},{"location":"generated/gallery/plot_4_nd_examples/#5d-example-with-proj_dist_fn-unif-and-point_dist_fn-n","title":"5D example with <code>proj_dist_fn = \"unif\"</code> and <code>point_dist_fn = \"n\"</code>","text":"<pre><code>seed = 579\n</code></pre> <pre><code># Number of dimensions\nnd = 5\n</code></pre> <pre><code>e086 = clugen(nd, 6, 1500, [0.1, 0.3, 0.5, 0.3, 0.1], np.pi / 12, 30 * np.ones(nd), 35, 5, 3.5,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\", rng=seed)\n</code></pre> <pre><code>plot_examples_nd(e086, \"e086: 5D with proj_dist_fn=\\\"unif\\\" and point_dist_fn=\\\"n\\\"\")\n</code></pre>"},{"location":"generated/gallery/plot_4_nd_examples/#4d-example-with-custom-projection-placement-using-the-beta-distribution","title":"4D example with custom projection placement using the Beta distribution","text":"<pre><code>seed = 963\n</code></pre> <pre><code># Number of dimensions\nnd = 4\n</code></pre> <pre><code># Custom proj_dist_fn: point projections placed using the Beta distribution\ndef proj_beta(len, n, rng):\n    return len * rng.beta(0.1, 0.1, size=n) - len / 2\n</code></pre> <pre><code>e087 = clugen(nd, 5, 1500, np.ones(nd), np.pi / 6, 30 * np.ones(nd), 60, 15, 6, rng=seed,\n    proj_dist_fn=proj_beta)\n</code></pre> <pre><code>plot_examples_nd(e087, \"e087: 4D with custom proj_dist_fn (Beta)\")\n</code></pre> <p>Total running time of the script: ( 0 minutes  7.035 seconds)</p> <p> Download Python source code: plot_4_nd_examples.py</p> <p> Download Jupyter notebook: plot_4_nd_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_5_mrg_examples/","title":"Merging and hierarchical cluster examples","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_5_mrg_examples/#merging-and-hierarchical-cluster-examples","title":"Merging and hierarchical cluster examples","text":"<p>This section contains several examples on how to merge cluster data, either generated with pyclugen or from other sources. To run the examples we first need to import the <code>clugen()</code> and <code>clugen()</code> functions:</p> <pre><code>import numpy as np\nfrom pyclugen import clugen, clumerge\n</code></pre> <p>Although it is possible to merge data in any dimension, these examples will focus on merging 2D data. Therefore, we'll use the same <code>plot_examples_2d</code> function used for the 2D examples:</p> <pre><code>from plot_functions import plot_examples_2d\n</code></pre>"},{"location":"generated/gallery/plot_5_mrg_examples/#merging-two-data-sets-generated-with-clugen","title":"Merging two data sets generated with <code>clugen()</code>","text":"<pre><code>seed1 = 444\nseed2 = 555\n</code></pre> <pre><code>e088 = clugen(2, 5, 1000, [1, 1], np.pi / 12, [20, 20], 14, 1.2, 1.5, rng=seed1,\n    proj_dist_fn=\"unif\", point_dist_fn=\"n\")\ne089 = clugen(2, 3, 1500, [1, 0], 0.05, [20, 20], 0, 0, 4, rng=seed2,\n    point_dist_fn=\"n\", cluster_offset = [20, 0])\ne090 = clumerge(e088, e089)\n</code></pre> <pre><code>plot_examples_2d(\n    e088, \"e088: data set 1\",\n    e089, \"e089: data set 2\",\n    e090, \"e090: merged data sets\")\n</code></pre> <p>In the previous example, clusters from individual data sets remain as separate clusters in the merged data set. It's also possible to maintain the original cluster labels by setting the <code>clusters_field</code> parameter to <code>None</code>:</p> <pre><code>e091 = clumerge(e088, e089, clusters_field=None)\n</code></pre> <pre><code>plot_examples_2d(\n    e088, \"e088: data set 1\",\n    e089, \"e089: data set 2\",\n    e091, \"e091: merged data sets\")\n</code></pre> <p></p>"},{"location":"generated/gallery/plot_5_mrg_examples/#adding-noise-to-a-clugen-generated-data-set","title":"Adding noise to a <code>clugen()</code>-generated data set","text":"<pre><code>seed = 333\n</code></pre> <pre><code>prng = np.random.default_rng(seed)\ne092 = {\"points\": 120 * prng.random((500, 2)) - 60, \"clusters\": np.ones(500, dtype=np.int32)}\ne093 = clumerge(e092, e090) # clumerge(e092, e088, e089) would also work\n</code></pre> <pre><code>plot_examples_2d(\n    e090, \"e090: original merged data sets\",\n    e092, \"e092: random uniform noise\",\n    e093, \"e093: data sets with noise\",\n    pmargin=0)\n</code></pre>"},{"location":"generated/gallery/plot_5_mrg_examples/#merging-with-data-not-generated-with-clugen","title":"Merging with data not generated with <code>clugen()</code>","text":"<p>Data generated with <code>clugen()</code> can be merged with other data sets, for example data created with one of scikit-learn's generators:</p> <pre><code>seed = 321\n</code></pre> <pre><code>from sklearn.datasets import make_moons\n\nX, y = make_moons(100, noise=0.05, random_state=seed)\n\ne094 = {\"points\": X, \"clusters\": y}\ne095 = clugen(2, 4, 200, [1, 1], np.pi / 12, [1, 1], 0.1, 0.01, 0.25, rng=seed,\n    proj_dist_fn = \"unif\", point_dist_fn = \"n\")\ne096 = clumerge(e094, e095)\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e094, \"e094: generated w/ make_moons()\",\n    e095, \"e095: generated w/ clugen()\",\n    e096, \"e096: merged data\")\n</code></pre> <p></p> <p>We can also hierarchize clusters from different sources:</p> <pre><code>e097 = {**e094, \"hclusters\": np.ones(100, dtype=np.int32)}\ne098 = {**e095._asdict(), \"hclusters\": 2 * np.ones(200, np.int32)}\ne099 = clumerge(e097, e098, clusters_field=\"hclusters\")\n</code></pre> <pre><code>plt = plot_examples_2d(\n    e097, \"e097: generated w/ make_moons()\",\n    e098, \"e098: generated w/ clugen()\",\n    e099, \"e099: merged data\",\n    clusters_field=\"hclusters\")\n</code></pre> <p></p> <p>Total running time of the script: ( 0 minutes  3.588 seconds)</p> <p> Download Python source code: plot_5_mrg_examples.py</p> <p> Download Jupyter notebook: plot_5_mrg_examples.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"},{"location":"generated/gallery/plot_functions/","title":"Plot functions","text":"<p>Note</p> <p>Click here to download the full example code</p>"},{"location":"generated/gallery/plot_functions/#plot-functions","title":"Plot functions","text":"<p>Several auxiliary functions for plotting the examples in this documentation.</p>"},{"location":"generated/gallery/plot_functions/#import-the-required-libraries","title":"Import the required libraries","text":"<pre><code>import os\nimport warnings\n\nimport matplotlib.pyplot as plt  # type: ignore\nimport numpy as np\nimport numpy.typing as npt\nimport pandas as pd\nimport seaborn as sns  # type: ignore\n\nfrom pyclugen import Clusters\n\n# Hide annoying warnings when building docs in CI\nif os.getenv(\"CI\") != None:\n    warnings.filterwarnings(\"ignore\")\n</code></pre>"},{"location":"generated/gallery/plot_functions/#clusters2df","title":"clusters2df","text":"<pre><code>def clusters2df(\n    *exs: Clusters | dict[str, npt.ArrayLike], clusters_field: str = \"clusters\"\n) -&gt; pd.DataFrame:\n    \"\"\"Convert a sequence of clusters to a Pandas dataframe.\"\"\"\n\n    dfs = []\n    iex = 1\n\n    for ex in exs:\n        if isinstance(ex, dict):\n            points = ex[\"points\"]\n            clusters = ex[clusters_field]\n        else:\n            points = ex.points\n            clusters = ex.clusters\n\n        df = pd.DataFrame(\n            data=points, columns=[f\"x{i}\" for i in range(np.size(points, 1))]\n        )\n        df[\"cluster\"] = clusters.tolist()\n        df[\"example\"] = [iex] * clusters.size\n        dfs.append(df)\n        iex += 1\n\n    return pd.concat(dfs, ignore_index=True)\n</code></pre>"},{"location":"generated/gallery/plot_functions/#get_plot_lims","title":"get_plot_lims","text":"<pre><code>def get_plot_lims(df: pd.DataFrame, pmargin: float = 0.1):\n    \"\"\"Determine the plot limits for the cluster data given in `df`.\"\"\"\n\n    # Get maximum and minimum points in each dimension\n    xmaxs = df.iloc[:, :-2].max()\n    xmins = df.iloc[:, :-2].min()\n\n    # Determine plot centers in each dimension\n    xcenters = (xmaxs + xmins) / 2\n\n    # Determine plots span for all dimensions\n    sidespan = (1 + pmargin) * np.max(np.abs(xmaxs - xmins)) / 2\n\n    # Determine final plots limits\n    xmaxs = xcenters + sidespan\n    xmins = xcenters - sidespan\n\n    return xmaxs, xmins\n</code></pre>"},{"location":"generated/gallery/plot_functions/#plot_examples_1d","title":"plot_examples_1d","text":"<pre><code>def plot_examples_1d(*ets, ncols: int = 3, clusters_field: str = \"clusters\"):\n    \"\"\"Plot the 1D examples given in the ets parameter.\"\"\"\n\n    # Get examples\n    ex = ets[0::2]\n    # Get titles\n    et = ets[1::2]\n\n    df = clusters2df(*ex, clusters_field=clusters_field)\n\n    # Set seaborn's dark grid style\n    sns.set_theme(style=\"darkgrid\")\n\n    # Use seaborn to create the plots\n    g = sns.FacetGrid(df, col=\"example\", hue=\"cluster\", col_wrap=ncols)\n\n    # Plot the kernel density estimation plots\n    g.map(sns.kdeplot, \"x0\", multiple=\"layer\", fill=True)\n\n    # Get a flattened view of the axes array\n    g_axes = g.axes.reshape(-1)\n\n    # Determine the height of the rugs in the rug plot to 5% of total height\n    rug_height = g_axes[0].get_ylim()[1] * 0.05\n\n    # Plot the rug markers below the kde plots\n    g.map(sns.rugplot, \"x0\", height=rug_height)\n\n    # Set titles\n    for ax, t in zip(g_axes, et):\n        ax.set_title(t)\n</code></pre>"},{"location":"generated/gallery/plot_functions/#plot_examples_2d","title":"plot_examples_2d","text":"<pre><code>def plot_examples_2d(\n    *ets, pmargin: float = 0.1, ncols: int = 3, clusters_field: str = \"clusters\"\n):\n    \"\"\"Plot the 2D examples given in the ets parameter.\"\"\"\n\n    # Get examples\n    ex = ets[0::2]\n    # Get titles\n    et = ets[1::2]\n\n    df = clusters2df(*ex, clusters_field=clusters_field)\n\n    # Get limits in each dimension\n    xmaxs, xmins = get_plot_lims(df, pmargin=pmargin)\n\n    # Set seaborn's dark grid style\n    sns.set_theme(style=\"darkgrid\")\n\n    # Use seaborn to create the plots\n    g = sns.FacetGrid(\n        df,\n        col=\"example\",\n        hue=\"cluster\",\n        xlim=(xmins.iloc[0], xmaxs.iloc[0]),\n        ylim=(xmins.iloc[1], xmaxs.iloc[1]),\n        aspect=1,\n        col_wrap=ncols,\n    )\n\n    g.map(sns.scatterplot, \"x0\", \"x1\", s=10)\n\n    # Set the plot titles and x, y labels\n    for ax, t in zip(g.axes, et):\n        ax.set_title(t)\n        ax.set_xlabel(\"x\")\n        ax.set_ylabel(\"y\")\n</code></pre>"},{"location":"generated/gallery/plot_functions/#plot_examples_3d","title":"plot_examples_3d","text":"<pre><code>def plot_examples_3d(\n    *ets,\n    pmargin: float = 0.1,\n    ncols: int = 3,\n    side=350,\n    clusters_field: str = \"clusters\",\n):\n    \"\"\"Plot the 3D examples given in the ets parameter.\"\"\"\n\n    # Get examples\n    ex = ets[0::2]\n    # Get titles\n    et = ets[1::2]\n\n    # Number of plots and number of rows in combined plot\n    num_plots = len(ex)\n    nrows = max(1, int(np.ceil(num_plots / ncols)))\n    blank_plots = nrows * ncols - num_plots\n\n    df = clusters2df(*ex, clusters_field=clusters_field)\n\n    # Get limits in each dimension\n    xmaxs, xmins = get_plot_lims(df, pmargin=pmargin)\n\n    # Reset to default Matplotlib style, to avoid seaborn interference\n    sns.reset_orig()\n\n    # To convert inches to pixels afterwards\n    px = 1 / plt.rcParams[\"figure.dpi\"]  # pixel in inches\n\n    # Use Matplotlib to create the plots\n    _, axs = plt.subplots(\n        nrows,\n        ncols,\n        figsize=(side * px * ncols, side * px * nrows),\n        subplot_kw=dict(projection=\"3d\"),\n    )\n    axs = axs.reshape(-1)\n    for ax, e, t in zip(axs, ex, et):\n        ax.set_title(t, fontsize=10)\n        ax.set_xlim(xmins.iloc[0], xmaxs.iloc[0])\n        ax.set_ylim(xmins.iloc[1], xmaxs.iloc[1])\n        ax.set_zlim(xmins.iloc[2], xmaxs.iloc[2])\n        ax.set_xlabel(\"$x$\", labelpad=-2)\n        ax.set_ylabel(\"$y$\", labelpad=-2)\n        ax.set_zlabel(\"$z$\", labelpad=-2)\n        ax.tick_params(labelsize=8, pad=-2)\n        ax.scatter(\n            e.points[:, 0],\n            e.points[:, 1],\n            e.points[:, 2],\n            c=e.clusters,\n            depthshade=False,\n            edgecolor=\"black\",\n            linewidths=0.2,\n        )\n\n    # Remaining plots are left blank\n    for ax in axs[len(ex) : len(ex) + blank_plots]:\n        ax.set_axis_off()\n        ax.set_facecolor(color=\"white\")\n        ax.patch.set_alpha(0)\n</code></pre>"},{"location":"generated/gallery/plot_functions/#plot_examples_nd","title":"plot_examples_nd","text":"<pre><code>def plot_examples_nd(\n    ex: Clusters, t: str, pmargin: float = 0.1, clusters_field: str = \"clusters\"\n):\n    \"\"\"Plot the nD example given in the ex parameter.\"\"\"\n\n    # How many dimensions?\n    nd = ex.points.shape[1]\n\n    df = clusters2df(ex, clusters_field=clusters_field)\n\n    # Get limits in each dimension\n    xmaxs, xmins = get_plot_lims(df, pmargin=pmargin)\n\n    # Set seaborn's dark grid style\n    sns.set_theme(style=\"darkgrid\")\n\n    # Create pairwise plots with nothing on the diagonal\n    g = sns.PairGrid(df.iloc[:, :-1], hue=\"cluster\", palette=\"deep\")\n    g.map_offdiag(sns.scatterplot, s=10)\n    g.figure.suptitle(t, y=1)\n\n    # Decorate plot\n    for i in range(nd):\n        for j in range(nd):\n            if i == j:\n                # Set the x labels in the diagonal plots\n                xycoord = (xmaxs.iloc[i] + xmins.iloc[i]) / 2\n                g.axes[i, i].text(\n                    xycoord, xycoord, f\"$x{i}$\", fontsize=20, ha=\"center\", va=\"center\"\n                )\n            else:\n                # Set appropriate plot intervals and aspect ratio\n                g.axes[i, j].set_xlim([xmins.iloc[j], xmaxs.iloc[j]])\n                g.axes[i, j].set_ylim([xmins.iloc[i], xmaxs.iloc[i]])\n                g.axes[i, j].set_aspect(1)\n</code></pre> <p>Total running time of the script: ( 0 minutes  0.006 seconds)</p> <p> Download Python source code: plot_functions.py</p> <p> Download Jupyter notebook: plot_functions.ipynb</p> <p>Gallery generated by mkdocs-gallery</p>"}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 0ea5bd1036eae6ba7109d9371e5364cae492ffaf..d876657471573744cec15426c7dcc0463b473a3a 100644
GIT binary patch
delta 15
WcmdnYw3&%bzMF$XXVXSDT}A*RT?6g_

delta 15
WcmdnYw3&%bzMF%?eB(woT}A*RngjR%