From e4e7fe5e9a669bce7f40703b61cfa951d92bd085 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 25 Jul 2018 17:06:13 +0200 Subject: [PATCH] first tutorial --- examples/DataFrameExample.sln | 10 ++-- examples/DataFrameExample/Program.cs | 36 -------------- .../GroupBy_Plot.csproj} | 6 +-- examples/GroupBy_Plot/Program.cs | 45 ++++++++++++++++++ .../airquality.csv | 0 examples/README.rst | 17 +++++++ examples/simple_graph.png | Bin 0 -> 5941 bytes 7 files changed, 69 insertions(+), 45 deletions(-) delete mode 100644 examples/DataFrameExample/Program.cs rename examples/{DataFrameExample/DataFrameExample.csproj => GroupBy_Plot/GroupBy_Plot.csproj} (61%) create mode 100644 examples/GroupBy_Plot/Program.cs rename examples/{DataFrameExample => GroupBy_Plot}/airquality.csv (100%) create mode 100644 examples/README.rst create mode 100644 examples/simple_graph.png diff --git a/examples/DataFrameExample.sln b/examples/DataFrameExample.sln index 09ad08a..34dfafd 100644 --- a/examples/DataFrameExample.sln +++ b/examples/DataFrameExample.sln @@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.27703.2042 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DataFrameExample", "DataFrame\DataFrameExample.csproj", "{1F3267A2-7B3D-40BC-A030-3D33D659DE74}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GroupBy_Plot", "GroupBy_Plot\GroupBy_Plot.csproj", "{81BD059C-A2D0-4CA9-A61B-27980DE92199}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -11,10 +11,10 @@ Global Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {1F3267A2-7B3D-40BC-A030-3D33D659DE74}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {1F3267A2-7B3D-40BC-A030-3D33D659DE74}.Debug|Any CPU.Build.0 = Debug|Any CPU - {1F3267A2-7B3D-40BC-A030-3D33D659DE74}.Release|Any CPU.ActiveCfg = Release|Any CPU - {1F3267A2-7B3D-40BC-A030-3D33D659DE74}.Release|Any CPU.Build.0 = Release|Any CPU + {81BD059C-A2D0-4CA9-A61B-27980DE92199}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {81BD059C-A2D0-4CA9-A61B-27980DE92199}.Debug|Any CPU.Build.0 = Debug|Any CPU + {81BD059C-A2D0-4CA9-A61B-27980DE92199}.Release|Any CPU.ActiveCfg = Release|Any CPU + {81BD059C-A2D0-4CA9-A61B-27980DE92199}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/examples/DataFrameExample/Program.cs b/examples/DataFrameExample/Program.cs deleted file mode 100644 index 15f3d7b..0000000 --- a/examples/DataFrameExample/Program.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System; -using System.Linq; -using Scikit.ML.DataFrame; -using OxyPlot; -using OxyPlot.Series; -using Microsoft.ML.Runtime.Data; - -namespace DataFrameExample -{ - class Program - { - static void Main(string[] args) - { - var df = DataFrame.ReadCsv("airquality.csv"); - Console.WriteLine("Shape: {0}", df.Shape); - Console.WriteLine("Columns: {0}", string.Join(",", df.Columns)); - - Console.WriteLine("df.iloc[0, 1] = {0}", df.iloc[0, 1]); - Console.WriteLine("df.loc[0, 'Ozone'] = {0}", df.loc[0, "Ozone"]); - - df["Quarter"] = df["Month"] / 4; - Console.WriteLine("Head\n{0}", df.Head()); - var gr = df.GroupBy(new[] { "Quarter" }).Sum(); - Console.WriteLine("Grouped by Quarter\n{0}", gr); - - var gr2 = df.Drop(new[] { "Ozone", "Solar_R" }).Copy().GroupBy(new[] { "Quarter" }).Sum(); - Console.WriteLine("Grouped by Quarter, no Ozone\n{0}", gr2); - - var plot = new PlotModel { Title = "Simple Graph" }; - var serie = new LineSeries(); - serie.Points.AddRange(Enumerable.Range(0, 2).Select(i => new DataPoint((int)(DvInt4)gr2.iloc[i, 0], (int)(DvInt4)gr2.iloc[i, 1]))); - - OxyPlot.Wpf.PngExporter.Export(plot, "graph.png", 600, 400, OxyColors.White); - } - } -} diff --git a/examples/DataFrameExample/DataFrameExample.csproj b/examples/GroupBy_Plot/GroupBy_Plot.csproj similarity index 61% rename from examples/DataFrameExample/DataFrameExample.csproj rename to examples/GroupBy_Plot/GroupBy_Plot.csproj index d2705d0..c83e59e 100644 --- a/examples/DataFrameExample/DataFrameExample.csproj +++ b/examples/GroupBy_Plot/GroupBy_Plot.csproj @@ -2,13 +2,11 @@ Exe - netcoreapp2.0 - DataFrameExample + netcoreapp2.1 - - + diff --git a/examples/GroupBy_Plot/Program.cs b/examples/GroupBy_Plot/Program.cs new file mode 100644 index 0000000..a9c5c7d --- /dev/null +++ b/examples/GroupBy_Plot/Program.cs @@ -0,0 +1,45 @@ +using System; +using System.IO; +using System.Linq; +using Scikit.ML.DataFrame; +using OxyPlot; +using OxyPlot.Axes; +using OxyPlot.Series; +using Microsoft.ML.Runtime.Data; + +namespace GroupBy_Plot +{ + class Program + { + static void Main(string[] args) + { + var df = DataFrame.ReadCsv("airquality.csv"); + Console.WriteLine("Shape: {0}", df.Shape); + Console.WriteLine("Columns: {0}", string.Join(",", df.Columns)); + + Console.WriteLine("df.iloc[0, 1] = {0}", df.iloc[0, 1]); + Console.WriteLine("df.loc[0, 'Ozone'] = {0}", df.loc[0, "Ozone"]); + + var monthColumn = df["Month"]; + var min = (double)(DvInt4)monthColumn.Aggregate(AggregatedFunction.Min).Get(0); // Syntax should be improved. + var max = (double)(DvInt4)monthColumn.Aggregate(AggregatedFunction.Max).Get(0); // Syntax should be improved. + df["MonthFrac"] = (df["Month"] - min) / (max - min); + Console.WriteLine("Head\n{0}", df.Head()); + var gr = df.GroupBy(new[] { "MonthFrac" }).Sum(); + Console.WriteLine("Grouped by MonthFrac\n{0}", gr); + + var gr2 = df.Drop(new[] { "Ozone", "Solar_R" }).Copy().GroupBy(new[] { "MonthFrac" }).Sum(); + Console.WriteLine("Grouped by MonthFrac, no Ozone\n{0}", gr2); + + var plot = new PlotModel { Title = "Simple Graph" }; + var serie = new ScatterSeries(); + serie.Points.AddRange(Enumerable.Range(0, gr.Shape.Item1).Select(i => new ScatterPoint((double)gr2.iloc[i, 0], (float)gr2.iloc[i, 2]))); + plot.Series.Add(serie); + plot.Axes.Add(new LinearAxis { Position = AxisPosition.Bottom, Title = gr2.Columns[0] }); + plot.Axes.Add(new LinearAxis { Position = AxisPosition.Left, Title = gr2.Columns[2] }); + + var plotString = SvgExporter.ExportToString(plot, 600, 400, true); + File.WriteAllText("graph2.svg", plotString); + } + } +} diff --git a/examples/DataFrameExample/airquality.csv b/examples/GroupBy_Plot/airquality.csv similarity index 100% rename from examples/DataFrameExample/airquality.csv rename to examples/GroupBy_Plot/airquality.csv diff --git a/examples/README.rst b/examples/README.rst new file mode 100644 index 0000000..890798c --- /dev/null +++ b/examples/README.rst @@ -0,0 +1,17 @@ + +Tutorial to Scikit.ML.DataFrame +=============================== + +.. contents:: + :local: + :depth: 1 + +DataFrame + GroupBy + Svg Plot +++++++++++++++++++++++++++++++ + +* `sln `_ +* `source `_ + +Output: + +.. image:: simple_graph.png diff --git a/examples/simple_graph.png b/examples/simple_graph.png new file mode 100644 index 0000000000000000000000000000000000000000..0bde275d6a9057c2d777641ffa23ae86abcc0c60 GIT binary patch literal 5941 zcmb_gc~}$KvJW~UqX>c#k;MpzL_xwJI|w=waJj$)m3GKHDy|3=P6xpkCui&)( zk;FUDGVnc>cdC3tZ%huB#K%z#Wxz-5D32m!#AUvz9^OTRFK-uxUpelt0m~=slHUb0 zf`9gqCEvfZ)9~H)o=+l=@W-D+XF8TN*SRypCKzsI z0v(iHCB|1ZJt_6BXlG7D zc{OK(@=|&4jf=@fq*Vzfe(>{tS`(&A)t~g<8An`Y;b$Gl=+8Ktj~))8Wv_zFl9l%s z=yGm3p*EIM4hfWD;034i;6Cl%FiDHf`p;11No<2^L^f!Q|j< z>B&$ra`&*XInsLe>SmSLqgnF^3;{QHo?nnZ!A)|8rx%R8=Sza?_Vyk zaVhMH92@F*bs8nFt}v?GMKA4~waS#k#~54N8u;5^*?g$Oe`+SIYsUqPDG4XVuOnhs z&ksh^`?GcNPdf<(P~|w2Q@Ebz_EY*w{7OoviWKpEZVNuUGjfxU2Cza6Nmo)(v}<>( z7lOm_T^GcEhj1~AI5tSaedq1emN^jo%Z2hsshESYx4fL7UW+*yY_47 z9#G7maevOrE5q>?FAVGOg_xqQLRrVhN{w=2roiy>_6L}KmTgJ-_p`$Kj>74ufNfgq@Rwj-P4 zN4u6&Mi4*yX2E*k!JGC0EcQOSI_;34%VjMI!yOnuaUu;>cUu&(@nW4r9WB;hFO6x) z17(~AlNius%l;`;)Pp*T(`BJ#+Oo5qfkccfDE$cwD)y7Plu%>|(dS3!MW>0c-laNt zTYYDl6nzlGVi+Et8Vog;)f@gO(P#)agi2c zuK!JOe~(O5wU$Cr@!;GsQ0TZOzLjj!C&i_DtBiG$xsqR$!e`mB;UZRGzVWkU#B?-t z+s7!+xZzDQSJ#X?2(z&BMS!$QfRQMJjiC(C&G>uY3k&GJp$O<9blYjse^Yg~K7P`G z&ug06Rr^hBhpy|jg|hky0;ie4w?}}Rm(KjY68`yyzboqnQ)e@=)1*e+I~7o7Z!UBo zVE8T!_Y`mrpa(h1bLHMYvikcXzZK$TN$Ppr7bi6>4PB|xK9|v49n4Du>uS0=uun*E z{BXNR0C+V$F5w!HGN*Od9jo%<$JnXF&DFI72W&6=8kNFaL|yItw42>gx!{r7cBgPD zghxO4EDa?j1{b-hZ}$t>ML=sHM#c&3U6fS3p}9@_JM<>bC{^KkAsrxE2_?p}hMP<7 z5gV+vt&ZbC9!bF9+jBlz;%)J?R+*85M7H8?hlPhJ?6099oxn2)!JGh*`jNzcgioS{ zA`2WZZS?MdtkWBLp#zM1?;I<@jHnmq7jn_Un%5ClV8Ij9mq1}-YjMNl-pL~rSM=#Z zpeG8g&`2#RVjxp6Rn&dninjUV3@f0`(S34MwMh?w zP)}=Zb-TYFU%T1>=v#pgtE7N-1%2_4tVuQorj8qRJwG0Q%k#ZOGb=j{DH@k(`HRA z)%0Ee117Z(18g@93Bilo8h6e!-0h-u;#tVDjvTwCJKFJU9 zH0@9&QLO<hP_n^8_&7KT77s`bqx%$_h$76uHSbC z>2$k-dhFnQd+i%Gbw^4ayzFo$^9-d*-0CY{hA9*2YO&?gaSBlnUb|nO zo7&mn`%*2}_`#b*ALk(eVj5>)h}hgX7P3_HgR{_nCi?V-e=%SHnpb0M@VFa}1MKQ{QLXQwL|7R%d#GSQ0vUO#7MHitLY?;w@1TH%ofIr_Setm8A< zv>&1>c$!(AfV{NlszUKG4lVj(iZVduGN&~04H529h(am-9Z!V&jY&alzhIS?s?4pN zas_9)ZGU4irHZGrp`Wyr)=hgM%=-j}bLx{M8IjS0Gp zwO6)CO`F-oyg1B55)*o;^_nq)_GK$86BM;)8B2$YJ{U!n%sUhY=;)=It}v(2%H_9t z@CYMXQOCV2HRFApzj~?NBMs_>xQ;Ek*?AqmXrx-KZ3PBk>wN|hD>I0NMc(WP&S^>b z!BogtJyFIe&Q+>vYmEV-_yb8n{!tb8e*jfRvH)Nc?e#-WTDlV1a(J^7SoTmrD%=qZ1H;sOJWjPQkKFNmR zEeBqUb0_cn%xA_Z0$hMcDNgY!0$}WSAO0aB_?OlQ)ZXmy;oNkuh400YY>NAa$p(`1 zH#h-U|JU=(U(?c)3*}IvyKBGs>t7Y$q8i-I==E8$BVZ*^~zqBAiiU-YTimam_6h&Oem^uJB0v zyL(57U#7jYYDMcsU;kRgd&j*s24WODbQF>ydkC3{m36;?hYoh3h*E>)KWR$D=9auy z3H~Ck>VSC(1#mcYn;%mSkXQbB6p-=U|S=z*q z+cFBzEghV1gm6Db=_hG!KW)6l1iBoIyYF-XL{=Si{{G6n;&5PtC&3d zu=Tnc-ztnX_gfht?$s@(>~y#H0JT`ZBhgHMHG`?ApisT{bI|6LWA;9K9$XC;9wdBy z+cT_9G12T#+GmfQqwHj`X_{k(2OO54Tb^2YC#Ov+hlNgD{1EyLgCXn|)|-1{AK3p0 zm_Ae9tH0KsF!r{mj!lIP;1*l8+wZP_tC=-H2VXMv8`@utr8XLH<}<8%nFicd-USE* z)-7hDgPLst>r#~Mg8_2wJ8buDy@7#i4G9}6ZHB~i4yWj*1il`TX7yprfOTA9Az$+J zm-B&juIn#8K0szufpwZwVB{H3baS*akES6g8CnlW*6tAm`a)v%#*u(DK%_Wy1~nVP zSO-+0!xq!Y^Kodv*07Bc2*bDe1Wy1rstDN_+zHtAAR{j;__#vX3K6n74u=IJO#NiC z;HNg`8UQ)n+1HD!WrQBwZ1f%ish6s%jXUUV0bXqLuAxgVobAf#56`IBsUn;uI8$qM%vX(4w>8?jv}V zjRD?-!R4?VSDP$>xEcDG5U`%<5Nb-@QVy{B)&yjQ!R(kZsW;n_CqK29ntiK{h$T+w z;Wg8|zCZ9rMBqKV*_A>FyF_S_yC#u7Gn6YC2Q4}e?v}x$M5*dCPGG*l+yDJj>DN?= zH1d&Pd7$;g1tTLU)9)bZjSDvk#(v7F8Z8fD-pi2>Pz zJXo__PM2+cs z`Qi{vYsGSIbYll}<(I~U0`{%Q5SI@L7P=#1C}pEd(_T$s3UB3Z%#+j@5NScr+8i6Q zFDV=?v@^xaEX~3oY-(kRU8~IGPwZ`;Y-|Z{?L-jsbiRPvm9mn|-cOg+WDzFpa7lkFJ~p1Pz7co6M&{R`5k_i)y&o`=na@yIV(F^k7+ zY;*ThQww2{pEq?)x^Ww{4D5v=`h>p-RkBCD!b(X+N>KW)ek&f@m2R_`Z`zB5pS;5y zFq{~z^Hz}w{J=LzBxsrzT_~_eL?8`q$=qk3G z(*C0ESnNt2sVJky?rrnRBZv=k*B7p0IJfUa@LzvfNu6wS6Bj5}Dy)Kjp`s0g z5-*KD9y2FZHLc_~NFelT)=(D4pIYtB?F{Nh^Lmq$I}VY3a=dB>DIozhVsKC;7Wk(l zlf{PB9Xe=sty_=pxq8N)7cSdrV&3+KBFgo8_>hJ!#TyNq=UZM6KsC-*s^QyAA~5^l zl>#EvcFvR4=T|pAV9{!QL$XPWN7MGCXOn_d3i(inRy?w%Pijj&x}?(7 zl6oom7!#_j{8F6iOQSf2b1H1}O5P6#=I8mo-PU$wm_H?9*Aq*5{BmsNxo$VKRP=E^ zMR?&(OF92#PZ{~ki05OHl4)|(u5GQA$(BD|Tr`#ZMWb8ro~+#+QED2Y>#BBAaK`hh zo;C|^({j$qspaZ6)svlNkKVJpp0>8RJ=i#P?ML#>;{x61Z)pY;> literal 0 HcmV?d00001