From e13cbc423ad0367a868d2c4e06f503647b8913b5 Mon Sep 17 00:00:00 2001
From: Hao Zhuang <hzhuang@google.com>
Date: Fri, 10 Jan 2020 17:57:03 -0800
Subject: [PATCH] Internal change

PiperOrigin-RevId: 289198382
Change-Id: Idc47613f838ad56b806c0b046467f19be49f9a9a
---
 qkeras/experimental/args.py          |  97 ++++++++
 qkeras/experimental/forest_gen.ipynb | 343 +++++++++++++++++++++++++++
 2 files changed, 440 insertions(+)
 create mode 100644 qkeras/experimental/args.py
 create mode 100644 qkeras/experimental/forest_gen.ipynb

diff --git a/qkeras/experimental/args.py b/qkeras/experimental/args.py
new file mode 100644
index 00000000..1e1e8696
--- /dev/null
+++ b/qkeras/experimental/args.py
@@ -0,0 +1,97 @@
+# Copyright 2019 Google LLC
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+
+
+def ParserArgs():
+  """Args Parser."""
+  parser = argparse.ArgumentParser()
+  parser.add_argument("--debug", default=False, action="store_true",
+                      help="set debug mode")
+  parser.add_argument("--print_debug", default=False, action="store_true",
+                      help="print debug information")
+
+  parser.add_argument("--model", default="",
+                      help="which model to run (dmnist, cmnist)")
+
+  parser.add_argument("-o", "--logic_optimize", default=False,
+                      action="store_true",
+                      help="optimize network.")
+
+  parser.add_argument("-l", "--load_weight", default=False,
+                      action="store_true",
+                      help="load weights directly from file.")
+  parser.add_argument("-w", "--weight_file", default=None,
+                      help="name of weights file")
+
+  parser.add_argument("--output_group", type=int, default=1,
+                      help="number of outputs to group together")
+  parser.add_argument("--kernel", default=None, type=int,
+                      help="kernel if more complex layer")
+  parser.add_argument("--strides", default=None, type=int,
+                      help="stride if more complex layer")
+  parser.add_argument("--padding", default=None,
+                      help="padding if more complex layer")
+
+  parser.add_argument("--conv_sample", default=None, type=int,
+                      help="number of samples within image for conv layer")
+  parser.add_argument("--sample", default=None,
+                      help="number of training samples")
+
+  parser.add_argument("--use_pla", default=False,
+                      action="store_true",
+                      help="use pla table format")
+  parser.add_argument("--binary", default=False,
+                      action="store_true",
+                      help="use binary inputs")
+
+  parser.add_argument("--i_name", default=None,
+                      help="input layer name")
+  parser.add_argument("--o_name", default=None,
+                      help="output layer name")
+  parser.add_argument("--qi", default="2,0,0",
+                      help="quantized input type")
+  parser.add_argument("--qo", default="2,0,0",
+                      help="quantized output type")
+
+  parser.add_argument("--run_abc", default=False, action="store_true",
+                      help="use abc to optimize logic")
+  parser.add_argument("--espresso_flags", default="-Dexpand",
+                      help="flags to be passed to espresso")
+  parser.add_argument("--abc_flags", default="",
+                      help="flags to be passed to abc")
+
+  parser.add_argument("--run_rf", default=False, action="store_true",
+                      help="use ranform forest to optimize logic")
+  parser.add_argument("--n_trees", default=3, type=int,
+                      help="number of trees to optimize")
+  parser.add_argument("--max_bits", default=1, type=int,
+                      help="maximum number of bits for random forest")
+  parser.add_argument("--is_regressor", default=False, action="store_true",
+                      help="use regressor instead of classifier")
+  parser.add_argument("--n_features", default=None,
+                      help="number of features for random forest")
+  parser.add_argument("--max_depth", default=None,
+                      help="maximum depth of random tree")
+  parser.add_argument("--sample_size", default=None,
+                      help="sample size of table for random tree")
+
+  return parser.parse_args()
diff --git a/qkeras/experimental/forest_gen.ipynb b/qkeras/experimental/forest_gen.ipynb
new file mode 100644
index 00000000..de11bcf0
--- /dev/null
+++ b/qkeras/experimental/forest_gen.ipynb
@@ -0,0 +1,343 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "gBvgSF6c_rk9"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "\n",
+        "Experimental code to produce a abc synthesizable netlist from a sci-kit learn random forest. \n",
+        "Untested. Proceed with caution.\n",
+        "author: schatter@google.com"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 51
+        },
+        "colab_type": "code",
+        "id": "V68GP6x8yf9E",
+        "outputId": "e66c913b-29db-46a9-c366-7cee945723bd"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "The scikit-learn version is 0.21.3.\n",
+            "The TF version is 2.0.0.\n"
+          ]
+        }
+      ],
+      "source": [
+        "#%tensorflow_version 1.x\n",
+        "import os\n",
+        "import re\n",
+        "import numpy as np\n",
+        "# TF is used only to read MNIST data\n",
+        "import tensorflow as tf\n",
+        "import sklearn\n",
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "\n",
+        "print('The scikit-learn version is {}.'.format(sklearn.__version__))\n",
+        "print('The TF version is {}.'.format(tf.__version__))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "_m9iOo25y9xv"
+      },
+      "outputs": [],
+      "source": [
+        "def tree_predict(tree, node, x):\n",
+        "  \n",
+        "    assert node \u003e= 0\n",
+        "  \n",
+        "    left   = tree.children_left\n",
+        "    right  = tree.children_right # -1 is sentinel for none\n",
+        "    feats  = tree.feature # -2 is sentinel for none\n",
+        "    thresh = tree.threshold\n",
+        "    values = tree.value\n",
+        "  \n",
+        "    if feats[node] == -2: # leaf node\n",
+        "        assert left[node] == -1\n",
+        "        assert right[node] == -1\n",
+        "        return values[node] / values[node].sum()\n",
+        "    else:\n",
+        "        assert left[node] != -1\n",
+        "        assert right[node] != -1\n",
+        "        # note: we are int'ing the threshold since we don't think it matters\n",
+        "        # as the features are all ints anyway\n",
+        "        if x[feats[node]] \u003c= int(thresh[node]):\n",
+        "            return tree_predict(tree, left[node], x)\n",
+        "        else:\n",
+        "            return tree_predict(tree, right[node], x)\n",
+        "\n",
+        "    \n",
+        "def forest_predict(model, x, debug=False):\n",
+        "\n",
+        "    res = tree_predict(model.estimators_[0].tree_, 0, x)  \n",
+        "    for estimator in model.estimators_[1:]:\n",
+        "        res += tree_predict(estimator.tree_, 0, x)\n",
+        "  \n",
+        "    if debug:\n",
+        "        print(res.reshape(-1).astype(np.int32))\n",
+        "    return res.reshape(-1).argmax()\n",
+        "\n",
+        "\n",
+        "def accuracy(model, examples):\n",
+        "    return np.array([forest_predict(model, example) for example in examples])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 323
+        },
+        "colab_type": "code",
+        "id": "_zaLSBNO0Izi",
+        "outputId": "aad1f57a-eedb-4c27-ccf6-c15329143f5b"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "RandomForestClassifier(bootstrap=False, class_weight=None, criterion='gini',\n",
+            "                       max_depth=3, max_features='auto', max_leaf_nodes=None,\n",
+            "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
+            "                       min_samples_leaf=1, min_samples_split=2,\n",
+            "                       min_weight_fraction_leaf=0.0, n_estimators=2,\n",
+            "                       n_jobs=None, oob_score=False, random_state=0, verbose=0,\n",
+            "                       warm_start=False)\n",
+            "name = real, ta = 0.4905, va = 0.5057\n",
+            "verified\n",
+            "RandomForestClassifier(bootstrap=False, class_weight=None, criterion='gini',\n",
+            "                       max_depth=3, max_features='auto', max_leaf_nodes=None,\n",
+            "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
+            "                       min_samples_leaf=1, min_samples_split=2,\n",
+            "                       min_weight_fraction_leaf=0.0, n_estimators=2,\n",
+            "                       n_jobs=None, oob_score=False, random_state=0, verbose=0,\n",
+            "                       warm_start=False)\n",
+            "name = random, ta = 0.11363333333333334, va = 0.1142\n",
+            "verified\n"
+          ]
+        }
+      ],
+      "source": [
+        "def generate(name, randomize_labels, nverify=1000):\n",
+        "    (tx, ty), (vx, vy) = tf.keras.datasets.mnist.load_data()\n",
+        "\n",
+        "    if randomize_labels:\n",
+        "        ty = np.roll(ty, 127) # np.random.permutation(ty)\n",
+        "\n",
+        "    tx = tx.reshape(60000, -1)\n",
+        "    vx = vx.reshape(10000, -1)\n",
+        "  \n",
+        "    # note we turn off bootstrap so that samples are taken without resampling\n",
+        "    # and as a result sample weights are always 1 and so inference is simpler\n",
+        "    # m = RandomForestClassifier(n_estimators=10, bootstrap=False, random_state=0)\n",
+        "    # TODO: tiny tree\n",
+        "    m = RandomForestClassifier(n_estimators=2, max_depth=3, bootstrap=False, random_state=0)\n",
+        "    m.fit(tx, ty)\n",
+        "  \n",
+        "    print(m)\n",
+        "    print(\"name = {}, ta = {}, va = {}\".format(name, m.score(tx, ty), \n",
+        "                                            m.score(vx, vy)))  \n",
+        "\n",
+        "    nverify = min(60000, nverify)\n",
+        "  \n",
+        "    mine   = accuracy(m, tx[:nverify])\n",
+        "    golden = m.predict(tx[:nverify])\n",
+        "  \n",
+        "    assert (mine == golden).all()\n",
+        "    # print(np.arange(nverify)[mine != golden])\n",
+        "    print(\"verified\")\n",
+        "\n",
+        "    # write_model(m, name)\n",
+        "    # print(\"done writing {}\".format(name))\n",
+        "  \n",
+        "    return m\n",
+        "    \n",
+        "mreal = generate('real',   randomize_labels=False)\n",
+        "mrand = generate('random', randomize_labels=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "5yDyMjua_rlJ",
+        "outputId": "a6b3ac06-c625-4321-ebd4-3908b27d02c7"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "module forest(\r\n",
+            "    input wire [7:0] x0,\r\n",
+            "    input wire [7:0] x1,\r\n",
+            "    input wire [7:0] x2,\r\n",
+            "    input wire [7:0] x3,\r\n",
+            "    input wire [7:0] x4,\r\n",
+            "    input wire [7:0] x5,\r\n",
+            "    input wire [7:0] x6,\r\n",
+            "    input wire [7:0] x7,\r\n",
+            "    input wire [7:0] x8,\r\n"
+          ]
+        }
+      ],
+      "source": [
+        "def dump_tree(tree, node, tree_id, n_classes_y, file):\n",
+        "    \n",
+        "    assert node \u003e= 0\n",
+        "  \n",
+        "    left   = tree.children_left\n",
+        "    right  = tree.children_right # -1 is sentinel for none\n",
+        "    feats  = tree.feature # -2 is sentinel for none\n",
+        "    thresh = tree.threshold\n",
+        "    values = tree.value\n",
+        "\n",
+        "    for i in range(n_classes_y):\n",
+        "        print('    wire [7:0] n_{}_{}_{};'.format(tree_id, node, i), file=file)\n",
+        "    \n",
+        "    if feats[node] == -2: # leaf node\n",
+        "        assert left[node] == -1\n",
+        "        assert right[node] == -1\n",
+        "        #print('    wire [7:0] n{};'.format(node), file=file)\n",
+        "        \n",
+        "        # for some reason (multi output classes?) tree.value has an extra dimension\n",
+        "        assert values[node].shape == (1, n_classes_y)\n",
+        "        class_probabilities = (values[node] / values[node].sum())[0]\n",
+        "        \n",
+        "        for i in range(n_classes_y):\n",
+        "            p_float = class_probabilities[i]\n",
+        "            p_fixed = int(p_float * 255. + 0.5)\n",
+        "            print('    assign n_{}_{}_{} = 8\\'h{:x}; // {}'.format(tree_id, node, i, p_fixed, p_float), file=file)\n",
+        "        return\n",
+        "    else:\n",
+        "        assert left[node] != -1\n",
+        "        assert right[node] != -1\n",
+        "        # note: we are int'ing the threshold since we don't think it matters\n",
+        "        # as the features are all ints anyway\n",
+        "        dump_tree(tree, left[node],  tree_id, n_classes_y, file=file)\n",
+        "        dump_tree(tree, right[node], tree_id, n_classes_y, file=file)\n",
+        "\n",
+        "        #for i in range(n_classes_y):\n",
+        "        #    print('    wire [7:0] n{}_{};'.format(node, i), file=file)\n",
+        "        print('    wire c_{}_{};'.format(tree_id, node), file=file)\n",
+        "        \n",
+        "        assert 0. \u003c= thresh[node] \n",
+        "        assert thresh[node] \u003c 255.\n",
+        "        threshold = int(thresh[node])\n",
+        "        \n",
+        "        print('    assign c_{}_{} = x{} \u003c= 8\\'h{:x};'.format(tree_id, node, feats[node], threshold), file=file)\n",
+        "        \n",
+        "        for i in range(n_classes_y):        \n",
+        "            print('    assign n_{}_{}_{} = c_{}_{} ? n_{}_{}_{} : n_{}_{}_{};'.format(\n",
+        "                    tree_id, node, i, \n",
+        "                    tree_id, node, \n",
+        "                    tree_id, left[node], i, \n",
+        "                    tree_id, right[node], i), \n",
+        "                file=file)\n",
+        "\n",
+        "\n",
+        "def dump_verilog(model, width_x, n_classes_y):\n",
+        "    with open('output.v', 'w') as f:\n",
+        "        print(\"module forest(\", file=f)\n",
+        "        for i in range(width_x):\n",
+        "            print(\"    input wire [7:0] x{}{}\".format(i, ','), file=f)       \n",
+        "        for i in range(n_classes_y):\n",
+        "            print(\"    output wire [15:0] y{}{}\".format(i, ',' if i \u003c n_classes_y - 1 else ''), file=f)       \n",
+        "        print(\"    );\", file=f)\n",
+        "      \n",
+        "        for i, estimator in enumerate(model.estimators_):\n",
+        "            print('    // dumping tree {}'.format(i), file=f)\n",
+        "            dump_tree(estimator.tree_, node=0, tree_id=i, n_classes_y=n_classes_y, file=f)    \n",
+        "\n",
+        "            for c in range(n_classes_y):\n",
+        "                print('    wire [15:0] s_{}_{};'.format(i, c), file=f)\n",
+        "                print('    wire [15:0] e_{}_{};'.format(i, c), file=f)\n",
+        "                print('    assign e_{}_{} = {} 8\\'h0, n_{}_0_{} {};'.format(i, c, '{', i, c, '}'), file=f)\n",
+        "                if i \u003e 0:\n",
+        "                    print('    assign s_{}_{} = s_{}_{} + e_{}_{};'.format(i, c, i - 1, c, i, c), file=f)\n",
+        "                else:\n",
+        "                    print('    assign s_{}_{} = e_{}_{};'.format(i, c, i, c), file=f)\n",
+        "\n",
+        "        for c in range(n_classes_y):\n",
+        "            print('    assign y{} = s_{}_{};'.format(c, len(model.estimators_) - 1, c), file=f)\n",
+        "            \n",
+        "        print(\"endmodule\", file=f)\n",
+        "        \n",
+        "    \n",
+        "dump_verilog(mreal, width_x=784, n_classes_y=10)\n",
+        "!head output.v\n",
+        "# verilator can take 3 mins to lint the resulting Verilog file if 10 trees and unlimited depth is used!\n",
+        "# !verilator output.v --lint-only\n",
+        "# !abc/abc -c \"%read output.v; %blast; \u0026ps; \u0026put; write test_syn.v\"\n",
+        "#!cat test_syn.v"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "RccyGmmK_rlM"
+      },
+      "outputs": [],
+      "source": [
+        "# ABC limitations:\n",
+        "# read silently fails whereas %read works\n",
+        "# if a PO is not driven an assertion fails in blast\n",
+        "# verilator limitations:\n",
+        "# sometimes when the input is bad verilator may get stuck!"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "forest-tobias.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.4"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}