From e13cbc423ad0367a868d2c4e06f503647b8913b5 Mon Sep 17 00:00:00 2001 From: Hao Zhuang Date: Fri, 10 Jan 2020 17:57:03 -0800 Subject: [PATCH] Internal change PiperOrigin-RevId: 289198382 Change-Id: Idc47613f838ad56b806c0b046467f19be49f9a9a --- qkeras/experimental/args.py | 97 ++++++++ qkeras/experimental/forest_gen.ipynb | 343 +++++++++++++++++++++++++++ 2 files changed, 440 insertions(+) create mode 100644 qkeras/experimental/args.py create mode 100644 qkeras/experimental/forest_gen.ipynb diff --git a/qkeras/experimental/args.py b/qkeras/experimental/args.py new file mode 100644 index 00000000..1e1e8696 --- /dev/null +++ b/qkeras/experimental/args.py @@ -0,0 +1,97 @@ +# Copyright 2019 Google LLC +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse + + +def ParserArgs(): + """Args Parser.""" + parser = argparse.ArgumentParser() + parser.add_argument("--debug", default=False, action="store_true", + help="set debug mode") + parser.add_argument("--print_debug", default=False, action="store_true", + help="print debug information") + + parser.add_argument("--model", default="", + help="which model to run (dmnist, cmnist)") + + parser.add_argument("-o", "--logic_optimize", default=False, + action="store_true", + help="optimize network.") + + parser.add_argument("-l", "--load_weight", default=False, + action="store_true", + help="load weights directly from file.") + parser.add_argument("-w", "--weight_file", default=None, + help="name of weights file") + + parser.add_argument("--output_group", type=int, default=1, + help="number of outputs to group together") + parser.add_argument("--kernel", default=None, type=int, + help="kernel if more complex layer") + parser.add_argument("--strides", default=None, type=int, + help="stride if more complex layer") + parser.add_argument("--padding", default=None, + help="padding if more complex layer") + + parser.add_argument("--conv_sample", default=None, type=int, + help="number of samples within image for conv layer") + parser.add_argument("--sample", default=None, + help="number of training samples") + + parser.add_argument("--use_pla", default=False, + action="store_true", + help="use pla table format") + parser.add_argument("--binary", default=False, + action="store_true", + help="use binary inputs") + + parser.add_argument("--i_name", default=None, + help="input layer name") + parser.add_argument("--o_name", default=None, + help="output layer name") + parser.add_argument("--qi", default="2,0,0", + help="quantized input type") + parser.add_argument("--qo", default="2,0,0", + help="quantized output type") + + parser.add_argument("--run_abc", default=False, action="store_true", + help="use abc to optimize logic") + parser.add_argument("--espresso_flags", default="-Dexpand", + help="flags to be passed to espresso") + parser.add_argument("--abc_flags", default="", + help="flags to be passed to abc") + + parser.add_argument("--run_rf", default=False, action="store_true", + help="use ranform forest to optimize logic") + parser.add_argument("--n_trees", default=3, type=int, + help="number of trees to optimize") + parser.add_argument("--max_bits", default=1, type=int, + help="maximum number of bits for random forest") + parser.add_argument("--is_regressor", default=False, action="store_true", + help="use regressor instead of classifier") + parser.add_argument("--n_features", default=None, + help="number of features for random forest") + parser.add_argument("--max_depth", default=None, + help="maximum depth of random tree") + parser.add_argument("--sample_size", default=None, + help="sample size of table for random tree") + + return parser.parse_args() diff --git a/qkeras/experimental/forest_gen.ipynb b/qkeras/experimental/forest_gen.ipynb new file mode 100644 index 00000000..de11bcf0 --- /dev/null +++ b/qkeras/experimental/forest_gen.ipynb @@ -0,0 +1,343 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "gBvgSF6c_rk9" + }, + "source": [ + "\n", + "\n", + "\n", + "Experimental code to produce a abc synthesizable netlist from a sci-kit learn random forest. \n", + "Untested. Proceed with caution.\n", + "author: schatter@google.com" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "V68GP6x8yf9E", + "outputId": "e66c913b-29db-46a9-c366-7cee945723bd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The scikit-learn version is 0.21.3.\n", + "The TF version is 2.0.0.\n" + ] + } + ], + "source": [ + "#%tensorflow_version 1.x\n", + "import os\n", + "import re\n", + "import numpy as np\n", + "# TF is used only to read MNIST data\n", + "import tensorflow as tf\n", + "import sklearn\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "print('The scikit-learn version is {}.'.format(sklearn.__version__))\n", + "print('The TF version is {}.'.format(tf.__version__))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "_m9iOo25y9xv" + }, + "outputs": [], + "source": [ + "def tree_predict(tree, node, x):\n", + " \n", + " assert node \u003e= 0\n", + " \n", + " left = tree.children_left\n", + " right = tree.children_right # -1 is sentinel for none\n", + " feats = tree.feature # -2 is sentinel for none\n", + " thresh = tree.threshold\n", + " values = tree.value\n", + " \n", + " if feats[node] == -2: # leaf node\n", + " assert left[node] == -1\n", + " assert right[node] == -1\n", + " return values[node] / values[node].sum()\n", + " else:\n", + " assert left[node] != -1\n", + " assert right[node] != -1\n", + " # note: we are int'ing the threshold since we don't think it matters\n", + " # as the features are all ints anyway\n", + " if x[feats[node]] \u003c= int(thresh[node]):\n", + " return tree_predict(tree, left[node], x)\n", + " else:\n", + " return tree_predict(tree, right[node], x)\n", + "\n", + " \n", + "def forest_predict(model, x, debug=False):\n", + "\n", + " res = tree_predict(model.estimators_[0].tree_, 0, x) \n", + " for estimator in model.estimators_[1:]:\n", + " res += tree_predict(estimator.tree_, 0, x)\n", + " \n", + " if debug:\n", + " print(res.reshape(-1).astype(np.int32))\n", + " return res.reshape(-1).argmax()\n", + "\n", + "\n", + "def accuracy(model, examples):\n", + " return np.array([forest_predict(model, example) for example in examples])" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 323 + }, + "colab_type": "code", + "id": "_zaLSBNO0Izi", + "outputId": "aad1f57a-eedb-4c27-ccf6-c15329143f5b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RandomForestClassifier(bootstrap=False, class_weight=None, criterion='gini',\n", + " max_depth=3, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=2,\n", + " n_jobs=None, oob_score=False, random_state=0, verbose=0,\n", + " warm_start=False)\n", + "name = real, ta = 0.4905, va = 0.5057\n", + "verified\n", + "RandomForestClassifier(bootstrap=False, class_weight=None, criterion='gini',\n", + " max_depth=3, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=2,\n", + " n_jobs=None, oob_score=False, random_state=0, verbose=0,\n", + " warm_start=False)\n", + "name = random, ta = 0.11363333333333334, va = 0.1142\n", + "verified\n" + ] + } + ], + "source": [ + "def generate(name, randomize_labels, nverify=1000):\n", + " (tx, ty), (vx, vy) = tf.keras.datasets.mnist.load_data()\n", + "\n", + " if randomize_labels:\n", + " ty = np.roll(ty, 127) # np.random.permutation(ty)\n", + "\n", + " tx = tx.reshape(60000, -1)\n", + " vx = vx.reshape(10000, -1)\n", + " \n", + " # note we turn off bootstrap so that samples are taken without resampling\n", + " # and as a result sample weights are always 1 and so inference is simpler\n", + " # m = RandomForestClassifier(n_estimators=10, bootstrap=False, random_state=0)\n", + " # TODO: tiny tree\n", + " m = RandomForestClassifier(n_estimators=2, max_depth=3, bootstrap=False, random_state=0)\n", + " m.fit(tx, ty)\n", + " \n", + " print(m)\n", + " print(\"name = {}, ta = {}, va = {}\".format(name, m.score(tx, ty), \n", + " m.score(vx, vy))) \n", + "\n", + " nverify = min(60000, nverify)\n", + " \n", + " mine = accuracy(m, tx[:nverify])\n", + " golden = m.predict(tx[:nverify])\n", + " \n", + " assert (mine == golden).all()\n", + " # print(np.arange(nverify)[mine != golden])\n", + " print(\"verified\")\n", + "\n", + " # write_model(m, name)\n", + " # print(\"done writing {}\".format(name))\n", + " \n", + " return m\n", + " \n", + "mreal = generate('real', randomize_labels=False)\n", + "mrand = generate('random', randomize_labels=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "5yDyMjua_rlJ", + "outputId": "a6b3ac06-c625-4321-ebd4-3908b27d02c7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "module forest(\r\n", + " input wire [7:0] x0,\r\n", + " input wire [7:0] x1,\r\n", + " input wire [7:0] x2,\r\n", + " input wire [7:0] x3,\r\n", + " input wire [7:0] x4,\r\n", + " input wire [7:0] x5,\r\n", + " input wire [7:0] x6,\r\n", + " input wire [7:0] x7,\r\n", + " input wire [7:0] x8,\r\n" + ] + } + ], + "source": [ + "def dump_tree(tree, node, tree_id, n_classes_y, file):\n", + " \n", + " assert node \u003e= 0\n", + " \n", + " left = tree.children_left\n", + " right = tree.children_right # -1 is sentinel for none\n", + " feats = tree.feature # -2 is sentinel for none\n", + " thresh = tree.threshold\n", + " values = tree.value\n", + "\n", + " for i in range(n_classes_y):\n", + " print(' wire [7:0] n_{}_{}_{};'.format(tree_id, node, i), file=file)\n", + " \n", + " if feats[node] == -2: # leaf node\n", + " assert left[node] == -1\n", + " assert right[node] == -1\n", + " #print(' wire [7:0] n{};'.format(node), file=file)\n", + " \n", + " # for some reason (multi output classes?) tree.value has an extra dimension\n", + " assert values[node].shape == (1, n_classes_y)\n", + " class_probabilities = (values[node] / values[node].sum())[0]\n", + " \n", + " for i in range(n_classes_y):\n", + " p_float = class_probabilities[i]\n", + " p_fixed = int(p_float * 255. + 0.5)\n", + " print(' assign n_{}_{}_{} = 8\\'h{:x}; // {}'.format(tree_id, node, i, p_fixed, p_float), file=file)\n", + " return\n", + " else:\n", + " assert left[node] != -1\n", + " assert right[node] != -1\n", + " # note: we are int'ing the threshold since we don't think it matters\n", + " # as the features are all ints anyway\n", + " dump_tree(tree, left[node], tree_id, n_classes_y, file=file)\n", + " dump_tree(tree, right[node], tree_id, n_classes_y, file=file)\n", + "\n", + " #for i in range(n_classes_y):\n", + " # print(' wire [7:0] n{}_{};'.format(node, i), file=file)\n", + " print(' wire c_{}_{};'.format(tree_id, node), file=file)\n", + " \n", + " assert 0. \u003c= thresh[node] \n", + " assert thresh[node] \u003c 255.\n", + " threshold = int(thresh[node])\n", + " \n", + " print(' assign c_{}_{} = x{} \u003c= 8\\'h{:x};'.format(tree_id, node, feats[node], threshold), file=file)\n", + " \n", + " for i in range(n_classes_y): \n", + " print(' assign n_{}_{}_{} = c_{}_{} ? n_{}_{}_{} : n_{}_{}_{};'.format(\n", + " tree_id, node, i, \n", + " tree_id, node, \n", + " tree_id, left[node], i, \n", + " tree_id, right[node], i), \n", + " file=file)\n", + "\n", + "\n", + "def dump_verilog(model, width_x, n_classes_y):\n", + " with open('output.v', 'w') as f:\n", + " print(\"module forest(\", file=f)\n", + " for i in range(width_x):\n", + " print(\" input wire [7:0] x{}{}\".format(i, ','), file=f) \n", + " for i in range(n_classes_y):\n", + " print(\" output wire [15:0] y{}{}\".format(i, ',' if i \u003c n_classes_y - 1 else ''), file=f) \n", + " print(\" );\", file=f)\n", + " \n", + " for i, estimator in enumerate(model.estimators_):\n", + " print(' // dumping tree {}'.format(i), file=f)\n", + " dump_tree(estimator.tree_, node=0, tree_id=i, n_classes_y=n_classes_y, file=f) \n", + "\n", + " for c in range(n_classes_y):\n", + " print(' wire [15:0] s_{}_{};'.format(i, c), file=f)\n", + " print(' wire [15:0] e_{}_{};'.format(i, c), file=f)\n", + " print(' assign e_{}_{} = {} 8\\'h0, n_{}_0_{} {};'.format(i, c, '{', i, c, '}'), file=f)\n", + " if i \u003e 0:\n", + " print(' assign s_{}_{} = s_{}_{} + e_{}_{};'.format(i, c, i - 1, c, i, c), file=f)\n", + " else:\n", + " print(' assign s_{}_{} = e_{}_{};'.format(i, c, i, c), file=f)\n", + "\n", + " for c in range(n_classes_y):\n", + " print(' assign y{} = s_{}_{};'.format(c, len(model.estimators_) - 1, c), file=f)\n", + " \n", + " print(\"endmodule\", file=f)\n", + " \n", + " \n", + "dump_verilog(mreal, width_x=784, n_classes_y=10)\n", + "!head output.v\n", + "# verilator can take 3 mins to lint the resulting Verilog file if 10 trees and unlimited depth is used!\n", + "# !verilator output.v --lint-only\n", + "# !abc/abc -c \"%read output.v; %blast; \u0026ps; \u0026put; write test_syn.v\"\n", + "#!cat test_syn.v" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "RccyGmmK_rlM" + }, + "outputs": [], + "source": [ + "# ABC limitations:\n", + "# read silently fails whereas %read works\n", + "# if a PO is not driven an assertion fails in blast\n", + "# verilator limitations:\n", + "# sometimes when the input is bad verilator may get stuck!" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "forest-tobias.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}