Merge pull request #29 from emlearn/cnn-fp32-int8

CNN: Build both fp32 and int8 modules
emlearn · Jan 9, 2025 · 696fd87 · 696fd87
2 parents 8bf8ed1 + 4761fba
commit 696fd87
Show file tree

Hide file tree

Showing 21 changed files with 1,225 additions and 293 deletions.
diff --git a/Makefile b/Makefile
@@ -22,8 +22,11 @@ $(MODULES_PATH)/emlearn_iir.mpy:
 $(MODULES_PATH)/emlearn_fft.mpy:
 	make -C src/emlearn_fft/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist
 
-$(MODULES_PATH)/emlearn_cnn.mpy:
-	make -C src/tinymaix_cnn/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist
+$(MODULES_PATH)/emlearn_cnn_int8.mpy:
+	make -C src/tinymaix_cnn/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 CONFIG=int8 clean dist
+
+$(MODULES_PATH)/emlearn_cnn_fp32.mpy:
+	make -C src/tinymaix_cnn/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 CONFIG=fp32 clean dist
 
 $(MODULES_PATH)/emlearn_kmeans.mpy:
 	make -C src/emlearn_kmeans/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist
@@ -46,7 +49,7 @@ emlearn_iir.results: $(MODULES_PATH)/emlearn_iir.mpy
 emlearn_fft.results: $(MODULES_PATH)/emlearn_fft.mpy
 	MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_fft.py
 
-emlearn_cnn.results: $(MODULES_PATH)/emlearn_cnn.mpy
+emlearn_cnn.results: $(MODULES_PATH)/emlearn_cnn_int8.mpy $(MODULES_PATH)/emlearn_cnn_fp32.mpy
 	MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_cnn.py
 
 emlearn_kmeans.results: $(MODULES_PATH)/emlearn_kmeans.mpy
@@ -76,6 +79,6 @@ release:
 
 check: emlearn_trees.results emlearn_neighbors.results emlearn_iir.results emlearn_iir_q15.results emlearn_fft.results emlearn_kmeans.results emlearn_arrayutils.results emlearn_cnn.results
 
-dist: $(MODULES_PATH)/emlearn_trees.mpy $(MODULES_PATH)/emlearn_neighbors.mpy $(MODULES_PATH)/emlearn_iir.mpy $(MODULES_PATH)/emlearn_iir_q15.mpy $(MODULES_PATH)/emlearn_fft.mpy $(MODULES_PATH)/emlearn_kmeans.mpy $(MODULES_PATH)/emlearn_arrayutils.mpy $(MODULES_PATH)/emlearn_cnn.mpy
+dist: $(MODULES_PATH)/emlearn_trees.mpy $(MODULES_PATH)/emlearn_neighbors.mpy $(MODULES_PATH)/emlearn_iir.mpy $(MODULES_PATH)/emlearn_iir_q15.mpy $(MODULES_PATH)/emlearn_fft.mpy $(MODULES_PATH)/emlearn_kmeans.mpy $(MODULES_PATH)/emlearn_arrayutils.mpy $(MODULES_PATH)/emlearn_cnn_int8.mpy $(MODULES_PATH)/emlearn_cnn_fp32.mpy
 
 
diff --git a/dependencies/TinyMaix b/dependencies/TinyMaix
diff --git a/examples/mnist_cnn/README.md b/examples/mnist_cnn/README.md
@@ -30,7 +30,7 @@ python mnist_train.py
 ## Running on host
 
 ```console
-micropython -m mip install https://emlearn.github.io/emlearn-micropython/builds/master/x64_6.3/emlearn_cnn.mpy
+micropython -m mip install https://emlearn.github.io/emlearn-micropython/builds/master/x64_6.3/emlearn_cnn_int8.mpy
 
 micropython mnist_cnn_run.py
 ```
@@ -50,11 +50,11 @@ Flash your device with a standard MicroPython firmware,
 from the MicroPython.org downloads page.
 
 ```console
-mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_cnn.mpy
+mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_cnn_int8.mpy
 ```
 
 ```console
-mpremote cp mnist_cnn.tmdl :
+mpremote cp mnist_cnn_int8.tmdl :
 mpremote cp -r test_data/ :
 mpremote run mnist_cnn_run.py
 ```

diff --git a/examples/mnist_cnn/mnist_cnn.h b/examples/mnist_cnn/mnist_cnn.h
diff --git a/examples/mnist_cnn/mnist_cnn.h5 b/examples/mnist_cnn/mnist_cnn.h5
diff --git a/examples/mnist_cnn/mnist_cnn.tflite b/examples/mnist_cnn/mnist_cnn.tflite
diff --git a/examples/mnist_cnn/mnist_cnn.tmdl b/examples/mnist_cnn/mnist_cnn.tmdl
diff --git a/examples/mnist_cnn/mnist_cnn_fp32.h b/examples/mnist_cnn/mnist_cnn_fp32.h
diff --git a/examples/mnist_cnn/mnist_cnn_fp32.tflite b/examples/mnist_cnn/mnist_cnn_fp32.tflite
diff --git a/examples/mnist_cnn/mnist_cnn_fp32.tmdl b/examples/mnist_cnn/mnist_cnn_fp32.tmdl
diff --git a/examples/mnist_cnn/mnist_cnn_int8.h b/examples/mnist_cnn/mnist_cnn_int8.h
diff --git a/examples/mnist_cnn/mnist_cnn_int8.tflite b/examples/mnist_cnn/mnist_cnn_int8.tflite
diff --git a/examples/mnist_cnn/mnist_cnn_int8.tmdl b/examples/mnist_cnn/mnist_cnn_int8.tmdl
diff --git a/examples/mnist_cnn/mnist_cnn_run.py b/examples/mnist_cnn/mnist_cnn_run.py
@@ -4,9 +4,9 @@
 import time
 import gc
 
-import emlearn_cnn
+import emlearn_cnn_int8
 
-MODEL = 'mnist_cnn.tmdl'
+MODEL = 'mnist_cnn_int8.tmdl'
 TEST_DATA_DIR = 'test_data'
 
 def argmax(arr):

diff --git a/examples/mnist_cnn/mnist_train.py b/examples/mnist_cnn/mnist_train.py
@@ -86,6 +86,7 @@ def generate_test_files(out_dir, x, y, samples_per_class=5):
 
 
 def generate_tinymaix_model(h5_file,
+        out_file : str,
         input_shape : tuple[int],
         output_shape : tuple[int],
         tools_dir,
@@ -99,7 +100,7 @@ def generate_tinymaix_model(h5_file,
 
     # Convert .h5 to .tflite file
     assert h5_file.endswith('.h5'), 'Keras model HDF5 file must end with .h5'
-    tflite_file = h5_file.replace('.h5', '.tflite')
+    tflite_file = out_file + '.tflite'
 
     args = [
         python_bin,
@@ -157,26 +158,26 @@ def main():
     tinymaix_tools_dir = '../../dependencies/TinyMaix/tools'
     assert os.path.exists(tinymaix_tools_dir), tinymaix_tools_dir
 
-    quantize_data = None # disables quantization
-    quantize_data = os.path.join(tinymaix_tools_dir, 'quant_img_mnist/')
-    if quantize_data is not None:
-        assert os.path.exists(quantize_data)
-    precision = 'int8' if quantize_data else 'fp32'
-
     # Run training
     train_mnist(h5_file)
 
-    #data = x_test[1]
-
     # Export the model using TinyMaix
-    out = generate_tinymaix_model(h5_file,
-        input_shape=(28,28,1),
-        output_shape=(1,),
-        tools_dir=tinymaix_tools_dir,
-        precision=precision,
-        quantize_data=quantize_data,
-    )
-    print('Wrote model to', out)
+    # both with quantization and without
+    for config in ('int8', 'fp32'):
+        if config == 'int8':
+            quantize_data = os.path.join(tinymaix_tools_dir, 'quant_img_mnist/')
+        else:
+            quantize_data = None # disables quantization
+
+        out = generate_tinymaix_model(h5_file,
+            out_file=h5_file.replace('.h5', '')+f'_{config}',
+            input_shape=(28,28,1),
+            output_shape=(1,),
+            tools_dir=tinymaix_tools_dir,
+            precision=config,
+            quantize_data=quantize_data,
+        )
+        print('Wrote model to', out)
 
 if __name__ == '__main__':
     main()
diff --git a/examples/mnist_cnn/package.json b/examples/mnist_cnn/package.json
@@ -7,7 +7,8 @@
   "readme": "github:emlearn/emlearn-micropython/blob/master/examples/mnist_cnn/README.md",
   "keywords": "machinelearning,cnn,convolutionalneuralnetwork,classification,mnist,digits",
   "urls": [
-    ["fs:mnist_cnn.tmdl", "mnist_cnn.tmdl"],
+    ["fs:mnist_cnn_int8.tmdl", "mnist_cnn_int8.tmdl"],
+    ["fs:mnist_cnn_fp32.tmdl", "mnist_cnn_fp32.tmdl"],
     ["fs:data/mnist_example_0.bin", "data/mnist_example_0.bin"],
     ["fs:data/mnist_example_1.bin", "data/mnist_example_1.bin"],
     ["fs:data/mnist_example_2.bin", "data/mnist_example_2.bin"],

diff --git a/src/tinymaix_cnn/Makefile b/src/tinymaix_cnn/Makefile
@@ -4,6 +4,8 @@ MPY_DIR = ../../micropython
 # Architecture to build for (x86, x64, armv6m, armv7m, xtensa, xtensawin)
 ARCH = x64
 
+CONFIG := int8
+
 # The ABI version for .mpy files
 MPY_ABI_VERSION := 6.3
 
@@ -12,6 +14,8 @@ TINYMAIX_DIR := ../../dependencies/TinyMaix
 
 DIST_DIR := ../../dist/$(ARCH)_$(MPY_ABI_VERSION)
 
+CONFIG_DIR := ./$(CONFIG)
+
 # Name of module
 MOD = emlearn_cnn
 
@@ -45,7 +49,7 @@ ifeq ($(SOFTFP_ENABLE), 1)
 endif
 
 # Releases
-DIST_FILE = $(DIST_DIR)/$(MOD).mpy
+DIST_FILE = $(DIST_DIR)/$(MOD)_$(CONFIG).mpy
 $(DIST_DIR):
 	mkdir -p $@
 
@@ -68,6 +72,6 @@ _addsubdf3.o:
 _arm_addsubdf3.o:
 	$(CROSS)ar -x $(LIBGCC_FILENAME) $(SOFTFP_O)
 
-CFLAGS += -I$(TINYMAIX_DIR)/include -I$(TINYMAIX_DIR)/src -Wno-error=unused-variable -Wno-error=multichar
+CFLAGS += -I$(CONFIG_DIR) -I$(TINYMAIX_DIR)/include -I$(TINYMAIX_DIR)/src -Wno-error=unused-variable -Wno-error=multichar -Wdouble-promotion
 
 dist: $(DIST_FILE)
diff --git a/src/tinymaix_cnn/fp32/tm_port.h b/src/tinymaix_cnn/fp32/tm_port.h
@@ -0,0 +1,97 @@
+/*
+Configuration file for TinyMaix
+
+Copyright 2022 Sipeed Technology Co., Ltd. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef __TM_PORT_H
+#define __TM_PORT_H
+
+#define TM_ARCH_CPU         (0) //default, pure cpu compute
+#define TM_ARCH_ARM_SIMD    (1) //ARM Cortex M4/M7, etc.
+#define TM_ARCH_ARM_NEON    (2) //ARM Cortex A7, etc.
+#define TM_ARCH_ARM_MVEI    (3) //ARMv8.1: M55, etc.
+#define TM_ARCH_RV32P       (4) //T-head E907, etc.
+#define TM_ARCH_RV64V       (5) //T-head C906,C910, etc.
+#define TM_ARCH_CSKYV2      (6) //cskyv2 with dsp core
+#define TM_ARCH_X86_SSE2    (7) //x86 sse2
+
+#define TM_OPT0             (0) //default, least code and buf
+#define TM_OPT1             (1) //opt for speed, need more code and buf
+#define TM_OPT2             (2) //TODO
+
+/******************************* PORT CONFIG  ************************************/
+#define TM_ARCH         TM_ARCH_CPU
+#define TM_OPT_LEVEL    TM_OPT0
+#define TM_MDL_TYPE     TM_MDL_FP32
+#define TM_FASTSCALE    (0)         //enable if your chip don't have FPU, may speed up 1/3, but decrease accuracy
+#define TM_LOCAL_MATH   (1)         //use local math func (like exp()) to avoid libm
+#define TM_ENABLE_STAT  (1)         //enable mdl stat functions
+#define TM_MAX_CSIZE    (1000)      //max channel num //used if INT8 mdl  //cost TM_MAX_CSIZE*4 Byte
+#define TM_MAX_KSIZE    (5*5)       //max kernel_size   //cost TM_MAX_KSIZE*4 Byte
+#define TM_MAX_KCSIZE   (3*3*256)   //max kernel_size*channels //cost TM_MAX_KSIZE*sizeof(mtype_t) Byte
+
+#define TM_INLINE       __attribute__((always_inline)) static inline
+#define TM_WEAK         __attribute__((weak))
+
+// Disable "static" (non-const) globals, since they are not supported by MicroPython mpy_ld.py
+#define TM_STATIC       
+
+// Use MicroPython for dynamic allocation
+#define tm_malloc(x)   m_malloc(x)
+#define tm_free(x)     m_free(x)
+
+// FIXME: set theese to use MicroPython primitives
+
+#define TM_PRINTF(...) //printf(__VA_ARGS__);
+#define TM_DBG(...)    TM_PRINTF("###L%d: ",__LINE__);TM_PRINTF(__VA_ARGS__);
+#define TM_DBGL()      //Serial.println(__LINE__);
+
+
+// FIXME: enable profiling
+#define TM_GET_US()       (0)
+#define TM_DBGT_INIT()     ;
+#define TM_DBGT_START()    ;
+#define TM_DBGT(x)         ;
+
+/******************************* DBG PERFORMANCE CONFIG  ************************************/
+//need clock tick to make accurate statistics
+#define TM_EN_PERF 0
+
+#if TM_EN_PERF
+    #define  TM_GET_TICK(x)     __ASM volatile("csrr %0, mcycle" : "=r"(x)); //edit your self
+
+    #define  TM_TICK_PERUS    (380) //sysconf(_SC_CLK_TCK)/1000000)
+    #define  TM_PERF_REG(x)    uint64_t x=0;
+    #define  TM_PERF_EXTREG(x) extern uint64_t x;
+    #define  TM_PERF_INIT(x)   uint64_t _##x##_t0, _##x##_t1;
+    #define  TM_PERF_START(x)  TM_GET_TICK(_##x##_t0);
+    #define  TM_PERF_ADD(x)   {TM_GET_TICK(_##x##_t1);(x)+=(_##x##_t1-_##x##_t0);TM_GET_TICK(_##x##_t0);};
+    #define  TM_PERF_PRINT(x) TM_PRINTF("PERF "#x": %ld us\r\n", (x)/TM_TICK_PERUS)
+#else
+    #define  TM_GET_TICK(x)
+    #define  TM_TICK_PERUS
+    #define  TM_PERF_REG(x)
+    #define  TM_PERF_EXTREG(x)
+    #define  TM_PERF_INIT(x)
+    #define  TM_PERF_START(x)
+    #define  TM_PERF_ADD(x)
+    #define  TM_PERF_PRINT(x)
+#endif
+
+
+/******************************* OPS CONFIG  ************************************/
+
+
+
+
+#endif
diff --git a/src/tinymaix_cnn/tm_port.h → src/tinymaix_cnn/int8/tm_port.h b/src/tinymaix_cnn/tm_port.h → src/tinymaix_cnn/int8/tm_port.h
diff --git a/src/tinymaix_cnn/mod_cnn.c b/src/tinymaix_cnn/mod_cnn.c
@@ -13,7 +13,7 @@
 
 #include <string.h>
 
-#define DEBUG (1)
+#define DEBUG (0)
 
 
 // memset is used by some standard C constructs

diff --git a/tests/test_cnn.py b/tests/test_cnn.py
@@ -1,16 +1,18 @@
 
 import array
-import emlearn_cnn
+import emlearn_cnn_int8
+import emlearn_cnn_fp32
 
-MNIST_MODEL = 'examples/mnist_cnn/mnist_cnn.tmdl'
+MNIST_MODEL_INT8 = 'examples/mnist_cnn/mnist_cnn_int8.tmdl'
+MNIST_MODEL_FP32 = 'examples/mnist_cnn/mnist_cnn_fp32.tmdl'
 MNIST_DATA_DIR = 'examples/mnist_cnn/data/'
 
 def test_cnn_create():
 
     model = None
-    with open(MNIST_MODEL, 'rb') as f:
+    with open(MNIST_MODEL_FP32, 'rb') as f:
         model_data = array.array('B', f.read())
-        model = emlearn_cnn.new(model_data)
+        model = emlearn_cnn_fp32.new(model_data)
 
         out_shape = model.output_dimensions()
         assert out_shape == (10,), (out_shape)
@@ -48,12 +50,12 @@ def argmax(arr):
 
     return idx_max
 
-def test_cnn_mnist():
+def check_cnn_mnist(cnn_module, model_path):
 
     model = None
-    with open(MNIST_MODEL, 'rb') as f:
+    with open(model_path, 'rb') as f:
         model_data = array.array('B', f.read())
-        model = emlearn_cnn.new(model_data)
+        model = cnn_module.new(model_data)
 
     probabilities = array.array('f', (-1 for _ in range(10)))
 
@@ -75,6 +77,14 @@ def test_cnn_mnist():
 
     assert correct >= 9, correct
 
+def test_cnn_mnist_int8():
+    check_cnn_mnist(emlearn_cnn_int8, MNIST_MODEL_INT8)
+
+
+def test_cnn_mnist_fp32():
+    check_cnn_mnist(emlearn_cnn_fp32, MNIST_MODEL_FP32)
+
 
 test_cnn_create()
-test_cnn_mnist()
+test_cnn_mnist_int8()
+test_cnn_mnist_fp32()