@@ -178,7 +178,17 @@ public float getObjThresh() {
178
178
// Number of threads in the java app
179
179
private static final int NUM_THREADS = 4 ;
180
180
private static boolean isNNAPI = false ;
181
- private static boolean isGPU = false ;
181
+ private static boolean isGPU = true ;
182
+
183
+ // tiny or not
184
+ private static boolean isTiny = true ;
185
+
186
+ // config yolov4 tiny
187
+ private static final int [] OUTPUT_WIDTH_TINY = new int []{2535 , 2535 };
188
+ private static final int [][] MASKS_TINY = new int [][]{{3 , 4 , 5 }, {1 , 2 , 3 }};
189
+ private static final int [] ANCHORS_TINY = new int []{
190
+ 23 , 27 , 37 , 58 , 81 , 82 , 81 , 82 , 135 , 169 , 344 , 319 };
191
+ private static final float [] XYSCALE_TINY = new float []{1.05f , 1.05f };
182
192
183
193
private boolean isModelQuantized ;
184
194
@@ -294,9 +304,8 @@ protected ByteBuffer convertBitmapToByteBuffer(Bitmap bitmap) {
294
304
return byteBuffer ;
295
305
}
296
306
297
- public ArrayList <Recognition > recognizeImage (Bitmap bitmap ) {
298
- ByteBuffer byteBuffer = convertBitmapToByteBuffer (bitmap );
299
-
307
+ private ArrayList <Recognition > getDetections (ByteBuffer byteBuffer , Bitmap bitmap ) {
308
+ ArrayList <Recognition > detections = new ArrayList <Recognition >();
300
309
Map <Integer , Object > outputMap = new HashMap <>();
301
310
for (int i = 0 ; i < OUTPUT_WIDTH .length ; i ++) {
302
311
float [][][][][] out = new float [1 ][OUTPUT_WIDTH [i ]][OUTPUT_WIDTH [i ]][3 ][5 + labels .size ()];
@@ -308,8 +317,6 @@ public ArrayList<Recognition> recognizeImage(Bitmap bitmap) {
308
317
Object [] inputArray = {byteBuffer };
309
318
tfLite .runForMultipleInputsOutputs (inputArray , outputMap );
310
319
311
- ArrayList <Recognition > detections = new ArrayList <Recognition >();
312
-
313
320
for (int i = 0 ; i < OUTPUT_WIDTH .length ; i ++) {
314
321
int gridWidth = OUTPUT_WIDTH [i ];
315
322
float [][][][][] out = (float [][][][][]) outputMap .get (i );
@@ -364,9 +371,69 @@ public ArrayList<Recognition> recognizeImage(Bitmap bitmap) {
364
371
}
365
372
Log .d ("YoloV4Classifier" , "out[" + i + "] detect end" );
366
373
}
374
+ return detections ;
375
+ }
367
376
368
- final ArrayList <Recognition > recognitions = nms (detections );
377
+ /**
378
+ * For yolov4-tiny, the situation would be a little different from the yolov4, it only has two
379
+ * output. Both has three dimenstion. The first one is a tensor with dimension [1, 2535,4], containing all the bounding boxes.
380
+ * The second one is a tensor with dimension [1, 2535, class_num], containing all the classes score.
381
+ * @param byteBuffer input ByteBuffer, which contains the image information
382
+ * @param bitmap pixel disenty used to resize the output images
383
+ * @return an array list containing the recognitions
384
+ */
385
+ private ArrayList <Recognition > getDetectionsForTiny (ByteBuffer byteBuffer , Bitmap bitmap ) {
386
+ ArrayList <Recognition > detections = new ArrayList <Recognition >();
387
+ Map <Integer , Object > outputMap = new HashMap <>();
388
+ outputMap .put (0 , new float [1 ][OUTPUT_WIDTH_TINY [0 ]][4 ]);
389
+ outputMap .put (1 , new float [1 ][OUTPUT_WIDTH_TINY [1 ]][labels .size ()]);
390
+ Object [] inputArray = {byteBuffer };
391
+ tfLite .runForMultipleInputsOutputs (inputArray , outputMap );
392
+
393
+ int gridWidth = OUTPUT_WIDTH_TINY [0 ];
394
+ float [][][] bboxes = (float [][][]) outputMap .get (0 );
395
+ float [][][] out_score = (float [][][]) outputMap .get (1 );
396
+
397
+ for (int i = 0 ; i < gridWidth ;i ++){
398
+ float maxClass = 0 ;
399
+ int detectedClass = -1 ;
400
+ final float [] classes = new float [labels .size ()];
401
+ for (int c = 0 ;c < labels .size ();c ++){
402
+ classes [c ] = out_score [0 ][i ][c ];
403
+ }
404
+ for (int c = 0 ;c <labels .size ();++c ){
405
+ if (classes [c ] > maxClass ){
406
+ detectedClass = c ;
407
+ maxClass = classes [c ];
408
+ }
409
+ }
410
+ final float score = maxClass ;
411
+ if (score > getObjThresh ()){
412
+ final float xPos = bboxes [0 ][i ][0 ];
413
+ final float yPos = bboxes [0 ][i ][1 ];
414
+ final float w = bboxes [0 ][i ][2 ];
415
+ final float h = bboxes [0 ][i ][3 ];
416
+ final RectF rectF = new RectF (
417
+ Math .max (0 , xPos - w / 2 ),
418
+ Math .max (0 , yPos - h / 2 ),
419
+ Math .min (bitmap .getWidth () - 1 , xPos + w / 2 ),
420
+ Math .min (bitmap .getHeight () - 1 , yPos + h / 2 ));
421
+ detections .add (new Recognition ("" + i , labels .get (detectedClass ),score ,rectF ,detectedClass ));
422
+ }
423
+ }
424
+ return detections ;
425
+ }
369
426
427
+ public ArrayList <Recognition > recognizeImage (Bitmap bitmap ) {
428
+ ByteBuffer byteBuffer = convertBitmapToByteBuffer (bitmap );
429
+ ArrayList <Recognition > detections ;
430
+ //check whether the tiny version is specified
431
+ if (isTiny ) {
432
+ detections = getDetectionsForTiny (byteBuffer , bitmap );
433
+ } else {
434
+ detections = getDetections (byteBuffer , bitmap );
435
+ }
436
+ final ArrayList <Recognition > recognitions = nms (detections );
370
437
return recognitions ;
371
438
}
372
439
0 commit comments