Add TFLite + Transformer example

android · Jul 17, 2024 · 8033be8 · 8033be8
1 parent b556c01
commit 8033be8
Show file tree

Hide file tree

Showing 16 changed files with 673 additions and 5 deletions.
diff --git a/build.gradle.kts b/build.gradle.kts
@@ -13,6 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+buildscript {
+    dependencies {
+        classpath("de.undercouch:gradle-download-task:4.1.2")
+    }
+}
+
 @Suppress("DSL_SCOPE_VIOLATION")
 plugins {
     alias(libs.plugins.android.application) apply false
@@ -38,7 +44,7 @@ versionCatalogUpdate {
 affectedModuleDetector {
     baseDir = "${project.rootDir}"
     pathsAffectingAllModules = setOf(
-            "gradle/libs.versions.toml",
+        "gradle/libs.versions.toml",
     )
     excludedModules = setOf<String>()
 

diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
@@ -44,6 +44,9 @@ material = "1.12.0-beta01"
 constraintlayout = "2.1.4"
 glide-compose = "1.0.0-beta01"
 glance = "1.1.0-SNAPSHOT"
+tensorflowLite = "2.9.0"
+tensorflowLiteGpuDelegatePlugin = "0.4.4"
+tensorflowLiteSupport = "0.4.2"
 
 [libraries]
 
@@ -153,6 +156,11 @@ glide-compose = { group = "com.github.bumptech.glide", name = "compose", version
 appcompat = { group = "androidx.appcompat", name = "appcompat", version.ref = "appcompat" }
 material = { group = "com.google.android.material", name = "material", version.ref = "material" }
 constraintlayout = { group = "androidx.constraintlayout", name = "constraintlayout", version.ref = "constraintlayout" }
+tensorflow-lite = { module = "org.tensorflow:tensorflow-lite", version.ref = "tensorflowLite" }
+tensorflow-lite-gpu = { module = "org.tensorflow:tensorflow-lite-gpu", version.ref = "tensorflowLite" }
+tensorflow-lite-gpu-delegate-plugin = { module = "org.tensorflow:tensorflow-lite-gpu-delegate-plugin", version.ref = "tensorflowLiteGpuDelegatePlugin" }
+tensorflow-lite-select-tf-ops = { module = "org.tensorflow:tensorflow-lite-select-tf-ops", version.ref = "tensorflowLite" }
+tensorflow-lite-support = { module = "org.tensorflow:tensorflow-lite-support", version.ref = "tensorflowLiteSupport" }
 
 [plugins]
 affectedmoduledetector = { id = "com.dropbox.affectedmoduledetector", version = "0.2.0" }

diff --git a/samples/README.md b/samples/README.md
@@ -116,14 +116,16 @@ The sample demonstrates the importance of having proper labels for
 A sample showcasing how to handle calls with the Jetpack Telecom API
 - [TextSpan](user-interface/text/src/main/java/com/example/platform/ui/text/TextSpan.kt):
 buildSpannedString is useful for quickly building a rich text.
+- [Transformer and TFLite integration](media/video/src/main/java/com/example/platform/media/video/TransformerTFLite.kt):
+This sample demonstrates using Transformer with TFLite by applying a selected art style to a video.
 - [UltraHDR Image Capture](camera/camera2/src/main/java/com/example/platform/camera/imagecapture/Camera2UltraHDRCapture.kt):
 This sample demonstrates how to capture a 10-bit compressed still image and 
 - [UltraHDR to HDR Video](media/ultrahdr/src/main/java/com/example/platform/media/ultrahdr/video/UltraHDRToHDRVideo.kt):
 This sample demonstrates converting a series of UltraHDR images into a HDR 
 - [UltraHDR x OpenGLES SurfaceView](graphics/ultrahdr/src/main/java/com/example/platform/graphics/ultrahdr/opengl/UltraHDRWithOpenGL.kt):
 This sample demonstrates displaying an UltraHDR image via and OpenGL Pipeline 
 - [Video Composition using Media3 Transformer](media/video/src/main/java/com/example/platform/media/video/TransformerVideoComposition.kt):
-This sample demonstrates concatenation of two video assets and an image using Media3.
+This sample demonstrates concatenation of two video assets and an image using Media3 Transformer library.
 - [Visualizing an UltraHDR Gainmap](graphics/ultrahdr/src/main/java/com/example/platform/graphics/ultrahdr/display/VisualizingAnUltraHDRGainmap.kt):
 This sample demonstrates visualizing the underlying gainmap of an UltraHDR 
 - [WindowInsetsAnimation](user-interface/window-insets/src/main/java/com/example/platform/ui/insets/WindowInsetsAnimation.kt):

diff --git a/samples/media/video/build.gradle.kts b/samples/media/video/build.gradle.kts
@@ -16,11 +16,25 @@
 
 plugins {
     id("com.example.platform.sample")
+    id("de.undercouch.download")
 }
 
 android {
     namespace = "com.example.platform.media.video"
     viewBinding.isEnabled = true
+
+    androidResources {
+        noCompress += "tflite"
+    }
+}
+
+// Import DownloadModels task for TFLite sample
+project.ext.set("ASSET_DIR", "$projectDir/src/main/assets")
+project.ext.set("TEST_ASSETS_DIR", "$projectDir/src/androidTest/assets")
+// Download default models; if you wish to use your own models then
+// place them in the "assets" directory and comment out this line.
+apply {
+    from("download_model.gradle")
 }
 
 dependencies {
@@ -37,4 +51,11 @@ dependencies {
     implementation(libs.androidx.media3.ui)
     implementation(libs.androidx.media3.effect)
     implementation(libs.material)
+
+    // Tensorflow lite dependencies
+    implementation(libs.tensorflow.lite)
+    implementation(libs.tensorflow.lite.gpu)
+    implementation(libs.tensorflow.lite.gpu.delegate.plugin)
+    implementation(libs.tensorflow.lite.support)
+    implementation(libs.tensorflow.lite.select.tf.ops)
 }
diff --git a/samples/media/video/download_model.gradle b/samples/media/video/download_model.gradle
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+tasks.register('downloadModelFile1', Download) {
+    src 'https://storage.googleapis.com/download.tensorflow.org/models/tflite/task_library/style_transfer/android/magenta_arbitrary-image-stylization-v1-256_fp16_prediction_1.tflite'
+    dest project.ext.ASSET_DIR + '/predict_float16.tflite'
+    overwrite false
+}
+
+tasks.register('downloadModelFile2', Download) {
+    src 'https://storage.googleapis.com/download.tensorflow.org/models/tflite/task_library/style_transfer/android/magenta_arbitrary-image-stylization-v1-256_fp16_transfer_1.tflite'
+    dest project.ext.ASSET_DIR + '/transfer_float16.tflite'
+    overwrite false
+}
+
+preBuild.dependsOn downloadModelFile1, downloadModelFile2
diff --git a/samples/media/video/src/main/assets/style1.jpg b/samples/media/video/src/main/assets/style1.jpg
diff --git a/samples/media/video/src/main/assets/style2.jpg b/samples/media/video/src/main/assets/style2.jpg
diff --git a/samples/media/video/src/main/assets/style3.jpg b/samples/media/video/src/main/assets/style3.jpg
diff --git a/...edia/video/src/main/java/com/example/platform/media/video/StyleTransferShaderProgram.java b/...edia/video/src/main/java/com/example/platform/media/video/StyleTransferShaderProgram.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.platform.media.video;
+
+import android.content.Context;
+import android.graphics.Bitmap;
+import android.graphics.BitmapFactory;
+import android.opengl.GLES20;
+import android.opengl.GLUtils;
+import android.util.Log;
+
+import androidx.media3.common.VideoFrameProcessingException;
+import androidx.media3.common.util.GlProgram;
+import androidx.media3.common.util.GlUtil;
+import androidx.media3.common.util.Size;
+import androidx.media3.common.util.UnstableApi;
+import androidx.media3.effect.BaseGlShaderProgram;
+
+import com.google.common.collect.ImmutableMap;
+
+import org.tensorflow.lite.DataType;
+import org.tensorflow.lite.Interpreter;
+import org.tensorflow.lite.InterpreterApi;
+import org.tensorflow.lite.gpu.CompatibilityList;
+import org.tensorflow.lite.gpu.GpuDelegate;
+import org.tensorflow.lite.support.common.FileUtil;
+import org.tensorflow.lite.support.common.ops.DequantizeOp;
+import org.tensorflow.lite.support.common.ops.NormalizeOp;
+import org.tensorflow.lite.support.image.ImageProcessor;
+import org.tensorflow.lite.support.image.TensorImage;
+import org.tensorflow.lite.support.image.ops.ResizeOp;
+import org.tensorflow.lite.support.image.ops.ResizeWithCropOrPadOp;
+import org.tensorflow.lite.support.tensorbuffer.TensorBuffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import javax.microedition.khronos.opengles.GL10;
+
+@UnstableApi
+final class StyleTransferShaderProgram extends BaseGlShaderProgram {
+
+    private static final String VERTEX_SHADER_PATH = "shaders/vertex_shader_transformation_es2.glsl";
+    private static final String FRAGMENT_SHADER_PATH = "shaders/fragment_shader_copy_es2.glsl";
+
+    private final GlProgram glProgram;
+    private final InterpreterApi transformInterpreter;
+    private final int inputTransformTargetHeight;
+    private final int inputTransformTargetWidth;
+    private final int[] outputTransformShape;
+
+    private final TensorBuffer predictOutput;
+
+    private int width;
+    private int height;
+
+    public StyleTransferShaderProgram(Context context, String styleAssetFileName)
+            throws VideoFrameProcessingException {
+        super(/* useHighPrecisionColorComponents= */ false, /* texturePoolCapacity= */ 1);
+
+        try {
+            glProgram = new GlProgram(context, VERTEX_SHADER_PATH, FRAGMENT_SHADER_PATH);
+
+            Interpreter.Options options = new Interpreter.Options();
+
+            CompatibilityList compatibilityList = new CompatibilityList();
+            if (compatibilityList.isDelegateSupportedOnThisDevice()) {
+                GpuDelegate.Options gpuDelegateOptions = compatibilityList.getBestOptionsForThisDevice();
+                GpuDelegate gpuDelegate = new GpuDelegate(gpuDelegateOptions);
+                options.addDelegate(gpuDelegate);
+            } else {
+                options.setNumThreads(6);
+            }
+            String predictModel = "predict_float16.tflite";
+            String transferModel = "transfer_float16.tflite";
+            Interpreter predictInterpeter =
+                    new Interpreter(FileUtil.loadMappedFile(context, predictModel), options);
+            transformInterpreter =
+                    InterpreterApi.create(FileUtil.loadMappedFile(context, transferModel), options);
+            int inputPredictTargetHeight = predictInterpeter.getInputTensor(0).shape()[1];
+            int inputPredictTargetWidth = predictInterpeter.getInputTensor(0).shape()[2];
+            int[] outputPredictShape = predictInterpeter.getOutputTensor(0).shape();
+
+            inputTransformTargetHeight = transformInterpreter.getInputTensor(0).shape()[1];
+            inputTransformTargetWidth = transformInterpreter.getInputTensor(0).shape()[2];
+            outputTransformShape = transformInterpreter.getOutputTensor(0).shape();
+
+            InputStream inputStream = context.getAssets().open(styleAssetFileName);
+            Bitmap styleImage = BitmapFactory.decodeStream(inputStream);
+            inputStream.close();
+            TensorImage styleTensorImage =
+                    getScaledTensorImage(styleImage, inputPredictTargetWidth, inputPredictTargetHeight);
+            predictOutput = TensorBuffer.createFixedSize(outputPredictShape, DataType.FLOAT32);
+            predictInterpeter.run(styleTensorImage.getBuffer(), predictOutput.getBuffer());
+        } catch (IOException | GlUtil.GlException e) {
+            Log.w("DEBUG", "Error setting up TfShaderProgram", e);
+            throw new VideoFrameProcessingException(e);
+        }
+    }
+
+    @Override
+    public Size configure(int inputWidth, int inputHeight) {
+        width = inputWidth;
+        height = inputHeight;
+        return new Size(inputWidth, inputHeight);
+    }
+
+    @Override
+    public void drawFrame(int inputTexId, long presentationTimeUs)
+            throws VideoFrameProcessingException {
+        ByteBuffer pixelBuffer = ByteBuffer.allocateDirect(width * height * 4);
+
+        Bitmap bitmap;
+        int texId;
+        try {
+            int[] boundFramebuffer = new int[1];
+            GLES20.glGetIntegerv(GLES20.GL_FRAMEBUFFER_BINDING, boundFramebuffer, /* offset= */ 0);
+
+            int fboId = GlUtil.createFboForTexture(inputTexId);
+            GlUtil.focusFramebufferUsingCurrentContext(fboId, width, height);
+            GLES20.glReadPixels(
+                    /* x= */ 0,
+                    /* y= */ 0,
+                    width,
+                    height,
+                    GLES20.GL_RGBA,
+                    GLES20.GL_UNSIGNED_BYTE,
+                    pixelBuffer);
+            GlUtil.checkGlError();
+            bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888);
+            bitmap.copyPixelsFromBuffer(pixelBuffer);
+
+            Log.w("DEBUG", "Process frame at " + (presentationTimeUs / 1000) + " ms");
+            long before = System.currentTimeMillis();
+            TensorImage tensorImage =
+                    getScaledTensorImage(bitmap, inputTransformTargetWidth, inputTransformTargetHeight);
+            Log.w("DEBUG", "- Scale " + (System.currentTimeMillis() - before) + " ms");
+            TensorBuffer outputImage =
+                    TensorBuffer.createFixedSize(outputTransformShape, DataType.FLOAT32);
+
+            before = System.currentTimeMillis();
+            transformInterpreter.runForMultipleInputsOutputs(
+                    new Object[] {tensorImage.getBuffer(), predictOutput.getBuffer()},
+                    ImmutableMap.<Integer, Object>builder().put(0, outputImage.getBuffer()).build());
+
+            Log.w("DEBUG", "- Run " + (System.currentTimeMillis() - before) + " ms");
+
+            before = System.currentTimeMillis();
+            ImageProcessor imagePostProcessor =
+                    new ImageProcessor.Builder()
+                            .add(new DequantizeOp(/* zeroPoint= */ 0f, /* scale= */ 255f))
+                            .build();
+            TensorImage outputTensorImage = new TensorImage(DataType.FLOAT32);
+            outputTensorImage.load(outputImage);
+            Log.w("DEBUG", "- Load output " + (System.currentTimeMillis() - before) + " ms");
+
+            before = System.currentTimeMillis();
+            Bitmap outputBitmap = imagePostProcessor.process(outputTensorImage).getBitmap();
+            Log.w("DEBUG", "- Post process output " + (System.currentTimeMillis() - before) + " ms");
+
+            texId =
+                    GlUtil.createTexture(
+                            outputBitmap.getWidth(),
+                            outputBitmap.getHeight(),
+                            /* useHighPrecisionColorComponents= */ false);
+            GLES20.glBindTexture(GLES20.GL_TEXTURE_2D, texId);
+            GLES20.glTexParameterf(GL10.GL_TEXTURE_2D, GL10.GL_TEXTURE_MIN_FILTER, GL10.GL_NEAREST);
+            GLES20.glTexParameterf(GL10.GL_TEXTURE_2D, GL10.GL_TEXTURE_MAG_FILTER, GL10.GL_LINEAR);
+            GLES20.glTexParameterf(GL10.GL_TEXTURE_2D, GL10.GL_TEXTURE_WRAP_S, GL10.GL_REPEAT);
+            GLES20.glTexParameterf(GL10.GL_TEXTURE_2D, GL10.GL_TEXTURE_WRAP_T, GL10.GL_REPEAT);
+            GLUtils.texImage2D(GLES20.GL_TEXTURE_2D, /* level= */ 0, outputBitmap, /* border= */ 0);
+            GlUtil.checkGlError();
+
+            GlUtil.focusFramebufferUsingCurrentContext(boundFramebuffer[0], width, height);
+
+            glProgram.use();
+            glProgram.setSamplerTexIdUniform("uTexSampler", texId, /* texUnitIndex= */ 0);
+            float[] identityMatrix = GlUtil.create4x4IdentityMatrix();
+            glProgram.setFloatsUniform("uTexTransformationMatrix", identityMatrix);
+            glProgram.setFloatsUniform("uTransformationMatrix", identityMatrix);
+            // glProgram.setFloatsUniform("uRgbMatrix", identityMatrix);
+            glProgram.setBufferAttribute(
+                    "aFramePosition",
+                    GlUtil.getNormalizedCoordinateBounds(),
+                    GlUtil.HOMOGENEOUS_COORDINATE_VECTOR_SIZE);
+            glProgram.bindAttributesAndUniforms();
+
+            GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, /* first= */ 0, /* count= */ 4);
+            GlUtil.checkGlError();
+
+            GlUtil.deleteTexture(texId);
+        } catch (GlUtil.GlException e) {
+            throw VideoFrameProcessingException.from(e);
+        }
+    }
+
+    private static TensorImage getScaledTensorImage(
+            Bitmap bitmap, int targetWidth, int targetHeight) {
+        int cropSize = Math.min(bitmap.getWidth(), bitmap.getHeight());
+        ImageProcessor imageProcessor =
+                new ImageProcessor.Builder()
+                        .add(new ResizeWithCropOrPadOp(cropSize, cropSize))
+                        .add(
+                                new ResizeOp(
+                                        targetHeight,
+                                        targetWidth,
+                                        ResizeOp.ResizeMethod.BILINEAR)) // TODO: Not sure why they are swapped?
+                        .add(new NormalizeOp(/* mean= */ 0f, /* stddev= */ 255f))
+                        .build();
+        TensorImage tensorImage = new TensorImage(DataType.FLOAT32);
+        tensorImage.load(bitmap);
+        return imageProcessor.process(tensorImage);
+    }
+}