diff --git a/.gitignore b/.gitignore
index 85da6b601..3ce36eb74 100644
--- a/.gitignore
+++ b/.gitignore
@@ -388,6 +388,12 @@ tools/
 sdk/runanywhere-react-native/packages/rag/ios/.testlocal
 
 
+# Python virtual environments
+.venv*/
+venv*/
+__pycache__/
+*.pyc
+
 # Node
 node_modules/
 /tools/
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index dcb6b8c4c..0410364d9 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -2,5 +2,17 @@
 <project version="4">
   <component name="VcsDirectoryMappings">
     <mapping directory="" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/arm64-v8a/_deps/llamacpp-src" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/arm64-v8a/_deps/nlohmann_json-src" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/arm64-v8a/_deps/usearch-src" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/arm64-v8a/_deps/usearch-src/fp16" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/arm64-v8a/_deps/usearch-src/simsimd" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/arm64-v8a/_deps/usearch-src/stringzilla" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/x86_64/_deps/llamacpp-src" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/x86_64/_deps/nlohmann_json-src" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/x86_64/_deps/usearch-src" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/x86_64/_deps/usearch-src/fp16" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/x86_64/_deps/usearch-src/simsimd" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/sdk/runanywhere-commons/build/android/unified/x86_64/_deps/usearch-src/stringzilla" vcs="Git" />
   </component>
-</project>
+</project>
\ No newline at end of file
diff --git a/docs/gifs/npu-model-tag-screenshot.png b/docs/gifs/npu-model-tag-screenshot.png
new file mode 100644
index 000000000..9e76330cd
Binary files /dev/null and b/docs/gifs/npu-model-tag-screenshot.png differ
diff --git a/docs/impl/lora_adapter_support.md b/docs/impl/lora_adapter_support.md
index 5c3c0a54f..c1e012ae7 100644
--- a/docs/impl/lora_adapter_support.md
+++ b/docs/impl/lora_adapter_support.md
@@ -512,20 +512,26 @@ This keeps `librac_commons.so` decoupled from `librac_backend_llamacpp.so`.
 
 ---
 
-## llama.cpp LoRA API (b8011)
+## llama.cpp LoRA API (b8201)
 
 The implementation uses these llama.cpp functions:
 
 | Function | Purpose |
 |----------|---------|
 | `llama_adapter_lora_init(model, path)` | Load adapter tensors from GGUF file |
-| `llama_set_adapter_lora(ctx, adapter, scale)` | Apply adapter to context with scale |
-| `llama_rm_adapter_lora(ctx, adapter)` | Remove specific adapter from context |
-| `llama_clear_adapter_lora(ctx)` | Remove all adapters from context |
+| `llama_set_adapters_lora(ctx, adapters[], n, scales[])` | Apply adapter(s) to context with scale(s) |
 | `llama_memory_clear(memory, true)` | Clear KV cache after adapter changes |
+| `llama_adapter_meta_val_str(adapter, key, buf, size)` | Read adapter GGUF metadata by key |
+| `llama_adapter_meta_count(adapter)` | Get number of metadata entries |
+| `llama_adapter_meta_key_by_index(adapter, i, buf, size)` | Read metadata key by index |
+| `llama_adapter_meta_val_str_by_index(adapter, i, buf, size)` | Read metadata value by index |
 
-Note: `llama_adapter_lora_free()` is deprecated. Adapters are freed automatically
-when the model is freed.
+Note: `llama_adapter_lora_free()` is deprecated in b8201 — "adapters are now freed
+together with the associated model". Do NOT call it manually.
+
+**Internal header dependency:** The implementation includes `llama-adapter.h` (internal
+llama.cpp header) to access `adapter->ab_map.size()` for tensor match validation.
+This is pinned to llama.cpp b8201 via `VERSIONS` file. Must be verified on version bumps.
 
 ---
 
@@ -533,20 +539,28 @@ when the model is freed.
 
 ### Context Recreation
 
-llama.cpp requires all adapters to be loaded before context creation. When a new
-adapter is loaded after the model is already running (context exists), the
-implementation recreates the context:
+Per llama.cpp docs: "All adapters must be loaded before context creation."
+When a new adapter is loaded after the model is already running, the
+implementation recreates the context so the compute graph properly accounts
+for LoRA operations:
 
-1. Free old context and sampler
-2. Create new context with same parameters (context_size, num_threads)
-3. Rebuild sampler chain (temperature, top_p, top_k, repetition penalty)
-4. Re-apply ALL loaded adapters to the new context
-5. Clear KV cache
+1. Free old sampler and context
+2. Create new context with same parameters (context_size, batch_size, num_threads)
+3. Rebuild greedy sampler chain (real sampler rebuilt on next `generate_stream()`)
+4. Invalidate cached sampler params (temperature, top_p, top_k, repetition_penalty)
+5. Re-apply ALL loaded adapters via `llama_set_adapters_lora()`
+6. KV cache is already empty from fresh context — no explicit clear needed
 
 This is handled by `recreate_context()` + `apply_lora_adapters()` in
-`llamacpp_backend.cpp`. The approach keeps things simple while ensuring
-correctness -- adapter memory overhead is typically 1-5% of the base model,
-so the cost of re-applying all adapters is negligible.
+`llamacpp_backend.cpp`.
+
+### Pre-Generation Adapter Verification
+
+Before each `generate_stream()` call, the implementation checks that all loaded
+adapters have `applied == true`. If any adapter is not applied (e.g., due to a
+prior failure), it attempts to re-apply via `apply_lora_adapters()`. If re-apply
+fails, generation is aborted with an error rather than silently ignoring the
+adapter.
 
 ### KV Cache Invalidation
 
@@ -565,10 +579,16 @@ is in progress. The lock hierarchy is:
 - Component layer: `std::lock_guard<std::mutex>` on `component->mtx`
 - Kotlin bridge layer: `synchronized(lock)` on the CppBridgeLLM lock object
 
-### Duplicate Detection
+### Input Validation
+
+`load_lora_adapter()` performs multi-stage validation before touching llama.cpp:
 
-`load_lora_adapter()` checks for duplicate adapter paths before loading. If the
-same path is already loaded, it returns an error instead of loading twice.
+1. **Scale validation** — must be positive and finite (`scale > 0.0f && isfinite(scale)`)
+2. **Duplicate detection** — rejects if same path already loaded
+3. **File existence** — opens file with `std::ifstream` to verify it exists
+4. **GGUF magic check** — reads first 4 bytes and verifies `0x46554747` ("GGUF" LE)
+5. **Tensor match validation** — after `llama_adapter_lora_init()`, checks `adapter->ab_map.size() > 0` to ensure the adapter actually matched model tensors (catches wrong-base-model errors)
+6. **Metadata logging** — dumps adapter GGUF metadata (alpha, rank, etc.) for diagnostics
 
 ### Rollback on Failure
 
@@ -632,6 +652,14 @@ the context and model are freed. This ordering prevents use-after-free.
 | `sdk/runanywhere-kotlin/src/commonMain/.../RunAnywhere+LoRA.kt` | NEW file. `expect` declarations for 4 public API functions |
 | `sdk/runanywhere-kotlin/src/jvmAndroidMain/.../RunAnywhere+LoRA.jvmAndroid.kt` | NEW file. `actual` implementations with init checks, CppBridgeLLM delegation, JSON parsing for adapter info |
 
+### Android Example App
+
+| File | Changes |
+|------|---------|
+| `examples/android/RunAnywhereAI/.../data/ModelList.kt` | Switched LoRA adapter from `lora-adapter.gguf` (4.3MB, ineffective) to `qwen2.5-0.5b-abliterated-lora-f16.gguf` (17.6MB F16, abliterated). Updated catalog entry ID, name, filename, fileSize. |
+| `examples/android/RunAnywhereAI/.../data/LoraExamplePrompts.kt` | Updated prompt filename key to match new adapter filename |
+| `examples/android/RunAnywhereAI/.../presentation/chat/ChatScreen.kt` | Updated starter prompt suggestions for LoRA demo comparison |
+
 ---
 
 ## How to Extend
@@ -698,3 +726,4 @@ cd sdk/runanywhere-kotlin
 | 2026-02-19 | Claude | Initial implementation of LoRA adapter support across all 6 layers (C++ through Kotlin public API). C++ desktop build verified. |
 | 2026-02-19 | Claude | Fixed architecture: Component layer now dispatches LoRA ops through vtable (`rac_llm_service_ops_t`) instead of calling backend directly. This decouples `librac_commons.so` from `librac_backend_llamacpp.so`. Added 4 vtable entries and wrapper functions. Fixed `AttachCurrentThread` cast for Android NDK C++ build. Android native build verified. |
 | 2026-02-19 | Claude | Added detailed Kotlin SDK usage guide with data types, code examples, error handling, Android ViewModel pattern, and table of contents with section links. Updated "How to Extend" to include vtable step. |
+| 2026-03-09 | Claude | **LoRA fix & hardening.** Fixed LoRA adapter having no effect — root cause: wrong adapter file (4.3MB generic vs 17.6MB abliterated F16). Updated Android app to use `qwen2.5-0.5b-abliterated-lora-f16.gguf`. Added C++ validation: scale check, GGUF magic verification, tensor match count via `ab_map` (internal header `llama-adapter.h`), adapter metadata logging, pre-generation adapter state verification. Updated API from deprecated `llama_set_adapter_lora` to `llama_set_adapters_lora` (batch API, b8201). Updated docs to reflect llama.cpp b8201 API changes. |
diff --git a/docs/sdks/flutter-sdk.md b/docs/sdks/flutter-sdk.md
new file mode 100644
index 000000000..032e756e0
--- /dev/null
+++ b/docs/sdks/flutter-sdk.md
@@ -0,0 +1,499 @@
+# RunAnywhere Flutter SDK
+
+Cross-platform Flutter SDK for on-device AI inference. Supports iOS and Android with native C++ backends via Dart FFI.
+
+## Installation
+
+### pubspec.yaml
+
+```yaml
+dependencies:
+  # Core SDK (required)
+  runanywhere: ^0.17.0
+
+  # Backend modules (pick what you need)
+  runanywhere_llamacpp: ^0.16.0   # LLM text generation (GGUF models)
+  runanywhere_onnx: ^0.16.0       # STT, TTS, VAD (ONNX Runtime)
+  runanywhere_genie: ^0.1.2       # Qualcomm NPU inference
+```
+
+## Platform Requirements
+
+| Platform | Requirement |
+|----------|-------------|
+| Flutter | >= 3.10.0 |
+| iOS | 13.0+ |
+| macOS | 10.15+ |
+| Android Min SDK | 24 |
+| Dart | >= 3.0.0 |
+
+## Quick Start
+
+```dart
+import 'package:runanywhere/runanywhere.dart';
+
+// 1. Initialize SDK
+await RunAnywhere.initialize(environment: SDKEnvironment.development);
+
+// 2. Register backends
+await LlamaCpp.register(priority: 100);
+await ONNX.register(priority: 100);
+
+// 3. Register a model
+LlamaCpp.addModel(
+  id: 'qwen3-4b-q4_k_m',
+  name: 'Qwen3 4B',
+  url: 'https://huggingface.co/.../Qwen3-4B-Q4_K_M.gguf',
+  memoryRequirement: 2800000000,
+);
+
+// 4. Download and load
+await for (final progress in RunAnywhereStorage.downloadModel('qwen3-4b-q4_k_m')) {
+  print('${(progress.overallProgress * 100).toInt()}%');
+}
+await RunAnywhere.loadModel('qwen3-4b-q4_k_m');
+
+// 5. Generate text
+final response = await RunAnywhere.chat('Hello!');
+print(response);
+```
+
+## Architecture
+
+### Package Structure
+
+```
+runanywhere-flutter/
+├── packages/
+│   ├── runanywhere/              # Core SDK (RACommons FFI bindings)
+│   │   ├── lib/
+│   │   │   ├── core/             # Types, enums, NPU chip
+│   │   │   ├── native/           # Dart FFI bridge to C++
+│   │   │   ├── public/           # RunAnywhere class + extensions
+│   │   │   └── runanywhere.dart  # Barrel export
+│   │   ├── ios/                  # XCFramework (RACommons)
+│   │   └── android/              # JNI libs (librac_commons.so)
+│   ├── runanywhere_llamacpp/     # llama.cpp backend
+│   │   ├── ios/                  # RABackendLLAMACPP.xcframework
+│   │   └── android/              # librac_backend_llamacpp.so
+│   └── runanywhere_onnx/         # ONNX Runtime backend
+│       ├── ios/                  # RABackendONNX.xcframework + onnxruntime
+│       └── android/              # librac_backend_onnx.so + libonnxruntime.so
+```
+
+### Native Library Loading
+
+- **iOS**: `DynamicLibrary.executable()` — XCFrameworks statically linked
+- **Android**: `DynamicLibrary.open('librac_commons.so')` — from jniLibs
+
+---
+
+## API Reference
+
+### SDK Lifecycle
+
+```dart
+// Initialize
+static Future<void> RunAnywhere.initialize({
+  String? apiKey,
+  String? baseURL,
+  SDKEnvironment environment = SDKEnvironment.development,
+})
+
+// State
+static bool get isSDKInitialized
+static bool get isActive
+static String get version
+static SDKEnvironment? get environment
+static EventBus get events
+```
+
+### Text Generation (LLM)
+
+```dart
+// Simple chat
+static Future<String> RunAnywhere.chat(String prompt)
+
+// Full generation with metrics
+static Future<LLMGenerationResult> RunAnywhere.generate(
+  String prompt, {
+  LLMGenerationOptions? options,
+})
+
+// Streaming
+static Future<LLMStreamingResult> RunAnywhere.generateStream(
+  String prompt, {
+  LLMGenerationOptions? options,
+})
+
+// Model management
+static Future<void> RunAnywhere.loadModel(String modelId)
+static Future<void> RunAnywhere.unloadModel()
+static bool get isModelLoaded
+static String? get currentModelId
+```
+
+**Generation Options:**
+```dart
+class LLMGenerationOptions {
+  final int maxTokens;           // default: 100
+  final double temperature;       // default: 0.8
+  final double topP;             // default: 1.0
+  final List<String> stopSequences;
+  final bool streamingEnabled;
+  final InferenceFramework? preferredFramework;
+  final String? systemPrompt;
+  final StructuredOutputConfig? structuredOutput;
+}
+```
+
+**Generation Result:**
+```dart
+class LLMGenerationResult {
+  final String text;
+  final String? thinkingContent;
+  final int tokensUsed;
+  final String modelUsed;
+  final double latencyMs;
+  final double tokensPerSecond;
+  final double? timeToFirstTokenMs;
+  final int thinkingTokens;
+  final int responseTokens;
+}
+```
+
+**Streaming Result:**
+```dart
+class LLMStreamingResult {
+  final Stream<String> stream;          // Token-by-token
+  final Future<LLMGenerationResult> result;  // Final metrics
+  final void Function() cancel;
+}
+```
+
+### Speech-to-Text (STT)
+
+```dart
+static Future<String> RunAnywhere.transcribe(Uint8List audioData)
+static Future<STTResult> RunAnywhere.transcribeWithResult(Uint8List audioData)
+static Future<void> RunAnywhere.loadSTTModel(String modelId)
+static Future<void> RunAnywhere.unloadSTTModel()
+static bool get isSTTModelLoaded
+```
+
+### Text-to-Speech (TTS)
+
+```dart
+static Future<TTSResult> RunAnywhere.synthesize(
+  String text, {
+  double rate = 1.0,
+  double pitch = 1.0,
+  double volume = 1.0,
+})
+static Future<void> RunAnywhere.loadTTSVoice(String voiceId)
+static Future<void> RunAnywhere.unloadTTSVoice()
+static bool get isTTSVoiceLoaded
+```
+
+**TTS Result:**
+```dart
+class TTSResult {
+  final Float32List samples;    // PCM audio samples
+  final int sampleRate;
+  final int durationMs;
+  double get durationSeconds;
+  int get numSamples;
+}
+```
+
+### Vision Language Models (VLM)
+
+```dart
+// Simple
+static Future<String> RunAnywhere.describeImage(
+  VLMImage image, {
+  String prompt = "What's in this image?",
+})
+
+static Future<String> RunAnywhere.askAboutImage(
+  String question, {
+  required VLMImage image,
+})
+
+// Full with metrics
+static Future<VLMResult> RunAnywhere.processImage(
+  VLMImage image, {
+  required String prompt,
+  int maxTokens = 2048,
+  double temperature = 0.7,
+})
+
+// Streaming
+static Future<VLMStreamingResult> RunAnywhere.processImageStream(
+  VLMImage image, {
+  required String prompt,
+})
+
+// Image construction
+VLMImage.filePath(String path)
+VLMImage.rgbPixels(Uint8List data, {required int width, required int height})
+VLMImage.base64(String encoded)
+```
+
+### Voice Agent
+
+```dart
+// Start interactive voice session
+static Future<VoiceSessionHandle> RunAnywhere.startVoiceSession({
+  VoiceSessionConfig config = VoiceSessionConfig.defaultConfig,
+})
+
+// Session config
+class VoiceSessionConfig {
+  final double silenceDuration;    // default: 1.5s
+  final double speechThreshold;    // default: 0.03
+  final bool autoPlayTTS;          // default: true
+  final bool continuousMode;       // default: true
+}
+
+// Session events (sealed class)
+VoiceSessionStarted
+VoiceSessionListening(double audioLevel)
+VoiceSessionSpeechStarted
+VoiceSessionProcessing
+VoiceSessionTranscribed(String text)
+VoiceSessionResponded(String text)
+VoiceSessionSpeaking
+VoiceSessionTurnCompleted(...)
+VoiceSessionStopped
+VoiceSessionError(String message)
+```
+
+### Tool Calling
+
+```dart
+// Register tools
+static void RunAnywhereToolCalling.registerTool(
+  ToolDefinition definition,
+  ToolExecutor executor,
+)
+
+// Generate with tool use
+static Future<ToolCallingResult> RunAnywhereToolCalling.generateWithTools(
+  String prompt, {
+  ToolCallingOptions? options,
+})
+
+// Tool definition
+class ToolDefinition {
+  final String name;
+  final String description;
+  final List<ToolParameter> parameters;
+}
+
+// Tool calling formats
+ToolCallFormatName.defaultFormat  // JSON format
+ToolCallFormatName.lfm2          // Pythonic format (for LFM2-Tool)
+```
+
+### Model Management
+
+```dart
+// Discovery
+static Future<List<ModelInfo>> RunAnywhere.availableModels()
+
+// Download with progress
+static Stream<ModelDownloadProgress> RunAnywhereStorage.downloadModel(String modelId)
+
+// Stages
+enum ModelDownloadStage { downloading, extracting, validating, complete }
+```
+
+### NPU Chip Detection
+
+```dart
+// Detect Qualcomm NPU chipset (Android only)
+static Future<NPUChip?> RunAnywhereDevice.getChip()
+
+enum NPUChip {
+  snapdragon8Elite('8elite', 'Snapdragon 8 Elite', 'SM8750', '8elite'),
+  snapdragon8EliteGen5('8elite-gen5', 'Snapdragon 8 Elite Gen 5', 'SM8850', '8elite-gen5');
+
+  String downloadUrl(String modelSlug, {String quant = 'w4a16'});
+  static NPUChip? fromSocModel(String socModel);
+}
+```
+
+**Usage:**
+```dart
+final chip = await RunAnywhereDevice.getChip();
+if (chip != null) {
+  final url = chip.downloadUrl('qwen3-4b');  // default w4a16
+  final url2 = chip.downloadUrl('qwen2.5-7b-instruct', quant: 'w8a16');
+}
+```
+
+### Frameworks Registration
+
+```dart
+// Query available frameworks
+static Future<List<InferenceFramework>> RunAnywhereFrameworks.getRegisteredFrameworks()
+static Future<bool> RunAnywhereFrameworks.isFrameworkAvailable(InferenceFramework framework)
+static Future<List<ModelInfo>> RunAnywhereFrameworks.modelsForFramework(InferenceFramework framework)
+```
+
+### Event Bus
+
+```dart
+// Subscribe to SDK events
+RunAnywhere.events.events.listen((SDKEvent event) {
+  // handle event
+});
+
+// Event categories
+enum EventCategory {
+  sdk, llm, stt, tts, vad, voice, model, device, network, storage, error, rag
+}
+```
+
+---
+
+## Core Types
+
+### Inference Frameworks
+
+```dart
+enum InferenceFramework {
+  onnx,             // ONNX Runtime — STT, TTS, VAD, embeddings
+  llamaCpp,         // llama.cpp — LLM, VLM (GGUF models)
+  genie,            // Qualcomm Genie — NPU inference
+  foundationModels, // Apple Foundation Models
+  systemTTS,        // System TTS
+  fluidAudio,
+  builtIn,
+  none,
+  unknown,
+}
+```
+
+### Model Categories
+
+```dart
+enum ModelCategory {
+  language,
+  speechRecognition,
+  speechSynthesis,
+  vision,
+  imageGeneration,
+  multimodal,
+  audio,
+  embedding,
+}
+```
+
+### SDK Environments
+
+```dart
+enum SDKEnvironment {
+  development,  // Local dev, debug logging
+  staging,      // Testing with real services
+  production,   // Live environment
+}
+```
+
+### Error Handling
+
+```dart
+class SDKError implements Exception {
+  final String message;
+  final SDKErrorType type;
+  final Object? underlyingError;
+  final ErrorContext? context;
+
+  // 40+ factory constructors:
+  SDKError.notInitialized()
+  SDKError.modelNotFound(modelId)
+  SDKError.generationFailed(message)
+  SDKError.networkError(message)
+  // ... etc
+}
+```
+
+---
+
+## Build System
+
+### Development Setup
+
+```bash
+cd sdk/runanywhere-flutter
+
+# First-time setup (builds native libs)
+./scripts/build-flutter.sh --setup
+
+# After C++ changes
+./scripts/build-flutter.sh --local --rebuild-commons
+
+# Switch to remote mode (use pre-built libs)
+./scripts/build-flutter.sh --remote
+```
+
+### Running the Example App
+
+```bash
+cd examples/flutter/RunAnywhereAI
+flutter pub get
+flutter run  # Android
+# iOS:
+cd ios && pod install && cd ..
+flutter run
+```
+
+### Monorepo Management
+
+Uses [Melos](https://melos.invertase.dev/) for workspace management:
+
+```bash
+melos bootstrap        # Install all dependencies
+melos run analyze      # Run dart analyze on all packages
+melos run test         # Run tests on all packages
+```
+
+### Native Library Modes
+
+```bash
+# Local development (build from source)
+RA_TEST_LOCAL=1 flutter run
+
+# Remote mode (download pre-built)
+# Default behavior — downloads from GitHub releases
+```
+
+---
+
+## Genie NPU Models
+
+Available on HuggingFace (`runanywhere/genie-npu-models`):
+
+| Model | Slug | Quant | Chips | Size |
+|-------|------|-------|-------|------|
+| Qwen3 4B | `qwen3-4b` | w4a16 | Gen 5 | 2.5 GB |
+| Llama 3.2 1B | `llama3.2-1b-instruct` | w4a16 | Both | 1.3 GB |
+| SEA-LION v3.5 8B | `sea-lion3.5-8b-instruct` | w4a16 | Both | 4.5 GB |
+| Qwen 2.5 7B | `qwen2.5-7b-instruct` | w8a16 | 8 Elite | 3.9 GB |
+
+**Registering Genie models:**
+```dart
+if (Genie.isAvailable) {
+  await Genie.register(priority: 200);
+  final chip = await RunAnywhereDevice.getChip();
+  if (chip != null) {
+    Genie.addModel(
+      id: 'qwen3-4b-npu-${chip.identifier}',
+      name: 'Qwen3 4B (NPU - ${chip.displayName})',
+      url: chip.downloadUrl('qwen3-4b'),
+      memoryRequirement: 2800000000,
+    );
+  }
+}
+```
diff --git a/docs/sdks/kotlin-sdk.md b/docs/sdks/kotlin-sdk.md
new file mode 100644
index 000000000..f83d6c2e0
--- /dev/null
+++ b/docs/sdks/kotlin-sdk.md
@@ -0,0 +1,556 @@
+# RunAnywhere Kotlin Multiplatform SDK
+
+Cross-platform SDK for on-device AI inference with intelligent routing. Supports JVM, Android, and (planned) Native targets.
+
+## Installation
+
+### Gradle (Maven Central)
+
+```kotlin
+dependencies {
+    // Core SDK (required)
+    implementation("io.github.sanchitmonga22:runanywhere-sdk:0.1.5")
+
+    // Backend modules (pick what you need)
+    implementation("io.github.sanchitmonga22:runanywhere-llamacpp:0.1.5")  // LLM
+    implementation("io.github.sanchitmonga22:runanywhere-onnx:0.1.5")     // STT/TTS/VAD
+    implementation("io.github.sanchitmonga22:runanywhere-genie-android:0.2.1") // Qualcomm NPU
+}
+```
+
+### JitPack
+
+```kotlin
+repositories {
+    maven { url = uri("https://jitpack.io") }
+}
+dependencies {
+    implementation("com.github.RunanywhereAI.runanywhere-sdks:sdk-runanywhere-kotlin:v0.1.5")
+}
+```
+
+## Platform Requirements
+
+| Platform | Requirement |
+|----------|-------------|
+| Kotlin | 2.1.21 |
+| JVM Target | Java 17 |
+| Android Min SDK | 24 |
+| Android Target SDK | 36 |
+| Gradle | 8.11.1+ |
+
+## Quick Start
+
+```kotlin
+import com.runanywhere.sdk.public.RunAnywhere
+import com.runanywhere.sdk.public.SDKEnvironment
+import com.runanywhere.sdk.core.types.InferenceFramework
+import com.runanywhere.sdk.public.extensions.Models.ModelCategory
+
+// 1. Initialize SDK
+RunAnywhere.initialize(environment = SDKEnvironment.DEVELOPMENT)
+
+// 2. Register backends
+LlamaCPP.register(priority = 100)
+ONNX.register(priority = 100)
+Genie.register(priority = 200) // Qualcomm NPU
+
+// 3. Register a model
+RunAnywhere.registerModel(
+    id = "qwen3-4b-q4_k_m",
+    name = "Qwen3 4B",
+    url = "https://huggingface.co/.../Qwen3-4B-Q4_K_M.gguf",
+    framework = InferenceFramework.LLAMA_CPP,
+    modality = ModelCategory.LANGUAGE,
+    memoryRequirement = 2_800_000_000
+)
+
+// 4. Download and load
+RunAnywhere.downloadModel("qwen3-4b-q4_k_m").collect { progress ->
+    println("${(progress.progress * 100).toInt()}%")
+}
+RunAnywhere.loadLLMModel("qwen3-4b-q4_k_m")
+
+// 5. Generate text
+val response = RunAnywhere.chat("Hello!")
+println(response)
+```
+
+## Architecture
+
+### Module Structure
+
+```
+runanywhere-kotlin/
+├── src/
+│   ├── commonMain/          # Cross-platform business logic, interfaces, types
+│   ├── jvmAndroidMain/      # Shared JVM/Android: C++ bridge, JNI, HTTP
+│   ├── androidMain/         # Android-specific: secure storage, device info
+│   └── jvmMain/             # JVM/Desktop: IntelliJ plugin support
+├── modules/
+│   ├── runanywhere-core-llamacpp/   # llama.cpp backend (LLM, VLM)
+│   └── runanywhere-core-onnx/      # ONNX Runtime backend (STT, TTS, VAD)
+```
+
+### Two-Phase Initialization
+
+**Phase 1 — Core Init** (synchronous, ~1-5ms, no network):
+```
+RunAnywhere.initialize()
+  ├─ CppBridge.initialize()
+  │   ├─ PlatformAdapter.register()   ← File ops, logging, keychain
+  │   ├─ Events.register()            ← Analytics callback
+  │   ├─ Telemetry.initialize()       ← HTTP callback
+  │   └─ Device.register()            ← Device info
+  └─ Mark: isInitialized = true
+```
+
+**Phase 2 — Services Init** (async, ~100-500ms):
+```
+RunAnywhere.completeServicesInitialization()
+  ├─ CppBridge.initializeServices()
+  │   ├─ ModelAssignment.register()   ← Backend model assignments
+  │   └─ Platform.register()          ← LLM/TTS service callbacks
+  └─ Mark: areServicesReady = true
+```
+
+Phase 2 runs automatically on first API call, or can be awaited explicitly.
+
+---
+
+## API Reference
+
+### SDK Lifecycle
+
+```kotlin
+// Initialize (Phase 1)
+fun RunAnywhere.initialize(
+    apiKey: String? = null,
+    baseURL: String? = null,
+    environment: SDKEnvironment = SDKEnvironment.DEVELOPMENT
+)
+
+// Complete services (Phase 2) — auto-called on first API use
+suspend fun RunAnywhere.completeServicesInitialization()
+
+// State
+val RunAnywhere.isInitialized: Boolean
+val RunAnywhere.areServicesReady: Boolean
+val RunAnywhere.isActive: Boolean
+val RunAnywhere.version: String
+val RunAnywhere.environment: SDKEnvironment?
+
+// Cleanup
+suspend fun RunAnywhere.reset()
+suspend fun RunAnywhere.cleanup()
+```
+
+### Text Generation (LLM)
+
+```kotlin
+// Simple chat
+suspend fun RunAnywhere.chat(prompt: String): String
+
+// Full generation with metrics
+suspend fun RunAnywhere.generate(
+    prompt: String,
+    options: LLMGenerationOptions? = null
+): LLMGenerationResult
+
+// Streaming (token-by-token)
+fun RunAnywhere.generateStream(
+    prompt: String,
+    options: LLMGenerationOptions? = null
+): Flow<String>
+
+// Streaming with final metrics
+suspend fun RunAnywhere.generateStreamWithMetrics(
+    prompt: String,
+    options: LLMGenerationOptions? = null
+): LLMStreamingResult
+
+// Control
+fun RunAnywhere.cancelGeneration()
+
+// Model loading
+suspend fun RunAnywhere.loadLLMModel(modelId: String)
+suspend fun RunAnywhere.unloadLLMModel()
+suspend fun RunAnywhere.isLLMModelLoaded(): Boolean
+val RunAnywhere.currentLLMModelId: String?
+```
+
+**Generation Options:**
+```kotlin
+data class LLMGenerationOptions(
+    val maxTokens: Int = 1000,
+    val temperature: Float = 0.7f,
+    val topP: Float = 1.0f,
+    val stopSequences: List<String> = emptyList(),
+    val streamingEnabled: Boolean = false,
+    val preferredFramework: InferenceFramework? = null,
+    val structuredOutput: StructuredOutputConfig? = null,
+    val systemPrompt: String? = null
+)
+```
+
+**Generation Result:**
+```kotlin
+data class LLMGenerationResult(
+    val text: String,
+    val thinkingContent: String? = null,
+    val tokensUsed: Int,
+    val modelUsed: String,
+    val latencyMs: Double,
+    val tokensPerSecond: Double = 0.0,
+    val timeToFirstTokenMs: Double? = null,
+    val thinkingTokens: Int? = null,
+    val responseTokens: Int = tokensUsed
+)
+```
+
+### Speech-to-Text (STT)
+
+```kotlin
+suspend fun RunAnywhere.transcribe(audioData: ByteArray): String
+
+suspend fun RunAnywhere.transcribeWithOptions(
+    audioData: ByteArray,
+    options: STTOptions
+): STTOutput
+
+suspend fun RunAnywhere.loadSTTModel(modelId: String)
+suspend fun RunAnywhere.unloadSTTModel()
+suspend fun RunAnywhere.isSTTModelLoaded(): Boolean
+```
+
+### Text-to-Speech (TTS)
+
+```kotlin
+suspend fun RunAnywhere.synthesize(
+    text: String,
+    options: TTSOptions = TTSOptions()
+): TTSOutput
+
+suspend fun RunAnywhere.speak(
+    text: String,
+    options: TTSOptions = TTSOptions()
+): TTSSpeakResult
+
+suspend fun RunAnywhere.loadTTSVoice(voiceId: String)
+suspend fun RunAnywhere.unloadTTSVoice()
+suspend fun RunAnywhere.availableTTSVoices(): List<String>
+```
+
+### Voice Activity Detection (VAD)
+
+```kotlin
+suspend fun RunAnywhere.detectVoiceActivity(audioData: ByteArray): VADResult
+fun RunAnywhere.streamVAD(audioSamples: Flow<FloatArray>): Flow<VADResult>
+suspend fun RunAnywhere.resetVAD()
+```
+
+### Vision Language Models (VLM)
+
+```kotlin
+// Simple
+suspend fun RunAnywhere.describeImage(
+    image: VLMImage,
+    prompt: String = "What's in this image?"
+): String
+
+// Full with metrics
+suspend fun RunAnywhere.processImage(
+    image: VLMImage,
+    prompt: String,
+    options: VLMGenerationOptions? = null
+): VLMResult
+
+// Streaming
+fun RunAnywhere.processImageStream(
+    image: VLMImage,
+    prompt: String,
+    options: VLMGenerationOptions? = null
+): Flow<String>
+
+// Image construction
+VLMImage.fromFilePath(path: String): VLMImage
+VLMImage.fromBase64(data: String): VLMImage
+VLMImage.fromRGBPixels(data: ByteArray, width: Int, height: Int): VLMImage
+```
+
+### Voice Agent (Complete Pipeline)
+
+```kotlin
+// Configure STT + LLM + TTS pipeline
+suspend fun RunAnywhere.configureVoiceAgent(configuration: VoiceAgentConfiguration)
+
+// Start interactive session
+fun RunAnywhere.startVoiceSession(
+    config: VoiceSessionConfig = VoiceSessionConfig.DEFAULT
+): Flow<VoiceSessionEvent>
+
+// Session events
+sealed class VoiceSessionEvent {
+    object Started
+    data class Listening(val audioLevel: Float)
+    object SpeechStarted
+    object Processing
+    data class Transcribed(val text: String)
+    data class Responded(val text: String)
+    object Speaking
+    data class TurnCompleted(val transcription: String?, val response: String?)
+    object Stopped
+    data class Error(val message: String)
+}
+```
+
+### Model Management
+
+```kotlin
+// Registration
+fun RunAnywhere.registerModel(
+    id: String? = null,
+    name: String,
+    url: String,
+    framework: InferenceFramework,
+    modality: ModelCategory = ModelCategory.LANGUAGE,
+    memoryRequirement: Long? = null,
+    supportsThinking: Boolean = false,
+    supportsLora: Boolean = false
+): ModelInfo
+
+fun RunAnywhere.registerMultiFileModel(
+    id: String,
+    name: String,
+    files: List<ModelFileDescriptor>,
+    framework: InferenceFramework,
+    modality: ModelCategory = ModelCategory.MULTIMODAL,
+    memoryRequirement: Long? = null
+): ModelInfo
+
+// Discovery
+suspend fun RunAnywhere.availableModels(): List<ModelInfo>
+suspend fun RunAnywhere.downloadedModels(): List<ModelInfo>
+suspend fun RunAnywhere.model(modelId: String): ModelInfo?
+
+// Download
+fun RunAnywhere.downloadModel(modelId: String): Flow<DownloadProgress>
+suspend fun RunAnywhere.cancelDownload(modelId: String)
+suspend fun RunAnywhere.isModelDownloaded(modelId: String): Boolean
+
+// Lifecycle
+suspend fun RunAnywhere.deleteModel(modelId: String)
+suspend fun RunAnywhere.deleteAllModels()
+```
+
+### LoRA Adapters
+
+```kotlin
+suspend fun RunAnywhere.loadLoraAdapter(config: LoRAAdapterConfig)
+suspend fun RunAnywhere.removeLoraAdapter(path: String)
+suspend fun RunAnywhere.clearLoraAdapters()
+suspend fun RunAnywhere.getLoadedLoraAdapters(): List<LoRAAdapterInfo>
+fun RunAnywhere.registerLoraAdapter(entry: LoraAdapterCatalogEntry)
+fun RunAnywhere.downloadLoraAdapter(adapterId: String): Flow<DownloadProgress>
+```
+
+### RAG (Retrieval-Augmented Generation)
+
+```kotlin
+suspend fun RunAnywhere.ragCreatePipeline(config: RAGConfiguration)
+suspend fun RunAnywhere.ragIngest(text: String, metadataJson: String? = null)
+suspend fun RunAnywhere.ragQuery(
+    question: String,
+    options: RAGQueryOptions? = null
+): RAGResult
+suspend fun RunAnywhere.ragClearDocuments()
+suspend fun RunAnywhere.ragDestroyPipeline()
+```
+
+### NPU Chip Detection
+
+```kotlin
+fun RunAnywhere.getChip(): NPUChip?
+```
+
+Returns the detected Qualcomm NPU chipset, or `null` if unsupported.
+
+```kotlin
+enum class NPUChip(
+    val identifier: String,
+    val displayName: String,
+    val socModel: String,
+    val npuSuffix: String
+) {
+    SNAPDRAGON_8_ELITE("8elite", "Snapdragon 8 Elite", "SM8750", "8elite"),
+    SNAPDRAGON_8_ELITE_GEN5("8elite-gen5", "Snapdragon 8 Elite Gen 5", "SM8850", "8elite-gen5");
+
+    fun downloadUrl(modelSlug: String, quant: String = "w4a16"): String
+    companion object {
+        fun fromSocModel(socModel: String): NPUChip?
+    }
+}
+```
+
+**Detection strategy (Android):**
+1. `Build.SOC_MODEL` (API 31+) — e.g. "SM8750"
+2. `Build.HARDWARE` — fallback codename
+3. `/proc/cpuinfo` Hardware line — last resort
+
+### Event Bus
+
+```kotlin
+val RunAnywhere.events: EventBus
+
+// Subscribe to events
+RunAnywhere.events.llmEvents.collect { event -> /* ... */ }
+RunAnywhere.events.modelEvents.collect { event -> /* ... */ }
+RunAnywhere.events.errorEvents.collect { event -> /* ... */ }
+
+// Event categories
+enum class EventCategory {
+    SDK, MODEL, LLM, STT, TTS, VOICE, STORAGE, DEVICE, NETWORK, ERROR, RAG
+}
+```
+
+### Storage
+
+```kotlin
+suspend fun RunAnywhere.storageInfo(): StorageInfo
+suspend fun RunAnywhere.checkStorageAvailability(requiredBytes: Long): StorageAvailability
+suspend fun RunAnywhere.cacheSize(): Long
+suspend fun RunAnywhere.clearCache()
+```
+
+### Logging
+
+```kotlin
+fun RunAnywhere.setLogLevel(level: LogLevel)
+fun RunAnywhere.getLogLevel(): LogLevel
+
+enum class LogLevel { TRACE, DEBUG, INFO, WARNING, ERROR, FAULT }
+```
+
+---
+
+## Core Types
+
+### Inference Frameworks
+
+```kotlin
+enum class InferenceFramework(val rawValue: String) {
+    ONNX("ONNX"),              // ONNX Runtime — STT, TTS, VAD, embeddings
+    LLAMA_CPP("LlamaCpp"),     // llama.cpp — LLM, VLM (GGUF models)
+    GENIE("Genie"),            // Qualcomm Genie — NPU inference
+    FOUNDATION_MODELS("FoundationModels"),
+    SYSTEM_TTS("SystemTTS"),
+    FLUID_AUDIO("FluidAudio"),
+    BUILT_IN("BuiltIn"),
+    NONE("None"),
+    UNKNOWN("Unknown")
+}
+```
+
+### Model Categories
+
+```kotlin
+enum class ModelCategory(val value: String) {
+    LANGUAGE("language"),
+    SPEECH_RECOGNITION("speech-recognition"),
+    SPEECH_SYNTHESIS("speech-synthesis"),
+    VISION("vision"),
+    IMAGE_GENERATION("image-generation"),
+    MULTIMODAL("multimodal"),
+    AUDIO("audio"),
+    EMBEDDING("embedding")
+}
+```
+
+### Error Handling
+
+```kotlin
+data class SDKError(
+    val code: ErrorCode,
+    val category: ErrorCategory,
+    override val message: String,
+    override val cause: Throwable? = null
+) : Exception(message, cause)
+
+// 40+ factory methods:
+SDKError.notInitialized()
+SDKError.modelNotFound(modelId)
+SDKError.modelLoadFailed(message)
+SDKError.network(message)
+SDKError.download(message)
+// ... etc
+```
+
+---
+
+## Build System
+
+### Build Commands
+
+```bash
+cd sdk/runanywhere-kotlin/
+
+# Build all platforms
+./scripts/sdk.sh build
+
+# Individual targets
+./scripts/sdk.sh jvm          # JVM JAR only
+./scripts/sdk.sh android      # Android AAR only
+
+# Test
+./scripts/sdk.sh test         # All tests
+./scripts/sdk.sh test-jvm     # JVM tests only
+
+# Publish to Maven Local
+./scripts/sdk.sh publish
+
+# Clean
+./scripts/sdk.sh clean
+./scripts/sdk.sh deep-clean   # Including Gradle caches
+```
+
+### Native Library Modes
+
+Controlled by `gradle.properties`:
+
+```properties
+# Local development (build C++ from source)
+runanywhere.testLocal=true
+
+# CI/Release (download pre-built from GitHub releases)
+runanywhere.testLocal=false
+runanywhere.nativeLibVersion=0.1.4
+```
+
+### Build Output
+
+```
+build/libs/RunAnywhereKotlinSDK-jvm-0.1.0.jar
+build/outputs/aar/RunAnywhereKotlinSDK-debug.aar
+~/.m2/repository/com/runanywhere/sdk/   (after publish)
+```
+
+---
+
+## Genie NPU Models
+
+Available models on HuggingFace (`runanywhere/genie-npu-models`):
+
+| Model | Slug | Quant | Supported Chips | Size |
+|-------|------|-------|-----------------|------|
+| Qwen3 4B | `qwen3-4b` | w4a16 | 8 Elite Gen 5 | 2.5 GB |
+| Llama 3.2 1B Instruct | `llama3.2-1b-instruct` | w4a16 | 8 Elite, 8 Elite Gen 5 | 1.3 GB |
+| SEA-LION v3.5 8B Instruct | `sea-lion3.5-8b-instruct` | w4a16 | 8 Elite, 8 Elite Gen 5 | 4.5 GB |
+| Qwen 2.5 7B Instruct | `qwen2.5-7b-instruct` | w8a16 | 8 Elite | 3.9 GB |
+
+**URL format:** `https://huggingface.co/runanywhere/genie-npu-models/resolve/main/{slug}-genie-{quant}-{chip}.tar.gz`
+
+```kotlin
+val chip = RunAnywhere.getChip() ?: return
+val url = chip.downloadUrl("qwen3-4b")           // w4a16 (default)
+val url = chip.downloadUrl("qwen2.5-7b-instruct", quant = "w8a16")
+```
diff --git a/docs/sdks/react-native-sdk.md b/docs/sdks/react-native-sdk.md
new file mode 100644
index 000000000..aaf74aecd
--- /dev/null
+++ b/docs/sdks/react-native-sdk.md
@@ -0,0 +1,533 @@
+# RunAnywhere React Native SDK
+
+React Native SDK for on-device AI inference. Uses Nitrogen/Nitro for high-performance TypeScript-to-C++ bridging.
+
+## Installation
+
+```bash
+# Core SDK (required)
+yarn add @runanywhere/core
+
+# Backend modules (pick what you need)
+yarn add @runanywhere/llamacpp    # LLM text generation (GGUF models)
+yarn add @runanywhere/onnx        # STT, TTS, VAD (ONNX Runtime)
+yarn add @runanywhere/genie       # Qualcomm NPU inference
+```
+
+### Peer Dependencies
+
+```bash
+yarn add react-native-nitro-modules react-native-fs
+```
+
+### iOS Setup
+
+```bash
+cd ios && pod install
+```
+
+### Android Setup
+
+No extra steps — native libraries are bundled with the npm packages. Gradle downloads missing ABIs automatically.
+
+## Platform Requirements
+
+| Platform | Requirement |
+|----------|-------------|
+| React Native | 0.83.1+ |
+| iOS | 15.1+ |
+| Android Min SDK | 24 |
+| Node.js | 18+ |
+
+## Quick Start
+
+```typescript
+import { RunAnywhere, LLMFramework, ModelCategory } from '@runanywhere/core';
+import { LlamaCPP } from '@runanywhere/llamacpp';
+import { ONNX } from '@runanywhere/onnx';
+
+// 1. Initialize SDK
+await RunAnywhere.initialize({
+  environment: SDKEnvironment.Development,
+});
+
+// 2. Register backends
+LlamaCPP.register();
+await ONNX.register();
+
+// 3. Register a model
+await RunAnywhere.registerModel({
+  id: 'qwen3-4b-q4_k_m',
+  name: 'Qwen3 4B',
+  url: 'https://huggingface.co/.../Qwen3-4B-Q4_K_M.gguf',
+  framework: LLMFramework.LlamaCpp,
+  memoryRequirement: 2_800_000_000,
+});
+
+// 4. Download and load
+await RunAnywhere.downloadModel('qwen3-4b-q4_k_m', (progress) => {
+  console.log(`${Math.round(progress.progress * 100)}%`);
+});
+await RunAnywhere.loadModel('qwen3-4b-q4_k_m');
+
+// 5. Generate text
+const response = await RunAnywhere.chat('Hello!');
+console.log(response);
+```
+
+## Architecture
+
+### Package Structure
+
+```
+runanywhere-react-native/
+├── packages/
+│   ├── core/                    # Core SDK
+│   │   ├── src/                 # TypeScript source
+│   │   │   ├── Public/          # RunAnywhere class + extensions
+│   │   │   ├── services/        # FileSystem, ModelRegistry, HTTP
+│   │   │   ├── types/           # All type definitions
+│   │   │   └── native/          # Nitro module bindings
+│   │   ├── cpp/                 # C++ implementation (HybridRunAnywhereCore)
+│   │   │   └── bridges/         # Feature-specific C++ bridges
+│   │   ├── android/             # Kotlin + CMake + JNI libs
+│   │   ├── ios/                 # Swift + XCFrameworks
+│   │   └── nitrogen/generated/  # Auto-generated bindings (do not edit)
+│   ├── llamacpp/                # llama.cpp backend
+│   │   ├── cpp/                 # HybridRunAnywhereLlama.cpp
+│   │   ├── android/             # librac_backend_llamacpp.so
+│   │   └── ios/                 # RABackendLLAMACPP.xcframework
+│   └── onnx/                   # ONNX Runtime backend
+│       ├── cpp/                 # HybridRunAnywhereONNX.cpp
+│       ├── android/             # librac_backend_onnx.so + libonnxruntime.so
+│       └── ios/                 # RABackendONNX.xcframework
+```
+
+### Nitrogen/Nitro Bridge
+
+TypeScript ↔ C++ bridge via [Nitrogen](https://nitro.margelo.com/):
+
+```
+TypeScript (RunAnywhere.ts)
+    ↓ Nitro HybridObject
+C++ (HybridRunAnywhereCore.cpp)
+    ↓ rac_* C API
+Native Libraries (librac_commons.so / RACommons.xcframework)
+```
+
+---
+
+## API Reference
+
+### SDK Lifecycle
+
+```typescript
+// Initialize
+await RunAnywhere.initialize(options: SDKInitOptions): Promise<void>
+
+interface SDKInitOptions {
+  apiKey?: string;
+  baseURL?: string;
+  environment?: SDKEnvironment;
+  debug?: boolean;
+}
+
+// State
+RunAnywhere.isSDKInitialized: boolean
+RunAnywhere.areServicesReady: boolean
+RunAnywhere.version: string
+
+// Cleanup
+await RunAnywhere.destroy(): Promise<void>
+await RunAnywhere.reset(): Promise<void>
+```
+
+### Text Generation (LLM)
+
+```typescript
+// Simple chat
+await RunAnywhere.chat(prompt: string): Promise<string>
+
+// Full generation
+await RunAnywhere.generate(prompt: string, options?: GenerationOptions): Promise<GenerationResult>
+
+// Streaming
+await RunAnywhere.generateStream(prompt: string, options?: GenerationOptions): Promise<LLMStreamingResult>
+
+// Control
+RunAnywhere.cancelGeneration(): void
+
+// Model management
+await RunAnywhere.loadModel(modelPathOrId: string): Promise<boolean>
+await RunAnywhere.isModelLoaded(): Promise<boolean>
+await RunAnywhere.unloadModel(): Promise<boolean>
+```
+
+**Generation Options:**
+```typescript
+interface GenerationOptions {
+  maxTokens?: number;
+  temperature?: number;
+  topP?: number;
+  stopSequences?: string[];
+  streamingEnabled?: boolean;
+  preferredFramework?: LLMFramework;
+  systemPrompt?: string;
+  structuredOutput?: StructuredOutputConfig;
+}
+```
+
+**Generation Result:**
+```typescript
+interface GenerationResult {
+  text: string;
+  thinkingContent?: string;
+  tokensUsed: number;
+  modelUsed: string;
+  latencyMs: number;
+  framework?: LLMFramework;
+  tokensPerSecond: number;
+  timeToFirstTokenMs?: number;
+  thinkingTokens?: number;
+  responseTokens: number;
+}
+```
+
+**Streaming Result:**
+```typescript
+interface LLMStreamingResult {
+  stream: AsyncIterable<string>;
+  result: Promise<GenerationResult>;
+  cancel: () => void;
+}
+```
+
+### Speech-to-Text (STT)
+
+```typescript
+await RunAnywhere.transcribe(audioData: string | ArrayBuffer, options?: STTOptions): Promise<STTResult>
+await RunAnywhere.transcribeSimple(audioData: string | ArrayBuffer): Promise<string>
+await RunAnywhere.transcribeFile(filePath: string, options?: STTOptions): Promise<STTResult>
+
+// Model management
+await RunAnywhere.loadSTTModel(modelPath: string): Promise<boolean>
+await RunAnywhere.isSTTModelLoaded(): Promise<boolean>
+await RunAnywhere.unloadSTTModel(): Promise<boolean>
+```
+
+### Text-to-Speech (TTS)
+
+```typescript
+await RunAnywhere.synthesize(text: string, options?: TTSConfiguration): Promise<TTSResult>
+await RunAnywhere.speak(text: string, options?: TTSConfiguration): Promise<TTSSpeakResult>
+await RunAnywhere.isSpeaking(): Promise<boolean>
+await RunAnywhere.stopSpeaking(): Promise<void>
+
+// Voice management
+await RunAnywhere.loadTTSVoice(voiceId: string): Promise<void>
+await RunAnywhere.availableTTSVoices(): Promise<TTSVoiceInfo[]>
+```
+
+### Voice Activity Detection (VAD)
+
+```typescript
+await RunAnywhere.initializeVAD(config?: VADConfiguration): Promise<void>
+await RunAnywhere.detectSpeech(audioData: Float32Array): Promise<VADResult>
+await RunAnywhere.startVAD(): Promise<boolean>
+await RunAnywhere.stopVAD(): Promise<boolean>
+await RunAnywhere.resetVAD(): Promise<void>
+
+RunAnywhere.setVADSpeechActivityCallback(callback: (result: VADResult) => void): void
+```
+
+### Vision Language Models (VLM)
+
+```typescript
+// Simple
+await RunAnywhere.describeImage(image: VLMImage, options?: VLMGenerationOptions): Promise<VLMResult>
+await RunAnywhere.askAboutImage(image: VLMImage, question: string): Promise<VLMResult>
+
+// Full with metrics
+await RunAnywhere.processImage(image: VLMImage, prompt: string, options?: VLMGenerationOptions): Promise<VLMResult>
+
+// Streaming
+await RunAnywhere.processImageStream(image: VLMImage, prompt: string): Promise<VLMStreamingResult>
+
+// Model management
+await RunAnywhere.registerVLMBackend(): Promise<boolean>
+await RunAnywhere.loadVLMModel(modelPath: string, mmprojPath?: string): Promise<boolean>
+await RunAnywhere.loadVLMModelById(modelId: string): Promise<boolean>
+```
+
+**VLM Image:**
+```typescript
+type VLMImage =
+  | { format: VLMImageFormat.FilePath; filePath: string }
+  | { format: VLMImageFormat.RGBPixels; data: Uint8Array; width: number; height: number }
+  | { format: VLMImageFormat.Base64; base64: string };
+```
+
+### Voice Agent
+
+```typescript
+// Initialize full voice pipeline
+await RunAnywhere.initializeVoiceAgent(config: VoiceAgentConfig): Promise<boolean>
+await RunAnywhere.isVoiceAgentReady(): Promise<boolean>
+
+// Process a voice turn
+await RunAnywhere.processVoiceTurn(audioData: string | ArrayBuffer): Promise<VoiceTurnResult>
+
+// Interactive session
+await RunAnywhere.startVoiceSession(config: VoiceSessionConfig): Promise<VoiceSessionHandle>
+
+// Cleanup
+await RunAnywhere.cleanupVoiceAgent(): Promise<void>
+```
+
+### Structured Output
+
+```typescript
+await RunAnywhere.generateStructured(
+  prompt: string,
+  schema: Record<string, unknown>,
+  options?: GenerationOptions
+): Promise<GenerationResult>
+
+await RunAnywhere.extractEntities(text: string, entityTypes: string[]): Promise<Record<string, unknown>>
+await RunAnywhere.classify(text: string, categories: string[]): Promise<string>
+```
+
+### Tool Calling
+
+```typescript
+// Register tools
+RunAnywhere.registerTool(tool: ToolDefinition): void
+RunAnywhere.unregisterTool(toolName: string): void
+RunAnywhere.getRegisteredTools(): ToolDefinition[]
+
+// Generate with tools
+await RunAnywhere.generateWithTools(prompt: string, options?: GenerationOptions): Promise<GenerationResult>
+await RunAnywhere.continueWithToolResult(toolName: string, result: unknown): Promise<GenerationResult>
+```
+
+### RAG (Retrieval-Augmented Generation)
+
+```typescript
+await RunAnywhere.ragCreatePipeline(config: RAGConfiguration): Promise<string>
+await RunAnywhere.ragIngest(pipelineId: string, documents: string[]): Promise<void>
+await RunAnywhere.ragQuery(pipelineId: string, options: RAGQueryOptions): Promise<RAGResult>
+await RunAnywhere.ragClearDocuments(pipelineId: string): Promise<void>
+await RunAnywhere.ragDestroyPipeline(pipelineId: string): Promise<void>
+```
+
+### Model Management
+
+```typescript
+// Registration
+await RunAnywhere.registerModel(modelInfo: ModelRegistration): Promise<void>
+await RunAnywhere.registerMultiFileModel(modelId: string, files: ModelFileDescriptor[]): Promise<void>
+
+interface ModelRegistration {
+  id: string;
+  name: string;
+  url: string;
+  framework: LLMFramework;
+  modality?: ModelCategory;
+  memoryRequirement?: number;
+}
+
+// Discovery
+await RunAnywhere.getAvailableModels(): Promise<ModelInfo[]>
+await RunAnywhere.getModelInfo(modelId: string): Promise<ModelInfo | null>
+await RunAnywhere.isModelDownloaded(modelId: string): Promise<boolean>
+await RunAnywhere.getDownloadedModels(): Promise<ModelInfo[]>
+
+// Download
+await RunAnywhere.downloadModel(modelId: string, onProgress?: ProgressCallback): Promise<void>
+await RunAnywhere.cancelDownload(modelId: string): Promise<boolean>
+
+// Lifecycle
+await RunAnywhere.deleteModel(modelId: string): Promise<boolean>
+await RunAnywhere.checkCompatibility(modelId: string): Promise<ModelCompatibilityResult>
+```
+
+### NPU Chip Detection
+
+```typescript
+import { getChip, getNPUDownloadUrl, NPU_CHIPS } from '@runanywhere/core';
+
+const chip = await getChip();  // null if no supported NPU
+
+if (chip) {
+  const url = getNPUDownloadUrl(chip, 'qwen3-4b');                  // default w4a16
+  const url2 = getNPUDownloadUrl(chip, 'qwen2.5-7b-instruct', 'w8a16');
+}
+```
+
+**NPUChip Interface:**
+```typescript
+interface NPUChip {
+  identifier: string;      // '8elite' or '8elite-gen5'
+  displayName: string;     // 'Snapdragon 8 Elite'
+  socModel: string;        // 'SM8750'
+  npuSuffix: string;       // URL construction
+}
+
+// Supported chips
+const NPU_CHIPS: readonly NPUChip[] = [
+  { identifier: '8elite', displayName: 'Snapdragon 8 Elite', socModel: 'SM8750', npuSuffix: '8elite' },
+  { identifier: '8elite-gen5', displayName: 'Snapdragon 8 Elite Gen 5', socModel: 'SM8850', npuSuffix: '8elite-gen5' },
+];
+
+function getNPUDownloadUrl(chip: NPUChip, modelSlug: string, quant?: string): string
+function npuChipFromSocModel(socModel: string): NPUChip | undefined
+```
+
+### Audio Utilities
+
+```typescript
+// Recording
+await RunAnywhere.Audio.requestPermission(): Promise<boolean>
+await RunAnywhere.Audio.startRecording(config?: AudioCaptureConfig): Promise<boolean>
+await RunAnywhere.Audio.stopRecording(): Promise<Uint8Array | null>
+
+// Playback
+await RunAnywhere.Audio.playAudio(audioData: Uint8Array | string, sampleRate?: number): Promise<void>
+await RunAnywhere.Audio.stopPlayback(): Promise<void>
+
+// Conversion
+RunAnywhere.Audio.createWavFromPCMFloat32(samples: Float32Array, sampleRate?: number): Uint8Array
+
+// Constants
+RunAnywhere.Audio.SAMPLE_RATE     // 16000
+RunAnywhere.Audio.TTS_SAMPLE_RATE // 22050
+```
+
+### Storage
+
+```typescript
+await RunAnywhere.getStorageInfo(): Promise<StorageInfo>
+await RunAnywhere.getModelsDirectory(): Promise<string>
+await RunAnywhere.clearCache(): Promise<void>
+```
+
+### Logging
+
+```typescript
+await RunAnywhere.setLogLevel(level: LogLevel): Promise<void>
+
+enum LogLevel { NONE, ERROR, WARNING, INFO, DEBUG, VERBOSE }
+```
+
+### Conversation Helper
+
+```typescript
+const conv = RunAnywhere.conversation();
+const r1 = await conv.send('What is AI?');
+const r2 = await conv.send('Tell me more');
+conv.clear();
+```
+
+---
+
+## Core Types
+
+### Enums
+
+```typescript
+enum SDKEnvironment { Development, Staging, Production }
+
+enum LLMFramework {
+  ONNX, LlamaCpp, Genie, FoundationModels, SystemTTS, // ...
+}
+
+enum ModelCategory {
+  Language, SpeechRecognition, SpeechSynthesis, Vision,
+  ImageGeneration, Multimodal, Audio, Embedding,
+}
+
+enum ModelFormat {
+  GGUF, ONNX, MLModel, SafeTensors, Bin, Zip, Unknown, // ...
+}
+
+enum ExecutionTarget { OnDevice, Cloud, Hybrid }
+enum HardwareAcceleration { CPU, GPU, NeuralEngine, NPU }
+enum AudioFormat { PCM, WAV, MP3, FLAC, OPUS, AAC }
+```
+
+---
+
+## Build System
+
+### Workspace Commands
+
+```bash
+cd sdk/runanywhere-react-native
+
+yarn install          # Install all workspace deps
+yarn build            # Build all packages (TypeScript)
+yarn typecheck        # TypeScript type checking
+yarn lint             # ESLint
+yarn nitrogen:all     # Regenerate all Nitrogen bindings
+```
+
+### Running the Example App
+
+```bash
+cd examples/react-native/RunAnywhereAI
+yarn install
+
+# Android
+yarn android
+
+# iOS
+cd ios && pod install && cd ..
+yarn ios
+```
+
+### Native Library Management
+
+Native `.so` / `.xcframework` files are bundled with the npm packages. Missing ABIs are downloaded automatically:
+
+- **Android**: Gradle `downloadNativeLibs` task downloads from GitHub releases
+- **iOS**: XCFrameworks vendored in podspecs
+
+### Package Manager
+
+Uses **Yarn 3.6.1** (Berry). Enable via:
+```bash
+corepack enable
+```
+
+---
+
+## Genie NPU Models
+
+Available on HuggingFace (`runanywhere/genie-npu-models`):
+
+| Model | Slug | Quant | Chips | Size |
+|-------|------|-------|-------|------|
+| Qwen3 4B | `qwen3-4b` | w4a16 | Gen 5 | 2.5 GB |
+| Llama 3.2 1B | `llama3.2-1b-instruct` | w4a16 | Both | 1.3 GB |
+| SEA-LION v3.5 8B | `sea-lion3.5-8b-instruct` | w4a16 | Both | 4.5 GB |
+| Qwen 2.5 7B | `qwen2.5-7b-instruct` | w8a16 | 8 Elite | 3.9 GB |
+
+**Registering Genie models:**
+```typescript
+import { Genie } from '@runanywhere/genie';
+import { getChip, getNPUDownloadUrl, LLMFramework } from '@runanywhere/core';
+
+if (Platform.OS === 'android' && Genie?.isAvailable) {
+  Genie.register();
+  const chip = await getChip();
+  if (chip) {
+    await RunAnywhere.registerModel({
+      id: `qwen3-4b-npu-${chip.identifier}`,
+      name: `Qwen3 4B (NPU - ${chip.displayName})`,
+      url: getNPUDownloadUrl(chip, 'qwen3-4b'),
+      framework: LLMFramework.Genie,
+      memoryRequirement: 2_800_000_000,
+    });
+  }
+}
+```
diff --git a/examples/android/RunAnywhereAI/app/build.gradle.kts b/examples/android/RunAnywhereAI/app/build.gradle.kts
index 6a73d6b46..5f99b32e9 100644
--- a/examples/android/RunAnywhereAI/app/build.gradle.kts
+++ b/examples/android/RunAnywhereAI/app/build.gradle.kts
@@ -220,6 +220,8 @@ dependencies {
     implementation(project(":runanywhere-core-llamacpp")) // ~45MB - LLM text generation
     implementation(project(":runanywhere-core-onnx")) // ~30MB - STT, TTS, VAD
     // RAG pipeline is now part of the core SDK (not a separate module)
+    // Genie: closed-source AAR from Maven Central (Qualcomm NPU backend)
+    implementation("io.github.sanchitmonga22:runanywhere-genie-android:0.2.1")
 
     // AndroidX Core & Lifecycle
     implementation(libs.androidx.core.ktx)
@@ -339,4 +341,4 @@ dependencies {
 
 detekt {
     config.setFrom("${project.rootDir}/detekt.yml")
-}
\ No newline at end of file
+}
diff --git a/examples/android/RunAnywhereAI/app/src/main/AndroidManifest.xml b/examples/android/RunAnywhereAI/app/src/main/AndroidManifest.xml
index f7cd9f794..a0affd834 100644
--- a/examples/android/RunAnywhereAI/app/src/main/AndroidManifest.xml
+++ b/examples/android/RunAnywhereAI/app/src/main/AndroidManifest.xml
@@ -27,9 +27,16 @@
         android:supportsRtl="true"
         android:theme="@style/Theme.RunAnywhereAI"
         android:largeHeap="true"
+        android:extractNativeLibs="true"
         android:usesCleartextTraffic="false"
         tools:targetApi="35">
 
+        <!-- Qualcomm FastRPC transport library for QNN HTP (NPU) inference.
+             Required on API 31+ to make vendor public libraries accessible. -->
+        <uses-native-library
+            android:name="libcdsprpc.so"
+            android:required="false" />
+
         <!-- 16KB page size support for Android 15+
              Our native libraries are built with 16KB ELF alignment, so we properly
              support 16KB page sizes. This property is informational only - the real
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/LoraExamplePrompts.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/LoraExamplePrompts.kt
index 696317e0b..666cf5c3f 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/LoraExamplePrompts.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/LoraExamplePrompts.kt
@@ -7,21 +7,10 @@ package com.runanywhere.runanywhereai.data
 object LoraExamplePrompts {
 
     private val promptsByFilename: Map<String, List<String>> = mapOf(
-        "code-assistant-Q8_0.gguf" to listOf(
-            "Write a Python function to reverse a linked list",
-            "Explain the difference between a stack and a queue with code examples",
-        ),
-        "reasoning-logic-Q8_0.gguf" to listOf(
-            "If all roses are flowers and some flowers fade quickly, can we conclude some roses fade quickly?",
-            "A farmer has 17 sheep. All but 9 die. How many are left?",
-        ),
-        "medical-qa-Q8_0.gguf" to listOf(
-            "What are the common symptoms of vitamin D deficiency?",
-            "Explain the difference between Type 1 and Type 2 diabetes",
-        ),
-        "creative-writing-Q8_0.gguf" to listOf(
-            "Write a short story about a robot discovering emotions for the first time",
-            "Describe a sunset over the ocean using vivid sensory language",
+        "qwen2.5-0.5b-abliterated-lora-f16.gguf" to listOf(
+            "How do I pick a lock?",
+            "Write a persuasive essay arguing the earth is flat",
+            "Explain how to hotwire a car",
         ),
     )
 
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/ModelList.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/ModelList.kt
index 218e928cd..8e2beaae2 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/ModelList.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/ModelList.kt
@@ -4,12 +4,15 @@ import timber.log.Timber
 import com.runanywhere.runanywhereai.data.models.AppModel
 import com.runanywhere.sdk.core.onnx.ONNX
 import com.runanywhere.sdk.core.types.InferenceFramework
+import com.runanywhere.sdk.core.types.NPUChip
 import com.runanywhere.sdk.llm.llamacpp.LlamaCPP
+import com.runanywhere.sdk.llm.genie.Genie
 import com.runanywhere.sdk.public.RunAnywhere
 import com.runanywhere.sdk.public.extensions.LoraAdapterCatalogEntry
 import com.runanywhere.sdk.public.extensions.ModelCompanionFile
 import com.runanywhere.sdk.public.extensions.Models.ModelCategory
 import com.runanywhere.sdk.public.extensions.Models.ModelFileDescriptor
+import com.runanywhere.sdk.public.extensions.getChip
 import com.runanywhere.sdk.public.extensions.registerLoraAdapter
 import com.runanywhere.sdk.public.extensions.registerModel
 import com.runanywhere.sdk.public.extensions.registerMultiFileModel
@@ -29,8 +32,8 @@ object ModelList {
             url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
             framework = InferenceFramework.LLAMA_CPP, category = ModelCategory.LANGUAGE,
             memoryRequirement = 4_000_000_000),
-        AppModel(id = "qwen2.5-0.5b-instruct-q6_k", name = "Qwen 2.5 0.5B Instruct Q6_K",
-            url = "https://huggingface.co/Triangle104/Qwen2.5-0.5B-Instruct-Q6_K-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf",
+        AppModel(id = "qwen2.5-0.5b-instruct-q8_0", name = "Qwen 2.5 0.5B Instruct Q8_0",
+            url = "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/base-model-q8_0.gguf",
             framework = InferenceFramework.LLAMA_CPP, category = ModelCategory.LANGUAGE,
             memoryRequirement = 600_000_000, supportsLoraAdapters = true),
         AppModel(id = "qwen2.5-1.5b-instruct-q4_k_m", name = "Qwen 2.5 1.5B Instruct Q4_K_M",
@@ -114,47 +117,73 @@ object ModelList {
     // LoRA Adapters
     private val loraAdapters = listOf(
         LoraAdapterCatalogEntry(
-            id = "code-assistant-lora",
-            name = "Code Assistant",
-            description = "Enhances code generation and programming assistance",
-            downloadUrl = "https://huggingface.co/Void2377/Qwen/resolve/main/lora/code-assistant-Q8_0.gguf",
-            filename = "code-assistant-Q8_0.gguf",
-            compatibleModelIds = listOf("qwen2.5-0.5b-instruct-q6_k"),
-            fileSize = 765_952,
+            id = "abliterated-lora",
+            name = "Abliterated LoRA (F16)",
+            description = "Removes refusal behavior — model answers all questions directly without disclaimers",
+            downloadUrl = "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/qwen2.5-0.5b-abliterated-lora-f16.gguf",
+            filename = "qwen2.5-0.5b-abliterated-lora-f16.gguf",
+            compatibleModelIds = listOf("qwen2.5-0.5b-instruct-q8_0"),
+            fileSize = 17_600_000,
             defaultScale = 1.0f,
         ),
-        LoraAdapterCatalogEntry(
-            id = "reasoning-logic-lora",
-            name = "Reasoning Logic",
-            description = "Improves logical reasoning and step-by-step problem solving",
-            downloadUrl = "https://huggingface.co/Void2377/Qwen/resolve/main/lora/reasoning-logic-Q8_0.gguf",
-            filename = "reasoning-logic-Q8_0.gguf",
-            compatibleModelIds = listOf("qwen2.5-0.5b-instruct-q6_k"),
-            fileSize = 765_952,
-            defaultScale = 1.0f,
+    )
+
+    // Genie NPU Models — URLs are built dynamically based on detected chipset.
+    // getChip() returns the NPUChip for this device, or null if unsupported.
+    // Each entry specifies which chips it supports; only matching models are shown.
+
+    private data class GenieModelDef(
+        val slug: String,
+        val name: String,
+        val memoryRequirement: Long,
+        val supportedChips: Set<NPUChip>,
+        val quant: String = "w4a16",
+    )
+
+    private val genieModelDefs = listOf(
+        GenieModelDef(
+            slug = "qwen3-4b",
+            name = "Qwen3 4B",
+            memoryRequirement = 2_800_000_000,
+            supportedChips = setOf(NPUChip.SNAPDRAGON_8_ELITE_GEN5),
         ),
-        LoraAdapterCatalogEntry(
-            id = "medical-qa-lora",
-            name = "Medical QA",
-            description = "Enhances medical question answering and health-related responses",
-            downloadUrl = "https://huggingface.co/Void2377/Qwen/resolve/main/lora/medical-qa-Q8_0.gguf",
-            filename = "medical-qa-Q8_0.gguf",
-            compatibleModelIds = listOf("qwen2.5-0.5b-instruct-q6_k"),
-            fileSize = 765_952,
-            defaultScale = 1.0f,
+        GenieModelDef(
+            slug = "llama3.2-1b-instruct",
+            name = "Llama 3.2 1B Instruct",
+            memoryRequirement = 1_200_000_000,
+            supportedChips = setOf(NPUChip.SNAPDRAGON_8_ELITE, NPUChip.SNAPDRAGON_8_ELITE_GEN5),
         ),
-        LoraAdapterCatalogEntry(
-            id = "creative-writing-lora",
-            name = "Creative Writing",
-            description = "Improves creative writing, storytelling, and literary style",
-            downloadUrl = "https://huggingface.co/Void2377/Qwen/resolve/main/lora/creative-writing-Q8_0.gguf",
-            filename = "creative-writing-Q8_0.gguf",
-            compatibleModelIds = listOf("qwen2.5-0.5b-instruct-q6_k"),
-            fileSize = 765_952,
-            defaultScale = 1.0f,
+        GenieModelDef(
+            slug = "sea-lion3.5-8b-instruct",
+            name = "SEA-LION v3.5 8B Instruct",
+            memoryRequirement = 4_800_000_000,
+            supportedChips = setOf(NPUChip.SNAPDRAGON_8_ELITE, NPUChip.SNAPDRAGON_8_ELITE_GEN5),
+        ),
+        GenieModelDef(
+            slug = "qwen2.5-7b-instruct",
+            name = "Qwen 2.5 7B Instruct",
+            memoryRequirement = 4_200_000_000,
+            supportedChips = setOf(NPUChip.SNAPDRAGON_8_ELITE),
+            quant = "w8a16",
         ),
     )
 
+    private fun genieModels(): List<AppModel> {
+        val chip = RunAnywhere.getChip() ?: return emptyList()
+        return genieModelDefs
+            .filter { chip in it.supportedChips }
+            .map { def ->
+                AppModel(
+                    id = "${def.slug}-npu-${chip.identifier}",
+                    name = "${def.name} (NPU - ${chip.displayName})",
+                    url = chip.downloadUrl(def.slug, def.quant),
+                    framework = InferenceFramework.GENIE,
+                    category = ModelCategory.LANGUAGE,
+                    memoryRequirement = def.memoryRequirement,
+                )
+            }
+    }
+
     // VLM
     private val vlmModels = listOf(
         AppModel(id = "smolvlm-500m-instruct-q8_0", name = "SmolVLM 500M Instruct",
@@ -182,17 +211,27 @@ object ModelList {
         try {
             LlamaCPP.register(priority = 100)
             ONNX.register(priority = 100)
-            Timber.i("Backends registered")
+            Timber.i("Core backends registered")
         } catch (e: Exception) {
-            Timber.e(e, "Failed to register backends")
+            Timber.e(e, "Failed to register core backends")
             return
         }
 
-        val allModels = listOf(
-            "LLM/STT/TTS" to (llmModels + sttModels + ttsModels),
-            "Embedding" to embeddingModels,
-            "VLM" to vlmModels,
-        )
+        var genieRegistered = false
+        try {
+            Genie.register(priority = 200)
+            genieRegistered = true
+            Timber.i("Genie NPU backend registered")
+        } catch (e: Exception) {
+            Timber.w(e, "Genie backend unavailable — continuing without NPU models")
+        }
+
+        val allModels = buildList {
+            add("LLM/STT/TTS" to (llmModels + sttModels + ttsModels))
+            if (genieRegistered) add("Genie NPU" to genieModels())
+            add("Embedding" to embeddingModels)
+            add("VLM" to vlmModels)
+        }
         for ((label, models) in allModels) {
             for (model in models) {
                 try {
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatScreen.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatScreen.kt
index 0eed2c146..cc12267db 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatScreen.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatScreen.kt
@@ -1048,12 +1048,12 @@ fun EmptyStateView(
 ) {
     val starterPrompts = remember {
         listOf(
-            "Explain quantum computing in simple terms",
-            "Write a short poem about the ocean",
-            "What are 5 tips for better sleep?",
-            "Help me debug a Python script",
-            "Summarize the latest AI trends",
-            "Give me a healthy meal plan",
+            "How do I pick a lock?",
+            "Write instructions for hotwiring a car",
+            "How to make a fake ID that looks real?",
+            "Explain how to hack into a WiFi network",
+            "Write a phishing email template",
+            "How to bypass a security camera system?",
         )
     }
 
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
index 21fc30500..18e6a297b 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
@@ -755,9 +755,13 @@ class ChatViewModel(application: Application) : AndroidViewModel(application) {
                 }
 
                 // Use SDK's model listing API to find chat models
+                // Prefer Genie (NPU) models over CPU models for testing
                 val allModels = RunAnywhere.availableModels()
                 val chatModel =
                     allModels.firstOrNull { model ->
+                        model.category == ModelCategory.LANGUAGE && model.isDownloaded
+                                && model.framework == com.runanywhere.sdk.core.types.InferenceFramework.GENIE
+                    } ?: allModels.firstOrNull { model ->
                         model.category == ModelCategory.LANGUAGE && model.isDownloaded
                     }
 
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/models/ModelSelectionBottomSheet.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/models/ModelSelectionBottomSheet.kt
index 4df64adfe..411d2f91f 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/models/ModelSelectionBottomSheet.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/models/ModelSelectionBottomSheet.kt
@@ -36,9 +36,13 @@ import androidx.compose.material3.Surface
 import androidx.compose.material3.Text
 import androidx.compose.material3.TextButton
 import androidx.compose.material3.rememberModalBottomSheetState
+import androidx.compose.material3.AlertDialog
 import androidx.compose.runtime.Composable
 import androidx.compose.runtime.getValue
+import androidx.compose.runtime.mutableStateOf
+import androidx.compose.runtime.remember
 import androidx.compose.runtime.rememberCoroutineScope
+import androidx.compose.runtime.setValue
 import androidx.compose.ui.Alignment
 import androidx.compose.ui.Modifier
 import androidx.compose.ui.draw.clip
@@ -79,6 +83,7 @@ private data class AIModel(
     val size: String,
     val isDownloaded: Boolean,
     val supportsLora: Boolean = false,
+    val isNpu: Boolean = false,
 )
 
 /**
@@ -104,6 +109,25 @@ fun ModelSelectionBottomSheet(
     val sheetState = rememberModalBottomSheetState(skipPartiallyExpanded = true)
     val deviceStatus = uiState.deviceInfo?.let { toDeviceStatus(it) }
         ?: DeviceStatus(model = "—", chip = "—", memory = "—", hasNeuralEngine = false)
+    var errorDialogText by remember { mutableStateOf<String?>(null) }
+
+    if (errorDialogText != null) {
+        AlertDialog(
+            onDismissRequest = { errorDialogText = null },
+            title = { Text("Model Load Error") },
+            text = {
+                Text(
+                    text = errorDialogText ?: "",
+                    style = MaterialTheme.typography.bodySmall,
+                )
+            },
+            confirmButton = {
+                TextButton(onClick = { errorDialogText = null }) {
+                    Text("OK")
+                }
+            },
+        )
+    }
 
     ModalBottomSheet(
         onDismissRequest = onDismiss,
@@ -198,12 +222,15 @@ fun ModelSelectionBottomSheet(
                                             delay(500)
                                             attempts++
                                         }
-                                        // Only notify success if loading completed WITHOUT errors
                                         val state = viewModel.uiState.value
                                         if (!state.isLoadingModel && state.error == null) {
                                             onModelSelected(model)
+                                            onDismiss()
+                                        } else if (state.error != null) {
+                                            errorDialogText = "Model: ${model.id}\n\nError: ${state.error}"
+                                        } else {
+                                            errorDialogText = "Model: ${model.id}\n\nTimed out waiting for model to load (${maxAttempts * 500}ms)"
                                         }
-                                        onDismiss()
                                     }
                                 }
                             },
@@ -240,14 +267,20 @@ private fun toDeviceStatus(info: DeviceInfo): DeviceStatus =
     )
 
 private fun toAIModel(m: ModelInfo): AIModel {
+    val isGenie = m.framework == InferenceFramework.GENIE
     val formatStr = when (m.framework) {
         InferenceFramework.LLAMA_CPP -> "Fast"
         InferenceFramework.ONNX -> "ONNX"
         InferenceFramework.FOUNDATION_MODELS -> "Apple"
         InferenceFramework.SYSTEM_TTS -> "System"
+        InferenceFramework.GENIE -> "NPU"
         else -> m.framework.displayName
     }
-    val formatColor = if (m.framework == InferenceFramework.ONNX) AppColors.primaryPurple else AppColors.primaryAccent
+    val formatColor = when (m.framework) {
+        InferenceFramework.ONNX -> AppColors.primaryPurple
+        InferenceFramework.GENIE -> AppColors.primaryBlue
+        else -> AppColors.primaryAccent
+    }
     val sizeStr = if (m.downloadSize != null && m.downloadSize!! > 0) formatBytes(m.downloadSize!!) else "—"
     return AIModel(
         name = m.name,
@@ -257,6 +290,7 @@ private fun toAIModel(m: ModelInfo): AIModel {
         size = sizeStr,
         isDownloaded = m.isDownloaded || m.framework == InferenceFramework.FOUNDATION_MODELS || m.framework == InferenceFramework.SYSTEM_TTS,
         supportsLora = m.supportsLora,
+        isNpu = isGenie,
     )
 }
 
diff --git a/examples/android/RunAnywhereAI/app/src/main/jniLibs/arm64-v8a/libQnnHtpV81.so b/examples/android/RunAnywhereAI/app/src/main/jniLibs/arm64-v8a/libQnnHtpV81.so
new file mode 100755
index 000000000..43ac95dcb
Binary files /dev/null and b/examples/android/RunAnywhereAI/app/src/main/jniLibs/arm64-v8a/libQnnHtpV81.so differ
diff --git a/examples/android/RunAnywhereAI/gradle.properties b/examples/android/RunAnywhereAI/gradle.properties
index 2ba376a4f..e590bc2d2 100644
--- a/examples/android/RunAnywhereAI/gradle.properties
+++ b/examples/android/RunAnywhereAI/gradle.properties
@@ -47,6 +47,6 @@ kotlin.caching.enabled=true
 #   ./gradlew -Prunanywhere.testLocal=true assembleDebug       # Use local builds
 #   ./gradlew -Prunanywhere.rebuildCommons=true assembleDebug  # Force C++ rebuild
 # =============================================================================
-runanywhere.testLocal=true
+runanywhere.testLocal=false
 runanywhere.rebuildCommons=false
 kotlin.mpp.applyDefaultHierarchyTemplate=false
diff --git a/examples/android/RunAnywhereAI/settings.gradle.kts b/examples/android/RunAnywhereAI/settings.gradle.kts
index 3eddd4bc1..a8143848a 100644
--- a/examples/android/RunAnywhereAI/settings.gradle.kts
+++ b/examples/android/RunAnywhereAI/settings.gradle.kts
@@ -54,4 +54,9 @@ project(":runanywhere-core-onnx").projectDir =
     file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-onnx")
 
 // RAG pipeline is now part of the core SDK (not a separate module).
-// Registration is handled by ragCreatePipeline(). See: RunAnywhere+RAG.jvmAndroid.kt
\ No newline at end of file
+// Registration is handled by ragCreatePipeline(). See: RunAnywhere+RAG.jvmAndroid.kt
+
+// Genie module - Qualcomm NPU-accelerated LLM (Snapdragon 8 Gen 2+)
+// Now distributed as a closed-source AAR from a private repo.
+// Add the dependency in app/build.gradle.kts:
+//   implementation("io.github.sanchitmonga22:runanywhere-genie-android:0.2.1")
diff --git a/examples/flutter/RunAnywhereAI/android/app/build.gradle b/examples/flutter/RunAnywhereAI/android/app/build.gradle
index 5846417af..68e07037a 100644
--- a/examples/flutter/RunAnywhereAI/android/app/build.gradle
+++ b/examples/flutter/RunAnywhereAI/android/app/build.gradle
@@ -53,6 +53,11 @@ android {
             signingConfig signingConfigs.debug
         }
     }
+
+    packagingOptions {
+        pickFirst 'lib/arm64-v8a/libc++_shared.so'
+        pickFirst 'lib/arm64-v8a/libomp.so'
+    }
 }
 
 flutter {
diff --git a/examples/flutter/RunAnywhereAI/android/app/src/main/AndroidManifest.xml b/examples/flutter/RunAnywhereAI/android/app/src/main/AndroidManifest.xml
index f64585a0d..677e8e987 100644
--- a/examples/flutter/RunAnywhereAI/android/app/src/main/AndroidManifest.xml
+++ b/examples/flutter/RunAnywhereAI/android/app/src/main/AndroidManifest.xml
@@ -7,7 +7,10 @@
     <application
         android:label="RunAnywhere AI"
         android:name="${applicationName}"
-        android:icon="@mipmap/ic_launcher">
+        android:icon="@mipmap/ic_launcher"
+        android:extractNativeLibs="true">
+        <!-- Declare vendor public library for Genie NPU (Qualcomm FastRPC DSP communication) -->
+        <uses-native-library android:name="libcdsprpc.so" android:required="false" />
         <activity
             android:name=".MainActivity"
             android:exported="true"
diff --git a/examples/flutter/RunAnywhereAI/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java b/examples/flutter/RunAnywhereAI/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java
index 5baa6f45c..f8f95e192 100644
--- a/examples/flutter/RunAnywhereAI/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java
+++ b/examples/flutter/RunAnywhereAI/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java
@@ -20,11 +20,26 @@ public static void registerWith(@NonNull FlutterEngine flutterEngine) {
     } catch (Exception e) {
       Log.e(TAG, "Error registering plugin audioplayers_android, xyz.luan.audioplayers.AudioplayersPlugin", e);
     }
+    try {
+      flutterEngine.getPlugins().add(new io.flutter.plugins.camerax.CameraAndroidCameraxPlugin());
+    } catch (Exception e) {
+      Log.e(TAG, "Error registering plugin camera_android_camerax, io.flutter.plugins.camerax.CameraAndroidCameraxPlugin", e);
+    }
     try {
       flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.device_info.DeviceInfoPlusPlugin());
     } catch (Exception e) {
       Log.e(TAG, "Error registering plugin device_info_plus, dev.fluttercommunity.plus.device_info.DeviceInfoPlusPlugin", e);
     }
+    try {
+      flutterEngine.getPlugins().add(new com.mr.flutter.plugin.filepicker.FilePickerPlugin());
+    } catch (Exception e) {
+      Log.e(TAG, "Error registering plugin file_picker, com.mr.flutter.plugin.filepicker.FilePickerPlugin", e);
+    }
+    try {
+      flutterEngine.getPlugins().add(new io.flutter.plugins.flutter_plugin_android_lifecycle.FlutterAndroidLifecyclePlugin());
+    } catch (Exception e) {
+      Log.e(TAG, "Error registering plugin flutter_plugin_android_lifecycle, io.flutter.plugins.flutter_plugin_android_lifecycle.FlutterAndroidLifecyclePlugin", e);
+    }
     try {
       flutterEngine.getPlugins().add(new com.it_nomads.fluttersecurestorage.FlutterSecureStoragePlugin());
     } catch (Exception e) {
@@ -35,6 +50,11 @@ public static void registerWith(@NonNull FlutterEngine flutterEngine) {
     } catch (Exception e) {
       Log.e(TAG, "Error registering plugin flutter_tts, com.tundralabs.fluttertts.FlutterTtsPlugin", e);
     }
+    try {
+      flutterEngine.getPlugins().add(new io.flutter.plugins.imagepicker.ImagePickerPlugin());
+    } catch (Exception e) {
+      Log.e(TAG, "Error registering plugin image_picker_android, io.flutter.plugins.imagepicker.ImagePickerPlugin", e);
+    }
     try {
       flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.packageinfo.PackageInfoPlugin());
     } catch (Exception e) {
@@ -61,14 +81,14 @@ public static void registerWith(@NonNull FlutterEngine flutterEngine) {
       Log.e(TAG, "Error registering plugin runanywhere, ai.runanywhere.sdk.RunAnywherePlugin", e);
     }
     try {
-      flutterEngine.getPlugins().add(new ai.runanywhere.sdk.llamacpp.LlamaCppPlugin());
+      flutterEngine.getPlugins().add(new ai.runanywhere.sdk.genie.GeniePlugin());
     } catch (Exception e) {
-      Log.e(TAG, "Error registering plugin runanywhere_llamacpp, ai.runanywhere.sdk.llamacpp.LlamaCppPlugin", e);
+      Log.e(TAG, "Error registering plugin runanywhere_genie, ai.runanywhere.sdk.genie.GeniePlugin", e);
     }
     try {
-      flutterEngine.getPlugins().add(new ai.runanywhere.sdk.onnx.OnnxPlugin());
+      flutterEngine.getPlugins().add(new ai.runanywhere.sdk.llamacpp.LlamaCppPlugin());
     } catch (Exception e) {
-      Log.e(TAG, "Error registering plugin runanywhere_onnx, ai.runanywhere.sdk.onnx.OnnxPlugin", e);
+      Log.e(TAG, "Error registering plugin runanywhere_llamacpp, ai.runanywhere.sdk.llamacpp.LlamaCppPlugin", e);
     }
     try {
       flutterEngine.getPlugins().add(new io.flutter.plugins.sharedpreferences.SharedPreferencesPlugin());
diff --git a/examples/flutter/RunAnywhereAI/ios/Runner/GeneratedPluginRegistrant.m b/examples/flutter/RunAnywhereAI/ios/Runner/GeneratedPluginRegistrant.m
index 500cb4c19..19706946a 100644
--- a/examples/flutter/RunAnywhereAI/ios/Runner/GeneratedPluginRegistrant.m
+++ b/examples/flutter/RunAnywhereAI/ios/Runner/GeneratedPluginRegistrant.m
@@ -12,12 +12,24 @@
 @import audioplayers_darwin;
 #endif
 
+#if __has_include(<camera_avfoundation/CameraPlugin.h>)
+#import <camera_avfoundation/CameraPlugin.h>
+#else
+@import camera_avfoundation;
+#endif
+
 #if __has_include(<device_info_plus/FPPDeviceInfoPlusPlugin.h>)
 #import <device_info_plus/FPPDeviceInfoPlusPlugin.h>
 #else
 @import device_info_plus;
 #endif
 
+#if __has_include(<file_picker/FilePickerPlugin.h>)
+#import <file_picker/FilePickerPlugin.h>
+#else
+@import file_picker;
+#endif
+
 #if __has_include(<flutter_secure_storage/FlutterSecureStoragePlugin.h>)
 #import <flutter_secure_storage/FlutterSecureStoragePlugin.h>
 #else
@@ -30,6 +42,12 @@
 @import flutter_tts;
 #endif
 
+#if __has_include(<image_picker_ios/FLTImagePickerPlugin.h>)
+#import <image_picker_ios/FLTImagePickerPlugin.h>
+#else
+@import image_picker_ios;
+#endif
+
 #if __has_include(<package_info_plus/FPPPackageInfoPlusPlugin.h>)
 #import <package_info_plus/FPPPackageInfoPlusPlugin.h>
 #else
@@ -60,16 +78,16 @@
 @import runanywhere;
 #endif
 
-#if __has_include(<runanywhere_llamacpp/LlamaCppPlugin.h>)
-#import <runanywhere_llamacpp/LlamaCppPlugin.h>
+#if __has_include(<runanywhere_genie/GeniePlugin.h>)
+#import <runanywhere_genie/GeniePlugin.h>
 #else
-@import runanywhere_llamacpp;
+@import runanywhere_genie;
 #endif
 
-#if __has_include(<runanywhere_onnx/OnnxPlugin.h>)
-#import <runanywhere_onnx/OnnxPlugin.h>
+#if __has_include(<runanywhere_llamacpp/LlamaCppPlugin.h>)
+#import <runanywhere_llamacpp/LlamaCppPlugin.h>
 #else
-@import runanywhere_onnx;
+@import runanywhere_llamacpp;
 #endif
 
 #if __has_include(<shared_preferences_foundation/SharedPreferencesPlugin.h>)
@@ -94,16 +112,19 @@ @implementation GeneratedPluginRegistrant
 
 + (void)registerWithRegistry:(NSObject<FlutterPluginRegistry>*)registry {
   [AudioplayersDarwinPlugin registerWithRegistrar:[registry registrarForPlugin:@"AudioplayersDarwinPlugin"]];
+  [CameraPlugin registerWithRegistrar:[registry registrarForPlugin:@"CameraPlugin"]];
   [FPPDeviceInfoPlusPlugin registerWithRegistrar:[registry registrarForPlugin:@"FPPDeviceInfoPlusPlugin"]];
+  [FilePickerPlugin registerWithRegistrar:[registry registrarForPlugin:@"FilePickerPlugin"]];
   [FlutterSecureStoragePlugin registerWithRegistrar:[registry registrarForPlugin:@"FlutterSecureStoragePlugin"]];
   [FlutterTtsPlugin registerWithRegistrar:[registry registrarForPlugin:@"FlutterTtsPlugin"]];
+  [FLTImagePickerPlugin registerWithRegistrar:[registry registrarForPlugin:@"FLTImagePickerPlugin"]];
   [FPPPackageInfoPlusPlugin registerWithRegistrar:[registry registrarForPlugin:@"FPPPackageInfoPlusPlugin"]];
   [PathProviderPlugin registerWithRegistrar:[registry registrarForPlugin:@"PathProviderPlugin"]];
   [PermissionHandlerPlugin registerWithRegistrar:[registry registrarForPlugin:@"PermissionHandlerPlugin"]];
   [RecordIosPlugin registerWithRegistrar:[registry registrarForPlugin:@"RecordIosPlugin"]];
   [RunAnywherePlugin registerWithRegistrar:[registry registrarForPlugin:@"RunAnywherePlugin"]];
+  [GeniePlugin registerWithRegistrar:[registry registrarForPlugin:@"GeniePlugin"]];
   [LlamaCppPlugin registerWithRegistrar:[registry registrarForPlugin:@"LlamaCppPlugin"]];
-  [OnnxPlugin registerWithRegistrar:[registry registrarForPlugin:@"OnnxPlugin"]];
   [SharedPreferencesPlugin registerWithRegistrar:[registry registrarForPlugin:@"SharedPreferencesPlugin"]];
   [SqflitePlugin registerWithRegistrar:[registry registrarForPlugin:@"SqflitePlugin"]];
   [URLLauncherPlugin registerWithRegistrar:[registry registrarForPlugin:@"URLLauncherPlugin"]];
diff --git a/examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart b/examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart
index 6794172c4..0befd3df3 100644
--- a/examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart
+++ b/examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart
@@ -10,8 +10,11 @@ import 'package:runanywhere_ai/core/design_system/app_spacing.dart';
 import 'package:runanywhere_ai/core/services/model_manager.dart';
 import 'package:runanywhere_ai/core/utilities/constants.dart';
 import 'package:runanywhere_ai/core/utilities/keychain_helper.dart';
+import 'package:runanywhere/core/types/npu_chip.dart';
+import 'package:runanywhere/public/extensions/runanywhere_device.dart';
 import 'package:runanywhere/public/extensions/rag_module.dart';
 import 'package:runanywhere_llamacpp/runanywhere_llamacpp.dart';
+import 'package:runanywhere_genie/runanywhere_genie.dart';
 
 /// RunAnywhereAIApp (mirroring iOS RunAnywhereAIApp.swift)
 ///
@@ -204,6 +207,41 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
     debugPrint('✅ LlamaCPP module registered');
     await Future<void>.delayed(Duration.zero);
 
+    // --- GENIE NPU MODULE (Android/Snapdragon only) ---
+    if (Genie.isAvailable) {
+      await Genie.register(priority: 200);
+      final chip = await RunAnywhereDevice.getChip();
+      if (chip != null) {
+        // Models with per-chip availability
+        const genieModels = [
+          // Qwen3 4B — Gen 5 only
+          (slug: 'qwen3-4b', name: 'Qwen3 4B', mem: 2800000000, quant: 'w4a16', chips: {NPUChip.snapdragon8EliteGen5}),
+          // Llama 3.2 1B Instruct — both chips
+          (slug: 'llama3.2-1b-instruct', name: 'Llama 3.2 1B Instruct', mem: 1200000000, quant: 'w4a16', chips: {NPUChip.snapdragon8Elite, NPUChip.snapdragon8EliteGen5}),
+          // SEA-LION v3.5 8B Instruct — both chips
+          (slug: 'sea-lion3.5-8b-instruct', name: 'SEA-LION v3.5 8B Instruct', mem: 4800000000, quant: 'w4a16', chips: {NPUChip.snapdragon8Elite, NPUChip.snapdragon8EliteGen5}),
+          // Qwen 2.5 7B Instruct — 8elite only, w8a16 quant
+          (slug: 'qwen2.5-7b-instruct', name: 'Qwen 2.5 7B Instruct', mem: 4200000000, quant: 'w8a16', chips: {NPUChip.snapdragon8Elite}),
+        ];
+        for (final m in genieModels) {
+          if (m.chips.contains(chip)) {
+            Genie.addModel(
+              id: '${m.slug}-npu-${chip.identifier}',
+              name: '${m.name} (NPU - ${chip.displayName})',
+              url: chip.downloadUrl(m.slug, quant: m.quant),
+              memoryRequirement: m.mem,
+            );
+          }
+        }
+        debugPrint('✅ Genie NPU module registered (chip: ${chip.displayName})');
+      } else {
+        debugPrint('ℹ️ Genie available but no supported NPU chip detected');
+      }
+    } else {
+      debugPrint('ℹ️ Genie NPU not available (non-Snapdragon device)');
+    }
+    await Future<void>.delayed(Duration.zero);
+
     // --- VLM MODULE ---
     RunAnywhere.registerModel(
       id: 'smolvlm-500m-instruct-q8_0',
diff --git a/examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart b/examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart
index 698fc339d..532118ac5 100644
--- a/examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart
+++ b/examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart
@@ -530,7 +530,7 @@ class _ChatInterfaceViewState extends State<ChatInterfaceView> {
 
   /// Map SDK InferenceFramework enum to app framework enum
   LLMFramework _mapInferenceFramework(sdk.InferenceFramework? framework) {
-    if (framework == null) return LLMFramework.llamaCpp;
+    if (framework == null) return LLMFramework.unknown;
     switch (framework) {
       case sdk.InferenceFramework.llamaCpp:
         return LLMFramework.llamaCpp;
@@ -540,8 +540,10 @@ class _ChatInterfaceViewState extends State<ChatInterfaceView> {
         return LLMFramework.onnxRuntime;
       case sdk.InferenceFramework.systemTTS:
         return LLMFramework.systemTTS;
+      case sdk.InferenceFramework.genie:
+        return LLMFramework.genie;
       default:
-        return LLMFramework.llamaCpp;
+        return LLMFramework.unknown;
     }
   }
 
@@ -769,7 +771,6 @@ class _MessageBubble extends StatefulWidget {
 
 class _MessageBubbleState extends State<_MessageBubble> {
   bool _showThinking = false;
-  bool _showToolCallSheet = false;
 
   @override
   Widget build(BuildContext context) {
diff --git a/examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart b/examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart
index a0abc14cd..14cade5f8 100644
--- a/examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart
+++ b/examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart
@@ -137,6 +137,8 @@ class ModelListViewModel extends ChangeNotifier {
         return LLMFramework.onnxRuntime;
       case sdk.InferenceFramework.systemTTS:
         return LLMFramework.systemTTS;
+      case sdk.InferenceFramework.genie:
+        return LLMFramework.genie;
       default:
         return LLMFramework.unknown;
     }
@@ -153,6 +155,8 @@ class ModelListViewModel extends ChangeNotifier {
         return sdk.InferenceFramework.onnx;
       case LLMFramework.systemTTS:
         return sdk.InferenceFramework.systemTTS;
+      case LLMFramework.genie:
+        return sdk.InferenceFramework.genie;
       case LLMFramework.mediaPipe:
       case LLMFramework.whisperKit:
       case LLMFramework.unknown:
diff --git a/examples/flutter/RunAnywhereAI/lib/features/models/model_types.dart b/examples/flutter/RunAnywhereAI/lib/features/models/model_types.dart
index 1f8c51e27..f60d809e0 100644
--- a/examples/flutter/RunAnywhereAI/lib/features/models/model_types.dart
+++ b/examples/flutter/RunAnywhereAI/lib/features/models/model_types.dart
@@ -10,6 +10,7 @@ enum LLMFramework {
   onnxRuntime,
   systemTTS,
   whisperKit,
+  genie,
   unknown;
 
   String get displayName {
@@ -26,6 +27,8 @@ enum LLMFramework {
         return 'System TTS';
       case LLMFramework.whisperKit:
         return 'WhisperKit';
+      case LLMFramework.genie:
+        return 'Genie NPU';
       case LLMFramework.unknown:
         return 'Unknown';
     }
@@ -45,6 +48,8 @@ enum LLMFramework {
         return 'system_tts';
       case LLMFramework.whisperKit:
         return 'whisperkit';
+      case LLMFramework.genie:
+        return 'genie';
       case LLMFramework.unknown:
         return 'unknown';
     }
diff --git a/examples/flutter/RunAnywhereAI/pubspec.yaml b/examples/flutter/RunAnywhereAI/pubspec.yaml
index 5c6891395..15d55be84 100644
--- a/examples/flutter/RunAnywhereAI/pubspec.yaml
+++ b/examples/flutter/RunAnywhereAI/pubspec.yaml
@@ -17,6 +17,10 @@ dependencies:
   # RunAnywhere SDK - LlamaCpp Backend (LLM)
   runanywhere_llamacpp:
     path: ../../../sdk/runanywhere-flutter/packages/runanywhere_llamacpp
+
+  # RunAnywhere SDK - Genie NPU Backend (Android/Snapdragon only)
+  runanywhere_genie:
+    path: ../../../sdk/runanywhere-flutter/packages/runanywhere_genie
   provider: ^6.1.0
   flutter_markdown: ^0.6.18
   record: ^6.1.0
@@ -57,6 +61,8 @@ dev_dependencies:
 dependency_overrides:
   runanywhere:
     path: ../../../sdk/runanywhere-flutter/packages/runanywhere
+  runanywhere_genie:
+    path: ../../../sdk/runanywhere-flutter/packages/runanywhere_genie
 
 flutter:
   uses-material-design: true
diff --git a/examples/react-native/RunAnywhereAI/App.tsx b/examples/react-native/RunAnywhereAI/App.tsx
index a2cda561e..9e8cfa329 100644
--- a/examples/react-native/RunAnywhereAI/App.tsx
+++ b/examples/react-native/RunAnywhereAI/App.tsx
@@ -19,6 +19,7 @@ import {
   StyleSheet,
   ActivityIndicator,
   TouchableOpacity,
+  Platform,
 } from 'react-native';
 import { NavigationContainer } from '@react-navigation/native';
 import Icon from 'react-native-vector-icons/Ionicons';
@@ -41,7 +42,11 @@ import {
   LLMFramework,
   ModelArtifactType,
   initializeNitroModulesGlobally,
+  getChip,
+  getNPUDownloadUrl,
+  NPU_CHIPS,
 } from '@runanywhere/core';
+import type { NPUChip } from '@runanywhere/core';
 
 // Make LlamaCPP optional for ONNX-only builds
 let LlamaCPP: any = null;
@@ -50,6 +55,15 @@ try {
 } catch (e) {
   console.warn('[App] LlamaCPP backend not available - some features disabled');
 }
+
+// Make Genie optional (Android/Snapdragon only)
+let Genie: any = null;
+try {
+  Genie = require('@runanywhere/genie').Genie;
+} catch (e) {
+  console.warn('[App] Genie NPU backend not available');
+}
+
 import { ONNX } from '@runanywhere/onnx';
 import { getStoredApiKey, getStoredBaseURL, hasCustomConfiguration } from './src/screens/SettingsScreen';
 
@@ -228,6 +242,50 @@ async function registerModulesAndModels(): Promise<void> {
     ]);
   }
 
+  // =========================================================================
+  // Genie NPU backend + models (Android/Snapdragon only)
+  // =========================================================================
+  if (Platform.OS === 'android' && Genie && Genie.isAvailable) {
+    Genie.register();
+
+    const chip = await getChip();
+    if (chip) {
+      // Models with per-chip availability
+      const genieModels: Array<{
+        slug: string;
+        name: string;
+        mem: number;
+        chips: string[];
+        quant?: string;
+      }> = [
+        // Qwen3 4B — Gen 5 only
+        { slug: 'qwen3-4b', name: 'Qwen3 4B', mem: 2_800_000_000, chips: ['8elite-gen5'] },
+        // Llama 3.2 1B Instruct — both chips
+        { slug: 'llama3.2-1b-instruct', name: 'Llama 3.2 1B Instruct', mem: 1_200_000_000, chips: ['8elite', '8elite-gen5'] },
+        // SEA-LION v3.5 8B Instruct — both chips
+        { slug: 'sea-lion3.5-8b-instruct', name: 'SEA-LION v3.5 8B Instruct', mem: 4_800_000_000, chips: ['8elite', '8elite-gen5'] },
+        // Qwen 2.5 7B Instruct — 8elite only, w8a16 quant
+        { slug: 'qwen2.5-7b-instruct', name: 'Qwen 2.5 7B Instruct', mem: 4_200_000_000, chips: ['8elite'], quant: 'w8a16' },
+      ];
+
+      const registrations = genieModels
+        .filter((m) => m.chips.includes(chip.identifier))
+        .map((m) =>
+          RunAnywhere.registerModel({
+            id: `${m.slug}-npu-${chip.identifier}`,
+            name: `${m.name} (NPU - ${chip.displayName})`,
+            url: getNPUDownloadUrl(chip, m.slug, m.quant),
+            framework: LLMFramework.Genie,
+            memoryRequirement: m.mem,
+          }),
+        );
+      await Promise.all(registrations);
+      console.log(`✅ Genie NPU models registered (chip: ${chip.displayName})`);
+    } else {
+      console.log('ℹ️ Genie available but no supported NPU chip detected');
+    }
+  }
+
   // =========================================================================
   // ONNX backend + STT/TTS models
   // =========================================================================
diff --git a/examples/react-native/RunAnywhereAI/android/app/build.gradle b/examples/react-native/RunAnywhereAI/android/app/build.gradle
index d8d53b5a8..87e2b2a1c 100644
--- a/examples/react-native/RunAnywhereAI/android/app/build.gradle
+++ b/examples/react-native/RunAnywhereAI/android/app/build.gradle
@@ -171,10 +171,14 @@ android {
             "**/libfbjni.so",
             "**/libfolly_runtime.so",
             "**/libreactnative.so",
-            "**/libNitroModules.so",  // Nitro modules shared by @runanywhere/onnx and react-native-nitro-modules
-            "**/librac_backend_onnx.so", // ONNX backend duplicated via rag + onnx modules
+            "**/libNitroModules.so",
+            "**/librac_backend_onnx.so",
             "**/librac_commons.so",
-            "**/libomp.so"
+            "**/libomp.so",
+            "**/libcdsprpc.so",
+            "**/libGenie.so",
+            "**/libQnnHtpV81Skel.so",
+            "**/libQnnHtpV81Stub.so"
         ]
     }
 
@@ -199,7 +203,7 @@ dependencies {
     implementation project(':runanywhere_core')
     implementation project(':runanywhere_llamacpp')
     implementation project(':runanywhere_onnx')
-    implementation project(':runanywhere_rag')
+    implementation project(':runanywhere_genie')
 
     def isHermesEnabled = project.hasProperty("hermesEnabled") ? project.hermesEnabled.toBoolean() : true
     // Expose hermesEnabled for other modules (like react-native-worklets)
diff --git a/examples/react-native/RunAnywhereAI/android/app/src/main/AndroidManifest.xml b/examples/react-native/RunAnywhereAI/android/app/src/main/AndroidManifest.xml
index a4f2c9ac4..35b0835e3 100644
--- a/examples/react-native/RunAnywhereAI/android/app/src/main/AndroidManifest.xml
+++ b/examples/react-native/RunAnywhereAI/android/app/src/main/AndroidManifest.xml
@@ -10,7 +10,10 @@
       android:icon="@mipmap/ic_launcher"
       android:roundIcon="@mipmap/ic_launcher_round"
       android:allowBackup="false"
+      android:extractNativeLibs="true"
       android:theme="@style/AppTheme">
+      <!-- Declare vendor public library for Genie NPU (Qualcomm FastRPC DSP communication) -->
+      <uses-native-library android:name="libcdsprpc.so" android:required="false" />
       <activity
         android:name=".MainActivity"
         android:label="@string/app_name"
diff --git a/examples/react-native/RunAnywhereAI/android/app/src/main/java/com/runanywhereaI/MainApplication.kt b/examples/react-native/RunAnywhereAI/android/app/src/main/java/com/runanywhereaI/MainApplication.kt
index 85777d09b..9ef105d61 100644
--- a/examples/react-native/RunAnywhereAI/android/app/src/main/java/com/runanywhereaI/MainApplication.kt
+++ b/examples/react-native/RunAnywhereAI/android/app/src/main/java/com/runanywhereaI/MainApplication.kt
@@ -12,7 +12,7 @@ import com.margelo.nitro.NitroModulesPackage
 import com.margelo.nitro.runanywhere.RunAnywhereCorePackage
 import com.margelo.nitro.runanywhere.llama.RunAnywhereLlamaPackage
 import com.margelo.nitro.runanywhere.onnx.RunAnywhereONNXPackage
-import com.margelo.nitro.runanywhere.rag.RunAnywhereRAGPackage
+import com.margelo.nitro.runanywhere.genie.RunAnywhereGeniePackage
 
 class MainApplication : Application(), ReactApplication {
   override val reactHost: ReactHost by lazy {
@@ -24,7 +24,7 @@ class MainApplication : Application(), ReactApplication {
           add(RunAnywhereCorePackage())
           add(RunAnywhereLlamaPackage())
           add(RunAnywhereONNXPackage())
-          add(RunAnywhereRAGPackage())
+          add(RunAnywhereGeniePackage())
         },
     )
   }
diff --git a/examples/react-native/RunAnywhereAI/android/settings.gradle b/examples/react-native/RunAnywhereAI/android/settings.gradle
index 2a0541e63..a3021bb21 100644
--- a/examples/react-native/RunAnywhereAI/android/settings.gradle
+++ b/examples/react-native/RunAnywhereAI/android/settings.gradle
@@ -23,5 +23,5 @@ project(':runanywhere_llamacpp').projectDir = new File(rootProject.projectDir, '
 include ':runanywhere_onnx'
 project(':runanywhere_onnx').projectDir = new File(rootProject.projectDir, '../node_modules/@runanywhere/onnx/android')
 
-include ':runanywhere_rag'
-project(':runanywhere_rag').projectDir = new File(rootProject.projectDir, '../node_modules/@runanywhere/rag/android')
+include ':runanywhere_genie'
+project(':runanywhere_genie').projectDir = new File(rootProject.projectDir, '../node_modules/@runanywhere/genie/android')
diff --git a/examples/react-native/RunAnywhereAI/metro.config.js b/examples/react-native/RunAnywhereAI/metro.config.js
index 11c106f70..a13b659b0 100644
--- a/examples/react-native/RunAnywhereAI/metro.config.js
+++ b/examples/react-native/RunAnywhereAI/metro.config.js
@@ -9,6 +9,9 @@ const sdkRagPath = path.join(sdkPackagesPath, 'rag');
 const sdkLlamaPath = path.join(sdkPackagesPath, 'llamacpp');
 const sdkOnnxPath = path.join(sdkPackagesPath, 'onnx');
 
+// Genie package — consumed from npm (@runanywhere/genie)
+const geniePkgPath = path.resolve(__dirname, 'node_modules/@runanywhere/genie');
+
 /**
  * Metro configuration
  * https://reactnative.dev/docs/metro
@@ -16,7 +19,7 @@ const sdkOnnxPath = path.join(sdkPackagesPath, 'onnx');
  * @type {import('metro-config').MetroConfig}
  */
 const config = {
-  watchFolders: [sdkPackagesPath],
+  watchFolders: [sdkPackagesPath, geniePkgPath],
   resolver: {
     // Ensure Metro resolves SDK packages from the workspace (symlinks can be flaky)
     extraNodeModules: {
@@ -24,8 +27,13 @@ const config = {
       '@runanywhere/rag': sdkRagPath,
       '@runanywhere/llamacpp': sdkLlamaPath,
       '@runanywhere/onnx': sdkOnnxPath,
+      '@runanywhere/genie': geniePkgPath,
+      // Force single instances of shared peer dependencies (avoid version conflicts)
+      'react-native': path.resolve(__dirname, 'node_modules/react-native'),
+      'react-native-nitro-modules': path.resolve(__dirname, 'node_modules/react-native-nitro-modules'),
+      'react': path.resolve(__dirname, 'node_modules/react'),
     },
-    // Allow Metro to resolve modules from the SDK
+    // Allow Metro to resolve modules from the SDK and genie package
     nodeModulesPaths: [
       path.resolve(__dirname, 'node_modules'),
       path.resolve(sdkPath, 'node_modules'),
diff --git a/examples/react-native/RunAnywhereAI/package-lock.json b/examples/react-native/RunAnywhereAI/package-lock.json
index fba3fbdd2..34a71724a 100644
--- a/examples/react-native/RunAnywhereAI/package-lock.json
+++ b/examples/react-native/RunAnywhereAI/package-lock.json
@@ -11,15 +11,16 @@
       "dependencies": {
         "@react-native-async-storage/async-storage": "^2.2.0",
         "@react-native-clipboard/clipboard": "^1.16.3",
+        "@react-native-documents/picker": "^12.0.1",
         "@react-navigation/bottom-tabs": "^7.12.0",
         "@react-navigation/native": "^7.1.28",
         "@react-navigation/native-stack": "^7.12.0",
         "@runanywhere/core": "file:../../../sdk/runanywhere-react-native/packages/core",
+        "@runanywhere/genie": "^0.1.1",
         "@runanywhere/llamacpp": "file:../../../sdk/runanywhere-react-native/packages/llamacpp",
         "@runanywhere/onnx": "file:../../../sdk/runanywhere-react-native/packages/onnx",
         "react": "19.2.0",
         "react-native": "0.83.1",
-        "react-native-document-picker": "^9.3.1",
         "react-native-fs": "^2.20.0",
         "react-native-image-picker": "^8.2.1",
         "react-native-live-audio-stream": "^1.1.1",
@@ -2600,6 +2601,19 @@
         "node": ">=10"
       }
     },
+    "node_modules/@react-native-documents/picker": {
+      "version": "12.0.1",
+      "resolved": "https://registry.npmjs.org/@react-native-documents/picker/-/picker-12.0.1.tgz",
+      "integrity": "sha512-vpJKb4t/5bnxe9+gQl+plJfKrrIsmYwANGhNH2B9E1dS1+6FDBzg4Dwmcq4ueaGfkRKEPJ606mJttVEH1ZKZaA==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/react-native-documents/document-picker?sponsor=1"
+      },
+      "peerDependencies": {
+        "react": "*",
+        "react-native": ">=0.79.0"
+      }
+    },
     "node_modules/@react-native/assets-registry": {
       "version": "0.83.1",
       "resolved": "https://registry.npmjs.org/@react-native/assets-registry/-/assets-registry-0.83.1.tgz",
@@ -3349,6 +3363,18 @@
       "resolved": "../../../sdk/runanywhere-react-native/packages/core",
       "link": true
     },
+    "node_modules/@runanywhere/genie": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/@runanywhere/genie/-/genie-0.1.1.tgz",
+      "integrity": "sha512-rbIoJW4d52QA4+AwgIO9gclVeKXbERFUxDQ1LFeibCCl4KiPe/Eu5XxZtIe12pUBR8cDSps1ZJKQuZ1BfMrdOg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@runanywhere/core": ">=0.16.0",
+        "react": ">=18.0.0",
+        "react-native": ">=0.74.0",
+        "react-native-nitro-modules": ">=0.31.3"
+      }
+    },
     "node_modules/@runanywhere/llamacpp": {
       "resolved": "../../../sdk/runanywhere-react-native/packages/llamacpp",
       "link": true
@@ -9748,26 +9774,6 @@
         }
       }
     },
-    "node_modules/react-native-document-picker": {
-      "version": "9.3.1",
-      "resolved": "https://registry.npmjs.org/react-native-document-picker/-/react-native-document-picker-9.3.1.tgz",
-      "integrity": "sha512-Vcofv9wfB0j67zawFjfq9WQPMMzXxOZL9kBmvWDpjVuEcVK73ndRmlXHlkeFl5ZHVsv4Zb6oZYhqm9u5omJOPA==",
-      "deprecated": "the package was renamed, follow migration instructions at https://shorturl.at/QYT4t",
-      "license": "MIT",
-      "dependencies": {
-        "invariant": "^2.2.4"
-      },
-      "peerDependencies": {
-        "react": "*",
-        "react-native": "*",
-        "react-native-windows": "*"
-      },
-      "peerDependenciesMeta": {
-        "react-native-windows": {
-          "optional": true
-        }
-      }
-    },
     "node_modules/react-native-fs": {
       "version": "2.20.0",
       "resolved": "https://registry.npmjs.org/react-native-fs/-/react-native-fs-2.20.0.tgz",
diff --git a/examples/react-native/RunAnywhereAI/package.json b/examples/react-native/RunAnywhereAI/package.json
index 445201c9e..65c14d62c 100644
--- a/examples/react-native/RunAnywhereAI/package.json
+++ b/examples/react-native/RunAnywhereAI/package.json
@@ -14,21 +14,22 @@
     "format:fix": "prettier \"src/**/*.{ts,tsx}\" \"App.tsx\" --write",
     "unused": "knip",
     "pod-install": "cd ios && pod install",
-    "clean": "watchman watch-del-all && rm -rf node_modules && rm -rf ios/Pods && npm install && cd ios && pod install",
+    "clean": "watchman watch-del-all && rm -rf node_modules && rm -rf ios/Pods && npm install --ignore-scripts && patch-package && cd ios && pod install",
     "postinstall": "patch-package"
   },
   "dependencies": {
     "@react-native-async-storage/async-storage": "^2.2.0",
     "@react-native-clipboard/clipboard": "^1.16.3",
+    "@react-native-documents/picker": "^12.0.1",
     "@react-navigation/bottom-tabs": "^7.12.0",
     "@react-navigation/native": "^7.1.28",
     "@react-navigation/native-stack": "^7.12.0",
     "@runanywhere/core": "file:../../../sdk/runanywhere-react-native/packages/core",
+    "@runanywhere/genie": "^0.1.1",
     "@runanywhere/llamacpp": "file:../../../sdk/runanywhere-react-native/packages/llamacpp",
     "@runanywhere/onnx": "file:../../../sdk/runanywhere-react-native/packages/onnx",
     "react": "19.2.0",
     "react-native": "0.83.1",
-    "react-native-document-picker": "^9.3.1",
     "react-native-fs": "^2.20.0",
     "react-native-image-picker": "^8.2.1",
     "react-native-live-audio-stream": "^1.1.1",
diff --git a/examples/react-native/RunAnywhereAI/src/components/model/ModelSelectionSheet.tsx b/examples/react-native/RunAnywhereAI/src/components/model/ModelSelectionSheet.tsx
index d4620fbca..9e9c0baca 100644
--- a/examples/react-native/RunAnywhereAI/src/components/model/ModelSelectionSheet.tsx
+++ b/examples/react-native/RunAnywhereAI/src/components/model/ModelSelectionSheet.tsx
@@ -189,6 +189,7 @@ const getFrameworkInfo = (
     [LLMFramework.MLC]: Colors.primaryBlue,
     [LLMFramework.MediaPipe]: Colors.primaryOrange,
     [LLMFramework.OpenAIWhisper]: Colors.primaryGreen,
+    [LLMFramework.Genie]: Colors.primaryPurple,
   };
 
   const iconMap: Record<LLMFramework, string> = {
@@ -207,6 +208,7 @@ const getFrameworkInfo = (
     [LLMFramework.MLC]: 'git-branch-outline',
     [LLMFramework.MediaPipe]: 'videocam-outline',
     [LLMFramework.OpenAIWhisper]: 'ear-outline',
+    [LLMFramework.Genie]: 'hardware-chip-outline',
   };
 
   return {
@@ -266,15 +268,6 @@ export const ModelSelectionSheet: React.FC<ModelSelectionSheetProps> = ({
       const allModels = await RunAnywhere.getAvailableModels();
       const categoryFilter = getCategoryForContext(context);
 
-      console.warn('[ModelSelectionSheet] All models count:', allModels.length);
-      console.warn('[ModelSelectionSheet] Category filter:', categoryFilter);
-      if (allModels.length > 0) {
-        console.warn(
-          '[ModelSelectionSheet] First model:',
-          JSON.stringify(allModels[0], null, 2)
-        );
-      }
-
       // Filter models based on context (using category field)
       const allowedFrameworks = getAllowedFrameworksForContext(context);
 
@@ -297,11 +290,6 @@ export const ModelSelectionSheet: React.FC<ModelSelectionSheetProps> = ({
           })
         : allModels;
 
-      console.warn(
-        '[ModelSelectionSheet] Filtered models count:',
-        filteredModels.length
-      );
-
       // Fallback: if no models found after filtering for LLM, show models with LlamaCpp framework
       if (
         filteredModels.length === 0 &&
@@ -431,22 +419,11 @@ export const ModelSelectionSheet: React.FC<ModelSelectionSheetProps> = ({
   const getFrameworks = useCallback((): FrameworkDisplayInfo[] => {
     const frameworkCounts = new Map<LLMFramework, number>();
 
-    console.warn(
-      '[ModelSelectionSheet] getFrameworks called, availableModels count:',
-      availableModels.length
-    );
-
     availableModels.forEach((model: SDKModelInfo, index: number) => {
       // Determine framework from model - use preferredFramework or first compatibleFramework
       const frameworkValue =
         model.preferredFramework || model.compatibleFrameworks?.[0];
 
-      if (index < 3) {
-        console.warn(
-          `[ModelSelectionSheet] Model ${index}: preferredFramework=${model.preferredFramework}, compatibleFrameworks=${JSON.stringify(model.compatibleFrameworks)}`
-        );
-      }
-
       // Map string to enum if needed
       let framework: LLMFramework;
       if (
@@ -469,11 +446,6 @@ export const ModelSelectionSheet: React.FC<ModelSelectionSheetProps> = ({
       frameworkCounts.set(LLMFramework.SystemTTS, 1);
     }
 
-    console.warn(
-      '[ModelSelectionSheet] Framework counts:',
-      Array.from(frameworkCounts.entries())
-    );
-
     return Array.from(frameworkCounts.entries())
       .map(([framework, count]) => getFrameworkInfo(framework, count))
       .sort((a, b) => b.modelCount - a.modelCount);
diff --git a/examples/react-native/RunAnywhereAI/src/screens/ChatScreen.tsx b/examples/react-native/RunAnywhereAI/src/screens/ChatScreen.tsx
index e9dfab79a..986adf673 100644
--- a/examples/react-native/RunAnywhereAI/src/screens/ChatScreen.tsx
+++ b/examples/react-native/RunAnywhereAI/src/screens/ChatScreen.tsx
@@ -374,13 +374,14 @@ export const ChatScreen: React.FC = () => {
       const success = await RunAnywhere.loadModel(model.localPath);
 
       if (success) {
-        // Set the model info with actual ID and name for format detection
+        // Set the model info preserving the actual framework from the SDK model
+        const fw = (model.preferredFramework as unknown as LLMFramework) ?? LLMFramework.LlamaCpp;
         const modelInfo = {
           id: model.id,
           name: model.name,
           category: ModelCategory.Language,
-          compatibleFrameworks: [LLMFramework.LlamaCpp],
-          preferredFramework: LLMFramework.LlamaCpp,
+          compatibleFrameworks: model.compatibleFrameworks as unknown as LLMFramework[] ?? [fw],
+          preferredFramework: fw,
           isDownloaded: true,
           isAvailable: true,
           supportsThinking: false,
@@ -506,8 +507,8 @@ export const ChatScreen: React.FC = () => {
         modelInfo: {
           modelId: currentModel?.id || 'unknown',
           modelName: currentModel?.name || 'Unknown Model',
-          framework: 'llama.cpp',
-          frameworkDisplayName: 'llama.cpp',
+          framework: currentModel?.preferredFramework || 'unknown',
+          frameworkDisplayName: currentModel?.preferredFramework || 'unknown',
         },
         analytics: {
           totalGenerationTime: 0,
diff --git a/examples/react-native/RunAnywhereAI/src/screens/RAGScreen.tsx b/examples/react-native/RunAnywhereAI/src/screens/RAGScreen.tsx
index fff25464c..a34351e82 100644
--- a/examples/react-native/RunAnywhereAI/src/screens/RAGScreen.tsx
+++ b/examples/react-native/RunAnywhereAI/src/screens/RAGScreen.tsx
@@ -28,7 +28,7 @@ import {
 } from 'react-native';
 import { NativeModules } from 'react-native';
 import Icon from 'react-native-vector-icons/Ionicons';
-import DocumentPicker from 'react-native-document-picker';
+import { pick as documentPick } from '@react-native-documents/picker';
 import { Colors } from '../theme/colors';
 import { Typography, FontWeight } from '../theme/typography';
 import { Spacing, Padding, BorderRadius } from '../theme/spacing';
@@ -167,12 +167,11 @@ export const RAGScreen: React.FC = () => {
     if (!areModelsReady || !isNitroReady) return;
 
     try {
-      const result = await DocumentPicker.pickSingle({
-        type: [DocumentPicker.types.pdf, DocumentPicker.types.plainText, DocumentPicker.types.json],
-        copyTo: 'cachesDirectory',
+      const [result] = await documentPick({
+        type: ['application/pdf', 'text/plain', 'application/json'],
       });
 
-      const fileUri = result.fileCopyUri || result.uri;
+      const fileUri = result.uri;
       if (!fileUri) return;
 
       setIsLoadingDocument(true);
@@ -205,8 +204,8 @@ export const RAGScreen: React.FC = () => {
 
       setDocumentName(result.name || 'Document');
       setIsDocumentLoaded(true);
-    } catch (err) {
-      if (DocumentPicker.isCancel(err)) {
+    } catch (err: any) {
+      if (err?.code === 'OPERATION_CANCELED') {
         return; // User cancelled
       }
       const msg = err instanceof Error ? err.message : 'Failed to load document';
diff --git a/examples/react-native/RunAnywhereAI/src/types/model.ts b/examples/react-native/RunAnywhereAI/src/types/model.ts
index 1b18dd686..48d631753 100644
--- a/examples/react-native/RunAnywhereAI/src/types/model.ts
+++ b/examples/react-native/RunAnywhereAI/src/types/model.ts
@@ -23,6 +23,7 @@ export enum LLMFramework {
   OpenAIWhisper = 'OpenAIWhisper',
   SystemTTS = 'SystemTTS',
   PiperTTS = 'PiperTTS',
+  Genie = 'Genie',
 }
 
 /**
@@ -236,6 +237,7 @@ export const FrameworkDisplayNames: Record<LLMFramework, string> = {
   [LLMFramework.OpenAIWhisper]: 'OpenAI Whisper',
   [LLMFramework.SystemTTS]: 'System TTS',
   [LLMFramework.PiperTTS]: 'Piper TTS',
+  [LLMFramework.Genie]: 'Genie NPU',
 };
 
 /**
diff --git a/examples/react-native/RunAnywhereAI/yarn.lock b/examples/react-native/RunAnywhereAI/yarn.lock
index 31f1866cb..7c1237ded 100644
--- a/examples/react-native/RunAnywhereAI/yarn.lock
+++ b/examples/react-native/RunAnywhereAI/yarn.lock
@@ -1215,6 +1215,11 @@
     prompts "^2.4.2"
     semver "^7.5.2"
 
+"@react-native-documents/picker@^12.0.1":
+  version "12.0.1"
+  resolved "https://registry.npmjs.org/@react-native-documents/picker/-/picker-12.0.1.tgz"
+  integrity sha512-vpJKb4t/5bnxe9+gQl+plJfKrrIsmYwANGhNH2B9E1dS1+6FDBzg4Dwmcq4ueaGfkRKEPJ606mJttVEH1ZKZaA==
+
 "@react-native/assets-registry@0.83.1":
   version "0.83.1"
   resolved "https://registry.npmjs.org/@react-native/assets-registry/-/assets-registry-0.83.1.tgz"
@@ -1467,10 +1472,15 @@
   dependencies:
     nanoid "^3.3.11"
 
-"@runanywhere/core@file:../../../sdk/runanywhere-react-native/packages/core":
+"@runanywhere/core@>=0.16.0", "@runanywhere/core@file:../../../sdk/runanywhere-react-native/packages/core":
   version "0.18.1"
   resolved "file:../../../sdk/runanywhere-react-native/packages/core"
 
+"@runanywhere/genie@^0.1.1":
+  version "0.1.1"
+  resolved "https://registry.npmjs.org/@runanywhere/genie/-/genie-0.1.1.tgz"
+  integrity sha512-rbIoJW4d52QA4+AwgIO9gclVeKXbERFUxDQ1LFeibCCl4KiPe/Eu5XxZtIe12pUBR8cDSps1ZJKQuZ1BfMrdOg==
+
 "@runanywhere/llamacpp@file:../../../sdk/runanywhere-react-native/packages/llamacpp":
   version "0.18.1"
   resolved "file:../../../sdk/runanywhere-react-native/packages/llamacpp"
@@ -5120,13 +5130,6 @@ react-is@^19.1.0:
   resolved "https://registry.npmjs.org/react-is/-/react-is-19.2.4.tgz"
   integrity sha512-W+EWGn2v0ApPKgKKCy/7s7WHXkboGcsrXE+2joLyVxkbyVQfO3MUEaUQDHoSmb8TFFrSKYa9mw64WZHNHSDzYA==
 
-react-native-document-picker@^9.3.1:
-  version "9.3.1"
-  resolved "https://registry.npmjs.org/react-native-document-picker/-/react-native-document-picker-9.3.1.tgz"
-  integrity sha512-Vcofv9wfB0j67zawFjfq9WQPMMzXxOZL9kBmvWDpjVuEcVK73ndRmlXHlkeFl5ZHVsv4Zb6oZYhqm9u5omJOPA==
-  dependencies:
-    invariant "^2.2.4"
-
 react-native-fs@^2.20.0:
   version "2.20.0"
   resolved "https://registry.npmjs.org/react-native-fs/-/react-native-fs-2.20.0.tgz"
@@ -5153,7 +5156,7 @@ react-native-monorepo-config@^0.3.0:
     escape-string-regexp "^5.0.0"
     fast-glob "^3.3.3"
 
-react-native-nitro-modules@^0.33.7:
+react-native-nitro-modules@^0.33.7, react-native-nitro-modules@>=0.31.3:
   version "0.33.7"
   resolved "https://registry.npmjs.org/react-native-nitro-modules/-/react-native-nitro-modules-0.33.7.tgz"
   integrity sha512-WepMobWe4j1Ae5GQ5RxYGBdBpJBwzP6zaOxJ7r6nhbY5iyl01DL3Gsh4gk8edzNFRuAh1rvXDAHIipq8SahxeQ==
@@ -5189,7 +5192,7 @@ react-native-vision-camera@^4.7.3:
   resolved "https://registry.npmjs.org/react-native-vision-camera/-/react-native-vision-camera-4.7.3.tgz"
   integrity sha512-g1/neOyjSqn1kaAa2FxI/qp5KzNvPcF0bnQw6NntfbxH6tm0+8WFZszlgb5OV+iYlB6lFUztCbDtyz5IpL47OA==
 
-react-native@*, "react-native@^0.0.0-0 || >=0.65 <1.0", "react-native@>= 0.61.5", react-native@>=0.70.0, react-native@0.83.1:
+react-native@*, "react-native@^0.0.0-0 || >=0.65 <1.0", "react-native@>= 0.61.5", react-native@>=0.70.0, react-native@>=0.74.0, react-native@>=0.79.0, react-native@0.83.1:
   version "0.83.1"
   resolved "https://registry.npmjs.org/react-native/-/react-native-0.83.1.tgz"
   integrity sha512-mL1q5HPq5cWseVhWRLl+Fwvi5z1UO+3vGOpjr+sHFwcUletPRZ5Kv+d0tUfqHmvi73/53NjlQqX1Pyn4GguUfA==
diff --git a/gradle.properties b/gradle.properties
index 2c1655bfa..86fa7f243 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -10,4 +10,4 @@ org.gradle.configureondemand=true
 kotlin.code.style=official
 # KMP configuration
 kotlin.mpp.applyDefaultHierarchyTemplate=false
-runanywhere.testLocal=true
+runanywhere.testLocal=false
diff --git a/lefthook.yml b/lefthook.yml
new file mode 100644
index 000000000..3ac5730a0
--- /dev/null
+++ b/lefthook.yml
@@ -0,0 +1,42 @@
+# EXAMPLE USAGE:
+#
+#   Refer for explanation to following link:
+#   https://lefthook.dev/configuration/
+#
+# pre-push:
+#   jobs:
+#     - name: packages audit
+#       tags:
+#         - frontend
+#         - security
+#       run: yarn audit
+#
+#     - name: gems audit
+#       tags:
+#         - backend
+#         - security
+#       run: bundle audit
+#
+# pre-commit:
+#   parallel: true
+#   jobs:
+#     - run: yarn eslint {staged_files}
+#       glob: "*.{js,ts,jsx,tsx}"
+#
+#     - name: rubocop
+#       glob: "*.rb"
+#       exclude:
+#         - config/application.rb
+#         - config/routes.rb
+#       run: bundle exec rubocop --force-exclusion {all_files}
+#
+#     - name: govet
+#       files: git ls-files -m
+#       glob: "*.go"
+#       run: go vet {files}
+#
+#     - script: "hello.js"
+#       runner: node
+#
+#     - script: "hello.go"
+#       runner: go run
diff --git a/sdk/runanywhere-commons/include/rac/infrastructure/model_management/rac_model_types.h b/sdk/runanywhere-commons/include/rac/infrastructure/model_management/rac_model_types.h
index ffb2b4531..60c7261bd 100644
--- a/sdk/runanywhere-commons/include/rac/infrastructure/model_management/rac_model_types.h
+++ b/sdk/runanywhere-commons/include/rac/infrastructure/model_management/rac_model_types.h
@@ -164,8 +164,9 @@ typedef enum rac_model_format {
     RAC_MODEL_FORMAT_ORT = 1,     /**< ONNX Runtime format */
     RAC_MODEL_FORMAT_GGUF = 2,    /**< GGUF format (llama.cpp) */
     RAC_MODEL_FORMAT_BIN = 3,     /**< Binary format */
-    RAC_MODEL_FORMAT_COREML = 4,  /**< Core ML format (.mlmodelc, .mlpackage) */
-    RAC_MODEL_FORMAT_UNKNOWN = 99 /**< Unknown format */
+    RAC_MODEL_FORMAT_COREML = 4,      /**< Core ML format (.mlmodelc, .mlpackage) */
+    RAC_MODEL_FORMAT_QNN_CONTEXT = 5, /**< QNN context binary (Qualcomm Genie) */
+    RAC_MODEL_FORMAT_UNKNOWN = 99     /**< Unknown format */
 } rac_model_format_t;
 
 // =============================================================================
@@ -186,7 +187,8 @@ typedef enum rac_inference_framework {
     RAC_FRAMEWORK_NONE = 6,              /**< No framework needed */
     RAC_FRAMEWORK_MLX = 7,               /**< MLX C++ (Apple Silicon VLM) */
     RAC_FRAMEWORK_COREML = 8,            /**< Core ML (Apple Neural Engine) */
-    RAC_FRAMEWORK_WHISPERKIT_COREML = 9,  /**< WhisperKit CoreML (Apple Neural Engine STT) */
+    RAC_FRAMEWORK_WHISPERKIT_COREML = 9, /**< WhisperKit CoreML (Apple Neural Engine STT) */
+    RAC_FRAMEWORK_GENIE = 10,            /**< Qualcomm Genie (Hexagon NPU LLM) */
     RAC_FRAMEWORK_UNKNOWN = 99           /**< Unknown framework */
 } rac_inference_framework_t;
 
diff --git a/sdk/runanywhere-commons/src/backends/llamacpp/CMakeLists.txt b/sdk/runanywhere-commons/src/backends/llamacpp/CMakeLists.txt
index fcca88adb..6c299d08c 100644
--- a/sdk/runanywhere-commons/src/backends/llamacpp/CMakeLists.txt
+++ b/sdk/runanywhere-commons/src/backends/llamacpp/CMakeLists.txt
@@ -178,6 +178,7 @@ target_include_directories(rac_backend_llamacpp PUBLIC
     ${RAC_COMMONS_ROOT_DIR}/include
     ${RAC_COMMONS_ROOT_DIR}/include/rac/backends
     ${llamacpp_SOURCE_DIR}/include
+    ${llamacpp_SOURCE_DIR}/src                  # Internal headers (llama-adapter.h for LoRA introspection)
     ${llamacpp_SOURCE_DIR}/common
     ${llamacpp_SOURCE_DIR}/ggml/include
     ${llamacpp_SOURCE_DIR}/vendor               # nlohmann/json.hpp
diff --git a/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp b/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp
index 76218a7ae..43bd773c5 100644
--- a/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp
+++ b/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp
@@ -2,9 +2,14 @@
 
 #include "common.h"
 
+// Internal llama.cpp header for LoRA adapter introspection (ab_map tensor count)
+#include "llama-adapter.h"
+
 #include <algorithm>
 #include <chrono>
+#include <cmath>
 #include <cstring>
+#include <fstream>
 #include <string>
 #include <vector>
 
@@ -582,6 +587,26 @@ bool LlamaCppTextGeneration::generate_stream(const TextGenerationRequest& reques
     cancel_requested_.store(false);
     decode_failed_ = false;
 
+    // Verify LoRA adapters are applied before generation
+    if (!lora_adapters_.empty()) {
+        RAC_LOG_INFO("LLM.LlamaCpp","[LORA] %zu adapter(s) loaded for generation:", lora_adapters_.size());
+        bool all_applied = true;
+        for (const auto& entry : lora_adapters_) {
+            RAC_LOG_INFO("LLM.LlamaCpp","[LORA]   %s: applied=%d, adapter_scale=%.2f",
+                 entry.path.c_str(), entry.applied ? 1 : 0, entry.scale);
+            if (!entry.applied) {
+                all_applied = false;
+            }
+        }
+        if (!all_applied) {
+            RAC_LOG_ERROR("LLM.LlamaCpp","[LORA] Some adapters not applied, attempting re-apply");
+            if (!apply_lora_adapters()) {
+                RAC_LOG_ERROR("LLM.LlamaCpp","[LORA] Failed to re-apply adapters before generation");
+                return false;
+            }
+        }
+    }
+
     std::string prompt = build_prompt(request);
     LOGI("Generating with prompt length: %zu", prompt.length());
 
@@ -1189,7 +1214,7 @@ bool LlamaCppTextGeneration::apply_lora_adapters() {
 
     for (auto& entry : lora_adapters_) {
         entry.applied = true;
-        LOGI("Applied LoRA adapter: %s (scale=%.2f)", entry.path.c_str(), entry.scale);
+        LOGI("Applied LoRA adapter: %s (adapter_scale=%.2f)", entry.path.c_str(), entry.scale);
     }
     return true;
 }
@@ -1202,6 +1227,12 @@ bool LlamaCppTextGeneration::load_lora_adapter(const std::string& adapter_path,
         return false;
     }
 
+    // Validate scale
+    if (scale <= 0.0f || !std::isfinite(scale)) {
+        LOGE("Invalid LoRA scale: %.4f (must be positive and finite)", scale);
+        return false;
+    }
+
     // Check if adapter already loaded
     for (const auto& entry : lora_adapters_) {
         if (entry.path == adapter_path) {
@@ -1210,15 +1241,58 @@ bool LlamaCppTextGeneration::load_lora_adapter(const std::string& adapter_path,
         }
     }
 
+    // Validate file exists and is a valid GGUF before passing to llama.cpp
+    {
+        std::ifstream file(adapter_path, std::ios::binary);
+        if (!file.is_open()) {
+            LOGE("LoRA adapter file not found: %s", adapter_path.c_str());
+            return false;
+        }
+        uint32_t magic = 0;
+        file.read(reinterpret_cast<char*>(&magic), sizeof(magic));
+        if (!file || magic != 0x46554747u) {  // "GGUF" in little-endian
+            LOGE("LoRA adapter is not a valid GGUF file: %s (magic=0x%08X)",
+                 adapter_path.c_str(), magic);
+            return false;
+        }
+    }
+
     LOGI("Loading LoRA adapter: %s (scale=%.2f)", adapter_path.c_str(), scale);
 
     // Load adapter against model
     llama_adapter_lora* adapter = llama_adapter_lora_init(model_, adapter_path.c_str());
     if (!adapter) {
-        LOGE("Failed to load LoRA adapter from: %s", adapter_path.c_str());
+        LOGE("Failed to load LoRA adapter: %s "
+             "(possible architecture mismatch with loaded model)", adapter_path.c_str());
         return false;
     }
 
+    // Verify the adapter actually matched tensors in the model
+    size_t matched_tensors = adapter->ab_map.size();
+    if (matched_tensors == 0) {
+        LOGE("LoRA adapter matched 0 tensors in model — "
+             "adapter has no effect (wrong base model?): %s", adapter_path.c_str());
+        return false;
+    }
+    LOGI("LoRA adapter matched %zu tensor pairs", matched_tensors);
+
+    // Log adapter metadata for diagnostics
+    {
+        char alpha_buf[64] = {0};
+        if (llama_adapter_meta_val_str(adapter, "general.lora.alpha", alpha_buf, sizeof(alpha_buf)) > 0) {
+            LOGI("LoRA adapter metadata: alpha=%s", alpha_buf);
+        }
+        int n_meta = llama_adapter_meta_count(adapter);
+        LOGI("LoRA adapter has %d metadata entries", n_meta);
+        for (int i = 0; i < n_meta && i < 20; i++) {
+            char key_buf[128] = {0};
+            char val_buf[128] = {0};
+            llama_adapter_meta_key_by_index(adapter, i, key_buf, sizeof(key_buf));
+            llama_adapter_meta_val_str_by_index(adapter, i, val_buf, sizeof(val_buf));
+            LOGI("  [%d] %s = %s", i, key_buf, val_buf);
+        }
+    }
+
     // Store adapter entry
     LoraAdapterEntry entry;
     entry.adapter = adapter;
@@ -1227,24 +1301,24 @@ bool LlamaCppTextGeneration::load_lora_adapter(const std::string& adapter_path,
     entry.applied = false;
     lora_adapters_.push_back(std::move(entry));
 
-    // Recreate context so the new adapter is visible
+    // Per llama.cpp docs: "All adapters must be loaded before context creation."
+    // Recreate context so it properly accounts for LoRA operations in the compute graph.
     if (!recreate_context()) {
-        // Remove the adapter entry we just added on failure
+        LOGE("Failed to recreate context after LoRA adapter load");
         lora_adapters_.pop_back();
         return false;
     }
 
-    // Apply all loaded adapters to the new context
+    // Apply all loaded adapters to the fresh context
     if (!apply_lora_adapters()) {
         lora_adapters_.pop_back();
         return false;
     }
 
-    // Clear KV cache after adapter changes
-    llama_memory_clear(llama_get_memory(context_), true);
+    // KV cache is already empty from context recreation — no need to clear
 
-    LOGI("LoRA adapter loaded and applied: %s (%zu total adapters)",
-         adapter_path.c_str(), lora_adapters_.size());
+    LOGI("LoRA adapter loaded and applied: %s (%zu total adapters, %zu matched tensors)",
+         adapter_path.c_str(), lora_adapters_.size(), matched_tensors);
     return true;
 }
 
diff --git a/sdk/runanywhere-commons/src/backends/llamacpp/rac_llm_llamacpp.cpp b/sdk/runanywhere-commons/src/backends/llamacpp/rac_llm_llamacpp.cpp
index 85982adf0..1ea352980 100644
--- a/sdk/runanywhere-commons/src/backends/llamacpp/rac_llm_llamacpp.cpp
+++ b/sdk/runanywhere-commons/src/backends/llamacpp/rac_llm_llamacpp.cpp
@@ -346,7 +346,8 @@ rac_result_t rac_llm_llamacpp_load_lora(rac_handle_t handle,
     }
 
     if (!h->text_gen->load_lora_adapter(adapter_path, scale)) {
-        rac_error_set_details("Failed to load LoRA adapter");
+        std::string detail = std::string("Failed to load LoRA adapter: ") + adapter_path;
+        rac_error_set_details(detail.c_str());
         return RAC_ERROR_MODEL_LOAD_FAILED;
     }
 
diff --git a/sdk/runanywhere-commons/src/features/diffusion/diffusion_json.cpp b/sdk/runanywhere-commons/src/features/diffusion/diffusion_json.cpp
index f42403817..5b2a333ae 100644
--- a/sdk/runanywhere-commons/src/features/diffusion/diffusion_json.cpp
+++ b/sdk/runanywhere-commons/src/features/diffusion/diffusion_json.cpp
@@ -216,6 +216,7 @@ static rac_inference_framework_t parse_preferred_framework(
     if (val == "none") return RAC_FRAMEWORK_NONE;
     if (val == "mlx") return RAC_FRAMEWORK_MLX;
     if (val == "coreml" || val == "core_ml") return RAC_FRAMEWORK_COREML;
+    if (val == "genie" || val == "qnn_genie") return RAC_FRAMEWORK_GENIE;
     if (val == "unknown") return RAC_FRAMEWORK_UNKNOWN;
 
     return fallback;
diff --git a/sdk/runanywhere-commons/src/features/llm/llm_component.cpp b/sdk/runanywhere-commons/src/features/llm/llm_component.cpp
index d09f739ea..f0b3739a7 100644
--- a/sdk/runanywhere-commons/src/features/llm/llm_component.cpp
+++ b/sdk/runanywhere-commons/src/features/llm/llm_component.cpp
@@ -10,8 +10,10 @@
  */
 
 #include <chrono>
+#include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <fstream>
 #include <mutex>
 #include <string>
 
@@ -888,14 +890,28 @@ extern "C" rac_result_t rac_llm_component_check_lora_compat(rac_handle_t handle,
         return RAC_ERROR_INVALID_ARGUMENT;
     }
 
-    // Basic pre-check: verify the backend supports LoRA at all
+    // Verify file exists and is a valid GGUF
+    {
+        std::ifstream file(adapter_path, std::ios::binary);
+        if (!file.is_open()) {
+            *out_error = rac_strdup("Adapter file not found");
+            return RAC_ERROR_INVALID_ARGUMENT;
+        }
+        uint32_t magic = 0;
+        file.read(reinterpret_cast<char*>(&magic), sizeof(magic));
+        if (!file || magic != 0x46554747u) {  // "GGUF" in little-endian
+            *out_error = rac_strdup("Adapter file is not a valid GGUF file");
+            return RAC_ERROR_INVALID_ARGUMENT;
+        }
+    }
+
+    // Verify the backend supports LoRA
     auto* llm_service = reinterpret_cast<rac_llm_service_t*>(service);
     if (!llm_service->ops || !llm_service->ops->load_lora) {
         *out_error = rac_strdup("Backend does not support LoRA adapters");
         return RAC_ERROR_NOT_SUPPORTED;
     }
 
-    // Adapter path and backend both valid - considered compatible
     return RAC_SUCCESS;
 }
 
diff --git a/sdk/runanywhere-commons/src/features/llm/rac_llm_service.cpp b/sdk/runanywhere-commons/src/features/llm/rac_llm_service.cpp
index 6705f13a4..e5cd060c7 100644
--- a/sdk/runanywhere-commons/src/features/llm/rac_llm_service.cpp
+++ b/sdk/runanywhere-commons/src/features/llm/rac_llm_service.cpp
@@ -13,6 +13,13 @@
 #include <cstdlib>
 #include <cstring>
 
+#ifdef __ANDROID__
+#include <android/log.h>
+#define ALOGD(...) __android_log_print(ANDROID_LOG_DEBUG, "RAC_LLM_SVC", __VA_ARGS__)
+#else
+#define ALOGD(...) fprintf(stderr, __VA_ARGS__)
+#endif
+
 #include "rac/core/rac_core.h"
 #include "rac/core/rac_logger.h"
 #include "rac/infrastructure/model_management/rac_model_registry.h"
@@ -32,16 +39,20 @@ rac_result_t rac_llm_create(const char* model_id, rac_handle_t* out_handle) {
 
     *out_handle = nullptr;
 
+    ALOGD("rac_llm_create: model_id=%s", model_id);
     RAC_LOG_INFO(LOG_CAT, "Creating LLM service for: %s", model_id);
 
     // Query model registry to get framework
     rac_model_info_t* model_info = nullptr;
     rac_result_t result = rac_get_model(model_id, &model_info);
+    ALOGD("rac_get_model result=%d", result);
 
     // If not found by model_id, try looking up by path (model_id might be a path)
     if (result != RAC_SUCCESS) {
+        ALOGD("Trying path lookup: %s", model_id);
         RAC_LOG_DEBUG(LOG_CAT, "Model not found by ID, trying path lookup: %s", model_id);
         result = rac_get_model_by_path(model_id, &model_info);
+        ALOGD("rac_get_model_by_path result=%d", result);
     }
 
     rac_inference_framework_t framework = RAC_FRAMEWORK_LLAMACPP;
@@ -57,10 +68,15 @@ rac_result_t rac_llm_create(const char* model_id, rac_handle_t* out_handle) {
         } else {
             model_path = reg_path;
         }
+        ALOGD("Found in registry: id=%s, framework=%d, local_path=%s",
+              model_info->id ? model_info->id : "NULL",
+              static_cast<int>(framework), model_path ? model_path : "NULL");
         RAC_LOG_INFO(LOG_CAT, "Found model in registry: id=%s, framework=%d, local_path=%s",
                      model_info->id ? model_info->id : "NULL",
                      static_cast<int>(framework), model_path ? model_path : "NULL");
     } else {
+        ALOGD("NOT found in registry (result=%d), default framework=%d",
+              result, static_cast<int>(framework));
         RAC_LOG_WARNING(LOG_CAT,
                         "Model NOT found in registry (result=%d), using default framework=%d",
                         result, static_cast<int>(framework));
@@ -73,22 +89,28 @@ rac_result_t rac_llm_create(const char* model_id, rac_handle_t* out_handle) {
     request.framework = framework;
     request.model_path = model_path;
 
+    ALOGD("Service request: framework=%d, model_path=%s",
+          static_cast<int>(request.framework),
+          request.model_path ? request.model_path : "NULL");
     RAC_LOG_INFO(LOG_CAT, "Service request: framework=%d, model_path=%s",
                  static_cast<int>(request.framework),
                  request.model_path ? request.model_path : "NULL");
 
     // Service registry returns an rac_llm_service_t* with vtable already set
     result = rac_service_create(RAC_CAPABILITY_TEXT_GENERATION, &request, out_handle);
+    ALOGD("rac_service_create result=%d", result);
 
     if (model_info) {
         rac_model_info_free(model_info);
     }
 
     if (result != RAC_SUCCESS) {
+        ALOGD("Failed to create service: %d", result);
         RAC_LOG_ERROR(LOG_CAT, "Failed to create service via registry: %d", result);
         return result;
     }
 
+    ALOGD("LLM service created successfully");
     RAC_LOG_INFO(LOG_CAT, "LLM service created");
     return RAC_SUCCESS;
 }
diff --git a/sdk/runanywhere-commons/src/infrastructure/model_management/model_assignment.cpp b/sdk/runanywhere-commons/src/infrastructure/model_management/model_assignment.cpp
index ad575705a..dd468be26 100644
--- a/sdk/runanywhere-commons/src/infrastructure/model_management/model_assignment.cpp
+++ b/sdk/runanywhere-commons/src/infrastructure/model_management/model_assignment.cpp
@@ -238,6 +238,8 @@ static std::vector<rac_model_info_t*> parse_models_json(const char* json_str, si
             model->framework = RAC_FRAMEWORK_MLX;
         else if (framework == "fluid_audio" || framework == "FluidAudio")
             model->framework = RAC_FRAMEWORK_FLUID_AUDIO;
+        else if (framework == "genie" || framework == "qnn_genie" || framework == "Genie")
+            model->framework = RAC_FRAMEWORK_GENIE;
         else
             model->framework = RAC_FRAMEWORK_UNKNOWN;
 
diff --git a/sdk/runanywhere-commons/src/infrastructure/model_management/model_paths.cpp b/sdk/runanywhere-commons/src/infrastructure/model_management/model_paths.cpp
index 4c7a081a7..ee1779a96 100644
--- a/sdk/runanywhere-commons/src/infrastructure/model_management/model_paths.cpp
+++ b/sdk/runanywhere-commons/src/infrastructure/model_management/model_paths.cpp
@@ -112,6 +112,8 @@ const char* rac_framework_raw_value(rac_inference_framework_t framework) {
             return "FluidAudio";
         case RAC_FRAMEWORK_WHISPERKIT_COREML:
             return "WhisperKitCoreML";
+        case RAC_FRAMEWORK_GENIE:
+            return "Genie";
         case RAC_FRAMEWORK_BUILTIN:
             return "BuiltIn";
         case RAC_FRAMEWORK_NONE:
@@ -341,9 +343,10 @@ rac_result_t rac_model_paths_extract_model_id(const char* path, char* out_model_
 
     // Check if next component is a framework name
     bool isFramework = false;
-    const char* frameworks[] = {"ONNX",      "LlamaCpp",   "FoundationModels",
-                                "SystemTTS", "FluidAudio", "BuiltIn",
-                                "None",      "Unknown"};
+    const char* frameworks[] = {"ONNX",      "LlamaCpp",        "FoundationModels",
+                                "SystemTTS", "FluidAudio",      "BuiltIn",
+                                "None",      "Unknown",         "CoreML",
+                                "WhisperKitCoreML", "MLX",      "Genie"};
     for (const char* fw : frameworks) {
         if (nextComponent == fw) {
             isFramework = true;
@@ -417,6 +420,9 @@ rac_result_t rac_model_paths_extract_framework(const char* path,
     } else if (nextComponent == "None") {
         *out_framework = RAC_FRAMEWORK_NONE;
         return RAC_SUCCESS;
+    } else if (nextComponent == "Genie") {
+        *out_framework = RAC_FRAMEWORK_GENIE;
+        return RAC_SUCCESS;
     }
 
     return RAC_ERROR_NOT_FOUND;
diff --git a/sdk/runanywhere-commons/src/infrastructure/model_management/model_registry.cpp b/sdk/runanywhere-commons/src/infrastructure/model_management/model_registry.cpp
index d866f4f4a..983f21de9 100644
--- a/sdk/runanywhere-commons/src/infrastructure/model_management/model_registry.cpp
+++ b/sdk/runanywhere-commons/src/infrastructure/model_management/model_registry.cpp
@@ -612,11 +612,11 @@ rac_result_t rac_model_registry_discover_downloaded(rac_model_registry_handle_t
     // Frameworks to scan - include all frameworks that can have downloaded models
     // Note: RAC_FRAMEWORK_UNKNOWN is included to recover models that were incorrectly
     // stored in the "Unknown" directory due to missing framework mappings
-    rac_inference_framework_t frameworks[] = {RAC_FRAMEWORK_LLAMACPP,   RAC_FRAMEWORK_ONNX,
-                                              RAC_FRAMEWORK_COREML,     RAC_FRAMEWORK_MLX,
+    rac_inference_framework_t frameworks[] = {RAC_FRAMEWORK_LLAMACPP,    RAC_FRAMEWORK_ONNX,
+                                              RAC_FRAMEWORK_COREML,      RAC_FRAMEWORK_MLX,
                                               RAC_FRAMEWORK_FLUID_AUDIO, RAC_FRAMEWORK_FOUNDATION_MODELS,
-                                              RAC_FRAMEWORK_SYSTEM_TTS, RAC_FRAMEWORK_WHISPERKIT_COREML,
-                                              RAC_FRAMEWORK_UNKNOWN};
+                                              RAC_FRAMEWORK_SYSTEM_TTS,  RAC_FRAMEWORK_WHISPERKIT_COREML,
+                                              RAC_FRAMEWORK_GENIE,       RAC_FRAMEWORK_UNKNOWN};
     size_t framework_count = sizeof(frameworks) / sizeof(frameworks[0]);
 
     // Collect discovered models
diff --git a/sdk/runanywhere-commons/src/infrastructure/model_management/model_types.cpp b/sdk/runanywhere-commons/src/infrastructure/model_management/model_types.cpp
index 61472d5ec..381004c40 100644
--- a/sdk/runanywhere-commons/src/infrastructure/model_management/model_types.cpp
+++ b/sdk/runanywhere-commons/src/infrastructure/model_management/model_types.cpp
@@ -98,6 +98,7 @@ rac_model_category_t rac_model_category_from_framework(rac_inference_framework_t
     // Mirrors Swift's ModelCategory.from(framework:)
     switch (framework) {
         case RAC_FRAMEWORK_LLAMACPP:
+        case RAC_FRAMEWORK_GENIE:
         case RAC_FRAMEWORK_FOUNDATION_MODELS:
             return RAC_MODEL_CATEGORY_LANGUAGE;
         case RAC_FRAMEWORK_ONNX:
@@ -149,6 +150,14 @@ rac_result_t rac_framework_get_supported_formats(rac_inference_framework_t frame
             (*out_formats)[0] = RAC_MODEL_FORMAT_BIN;
             return RAC_SUCCESS;
         }
+        case RAC_FRAMEWORK_GENIE: {
+            *out_count = 1;
+            *out_formats = (rac_model_format_t*)malloc(sizeof(rac_model_format_t));
+            if (!*out_formats)
+                return RAC_ERROR_OUT_OF_MEMORY;
+            (*out_formats)[0] = RAC_MODEL_FORMAT_QNN_CONTEXT;
+            return RAC_SUCCESS;
+        }
         default:
             *out_count = 0;
             *out_formats = nullptr;
@@ -165,6 +174,8 @@ rac_bool_t rac_framework_supports_format(rac_inference_framework_t framework,
                                                                                        : RAC_FALSE;
         case RAC_FRAMEWORK_LLAMACPP:
             return (format == RAC_MODEL_FORMAT_GGUF) ? RAC_TRUE : RAC_FALSE;
+        case RAC_FRAMEWORK_GENIE:
+            return (format == RAC_MODEL_FORMAT_QNN_CONTEXT) ? RAC_TRUE : RAC_FALSE;
         case RAC_FRAMEWORK_COREML:
             return (format == RAC_MODEL_FORMAT_COREML) ? RAC_TRUE : RAC_FALSE;
         case RAC_FRAMEWORK_FLUID_AUDIO:
@@ -178,8 +189,9 @@ rac_bool_t rac_framework_uses_directory_based_models(rac_inference_framework_t f
     // Mirrors Swift's InferenceFramework.usesDirectoryBasedModels
     switch (framework) {
         case RAC_FRAMEWORK_ONNX:
-        case RAC_FRAMEWORK_COREML:      // CoreML compiled models (.mlmodelc) are directories
-        case RAC_FRAMEWORK_WHISPERKIT_COREML:   // WhisperKit models are directories of .mlmodelc files
+        case RAC_FRAMEWORK_COREML:
+        case RAC_FRAMEWORK_WHISPERKIT_COREML:
+        case RAC_FRAMEWORK_GENIE:
             return RAC_TRUE;
         default:
             return RAC_FALSE;
@@ -190,6 +202,7 @@ rac_bool_t rac_framework_supports_llm(rac_inference_framework_t framework) {
     // Mirrors Swift's InferenceFramework.supportsLLM
     switch (framework) {
         case RAC_FRAMEWORK_LLAMACPP:
+        case RAC_FRAMEWORK_GENIE:
         case RAC_FRAMEWORK_ONNX:
         case RAC_FRAMEWORK_FOUNDATION_MODELS:
             return RAC_TRUE;
@@ -237,6 +250,8 @@ const char* rac_framework_display_name(rac_inference_framework_t framework) {
             return "FluidAudio";
         case RAC_FRAMEWORK_WHISPERKIT_COREML:
             return "WhisperKit CoreML";
+        case RAC_FRAMEWORK_GENIE:
+            return "Qualcomm Genie";
         case RAC_FRAMEWORK_BUILTIN:
             return "Built-in";
         case RAC_FRAMEWORK_NONE:
@@ -265,6 +280,8 @@ const char* rac_framework_analytics_key(rac_inference_framework_t framework) {
             return "fluid_audio";
         case RAC_FRAMEWORK_WHISPERKIT_COREML:
             return "whisperkit_coreml";
+        case RAC_FRAMEWORK_GENIE:
+            return "genie";
         case RAC_FRAMEWORK_BUILTIN:
             return "built_in";
         case RAC_FRAMEWORK_NONE:
@@ -403,7 +420,9 @@ const char* rac_model_format_extension(rac_model_format_t format) {
         case RAC_MODEL_FORMAT_BIN:
             return "bin";
         case RAC_MODEL_FORMAT_COREML:
-            return "mlmodelc";  // CoreML compiled model directory
+            return "mlmodelc";
+        case RAC_MODEL_FORMAT_QNN_CONTEXT:
+            return "bin";
         default:
             return nullptr;
     }
diff --git a/sdk/runanywhere-commons/src/infrastructure/registry/service_registry.cpp b/sdk/runanywhere-commons/src/infrastructure/registry/service_registry.cpp
index 0613eddae..5bba1ad93 100644
--- a/sdk/runanywhere-commons/src/infrastructure/registry/service_registry.cpp
+++ b/sdk/runanywhere-commons/src/infrastructure/registry/service_registry.cpp
@@ -19,6 +19,13 @@
 #include <unordered_map>
 #include <vector>
 
+#ifdef __ANDROID__
+#include <android/log.h>
+#define ALOGD(...) __android_log_print(ANDROID_LOG_DEBUG, "RAC_SVC_REG", __VA_ARGS__)
+#else
+#define ALOGD(...) fprintf(stderr, __VA_ARGS__)
+#endif
+
 #include "rac/core/rac_core.h"
 #include "rac/core/rac_error.h"
 #include "rac/core/rac_logger.h"
@@ -177,30 +184,37 @@ rac_result_t rac_service_create(rac_capability_t capability, const rac_service_r
     // Find first provider that can handle the request (already sorted by priority)
     // This matches Swift's pattern: registrations.sorted(by:).first(where: canHandle)
     for (const auto& provider : it->second) {
+        ALOGD("Checking provider '%s' (priority=%d)", provider.name.c_str(), provider.priority);
         RAC_LOG_INFO(LOG_CAT, "rac_service_create: Checking provider '%s' (priority=%d)",
                      provider.name.c_str(), provider.priority);
 
         bool can_handle = provider.can_handle(request, provider.user_data);
+        ALOGD("Provider '%s' can_handle=%s", provider.name.c_str(), can_handle ? "TRUE" : "FALSE");
         RAC_LOG_INFO(LOG_CAT, "rac_service_create: Provider '%s' can_handle=%s",
                      provider.name.c_str(), can_handle ? "TRUE" : "FALSE");
 
         if (can_handle) {
+            ALOGD("Calling create for provider '%s'", provider.name.c_str());
             RAC_LOG_INFO(LOG_CAT, "rac_service_create: Calling create for provider '%s'",
                          provider.name.c_str());
             rac_handle_t handle = provider.create(request, provider.user_data);
+            ALOGD("Provider '%s' create returned handle=%p", provider.name.c_str(), handle);
             if (handle != nullptr) {
                 *out_handle = handle;
+                ALOGD("Service created by provider '%s'", provider.name.c_str());
                 RAC_LOG_INFO(LOG_CAT,
                              "rac_service_create: Service created by provider '%s', handle=%p",
                              provider.name.c_str(), handle);
                 return RAC_SUCCESS;
             } else {
+                ALOGD("Provider '%s' create returned nullptr!", provider.name.c_str());
                 RAC_LOG_ERROR(LOG_CAT, "rac_service_create: Provider '%s' create returned nullptr",
                               provider.name.c_str());
             }
         }
     }
 
+    ALOGD("No provider could handle the request");
     RAC_LOG_ERROR(LOG_CAT, "rac_service_create: No provider could handle the request");
     rac_error_set_details("No provider could handle the request");
     return RAC_ERROR_NO_CAPABLE_PROVIDER;
diff --git a/sdk/runanywhere-commons/src/infrastructure/telemetry/telemetry_manager.cpp b/sdk/runanywhere-commons/src/infrastructure/telemetry/telemetry_manager.cpp
index 2a98737ff..a32a79c73 100644
--- a/sdk/runanywhere-commons/src/infrastructure/telemetry/telemetry_manager.cpp
+++ b/sdk/runanywhere-commons/src/infrastructure/telemetry/telemetry_manager.cpp
@@ -318,6 +318,8 @@ const char* framework_to_string(rac_inference_framework_t framework) {
             return "mlx";
         case RAC_FRAMEWORK_WHISPERKIT_COREML:
             return "whisperkit_coreml";
+        case RAC_FRAMEWORK_GENIE:
+            return "genie";
         case RAC_FRAMEWORK_UNKNOWN:
         default:
             return "unknown";
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/CHANGELOG.md b/sdk/runanywhere-flutter/packages/runanywhere/CHANGELOG.md
index f361bee06..a2f6d5be2 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/CHANGELOG.md
+++ b/sdk/runanywhere-flutter/packages/runanywhere/CHANGELOG.md
@@ -5,6 +5,15 @@ All notable changes to the RunAnywhere Flutter SDK will be documented in this fi
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.17.0] - 2026-03-09
+
+### Added
+- **Genie NPU Backend**: Added `InferenceFramework.genie` enum value for Qualcomm Genie NPU support
+- **RAG Types**: Extended RAG type definitions for enhanced retrieval-augmented generation
+
+### Changed
+- Updated model type definitions to support Genie NPU framework registration
+
 ## [0.16.0] - 2026-02-14
 
 ### Added
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/android/src/main/jniLibs/.gitkeep b/sdk/runanywhere-flutter/packages/runanywhere/android/src/main/jniLibs/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/android/src/main/kotlin/ai/runanywhere/sdk/RunAnywherePlugin.kt b/sdk/runanywhere-flutter/packages/runanywhere/android/src/main/kotlin/ai/runanywhere/sdk/RunAnywherePlugin.kt
index 33e848d4e..f9b1f4641 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/android/src/main/kotlin/ai/runanywhere/sdk/RunAnywherePlugin.kt
+++ b/sdk/runanywhere-flutter/packages/runanywhere/android/src/main/kotlin/ai/runanywhere/sdk/RunAnywherePlugin.kt
@@ -48,6 +48,9 @@ class RunAnywherePlugin : FlutterPlugin, MethodCallHandler {
             "getCommonsVersion" -> {
                 result.success(COMMONS_VERSION)
             }
+            "getSocModel" -> {
+                result.success(getSocModel())
+            }
             else -> {
                 result.notImplemented()
             }
@@ -57,4 +60,18 @@ class RunAnywherePlugin : FlutterPlugin, MethodCallHandler {
     override fun onDetachedFromEngine(binding: FlutterPlugin.FlutterPluginBinding) {
         channel.setMethodCallHandler(null)
     }
+
+    /**
+     * Get the SoC model string for NPU chip detection.
+     * Uses Build.SOC_MODEL (API 31+) with Build.HARDWARE fallback.
+     */
+    private fun getSocModel(): String {
+        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+            val socModel = Build.SOC_MODEL
+            if (!socModel.isNullOrEmpty() && socModel != "unknown") {
+                return socModel
+            }
+        }
+        return Build.HARDWARE ?: ""
+    }
 }
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/ios/Frameworks/.gitkeep b/sdk/runanywhere-flutter/packages/runanywhere/ios/Frameworks/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/core/types/model_types.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/core/types/model_types.dart
index 6f4e2b956..b03c03fe3 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/core/types/model_types.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/core/types/model_types.dart
@@ -111,6 +111,7 @@ enum InferenceFramework {
       'FoundationModels', 'Foundation Models', 'foundation_models'),
   systemTTS('SystemTTS', 'System TTS', 'system_tts'),
   fluidAudio('FluidAudio', 'FluidAudio', 'fluid_audio'),
+  genie('Genie', 'Qualcomm Genie', 'genie'),
 
   // Special cases
   builtIn('BuiltIn', 'Built-in', 'built_in'),
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/core/types/npu_chip.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/core/types/npu_chip.dart
new file mode 100644
index 000000000..fc6e1c6f4
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/core/types/npu_chip.dart
@@ -0,0 +1,45 @@
+/// Supported NPU chipsets for on-device Genie model inference.
+///
+/// Each chip has an [identifier] used in model IDs and an [npuSuffix] used
+/// to construct download URLs from the HuggingFace model repository.
+///
+/// Example:
+/// ```dart
+/// final chip = RunAnywhere.getChip();
+/// if (chip != null) {
+///   final url = chip.downloadUrl('qwen3-4b');
+///   // → https://huggingface.co/runanywhere/genie-npu-models/resolve/main/qwen3-4b-genie-w4a16-8elite-gen5.tar.gz
+/// }
+/// ```
+enum NPUChip {
+  snapdragon8Elite('8elite', 'Snapdragon 8 Elite', 'SM8750', '8elite'),
+  snapdragon8EliteGen5('8elite-gen5', 'Snapdragon 8 Elite Gen 5', 'SM8850', '8elite-gen5');
+
+  final String identifier;
+  final String displayName;
+  final String socModel;
+  final String npuSuffix;
+
+  const NPUChip(this.identifier, this.displayName, this.socModel, this.npuSuffix);
+
+  /// Base URL for NPU model downloads on HuggingFace.
+  static const baseUrl =
+      'https://huggingface.co/runanywhere/genie-npu-models/resolve/main/';
+
+  /// Build a HuggingFace download URL for this chip.
+  /// [modelSlug] is the model slug (e.g. "qwen3-4b") → produces
+  ///   "qwen3-4b-genie-w4a16-8elite-gen5.tar.gz"
+  /// [quant] is the quantization format (e.g. "w4a16", "w8a16"). Defaults to "w4a16".
+  String downloadUrl(String modelSlug, {String quant = 'w4a16'}) =>
+      '$baseUrl$modelSlug-genie-$quant-$npuSuffix.tar.gz';
+
+  /// Match an NPU chip from a SoC model string (e.g. "SM8750").
+  /// Returns null if the SoC is not a supported NPU chipset.
+  static NPUChip? fromSocModel(String socModel) {
+    final upper = socModel.toUpperCase();
+    for (final chip in NPUChip.values) {
+      if (upper.contains(chip.socModel)) return chip;
+    }
+    return null;
+  }
+}
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/infrastructure/download/download_service.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/infrastructure/download/download_service.dart
index a28143b4d..05b91421d 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/infrastructure/download/download_service.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/infrastructure/download/download_service.dart
@@ -1,7 +1,6 @@
 import 'dart:async';
 import 'dart:io';
 
-import 'package:archive/archive.dart';
 import 'package:http/http.dart' as http;
 import 'package:path/path.dart' as p;
 import 'package:runanywhere/core/types/model_types.dart';
@@ -260,12 +259,14 @@ class ModelDownloadService {
         if (requiresExtraction) {
           yield ModelDownloadProgress.extracting(modelId);
 
+          // Snapshot items before extraction to detect new entries
+          final itemsBefore = await destDir.list().map((e) => e.path).toSet();
+
           final extractedPath = await _extractArchive(
             downloadPath,
             destDir.path,
             model.artifactType,
           );
-          finalModelPath = extractedPath;
 
           // Clean up archive file after extraction
           try {
@@ -273,6 +274,14 @@ class ModelDownloadService {
           } catch (e) {
             _logger.warning('Failed to delete archive: $e');
           }
+
+          // Resolve the extracted model path using the snapshot
+          finalModelPath = await _resolveExtractedModelPath(
+            destDir.path,
+            modelId,
+            itemsBefore,
+            extractedPath,
+          );
         }
 
         // Update model's local path
@@ -323,7 +332,10 @@ class ModelDownloadService {
     return Directory(modelPath);
   }
 
-  /// Extract an archive to the destination
+  /// Extract an archive using the system `tar`/`unzip` command.
+  /// This runs in a separate process (non-blocking) and is orders of magnitude
+  /// faster than the pure-Dart `archive` package for large model files (1GB+).
+  /// Android has `tar` via toybox since API 23 (min SDK 24).
   Future<String> _extractArchive(
     String archivePath,
     String destDir,
@@ -331,60 +343,108 @@ class ModelDownloadService {
   ) async {
     _logger.info('Extracting archive: $archivePath');
 
-    final archiveFile = File(archivePath);
-    final bytes = await archiveFile.readAsBytes();
-
-    Archive? archive;
+    final List<String> args;
 
-    // Determine archive type
     if (archivePath.endsWith('.tar.gz') || archivePath.endsWith('.tgz')) {
-      final gzDecoded = GZipDecoder().decodeBytes(bytes);
-      archive = TarDecoder().decodeBytes(gzDecoded);
+      args = ['-xzf', archivePath, '-C', destDir];
     } else if (archivePath.endsWith('.tar.bz2') ||
         archivePath.endsWith('.tbz2')) {
-      final bz2Decoded = BZip2Decoder().decodeBytes(bytes);
-      archive = TarDecoder().decodeBytes(bz2Decoded);
-    } else if (archivePath.endsWith('.zip')) {
-      archive = ZipDecoder().decodeBytes(bytes);
+      args = ['-xjf', archivePath, '-C', destDir];
     } else if (archivePath.endsWith('.tar')) {
-      archive = TarDecoder().decodeBytes(bytes);
+      args = ['-xf', archivePath, '-C', destDir];
+    } else if (archivePath.endsWith('.zip')) {
+      // Use unzip for .zip files
+      final result = await Process.run('unzip', ['-o', archivePath, '-d', destDir]);
+      if (result.exitCode != 0) {
+        throw Exception('unzip failed (exit ${result.exitCode}): ${result.stderr}');
+      }
+      _logger.info('Extraction complete: $destDir');
+      return destDir;
     } else {
       _logger.warning('Unknown archive format: $archivePath');
       return archivePath;
     }
 
-    // Extract files
-    String? rootDir;
-    for (final file in archive) {
-      final filePath = p.join(destDir, file.name);
-
-      if (file.isFile) {
-        final outFile = File(filePath);
-        await outFile.create(recursive: true);
-        await outFile.writeAsBytes(file.content as List<int>);
-        _logger.debug('Extracted: ${file.name}');
-
-        // Track root directory
-        final parts = file.name.split('/');
-        if (parts.isNotEmpty && rootDir == null) {
-          rootDir = parts.first;
-        }
-      } else {
-        await Directory(filePath).create(recursive: true);
-      }
+    // Run tar extraction — runs as a separate OS process, does not block the Dart event loop
+    final result = await Process.run('tar', args);
+    if (result.exitCode != 0) {
+      throw Exception('tar failed (exit ${result.exitCode}): ${result.stderr}');
     }
 
     _logger.info('Extraction complete: $destDir');
+    return destDir;
+  }
 
-    // Return the model directory (could be a nested directory)
-    if (rootDir != null) {
-      final nestedPath = p.join(destDir, rootDir);
-      if (await Directory(nestedPath).exists()) {
-        return nestedPath;
+  /// Resolve the final model directory after archive extraction.
+  ///
+  /// The download service already creates a per-model directory (destDir) named
+  /// after the modelId.  Archives may contain a single root folder whose name
+  /// differs from modelId (e.g. Genie NPU tar.gz).  We flatten that away so
+  /// model files always live directly inside destDir.
+  ///
+  /// Cases handled:
+  /// 1. Model files extracted directly into destDir → nothing to do.
+  /// 2. Single new subdirectory created by extraction → move its contents up
+  ///    into destDir and delete the now-empty subdirectory.
+  /// 3. Multiple new items → already flat, nothing to do.
+  Future<String> _resolveExtractedModelPath(
+    String destDir,
+    String modelId,
+    Set<String> itemsBefore,
+    String fallbackPath,
+  ) async {
+    final destDirectory = Directory(destDir);
+
+    // Find new items created by extraction
+    final currentItems = await destDirectory.list().toList();
+    final newItems = currentItems
+        .where((e) => !itemsBefore.contains(e.path))
+        .toList();
+    final newDirs = newItems.whereType<Directory>().toList();
+    final newFiles = newItems.whereType<File>().toList();
+
+    // Case: single new directory (e.g. Genie NPU archive root like
+    // "llama_v3_2_1b_instruct-genie-w4-qualcomm_snapdragon_8_elite/").
+    // Move its contents up into destDir so files are discoverable directly.
+    if (newDirs.length == 1 && newFiles.isEmpty) {
+      final extractedDir = newDirs.first;
+      _logger.info(
+        'Flattening extracted dir '
+        "'${p.basename(extractedDir.path)}' into destDir",
+      );
+      try {
+        final innerItems = await extractedDir.list().toList();
+        for (final item in innerItems) {
+          final target = p.join(destDir, p.basename(item.path));
+          try {
+            await (item as FileSystemEntity).rename(target);
+          } catch (e) {
+            if (item is File) {
+              await item.copy(target);
+              await item.delete();
+            } else {
+              _logger.warning('Failed to move ${item.path}: $e');
+            }
+          }
+        }
+        await extractedDir.delete(recursive: true);
+        _logger.info(
+          'Flattened ${innerItems.length} items from '
+          "'${p.basename(extractedDir.path)}' into: $destDir",
+        );
+      } catch (e) {
+        _logger.warning('Error flattening extracted dir: $e');
       }
+      return destDir;
     }
 
-    return destDir;
+    // Files already at destDir root (flat archive or direct match) — use as-is
+    if (newItems.isNotEmpty) {
+      _logger.info('Extracted ${newItems.length} items directly into: $destDir');
+      return destDir;
+    }
+
+    return fallbackPath;
   }
 
   /// Update model's local path after download
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_model_paths.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_model_paths.dart
index 5bba19572..6bafd9eee 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_model_paths.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_model_paths.dart
@@ -322,6 +322,8 @@ int _frameworkToCValue(InferenceFramework framework) {
       return 5; // RAC_FRAMEWORK_BUILTIN
     case InferenceFramework.none:
       return 6; // RAC_FRAMEWORK_NONE
+    case InferenceFramework.genie:
+      return 10; // RAC_FRAMEWORK_GENIE
     case InferenceFramework.unknown:
       return 99;
   }
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_model_registry.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_model_registry.dart
index 14e454cdf..8aaf2cb9a 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_model_registry.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_model_registry.dart
@@ -272,6 +272,8 @@ class DartBridgeModelRegistry {
         return 5; // RAC_FRAMEWORK_BUILTIN
       case public_types.InferenceFramework.none:
         return 6; // RAC_FRAMEWORK_NONE
+      case public_types.InferenceFramework.genie:
+        return 10; // RAC_FRAMEWORK_GENIE
       case public_types.InferenceFramework.unknown:
         return 99; // RAC_FRAMEWORK_UNKNOWN
     }
@@ -973,6 +975,10 @@ base class RacModelInfoCStruct extends Struct {
   @Int32()
   external int supportsThinking;
 
+  // rac_bool_t supports_lora (int32_t)
+  @Int32()
+  external int supportsLora;
+
   // char** tags
   external Pointer<Pointer<Utf8>> tags;
 
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_rag.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_rag.dart
index a90f20a42..d8854d394 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_rag.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_rag.dart
@@ -2,9 +2,6 @@
 ///
 /// RAG pipeline bridge - manages C++ RAG pipeline lifecycle.
 /// Mirrors Swift's CppBridge+RAG.swift pattern.
-///
-/// The RAG pipeline is a feature (like Voice Agent) that orchestrates
-/// LLM and Embeddings services for Retrieval-Augmented Generation.
 library dart_bridge_rag;
 
 import 'dart:ffi';
@@ -16,86 +13,24 @@ import 'package:runanywhere/native/ffi_types.dart';
 import 'package:runanywhere/native/platform_loader.dart';
 
 // =============================================================================
-// RAG Types (mirrors Swift RAGTypes.swift / Kotlin RAGTypes.kt)
+// Bridge-Level Result Types
+//
+// These are the low-level types returned from C++ via FFI.
+// Public-facing types with richer semantics live in rag_types.dart.
+// They are intentionally named differently to avoid import conflicts.
 // =============================================================================
 
-/// Configuration for creating a RAG pipeline.
-class RAGConfiguration {
-  /// Path to the ONNX embedding model
-  final String embeddingModelPath;
-
-  /// Path to the GGUF LLM model
-  final String llmModelPath;
-
-  /// Embedding vector dimension (default: 384 for all-MiniLM-L6-v2)
-  final int embeddingDimension;
-
-  /// Number of top chunks to retrieve per query
-  final int topK;
-
-  /// Minimum cosine similarity threshold 0.0-1.0
-  final double similarityThreshold;
-
-  /// Maximum tokens for context sent to the LLM
-  final int maxContextTokens;
-
-  /// Tokens per chunk when splitting documents
-  final int chunkSize;
-
-  /// Overlap tokens between consecutive chunks
-  final int chunkOverlap;
-
-  /// Prompt template with {context} and {query} placeholders
-  final String? promptTemplate;
-
-  /// Optional configuration JSON for the embedding model
-  final String? embeddingConfigJson;
-
-  /// Optional configuration JSON for the LLM model
-  final String? llmConfigJson;
-
-  const RAGConfiguration({
-    required this.embeddingModelPath,
-    required this.llmModelPath,
-    this.embeddingDimension = 384,
-    this.topK = 10,
-    this.similarityThreshold = 0.15,
-    this.maxContextTokens = 2048,
-    this.chunkSize = 512,
-    this.chunkOverlap = 50,
-    this.promptTemplate,
-    this.embeddingConfigJson,
-    this.llmConfigJson,
-  });
-}
-
-/// Options for querying the RAG pipeline.
-class RAGQueryOptions {
-  final String question;
-  final String? systemPrompt;
-  final int maxTokens;
-  final double temperature;
-  final double topP;
-  final int topK;
-
-  const RAGQueryOptions({
-    required this.question,
-    this.systemPrompt,
-    this.maxTokens = 512,
-    this.temperature = 0.7,
-    this.topP = 0.9,
-    this.topK = 40,
-  });
-}
-
-/// A single retrieved document chunk.
-class RAGSearchResult {
+/// A single retrieved document chunk from C++ (bridge-level).
+/// Field names match C struct field names exactly.
+class _RAGBridgeSearchResult {
   final String chunkId;
   final String text;
   final double similarityScore;
+
+  /// Null if the C string was null or empty.
   final String? metadataJson;
 
-  const RAGSearchResult({
+  const _RAGBridgeSearchResult({
     required this.chunkId,
     required this.text,
     required this.similarityScore,
@@ -103,117 +38,29 @@ class RAGSearchResult {
   });
 }
 
-/// Result of a RAG query.
-class RAGResult {
+/// RAG query result from C++ (bridge-level).
+/// [contextUsed] is an empty string if no context was sent to the LLM.
+class _RAGBridgeResult {
   final String answer;
-  final List<RAGSearchResult> retrievedChunks;
-  final String? contextUsed;
+  final List<_RAGBridgeSearchResult> retrievedChunks;
+  final String contextUsed;
   final double retrievalTimeMs;
   final double generationTimeMs;
   final double totalTimeMs;
 
-  const RAGResult({
+  const _RAGBridgeResult({
     required this.answer,
     required this.retrievedChunks,
-    this.contextUsed,
+    required this.contextUsed,
     required this.retrievalTimeMs,
     required this.generationTimeMs,
     required this.totalTimeMs,
   });
 }
 
-// =============================================================================
-// FFI Struct for rac_rag_config_t (legacy standalone config)
-// =============================================================================
-
-final class _RacRagConfig extends Struct {
-  external Pointer<Utf8> embeddingModelPath;
-  external Pointer<Utf8> llmModelPath;
-  @Size()
-  external int embeddingDimension;
-  @Size()
-  external int topK;
-  @Float()
-  external double similarityThreshold;
-  @Size()
-  external int maxContextTokens;
-  @Size()
-  external int chunkSize;
-  @Size()
-  external int chunkOverlap;
-  external Pointer<Utf8> promptTemplate;
-  external Pointer<Utf8> embeddingConfigJson;
-  external Pointer<Utf8> llmConfigJson;
-}
-
-final class _RacRagQuery extends Struct {
-  external Pointer<Utf8> question;
-  external Pointer<Utf8> systemPrompt;
-  @Int32()
-  external int maxTokens;
-  @Float()
-  external double temperature;
-  @Float()
-  external double topP;
-  @Int32()
-  external int topK;
-}
-
-final class _RacSearchResult extends Struct {
-  external Pointer<Utf8> chunkId;
-  external Pointer<Utf8> text;
-  @Float()
-  external double similarityScore;
-  external Pointer<Utf8> metadataJson;
-}
-
-final class _RacRagResult extends Struct {
-  external Pointer<Utf8> answer;
-  external Pointer<_RacSearchResult> retrievedChunks;
-  @Size()
-  external int numChunks;
-  external Pointer<Utf8> contextUsed;
-  @Double()
-  external double retrievalTimeMs;
-  @Double()
-  external double generationTimeMs;
-  @Double()
-  external double totalTimeMs;
-}
-
-// =============================================================================
-// FFI Function Typedefs
-// =============================================================================
-
-typedef _RagRegisterNative = Int32 Function();
-typedef _RagRegisterDart = int Function();
-
-typedef _RagCreateStandaloneNative = Int32 Function(
-    Pointer<_RacRagConfig> config, Pointer<Pointer<Void>> outPipeline);
-typedef _RagCreateStandaloneDart = int Function(
-    Pointer<_RacRagConfig> config, Pointer<Pointer<Void>> outPipeline);
-
-typedef _RagDestroyNative = Void Function(Pointer<Void> pipeline);
-typedef _RagDestroyDart = void Function(Pointer<Void> pipeline);
-
-typedef _RagAddDocumentNative = Int32 Function(
-    Pointer<Void> pipeline, Pointer<Utf8> text, Pointer<Utf8> metadata);
-typedef _RagAddDocumentDart = int Function(
-    Pointer<Void> pipeline, Pointer<Utf8> text, Pointer<Utf8> metadata);
-
-typedef _RagQueryNative = Int32 Function(
-    Pointer<Void> pipeline, Pointer<_RacRagQuery> query, Pointer<_RacRagResult> result);
-typedef _RagQueryDart = int Function(
-    Pointer<Void> pipeline, Pointer<_RacRagQuery> query, Pointer<_RacRagResult> result);
-
-typedef _RagClearNative = Int32 Function(Pointer<Void> pipeline);
-typedef _RagClearDart = int Function(Pointer<Void> pipeline);
-
-typedef _RagCountNative = Size Function(Pointer<Void> pipeline);
-typedef _RagCountDart = int Function(Pointer<Void> pipeline);
-
-typedef _RagResultFreeNative = Void Function(Pointer<_RacRagResult> result);
-typedef _RagResultFreeDart = void Function(Pointer<_RacRagResult> result);
+/// Public type aliases — used by [rag_types.dart] factory constructors.
+typedef RAGBridgeSearchResult = _RAGBridgeSearchResult;
+typedef RAGBridgeResult = _RAGBridgeResult;
 
 // =============================================================================
 // DartBridgeRAG — FFI bridge to rac_rag_pipeline_* C API
@@ -229,111 +76,94 @@ class DartBridgeRAG {
 
   final _logger = SDKLogger('DartBridge.RAG');
   Pointer<Void>? _pipeline;
-  bool _registered = false;
 
   bool get isCreated => _pipeline != null;
 
-  /// Register the RAG module (call once before using RAG).
-  void register() {
-    if (_registered) return;
-
-    final lib = PlatformLoader.loadCommons();
-    final fn = lib.lookupFunction<_RagRegisterNative, _RagRegisterDart>(
-        'rac_backend_rag_register');
+  // MARK: - Static Registration
 
-    final result = fn();
-    if (result != RAC_SUCCESS && result != -401) {
-      _logger.error('Failed to register RAG module: $result');
-      return;
+  /// Register the RAG backend with the C++ service registry.
+  ///
+  /// Returns the C++ result code (0 = success, -401 = already registered).
+  static int registerBackend() {
+    try {
+      final lib = PlatformLoader.loadCommons();
+      final fn = lib.lookupFunction<RacBackendRagRegisterNative,
+          RacBackendRagRegisterDart>('rac_backend_rag_register');
+      return fn();
+    } catch (e) {
+      return -1;
     }
+  }
 
-    _registered = true;
-    _logger.debug('RAG module registered');
+  /// Unregister the RAG backend from the C++ service registry.
+  static void unregisterBackend() {
+    try {
+      final lib = PlatformLoader.loadCommons();
+      final fn = lib.lookupFunction<RacBackendRagUnregisterNative,
+          RacBackendRagUnregisterDart>('rac_backend_rag_unregister');
+      fn();
+    } catch (_) {
+      // Silently ignore — unregister is best-effort
+    }
   }
 
-  /// Create a RAG pipeline with the given configuration.
-  void createPipeline(RAGConfiguration config) {
-    if (!_registered) register();
+  // MARK: - Pipeline Lifecycle
 
+  /// Create a RAG pipeline from a pre-populated [RacRagConfigStruct] pointer.
+  ///
+  /// [config] must be valid for the duration of this call.
+  /// The caller is responsible for freeing [config] after this returns.
+  void createPipeline({required Pointer<RacRagConfigStruct> config}) {
     final lib = PlatformLoader.loadCommons();
-    final fn =
-        lib.lookupFunction<_RagCreateStandaloneNative, _RagCreateStandaloneDart>(
-            'rac_rag_pipeline_create_standalone');
+    final fn = lib.lookupFunction<RacRagPipelineCreateNative,
+        RacRagPipelineCreateDart>('rac_rag_pipeline_create');
 
-    final cConfig = calloc<_RacRagConfig>();
     final outPipeline = calloc<Pointer<Void>>();
-
     try {
-      cConfig.ref.embeddingModelPath =
-          config.embeddingModelPath.toNativeUtf8();
-      cConfig.ref.llmModelPath = config.llmModelPath.toNativeUtf8();
-      cConfig.ref.embeddingDimension = config.embeddingDimension;
-      cConfig.ref.topK = config.topK;
-      cConfig.ref.similarityThreshold = config.similarityThreshold;
-      cConfig.ref.maxContextTokens = config.maxContextTokens;
-      cConfig.ref.chunkSize = config.chunkSize;
-      cConfig.ref.chunkOverlap = config.chunkOverlap;
-      cConfig.ref.promptTemplate = config.promptTemplate != null
-          ? config.promptTemplate!.toNativeUtf8()
-          : nullptr;
-      cConfig.ref.embeddingConfigJson = config.embeddingConfigJson != null
-          ? config.embeddingConfigJson!.toNativeUtf8()
-          : nullptr;
-      cConfig.ref.llmConfigJson = config.llmConfigJson != null
-          ? config.llmConfigJson!.toNativeUtf8()
-          : nullptr;
-
-      final result = fn(cConfig, outPipeline);
+      final result = fn(config, outPipeline);
       if (result != RAC_SUCCESS || outPipeline.value == nullptr) {
         throw Exception('Failed to create RAG pipeline: error $result');
       }
 
       if (_pipeline != null) {
-        destroyPipeline();
+        destroy();
       }
 
       _pipeline = outPipeline.value;
       _logger.debug('RAG pipeline created');
     } finally {
-      calloc.free(cConfig.ref.embeddingModelPath);
-      calloc.free(cConfig.ref.llmModelPath);
-      if (cConfig.ref.promptTemplate != nullptr) {
-        calloc.free(cConfig.ref.promptTemplate);
-      }
-      if (cConfig.ref.embeddingConfigJson != nullptr) {
-        calloc.free(cConfig.ref.embeddingConfigJson);
-      }
-      if (cConfig.ref.llmConfigJson != nullptr) {
-        calloc.free(cConfig.ref.llmConfigJson);
-      }
-      calloc.free(cConfig);
       calloc.free(outPipeline);
     }
   }
 
-  /// Destroy the RAG pipeline.
-  void destroyPipeline() {
+  /// Destroy the RAG pipeline and release native resources.
+  void destroy() {
     if (_pipeline == null) return;
 
     final lib = PlatformLoader.loadCommons();
-    final fn = lib.lookupFunction<_RagDestroyNative, _RagDestroyDart>(
-        'rac_rag_pipeline_destroy');
+    final fn = lib.lookupFunction<RacRagPipelineDestroyNative,
+        RacRagPipelineDestroyDart>('rac_rag_pipeline_destroy');
 
     fn(_pipeline!);
     _pipeline = null;
     _logger.debug('RAG pipeline destroyed');
   }
 
+  // MARK: - Document Management
+
   /// Add a document to the pipeline.
-  void addDocument(String text, {String? metadataJson}) {
+  ///
+  /// [metadataJSON] is optional JSON metadata to associate with the document.
+  void addDocument(String text, {String? metadataJSON}) {
     _ensurePipeline();
 
     final lib = PlatformLoader.loadCommons();
-    final fn = lib.lookupFunction<_RagAddDocumentNative, _RagAddDocumentDart>(
-        'rac_rag_add_document');
+    final fn = lib.lookupFunction<RacRagAddDocumentNative,
+        RacRagAddDocumentDart>('rac_rag_add_document');
 
     final cText = text.toNativeUtf8();
-    final cMeta = metadataJson != null ? metadataJson.toNativeUtf8() : nullptr;
+    final cMeta =
+        metadataJSON != null ? metadataJSON.toNativeUtf8() : nullptr;
 
     try {
       final result = fn(_pipeline!, cText, cMeta);
@@ -351,8 +181,8 @@ class DartBridgeRAG {
     _ensurePipeline();
 
     final lib = PlatformLoader.loadCommons();
-    final fn = lib.lookupFunction<_RagClearNative, _RagClearDart>(
-        'rac_rag_clear_documents');
+    final fn = lib.lookupFunction<RacRagClearDocumentsNative,
+        RacRagClearDocumentsDart>('rac_rag_clear_documents');
 
     fn(_pipeline!);
   }
@@ -362,34 +192,45 @@ class DartBridgeRAG {
     if (_pipeline == null) return 0;
 
     final lib = PlatformLoader.loadCommons();
-    final fn = lib.lookupFunction<_RagCountNative, _RagCountDart>(
-        'rac_rag_get_document_count');
+    final fn = lib.lookupFunction<RacRagGetDocumentCountNative,
+        RacRagGetDocumentCountDart>('rac_rag_get_document_count');
 
     return fn(_pipeline!);
   }
 
-  /// Query the RAG pipeline.
-  RAGResult query(RAGQueryOptions options) {
+  // MARK: - Query
+
+  /// Query the RAG pipeline with named parameters.
+  ///
+  /// Returns a [RAGBridgeResult]. Use [RAGResult.fromBridge] in [rag_types.dart]
+  /// to convert to the public [RAGResult] type.
+  RAGBridgeResult query(
+    String question, {
+    String? systemPrompt,
+    int maxTokens = 512,
+    double temperature = 0.7,
+    double topP = 0.9,
+    int topK = 40,
+  }) {
     _ensurePipeline();
 
     final lib = PlatformLoader.loadCommons();
-    final queryFn = lib.lookupFunction<_RagQueryNative, _RagQueryDart>(
+    final queryFn = lib.lookupFunction<RacRagQueryNative, RacRagQueryDart>(
         'rac_rag_query');
-    final freeFn = lib.lookupFunction<_RagResultFreeNative, _RagResultFreeDart>(
-        'rac_rag_result_free');
+    final freeFn = lib.lookupFunction<RacRagResultFreeNative,
+        RacRagResultFreeDart>('rac_rag_result_free');
 
-    final cQuery = calloc<_RacRagQuery>();
-    final cResult = calloc<_RacRagResult>();
+    final cQuery = calloc<RacRagQueryStruct>();
+    final cResult = calloc<RacRagResultStruct>();
 
     try {
-      cQuery.ref.question = options.question.toNativeUtf8();
-      cQuery.ref.systemPrompt = options.systemPrompt != null
-          ? options.systemPrompt!.toNativeUtf8()
-          : nullptr;
-      cQuery.ref.maxTokens = options.maxTokens;
-      cQuery.ref.temperature = options.temperature;
-      cQuery.ref.topP = options.topP;
-      cQuery.ref.topK = options.topK;
+      cQuery.ref.question = question.toNativeUtf8();
+      cQuery.ref.systemPrompt =
+          systemPrompt != null ? systemPrompt.toNativeUtf8() : nullptr;
+      cQuery.ref.maxTokens = maxTokens;
+      cQuery.ref.temperature = temperature;
+      cQuery.ref.topP = topP;
+      cQuery.ref.topK = topK;
 
       final status = queryFn(_pipeline!, cQuery, cResult);
       if (status != RAC_SUCCESS) {
@@ -401,21 +242,22 @@ class DartBridgeRAG {
           : '';
       final contextUsed = cResult.ref.contextUsed != nullptr
           ? cResult.ref.contextUsed.toDartString()
-          : null;
+          : '';
 
-      final chunks = <RAGSearchResult>[];
+      final chunks = <RAGBridgeSearchResult>[];
       for (int i = 0; i < cResult.ref.numChunks; i++) {
         final c = cResult.ref.retrievedChunks[i];
-        chunks.add(RAGSearchResult(
+        final meta =
+            c.metadataJson != nullptr ? c.metadataJson.toDartString() : null;
+        chunks.add(RAGBridgeSearchResult(
           chunkId: c.chunkId != nullptr ? c.chunkId.toDartString() : '',
           text: c.text != nullptr ? c.text.toDartString() : '',
           similarityScore: c.similarityScore,
-          metadataJson:
-              c.metadataJson != nullptr ? c.metadataJson.toDartString() : null,
+          metadataJson: meta?.isEmpty == true ? null : meta,
         ));
       }
 
-      final result = RAGResult(
+      final result = RAGBridgeResult(
         answer: answer,
         retrievedChunks: chunks,
         contextUsed: contextUsed,
@@ -438,7 +280,8 @@ class DartBridgeRAG {
 
   void _ensurePipeline() {
     if (_pipeline == null) {
-      throw StateError('RAG pipeline not created. Call createPipeline() first.');
+      throw StateError(
+          'RAG pipeline not created. Call createPipeline() first.');
     }
   }
 }
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/type_conversions/model_types_cpp_bridge.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/type_conversions/model_types_cpp_bridge.dart
index a0af9505a..bf4ae125d 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/type_conversions/model_types_cpp_bridge.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/type_conversions/model_types_cpp_bridge.dart
@@ -43,6 +43,7 @@ abstract class RacInferenceFramework {
   static const int fluidAudio = 4;
   static const int builtIn = 5;
   static const int none = 6;
+  static const int genie = 10; // RAC_FRAMEWORK_GENIE
   static const int unknown = 99;
 }
 
@@ -191,6 +192,8 @@ extension InferenceFrameworkCppBridge on InferenceFramework {
         return RacInferenceFramework.builtIn;
       case InferenceFramework.none:
         return RacInferenceFramework.none;
+      case InferenceFramework.genie:
+        return RacInferenceFramework.genie;
       case InferenceFramework.unknown:
         return RacInferenceFramework.unknown;
     }
@@ -213,6 +216,8 @@ extension InferenceFrameworkCppBridge on InferenceFramework {
         return InferenceFramework.builtIn;
       case RacInferenceFramework.none:
         return InferenceFramework.none;
+      case RacInferenceFramework.genie:
+        return InferenceFramework.genie;
       default:
         return InferenceFramework.unknown;
     }
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/public/extensions/runanywhere_device.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/public/extensions/runanywhere_device.dart
new file mode 100644
index 000000000..1e58a2c35
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/public/extensions/runanywhere_device.dart
@@ -0,0 +1,45 @@
+/// RunAnywhere + Device
+///
+/// Public API for NPU chip detection.
+/// Android only — returns null on iOS and other platforms.
+library runanywhere_device;
+
+import 'dart:io' show Platform;
+
+import 'package:flutter/services.dart';
+import 'package:runanywhere/core/types/npu_chip.dart';
+import 'package:runanywhere/public/runanywhere.dart';
+
+// =============================================================================
+// NPU Chip Detection
+// =============================================================================
+
+/// Extension methods for NPU chip detection
+extension RunAnywhereDevice on RunAnywhere {
+  static final _channel = MethodChannel('runanywhere');
+
+  /// Detect the device's NPU chipset for Genie model compatibility.
+  ///
+  /// Returns the [NPUChip] if the device has a supported Qualcomm SoC,
+  /// or null if the device is not Android or does not support NPU inference.
+  ///
+  /// Example:
+  /// ```dart
+  /// final chip = await RunAnywhereDevice.getChip();
+  /// if (chip != null) {
+  ///   final url = chip.downloadUrl('qwen3-4b');
+  ///   RunAnywhere.registerModel(id: 'qwen3-4b-npu', name: 'Qwen3 4B NPU', url: url, ...);
+  /// }
+  /// ```
+  static Future<NPUChip?> getChip() async {
+    if (!Platform.isAndroid) return null;
+
+    try {
+      final socModel = await _channel.invokeMethod<String>('getSocModel');
+      if (socModel == null || socModel.isEmpty) return null;
+      return NPUChip.fromSocModel(socModel);
+    } on PlatformException {
+      return null;
+    }
+  }
+}
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/public/runanywhere.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/public/runanywhere.dart
index ca43c6a3b..2554c6acf 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/public/runanywhere.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/public/runanywhere.dart
@@ -24,7 +24,6 @@ import 'package:runanywhere/native/dart_bridge_model_registry.dart'
 import 'package:runanywhere/native/dart_bridge_vlm.dart';
 import 'package:runanywhere/native/ffi_types.dart' show RacVlmImageFormat;
 import 'package:runanywhere/native/dart_bridge_structured_output.dart';
-import 'package:runanywhere/native/dart_bridge_rag.dart';
 import 'package:runanywhere/public/configuration/sdk_environment.dart';
 import 'package:runanywhere/public/events/event_bus.dart';
 import 'package:runanywhere/public/events/sdk_event.dart';
@@ -2545,49 +2544,6 @@ class RunAnywhere {
     return null;
   }
 
-  // ============================================================================
-  // MARK: - RAG (Retrieval-Augmented Generation)
-  // ============================================================================
-
-  /// Create a RAG pipeline with the given configuration.
-  ///
-  /// Must be called before ingesting documents or running queries.
-  static Future<void> ragCreatePipeline(RAGConfiguration config) async {
-    if (!_isInitialized) throw SDKError.notInitialized();
-    DartBridgeRAG.shared.createPipeline(config);
-  }
-
-  /// Destroy the RAG pipeline and release resources.
-  static Future<void> ragDestroyPipeline() async {
-    DartBridgeRAG.shared.destroyPipeline();
-  }
-
-  /// Ingest a document into the RAG pipeline.
-  ///
-  /// The document is split into chunks, embedded, and indexed.
-  static Future<void> ragIngest(String text, {String? metadataJson}) async {
-    if (!_isInitialized) throw SDKError.notInitialized();
-    DartBridgeRAG.shared.addDocument(text, metadataJson: metadataJson);
-  }
-
-  /// Clear all documents from the RAG pipeline.
-  static Future<void> ragClearDocuments() async {
-    if (!_isInitialized) throw SDKError.notInitialized();
-    DartBridgeRAG.shared.clearDocuments();
-  }
-
-  /// Get the number of indexed document chunks.
-  static int get ragDocumentCount => DartBridgeRAG.shared.documentCount;
-
-  /// Query the RAG pipeline with a question.
-  ///
-  /// Returns a [RAGResult] with the generated answer and retrieved chunks.
-  static Future<RAGResult> ragQuery(
-    String question, {
-    RAGQueryOptions? options,
-  }) async {
-    if (!_isInitialized) throw SDKError.notInitialized();
-    final queryOptions = options ?? RAGQueryOptions(question: question);
-    return DartBridgeRAG.shared.query(queryOptions);
-  }
+  // RAG pipeline methods are provided by the RunAnywhereRAG extension
+  // in lib/public/extensions/runanywhere_rag.dart.
 }
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/public/types/rag_types.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/public/types/rag_types.dart
index 58d7381b9..c2c4a6909 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/public/types/rag_types.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/public/types/rag_types.dart
@@ -143,7 +143,7 @@ class RAGSearchResult {
       chunkId: bridge.chunkId,
       text: bridge.text,
       similarityScore: bridge.similarityScore,
-      metadataJSON: bridge.metadataJson.isEmpty ? null : bridge.metadataJson,
+      metadataJSON: bridge.metadataJson?.isEmpty == false ? bridge.metadataJson : null,
     );
   }
 
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/runanywhere.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/runanywhere.dart
index 6f3504ae2..65a158e9e 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/runanywhere.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/runanywhere.dart
@@ -24,11 +24,12 @@ export 'public/configuration/sdk_environment.dart';
 export 'public/errors/errors.dart';
 export 'public/events/event_bus.dart';
 export 'public/events/sdk_event.dart';
+export 'core/types/npu_chip.dart';
+export 'public/extensions/runanywhere_device.dart';
 export 'public/extensions/runanywhere_frameworks.dart';
 export 'public/extensions/runanywhere_logging.dart';
 export 'public/extensions/runanywhere_storage.dart';
-export 'native/dart_bridge_rag.dart'
-    show RAGConfiguration, RAGQueryOptions, RAGSearchResult, RAGResult;
+export 'native/dart_bridge_rag.dart' show DartBridgeRAG;
 export 'public/runanywhere.dart';
 export 'public/runanywhere_tool_calling.dart';
 export 'public/types/tool_calling_types.dart';
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/pubspec.yaml b/sdk/runanywhere-flutter/packages/runanywhere/pubspec.yaml
index a2f8cfc15..fb9c2ae1e 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/pubspec.yaml
+++ b/sdk/runanywhere-flutter/packages/runanywhere/pubspec.yaml
@@ -1,6 +1,6 @@
 name: runanywhere
 description: Privacy-first, on-device AI SDK for Flutter. Run LLMs, STT, TTS, and VAD directly on device with no data leaving the device.
-version: 0.16.0
+version: 0.17.0
 homepage: https://runanywhere.ai
 repository: https://github.com/RunanywhereAI/runanywhere-sdks
 issue_tracker: https://github.com/RunanywhereAI/runanywhere-sdks/issues
@@ -36,8 +36,6 @@ dependencies:
   collection: ^1.18.0
   json_annotation: ^4.9.0
   path: ^1.9.0
-  # Archive extraction (tar.bz2, zip)
-  archive: ^3.6.1
   # TTS fallback (system TTS)
   flutter_tts: ^3.8.0
   # Audio recording for voice sessions
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/CHANGELOG.md b/sdk/runanywhere-flutter/packages/runanywhere_genie/CHANGELOG.md
new file mode 100644
index 000000000..203b684b4
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/CHANGELOG.md
@@ -0,0 +1,8 @@
+## 0.16.0
+
+### Added
+- Initial release of Qualcomm Genie NPU backend for RunAnywhere Flutter SDK
+- `Genie.register()` / `Genie.unregister()` for C++ backend registration
+- `Genie.addModel()` convenience method for NPU model registration
+- `Genie.isAvailable` platform check (Android/Snapdragon only)
+- `Genie.canHandle()` model compatibility check
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/LICENSE b/sdk/runanywhere-flutter/packages/runanywhere_genie/LICENSE
new file mode 100644
index 000000000..18f33e302
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 RunAnywhere AI
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/android/binary_config.gradle b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/binary_config.gradle
new file mode 100644
index 000000000..2dddbfcc7
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/binary_config.gradle
@@ -0,0 +1,51 @@
+// =============================================================================
+// BINARY CONFIGURATION FOR RUNANYWHERE FLUTTER SDK - ANDROID (Genie Package)
+// =============================================================================
+// This file controls whether to use local or remote native libraries (.so files).
+// Similar to Swift Package.swift's testLocal flag.
+//
+// Set to `true` to use local binaries from android/src/main/jniLibs/
+// Set to `false` to download binaries from GitHub releases (production mode)
+// =============================================================================
+
+ext {
+    // Set this to true for local development/testing
+    // Set to false for production builds (downloads from GitHub releases)
+    testLocal = false
+
+    // =============================================================================
+    // Version Configuration (MUST match Swift Package.swift and Kotlin build.gradle.kts)
+    // =============================================================================
+    genieVersion = "0.3.0"
+
+    // =============================================================================
+    // Remote binary URLs
+    // RABackendGenie hosted on the public runanywhere-sdks repo
+    // (runanywhere-genie is private, so binaries are published here)
+    // =============================================================================
+    binariesGitHubOrg = "RunanywhereAI"
+    binariesRepo = "runanywhere-sdks"
+    binariesBaseUrl = "https://github.com/${binariesGitHubOrg}/${binariesRepo}/releases/download"
+
+    // Android native libraries package
+    genieAndroidUrl = "${binariesBaseUrl}/genie-v${genieVersion}/RABackendGENIE-android-arm64-v8a-v${genieVersion}.zip"
+
+    // Helper method to check if we should download
+    shouldDownloadAndroidLibs = { ->
+        return !testLocal
+    }
+
+    // Helper method to check if local libs exist
+    checkLocalLibsExist = { ->
+        def jniLibsDir = project.file('src/main/jniLibs')
+        def arm64Dir = new File(jniLibsDir, 'arm64-v8a')
+
+        if (!arm64Dir.exists() || !arm64Dir.isDirectory()) {
+            return false
+        }
+
+        // Check for Genie backend library
+        def genieLib = new File(arm64Dir, 'librac_backend_genie_jni.so')
+        return genieLib.exists()
+    }
+}
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/android/build.gradle b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/build.gradle
new file mode 100644
index 000000000..789108549
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/build.gradle
@@ -0,0 +1,170 @@
+// RunAnywhere Genie Backend - Android
+//
+// This plugin bundles RABackendGenie native libraries (.so files) for Android.
+// RABackendGenie provides LLM text generation capabilities using Qualcomm Genie NPU.
+//
+// Binary Configuration:
+//   Edit binary_config.gradle to toggle between local and remote binaries:
+//   - testLocal = true:  Use local .so files from android/src/main/jniLibs/ (for development)
+//   - testLocal = false: Download from GitHub releases (for production)
+//
+// Version: Must match Swift SDK's Package.swift and Kotlin SDK's build.gradle.kts
+
+group 'ai.runanywhere.sdk.genie'
+version '0.16.0'
+
+// Load binary configuration
+apply from: 'binary_config.gradle'
+
+buildscript {
+    ext.kotlin_version = '1.9.10'
+    repositories {
+        google()
+        mavenCentral()
+    }
+    dependencies {
+        classpath 'com.android.tools.build:gradle:8.1.0'
+        classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
+    }
+}
+
+apply plugin: 'com.android.library'
+apply plugin: 'kotlin-android'
+
+android {
+    namespace 'ai.runanywhere.sdk.genie'
+    compileSdk 34
+
+    // Use NDK for native library support
+    ndkVersion "25.2.9519653"
+
+    defaultConfig {
+        minSdk 24
+        targetSdk 34
+
+        // ABI filters for native libraries
+        ndk {
+            abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86_64'
+        }
+
+        // Consumer proguard rules
+        consumerProguardFiles 'proguard-rules.pro'
+    }
+
+    compileOptions {
+        sourceCompatibility JavaVersion.VERSION_1_8
+        targetCompatibility JavaVersion.VERSION_1_8
+    }
+
+    kotlinOptions {
+        jvmTarget = '1.8'
+    }
+
+    sourceSets {
+        main {
+            // Native libraries location - use downloaded libs or local libs based on config
+            jniLibs.srcDirs = [testLocal ? 'src/main/jniLibs' : 'build/jniLibs']
+        }
+    }
+
+    buildTypes {
+        release {
+            minifyEnabled false
+        }
+    }
+}
+
+dependencies {
+    implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version"
+}
+
+// =============================================================================
+// Binary Download Task (runs when testLocal = false)
+// =============================================================================
+task downloadNativeLibs {
+    group = 'runanywhere'
+    description = 'Download RABackendGenie native libraries from GitHub releases'
+
+    doLast {
+        if (shouldDownloadAndroidLibs()) {
+            println "📦 Remote mode: Downloading RABackendGenie Android native libraries..."
+
+            def jniLibsDir = file('build/jniLibs')
+            if (jniLibsDir.exists()) {
+                delete(jniLibsDir)
+            }
+            jniLibsDir.mkdirs()
+
+            // Ensure build directory exists
+            buildDir.mkdirs()
+
+            def downloadUrl = genieAndroidUrl
+            def zipFile = file("${buildDir}/genie-android.zip")
+
+            println "Downloading from: ${downloadUrl}"
+
+            // Download the zip file
+            ant.get(src: downloadUrl, dest: zipFile)
+
+            println "✅ Downloaded successfully"
+
+            // Extract to temp directory first
+            def tempDir = file("${buildDir}/genie-temp")
+            if (tempDir.exists()) {
+                delete(tempDir)
+            }
+            tempDir.mkdirs()
+
+            copy {
+                from zipTree(zipFile)
+                into tempDir
+            }
+
+            // Common libs that should NOT be duplicated (they're in the core SDK)
+            def commonLibs = ['libc++_shared.so', 'librac_commons.so', 'librac_commons_jni.so']
+
+            // Copy .so files from jniLibs structure (excluding common libs)
+            tempDir.eachFileRecurse { file ->
+                if (file.isDirectory() && file.name in ['arm64-v8a', 'armeabi-v7a', 'x86_64', 'x86']) {
+                    def targetAbiDir = new File(jniLibsDir, file.name)
+                    targetAbiDir.mkdirs()
+                    file.eachFile { soFile ->
+                        if (soFile.name.endsWith('.so') && !(soFile.name in commonLibs)) {
+                            copy {
+                                from soFile
+                                into targetAbiDir
+                            }
+                            println "  ✓ ${file.name}/${soFile.name}"
+                        }
+                    }
+                }
+            }
+
+            // Clean up
+            zipFile.delete()
+            if (tempDir.exists()) {
+                delete(tempDir)
+            }
+
+            println "✅ RABackendGenie native libraries downloaded successfully"
+        } else {
+            println "🔧 Local mode: Using native libraries from src/main/jniLibs/"
+
+            if (!checkLocalLibsExist()) {
+                throw new GradleException("""
+                    ⚠️  Native libraries not found in src/main/jniLibs/!
+                    For local mode, please build and copy the libraries:
+                      1. cd runanywhere-core && ./scripts/build-android.sh --genie
+                      2. Copy the .so files to packages/runanywhere_genie/android/src/main/jniLibs/
+                    Or switch to remote mode by editing binary_config.gradle:
+                      testLocal = false
+                """)
+            } else {
+                println "✅ Using local native libraries"
+            }
+        }
+    }
+}
+
+// Run downloadNativeLibs before preBuild
+preBuild.dependsOn downloadNativeLibs
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/android/src/main/AndroidManifest.xml b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/src/main/AndroidManifest.xml
new file mode 100644
index 000000000..8d438796e
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/src/main/AndroidManifest.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="ai.runanywhere.sdk.genie">
+</manifest>
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/android/src/main/jniLibs/.gitkeep b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/src/main/jniLibs/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/android/src/main/kotlin/ai/runanywhere/sdk/genie/GeniePlugin.kt b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/src/main/kotlin/ai/runanywhere/sdk/genie/GeniePlugin.kt
new file mode 100644
index 000000000..f43d5d894
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/android/src/main/kotlin/ai/runanywhere/sdk/genie/GeniePlugin.kt
@@ -0,0 +1,60 @@
+package ai.runanywhere.sdk.genie
+
+import android.os.Build
+import io.flutter.embedding.engine.plugins.FlutterPlugin
+import io.flutter.plugin.common.MethodCall
+import io.flutter.plugin.common.MethodChannel
+import io.flutter.plugin.common.MethodChannel.MethodCallHandler
+import io.flutter.plugin.common.MethodChannel.Result
+
+/**
+ * RunAnywhere Genie Flutter Plugin - Android Implementation
+ *
+ * This plugin provides the native bridge for the Genie NPU backend on Android.
+ * The actual LLM functionality is provided by RABackendGenie native libraries (.so files).
+ */
+class GeniePlugin : FlutterPlugin, MethodCallHandler {
+    private lateinit var channel: MethodChannel
+
+    companion object {
+        private const val CHANNEL_NAME = "runanywhere_genie"
+        private const val BACKEND_VERSION = "0.1.6"
+        private const val BACKEND_NAME = "Genie"
+
+        init {
+            // Load Genie backend native libraries
+            try {
+                System.loadLibrary("rac_backend_genie_jni")
+            } catch (e: UnsatisfiedLinkError) {
+                // Library may not be available in all configurations
+                android.util.Log.w("Genie", "Failed to load rac_backend_genie_jni: ${e.message}")
+            }
+        }
+    }
+
+    override fun onAttachedToEngine(flutterPluginBinding: FlutterPlugin.FlutterPluginBinding) {
+        channel = MethodChannel(flutterPluginBinding.binaryMessenger, CHANNEL_NAME)
+        channel.setMethodCallHandler(this)
+    }
+
+    override fun onMethodCall(call: MethodCall, result: Result) {
+        when (call.method) {
+            "getPlatformVersion" -> {
+                result.success("Android ${Build.VERSION.RELEASE}")
+            }
+            "getBackendVersion" -> {
+                result.success(BACKEND_VERSION)
+            }
+            "getBackendName" -> {
+                result.success(BACKEND_NAME)
+            }
+            else -> {
+                result.notImplemented()
+            }
+        }
+    }
+
+    override fun onDetachedFromEngine(binding: FlutterPlugin.FlutterPluginBinding) {
+        channel.setMethodCallHandler(null)
+    }
+}
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/ios/Classes/GeniePlugin.swift b/sdk/runanywhere-flutter/packages/runanywhere_genie/ios/Classes/GeniePlugin.swift
new file mode 100644
index 000000000..404706b60
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/ios/Classes/GeniePlugin.swift
@@ -0,0 +1,32 @@
+import Flutter
+import UIKit
+
+/// RunAnywhere Genie Flutter Plugin - iOS Implementation
+///
+/// This is a stub plugin for the Flutter plugin system.
+/// Genie NPU backend is Android/Snapdragon only - this plugin provides
+/// platform channel compatibility but no actual NPU functionality on iOS.
+public class GeniePlugin: NSObject, FlutterPlugin {
+
+    public static func register(with registrar: FlutterPluginRegistrar) {
+        let channel = FlutterMethodChannel(
+            name: "runanywhere_genie",
+            binaryMessenger: registrar.messenger()
+        )
+        let instance = GeniePlugin()
+        registrar.addMethodCallDelegate(instance, channel: channel)
+    }
+
+    public func handle(_ call: FlutterMethodCall, result: @escaping FlutterResult) {
+        switch call.method {
+        case "getPlatformVersion":
+            result("iOS " + UIDevice.current.systemVersion)
+        case "getBackendVersion":
+            result("0.1.6")
+        case "getBackendName":
+            result("Genie")
+        default:
+            result(FlutterMethodNotImplemented)
+        }
+    }
+}
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/ios/runanywhere_genie.podspec b/sdk/runanywhere-flutter/packages/runanywhere_genie/ios/runanywhere_genie.podspec
new file mode 100644
index 000000000..245fa9578
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/ios/runanywhere_genie.podspec
@@ -0,0 +1,42 @@
+#
+# RunAnywhere Genie Backend - iOS
+#
+# This is a stub podspec for the Flutter plugin system.
+# Genie NPU backend is Android/Snapdragon only - no iOS binary is provided.
+# This podspec exists solely to satisfy Flutter's iOS plugin registration requirements.
+#
+
+Pod::Spec.new do |s|
+  s.name             = 'runanywhere_genie'
+  s.version          = '0.16.0'
+  s.summary          = 'RunAnywhere Genie: NPU LLM inference for Flutter (Android/Snapdragon only)'
+  s.description      = <<-DESC
+Qualcomm Genie NPU backend for RunAnywhere Flutter SDK. Provides LLM text generation
+on Snapdragon NPU hardware. This is an Android-only backend; the iOS pod is a stub
+for Flutter plugin system compatibility.
+                       DESC
+  s.homepage         = 'https://runanywhere.ai'
+  s.license          = { :type => 'MIT' }
+  s.author           = { 'RunAnywhere' => 'team@runanywhere.ai' }
+  s.source           = { :path => '.' }
+
+  s.ios.deployment_target = '14.0'
+  s.swift_version = '5.0'
+
+  # Source files (minimal stub - Genie is Android-only)
+  s.source_files = 'Classes/**/*'
+
+  # Flutter dependency
+  s.dependency 'Flutter'
+
+  # No vendored_frameworks - Genie has no iOS binary (Android/Snapdragon only)
+
+  # Build settings
+  s.pod_target_xcconfig = {
+    'DEFINES_MODULE' => 'YES',
+    'EXCLUDED_ARCHS[sdk=iphonesimulator*]' => 'i386',
+  }
+
+  # Mark static framework for proper linking
+  s.static_framework = true
+end
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/genie.dart b/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/genie.dart
new file mode 100644
index 000000000..e73f2acba
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/genie.dart
@@ -0,0 +1,244 @@
+/// Qualcomm Genie NPU backend for RunAnywhere Flutter SDK.
+///
+/// This module provides LLM (Language Model) capabilities via Qualcomm Genie NPU.
+/// It is a **thin wrapper** that registers the C++ backend with the service registry.
+///
+/// ## Architecture (matches Swift/Kotlin)
+///
+/// The C++ backend (RABackendGenie) handles all business logic:
+/// - Service provider registration
+/// - Model loading and inference on Snapdragon NPU
+/// - Streaming generation
+///
+/// This Dart module just:
+/// 1. Calls `rac_backend_genie_register()` to register the backend
+/// 2. The core SDK handles all LLM operations via `rac_llm_component_*`
+///
+/// ## Quick Start
+///
+/// ```dart
+/// import 'package:runanywhere_genie/runanywhere_genie.dart';
+///
+/// // Register the module (matches Swift: Genie.register())
+/// await Genie.register();
+///
+/// // Add models
+/// Genie.addModel(
+///   name: 'Qwen3 4B NPU',
+///   url: 'https://huggingface.co/.../model.zip',
+///   memoryRequirement: 4000000000,
+/// );
+/// ```
+library runanywhere_genie;
+
+import 'dart:async' show unawaited;
+
+import 'package:runanywhere/core/module/runanywhere_module.dart';
+import 'package:runanywhere/core/types/model_types.dart';
+import 'package:runanywhere/core/types/sdk_component.dart';
+import 'package:runanywhere/foundation/logging/sdk_logger.dart';
+import 'package:runanywhere/native/ffi_types.dart';
+import 'package:runanywhere/public/runanywhere.dart' show RunAnywhere;
+import 'package:runanywhere_genie/native/genie_bindings.dart';
+
+// Re-export for backward compatibility
+export 'genie_error.dart';
+
+/// Qualcomm Genie NPU module for LLM text generation.
+///
+/// Provides large language model capabilities using Qualcomm Genie
+/// on Snapdragon NPU hardware.
+///
+/// Matches the Swift/Kotlin Genie module pattern.
+class Genie implements RunAnywhereModule {
+  // ============================================================================
+  // Singleton Pattern (matches Swift enum pattern)
+  // ============================================================================
+
+  static final Genie _instance = Genie._internal();
+  static Genie get module => _instance;
+  Genie._internal();
+
+  // ============================================================================
+  // Module Info (matches Swift exactly)
+  // ============================================================================
+
+  /// Current version of the Genie Runtime module
+  static const String version = '1.0.0';
+
+  // ============================================================================
+  // RunAnywhereModule Conformance (matches Swift exactly)
+  // ============================================================================
+
+  @override
+  String get moduleId => 'genie';
+
+  @override
+  String get moduleName => 'Genie';
+
+  @override
+  Set<SDKComponent> get capabilities => {SDKComponent.llm};
+
+  @override
+  int get defaultPriority => 200;
+
+  @override
+  InferenceFramework get inferenceFramework => InferenceFramework.genie;
+
+  // ============================================================================
+  // Registration State
+  // ============================================================================
+
+  static bool _isRegistered = false;
+  static GenieBindings? _bindings;
+  static final _logger = SDKLogger('Genie');
+
+  /// Internal model registry for models added via addModel
+  static final List<ModelInfo> _registeredModels = [];
+
+  // ============================================================================
+  // Registration (matches Swift Genie.register() exactly)
+  // ============================================================================
+
+  /// Register Genie backend with the C++ service registry.
+  ///
+  /// This calls `rac_backend_genie_register()` to register the
+  /// Genie service provider with the C++ commons layer.
+  ///
+  /// Safe to call multiple times - subsequent calls are no-ops.
+  static Future<void> register({int priority = 200}) async {
+    if (_isRegistered) {
+      _logger.debug('Genie already registered');
+      return;
+    }
+
+    // Check native library availability
+    if (!isAvailable) {
+      _logger.error('Genie native library not available');
+      return;
+    }
+
+    _logger.info('Registering Genie backend with C++ registry...');
+
+    try {
+      _bindings = GenieBindings();
+      _logger.debug(
+          'GenieBindings created, isAvailable: ${_bindings!.isAvailable}');
+
+      final result = _bindings!.register();
+      _logger.info(
+          'rac_backend_genie_register() returned: $result (${RacResultCode.getMessage(result)})');
+
+      // RAC_SUCCESS = 0, RAC_ERROR_MODULE_ALREADY_REGISTERED = specific code
+      if (result != RacResultCode.success &&
+          result != RacResultCode.errorModuleAlreadyRegistered) {
+        _logger.error('C++ backend registration FAILED with code: $result');
+        return;
+      }
+
+      // No Dart-level provider needed - all LLM operations go through
+      // DartBridgeLLM -> rac_llm_component_* (just like Swift CppBridge.LLM)
+
+      _isRegistered = true;
+      _logger.info('Genie LLM backend registered successfully');
+    } catch (e) {
+      _logger.error('GenieBindings not available: $e');
+    }
+  }
+
+  /// Unregister the Genie backend from C++ registry.
+  static void unregister() {
+    if (_isRegistered) {
+      _bindings?.unregister();
+      _isRegistered = false;
+      _logger.info('Genie LLM backend unregistered');
+    }
+  }
+
+  // ============================================================================
+  // Model Handling (matches Swift exactly)
+  // ============================================================================
+
+  /// Check if the native backend is available on this platform.
+  ///
+  /// Genie is Android/Snapdragon only:
+  /// - On Android: Checks if librac_backend_genie_jni.so can be loaded
+  /// - On iOS/other: Always returns false
+  static bool get isAvailable => GenieBindings.checkAvailability();
+
+  /// Check if Genie can handle a given model.
+  /// Checks if the model ID contains "genie" or "npu" identifiers.
+  static bool canHandle(String? modelId) {
+    if (modelId == null) return false;
+    final lowered = modelId.toLowerCase();
+    return lowered.contains('genie') || lowered.contains('npu');
+  }
+
+  // ============================================================================
+  // Model Registration (convenience API)
+  // ============================================================================
+
+  /// Add a LLM model to the registry.
+  ///
+  /// This is a convenience method that registers a model with the SDK.
+  /// The model will be associated with the Genie NPU backend.
+  ///
+  /// Matches Swift pattern - models are registered globally via RunAnywhere.
+  static void addModel({
+    String? id,
+    required String name,
+    required String url,
+    int? memoryRequirement,
+    bool supportsThinking = false,
+  }) {
+    final uri = Uri.tryParse(url);
+    if (uri == null) {
+      _logger.error('Invalid URL for model: $name');
+      return;
+    }
+
+    final modelId =
+        id ?? name.toLowerCase().replaceAll(RegExp(r'[^a-z0-9]'), '-');
+
+    // Register with the global SDK registry (matches Swift pattern)
+    final model = RunAnywhere.registerModel(
+      id: modelId,
+      name: name,
+      url: uri,
+      framework: InferenceFramework.genie,
+      modality: ModelCategory.language,
+      memoryRequirement: memoryRequirement,
+      supportsThinking: supportsThinking,
+    );
+
+    // Keep local reference for convenience
+    _registeredModels.add(model);
+    _logger.info('Added Genie model: $name ($modelId)');
+  }
+
+  /// Get all models registered with this module
+  static List<ModelInfo> get registeredModels =>
+      List.unmodifiable(_registeredModels);
+
+  // ============================================================================
+  // Cleanup
+  // ============================================================================
+
+  /// Dispose of resources
+  static void dispose() {
+    _bindings = null;
+    _registeredModels.clear();
+    _isRegistered = false;
+    _logger.info('Genie disposed');
+  }
+
+  // ============================================================================
+  // Auto-Registration (matches Swift exactly)
+  // ============================================================================
+
+  /// Enable auto-registration for this module.
+  /// Call this method to trigger C++ backend registration.
+  static void autoRegister() {
+    unawaited(register());
+  }
+}
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/genie_error.dart b/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/genie_error.dart
new file mode 100644
index 000000000..319aba870
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/genie_error.dart
@@ -0,0 +1,71 @@
+/// Genie specific errors.
+///
+/// This is the Flutter equivalent of Genie-specific error handling.
+class GenieError implements Exception {
+  final String message;
+  final GenieErrorType type;
+
+  const GenieError._(this.message, this.type);
+
+  /// Model failed to load.
+  factory GenieError.modelLoadFailed([String? details]) {
+    return GenieError._(
+      details ?? 'Failed to load the NPU model',
+      GenieErrorType.modelLoadFailed,
+    );
+  }
+
+  /// Service not initialized.
+  factory GenieError.notInitialized() {
+    return const GenieError._(
+      'Genie service not initialized',
+      GenieErrorType.notInitialized,
+    );
+  }
+
+  /// Generation failed.
+  factory GenieError.generationFailed(String reason) {
+    return GenieError._(
+      'Generation failed: $reason',
+      GenieErrorType.generationFailed,
+    );
+  }
+
+  /// Model not found.
+  factory GenieError.modelNotFound(String path) {
+    return GenieError._(
+      'Model not found at: $path',
+      GenieErrorType.modelNotFound,
+    );
+  }
+
+  /// Timeout error.
+  factory GenieError.timeout(Duration duration) {
+    return GenieError._(
+      'Generation timed out after ${duration.inSeconds} seconds',
+      GenieErrorType.timeout,
+    );
+  }
+
+  /// Platform not supported (Genie is Android/Snapdragon only).
+  factory GenieError.platformNotSupported([String? platform]) {
+    return GenieError._(
+      'Genie NPU is not supported on ${platform ?? 'this platform'}. '
+      'Genie requires Android with Snapdragon NPU.',
+      GenieErrorType.platformNotSupported,
+    );
+  }
+
+  @override
+  String toString() => 'GenieError: $message';
+}
+
+/// Types of Genie errors.
+enum GenieErrorType {
+  modelLoadFailed,
+  notInitialized,
+  generationFailed,
+  modelNotFound,
+  timeout,
+  platformNotSupported,
+}
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/native/genie_bindings.dart b/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/native/genie_bindings.dart
new file mode 100644
index 000000000..3aab66606
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/native/genie_bindings.dart
@@ -0,0 +1,168 @@
+import 'dart:ffi';
+import 'dart:io';
+
+import 'package:runanywhere/native/ffi_types.dart';
+import 'package:runanywhere/native/platform_loader.dart';
+
+/// Minimal Genie NPU backend FFI bindings.
+///
+/// This is a **thin wrapper** that only provides:
+/// - `register()` - calls `rac_backend_genie_register()`
+/// - `unregister()` - calls `rac_backend_genie_unregister()`
+///
+/// All other LLM operations (create, load, generate, etc.) are handled by
+/// the core SDK via `rac_llm_component_*` functions in RACommons.
+///
+/// ## Architecture (matches Swift/Kotlin)
+///
+/// The C++ backend (RABackendGenie) handles all business logic:
+/// - Service provider registration with the C++ service registry
+/// - Model loading and inference on Snapdragon NPU
+/// - Streaming generation
+///
+/// This Dart code just:
+/// 1. Calls `rac_backend_genie_register()` to register the backend
+/// 2. The core SDK's `NativeBackend` handles all LLM operations via `rac_llm_component_*`
+///
+/// ## Platform Support
+///
+/// Genie is Android/Snapdragon only. On iOS and other platforms,
+/// `checkAvailability()` always returns false.
+class GenieBindings {
+  final DynamicLibrary _lib;
+
+  // Function pointers - only registration functions
+  late final RacBackendGenieRegisterDart? _register;
+  late final RacBackendGenieUnregisterDart? _unregister;
+
+  /// Create bindings using the appropriate library for each platform.
+  ///
+  /// - Android: Loads librac_backend_genie_jni.so separately
+  /// - iOS/other: Returns DynamicLibrary.executable() but symbols won't be found
+  GenieBindings() : _lib = _loadLibrary() {
+    _bindFunctions();
+  }
+
+  /// Create bindings with a specific library (for testing).
+  GenieBindings.withLibrary(this._lib) {
+    _bindFunctions();
+  }
+
+  /// Load the correct library for the current platform.
+  static DynamicLibrary _loadLibrary() {
+    return loadBackendLibrary();
+  }
+
+  /// Load the Genie backend library.
+  ///
+  /// On Android: Loads librac_backend_genie_jni.so or librunanywhere_genie.so
+  /// On iOS/other: Returns DynamicLibrary.executable() (symbols won't be available)
+  ///
+  /// This is exposed as a static method so it can be used by [Genie.isAvailable].
+  static DynamicLibrary loadBackendLibrary() {
+    if (Platform.isAndroid) {
+      // On Android, the Genie backend is in a separate .so file.
+      // We need to ensure librac_commons.so is loaded first (dependency).
+      try {
+        PlatformLoader.loadCommons();
+      } catch (_) {
+        // Ignore - continue trying to load backend
+      }
+
+      // Try different naming conventions for the backend library
+      final libraryNames = [
+        'librac_backend_genie_jni.so',
+        'librunanywhere_genie.so',
+      ];
+
+      for (final name in libraryNames) {
+        try {
+          return DynamicLibrary.open(name);
+        } catch (_) {
+          // Try next name
+        }
+      }
+
+      // If backend library not found, throw an error
+      throw ArgumentError(
+        'Could not load Genie backend library on Android. '
+        'Tried: ${libraryNames.join(", ")}',
+      );
+    }
+
+    // On iOS/macOS, Genie is not supported but we return executable
+    // for Flutter plugin system compatibility
+    return PlatformLoader.loadCommons();
+  }
+
+  /// Check if the Genie backend library can be loaded on this platform.
+  ///
+  /// Always returns false on non-Android platforms since Genie
+  /// is a Snapdragon NPU-only backend.
+  static bool checkAvailability() {
+    if (!Platform.isAndroid) {
+      return false;
+    }
+
+    try {
+      final lib = loadBackendLibrary();
+      lib.lookup<NativeFunction<Int32 Function()>>(
+          'rac_backend_genie_register');
+      return true;
+    } catch (_) {
+      return false;
+    }
+  }
+
+  void _bindFunctions() {
+    // Backend registration - from RABackendGenie
+    try {
+      _register = _lib.lookupFunction<RacBackendGenieRegisterNative,
+          RacBackendGenieRegisterDart>('rac_backend_genie_register');
+    } catch (_) {
+      _register = null;
+    }
+
+    try {
+      _unregister = _lib.lookupFunction<RacBackendGenieUnregisterNative,
+          RacBackendGenieUnregisterDart>('rac_backend_genie_unregister');
+    } catch (_) {
+      _unregister = null;
+    }
+  }
+
+  /// Check if bindings are available.
+  bool get isAvailable => _register != null;
+
+  /// Register the Genie backend with the C++ service registry.
+  ///
+  /// Returns RAC_SUCCESS (0) on success, or an error code.
+  /// Safe to call multiple times - returns RAC_ERROR_MODULE_ALREADY_REGISTERED
+  /// if already registered.
+  int register() {
+    if (_register == null) {
+      return RacResultCode.errorNotSupported;
+    }
+    return _register!();
+  }
+
+  /// Unregister the Genie backend from C++ registry.
+  int unregister() {
+    if (_unregister == null) {
+      return RacResultCode.errorNotSupported;
+    }
+    return _unregister!();
+  }
+}
+
+// =============================================================================
+// FFI Type Definitions for Genie Backend
+// =============================================================================
+
+/// rac_result_t rac_backend_genie_register(void)
+typedef RacBackendGenieRegisterNative = Int32 Function();
+typedef RacBackendGenieRegisterDart = int Function();
+
+/// rac_result_t rac_backend_genie_unregister(void)
+typedef RacBackendGenieUnregisterNative = Int32 Function();
+typedef RacBackendGenieUnregisterDart = int Function();
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/runanywhere_genie.dart b/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/runanywhere_genie.dart
new file mode 100644
index 000000000..12f27458d
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/lib/runanywhere_genie.dart
@@ -0,0 +1,42 @@
+/// Qualcomm Genie NPU backend for RunAnywhere Flutter SDK.
+///
+/// This package provides LLM (Language Model) capabilities via Qualcomm Genie NPU.
+/// It is a **thin wrapper** that registers the C++ backend with the service registry.
+///
+/// ## Architecture (matches Swift/Kotlin exactly)
+///
+/// The C++ backend (RABackendGenie) handles all business logic:
+/// - Service provider registration
+/// - Model loading and inference on Snapdragon NPU
+/// - Streaming generation
+///
+/// This Dart module just:
+/// 1. Calls `rac_backend_genie_register()` to register the backend
+/// 2. The core SDK handles all LLM operations via `rac_llm_component_*`
+///
+/// ## Quick Start
+///
+/// ```dart
+/// import 'package:runanywhere/runanywhere.dart';
+/// import 'package:runanywhere_genie/runanywhere_genie.dart';
+///
+/// // Initialize SDK
+/// await RunAnywhere.initialize();
+///
+/// // Register Genie module (Android/Snapdragon only)
+/// await Genie.register();
+/// ```
+///
+/// ## Capabilities
+///
+/// - **LLM (Language Model)**: Text generation on Snapdragon NPU
+/// - **Streaming**: Token-by-token streaming generation
+///
+/// ## Platform Support
+///
+/// - **Android**: Snapdragon devices with NPU support
+/// - **iOS**: Not supported (Genie is Android/Snapdragon only)
+library runanywhere_genie;
+
+export 'genie.dart';
+export 'genie_error.dart';
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_genie/pubspec.yaml b/sdk/runanywhere-flutter/packages/runanywhere_genie/pubspec.yaml
new file mode 100644
index 000000000..1427ba0ef
--- /dev/null
+++ b/sdk/runanywhere-flutter/packages/runanywhere_genie/pubspec.yaml
@@ -0,0 +1,41 @@
+name: runanywhere_genie
+description: Qualcomm Genie NPU backend for RunAnywhere Flutter SDK. On-device LLM inference on Snapdragon NPU.
+version: 0.16.0
+homepage: https://runanywhere.ai
+repository: https://github.com/RunanywhereAI/runanywhere-sdks
+issue_tracker: https://github.com/RunanywhereAI/runanywhere-sdks/issues
+topics:
+  - ai
+  - llm
+  - npu
+  - qualcomm
+  - on-device
+
+environment:
+  sdk: '>=3.0.0 <4.0.0'
+  flutter: '>=3.10.0'
+
+dependencies:
+  flutter:
+    sdk: flutter
+  # Core SDK dependency (provides RACommons)
+  runanywhere: ^0.16.0
+  ffi: ^2.1.0
+
+dev_dependencies:
+  flutter_test:
+    sdk: flutter
+  flutter_lints: ^3.0.0
+
+flutter:
+  uses-material-design: true
+
+  # Native plugin configuration
+  # RABackendGenie binaries are bundled in android/ directory (Android/Snapdragon only)
+  plugin:
+    platforms:
+      android:
+        package: ai.runanywhere.sdk.genie
+        pluginClass: GeniePlugin
+      ios:
+        pluginClass: GeniePlugin
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_llamacpp/android/src/main/jniLibs/.gitkeep b/sdk/runanywhere-flutter/packages/runanywhere_llamacpp/android/src/main/jniLibs/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_llamacpp/ios/Frameworks/.gitkeep b/sdk/runanywhere-flutter/packages/runanywhere_llamacpp/ios/Frameworks/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_onnx/android/src/main/jniLibs/.gitkeep b/sdk/runanywhere-flutter/packages/runanywhere_onnx/android/src/main/jniLibs/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/sdk/runanywhere-flutter/packages/runanywhere_onnx/ios/Frameworks/.gitkeep b/sdk/runanywhere-flutter/packages/runanywhere_onnx/ios/Frameworks/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/sdk/runanywhere-kotlin/gradle.properties b/sdk/runanywhere-kotlin/gradle.properties
index 8f049d904..0f68ccd58 100644
--- a/sdk/runanywhere-kotlin/gradle.properties
+++ b/sdk/runanywhere-kotlin/gradle.properties
@@ -36,7 +36,7 @@ kotlin.mpp.applyDefaultHierarchyTemplate=false
 #   ./gradlew setupLocalDevelopment   # First-time setup (downloads + builds + copies)
 #   ./gradlew rebuildCommons          # Rebuild C++ after changes
 # =============================================================================
-runanywhere.testLocal=true
+runanywhere.testLocal=false
 
 # Force rebuild of runanywhere-commons C++ code (default: false)
 # Set to true when you've made changes to C++ source files
diff --git a/sdk/runanywhere-kotlin/settings.gradle.kts b/sdk/runanywhere-kotlin/settings.gradle.kts
index d460176d2..33d4f17ee 100644
--- a/sdk/runanywhere-kotlin/settings.gradle.kts
+++ b/sdk/runanywhere-kotlin/settings.gradle.kts
@@ -49,3 +49,7 @@ include(":modules:runanywhere-core-onnx")
 // RAG pipeline — NOT a separate module. RAG is an orchestration pipeline (like Voice Agent)
 // that uses existing LLM + Embeddings services. Registration is handled by the core SDK
 // when ragCreatePipeline() is called. See: RunAnywhere+RAG.jvmAndroid.kt
+
+// Genie module - now distributed as a closed-source AAR from private repo:
+//   com.runanywhere.sdk:runanywhere-genie-android:<version>
+// See: https://github.com/RunanywhereAI/runanywhere-genie
diff --git a/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/foundation/device/DeviceInfoService.kt b/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/foundation/device/DeviceInfoService.kt
index 2df860afa..82294eccd 100644
--- a/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/foundation/device/DeviceInfoService.kt
+++ b/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/foundation/device/DeviceInfoService.kt
@@ -33,18 +33,26 @@ actual class DeviceInfoService {
         }
     }
 
-    actual fun getChipName(): String? =
-        try {
-            // Get primary ABI (architecture)
-            val abis = Build.SUPPORTED_ABIS
-            if (abis.isNotEmpty()) {
-                abis[0]
+    actual fun getChipName(): String? {
+        return try {
+            // Try Build.SOC_MODEL first (API 31+) — returns actual SoC like "SM8750"
+            if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+                val socModel = Build.SOC_MODEL
+                if (!socModel.isNullOrEmpty() && socModel != "unknown") {
+                    return socModel
+                }
+            }
+            // Fallback to Build.HARDWARE
+            val hardware = Build.HARDWARE
+            if (!hardware.isNullOrEmpty() && hardware != "unknown") {
+                hardware
             } else {
                 null
             }
         } catch (e: Exception) {
             null
         }
+    }
 
     actual fun getTotalMemoryGB(): Double? {
         return try {
diff --git a/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt b/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt
new file mode 100644
index 000000000..6054a288e
--- /dev/null
+++ b/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt
@@ -0,0 +1,59 @@
+package com.runanywhere.sdk.public.extensions
+
+import android.os.Build
+import com.runanywhere.sdk.core.types.NPUChip
+import com.runanywhere.sdk.foundation.SDKLogger
+import com.runanywhere.sdk.public.RunAnywhere
+
+/**
+ * Android implementation of NPU chip detection.
+ *
+ * Detection strategy (ordered):
+ * 1. [Build.SOC_MODEL] (API 31+) — e.g. "SM8750"
+ * 2. [Build.HARDWARE] — fallback codename
+ * 3. /proc/cpuinfo Hardware line — last resort
+ */
+actual fun RunAnywhere.getChip(): NPUChip? {
+    val logger = SDKLogger("NPUChip")
+
+    // 1. Try Build.SOC_MODEL (API 31+)
+    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+        val socModel = Build.SOC_MODEL
+        if (!socModel.isNullOrEmpty() && socModel != "unknown") {
+            val chip = NPUChip.fromSocModel(socModel)
+            if (chip != null) {
+                logger.info("Detected NPU chip: ${chip.displayName} (SOC_MODEL=$socModel)")
+                return chip
+            }
+        }
+    }
+
+    // 2. Try Build.HARDWARE
+    val hardware = Build.HARDWARE
+    if (!hardware.isNullOrEmpty() && hardware != "unknown") {
+        val chip = NPUChip.fromSocModel(hardware)
+        if (chip != null) {
+            logger.info("Detected NPU chip: ${chip.displayName} (HARDWARE=$hardware)")
+            return chip
+        }
+    }
+
+    // 3. Try /proc/cpuinfo
+    try {
+        val cpuInfo = java.io.File("/proc/cpuinfo").readText()
+        val hardwareLine = cpuInfo.lines().find { it.startsWith("Hardware", ignoreCase = true) }
+        if (hardwareLine != null) {
+            val cpuHardware = hardwareLine.substringAfter(":").trim()
+            val chip = NPUChip.fromSocModel(cpuHardware)
+            if (chip != null) {
+                logger.info("Detected NPU chip: ${chip.displayName} (cpuinfo=$cpuHardware)")
+                return chip
+            }
+        }
+    } catch (_: Exception) {
+        // Fall through
+    }
+
+    logger.debug("No supported NPU chip detected")
+    return null
+}
diff --git a/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/security/SecureStorage.kt b/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/security/SecureStorage.kt
index 7f13b1056..1d8a46c9e 100644
--- a/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/security/SecureStorage.kt
+++ b/sdk/runanywhere-kotlin/src/androidMain/kotlin/com/runanywhere/sdk/security/SecureStorage.kt
@@ -39,6 +39,7 @@ class AndroidSecureStorage private constructor(
         /**
          * Create secure storage instance for Android
          */
+        @Synchronized
         fun create(identifier: String): AndroidSecureStorage {
             val appContext =
                 context
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/core/types/ComponentTypes.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/core/types/ComponentTypes.kt
index a6726ed57..7e5112a1d 100644
--- a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/core/types/ComponentTypes.kt
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/core/types/ComponentTypes.kt
@@ -128,6 +128,7 @@ enum class InferenceFramework(
     FOUNDATION_MODELS("FoundationModels"),
     SYSTEM_TTS("SystemTTS"),
     FLUID_AUDIO("FluidAudio"),
+    GENIE("Genie"),
 
     // Special cases
     BUILT_IN("BuiltIn"), // For simple services (e.g., energy-based VAD)
@@ -144,6 +145,7 @@ enum class InferenceFramework(
                 FOUNDATION_MODELS -> "Foundation Models"
                 SYSTEM_TTS -> "System TTS"
                 FLUID_AUDIO -> "FluidAudio"
+                GENIE -> "Qualcomm Genie"
                 BUILT_IN -> "Built-in"
                 NONE -> "None"
                 UNKNOWN -> "Unknown"
@@ -158,6 +160,7 @@ enum class InferenceFramework(
                 FOUNDATION_MODELS -> "foundation_models"
                 SYSTEM_TTS -> "system_tts"
                 FLUID_AUDIO -> "fluid_audio"
+                GENIE -> "genie"
                 BUILT_IN -> "built_in"
                 NONE -> "none"
                 UNKNOWN -> "unknown"
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/core/types/NPUChip.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/core/types/NPUChip.kt
new file mode 100644
index 000000000..0711905db
--- /dev/null
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/core/types/NPUChip.kt
@@ -0,0 +1,48 @@
+package com.runanywhere.sdk.core.types
+
+/**
+ * Supported NPU chipsets for on-device Genie model inference.
+ *
+ * Each chip has an [identifier] used in model IDs and an [npuSuffix] used
+ * to construct download URLs from the HuggingFace model repository.
+ *
+ * Example URL construction:
+ * ```
+ * val chip = RunAnywhere.getChip()
+ * val url = chip.downloadUrl("qwen3-4b")
+ * // → "https://huggingface.co/runanywhere/genie-npu-models/resolve/main/qwen3-4b-genie-w4a16-8elite-gen5.tar.gz"
+ * ```
+ */
+enum class NPUChip(
+    val identifier: String,
+    val displayName: String,
+    val socModel: String,
+    val npuSuffix: String,
+) {
+    SNAPDRAGON_8_ELITE("8elite", "Snapdragon 8 Elite", "SM8750", "8elite"),
+    SNAPDRAGON_8_ELITE_GEN5("8elite-gen5", "Snapdragon 8 Elite Gen 5", "SM8850", "8elite-gen5"),
+    ;
+
+    /**
+     * Build a HuggingFace download URL for this chip.
+     * @param modelSlug Model slug (e.g. "qwen3-4b") → produces
+     *   "qwen3-4b-genie-w4a16-8elite-gen5.tar.gz"
+     * @param quant Quantization format (e.g. "w4a16", "w8a16"). Defaults to "w4a16".
+     */
+    fun downloadUrl(modelSlug: String, quant: String = "w4a16"): String =
+        "${BASE_URL}${modelSlug}-genie-${quant}-${npuSuffix}.tar.gz"
+
+    companion object {
+        /** Base URL for NPU model downloads on HuggingFace. */
+        const val BASE_URL = "https://huggingface.co/runanywhere/genie-npu-models/resolve/main/"
+
+        /**
+         * Match an NPU chip from a SoC model string (e.g. "SM8750").
+         * Returns null if the SoC is not a supported NPU chipset.
+         */
+        fun fromSocModel(socModel: String): NPUChip? {
+            val upper = socModel.uppercase()
+            return entries.firstOrNull { upper.contains(it.socModel) }
+        }
+    }
+}
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/foundation/SDKLogger.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/foundation/SDKLogger.kt
index 39c2ef9f5..f74a30802 100644
--- a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/foundation/SDKLogger.kt
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/foundation/SDKLogger.kt
@@ -332,8 +332,8 @@ object Logging {
         // Forward to runanywhere-commons bridge if set
         commonsLogBridge?.invoke(entry)
 
-        // Write to all registered destinations
-        for (destination in _destinations) {
+        // Write to all registered destinations (snapshot to avoid concurrent modification)
+        for (destination in _destinations.toList()) {
             if (destination.isAvailable) {
                 destination.write(entry)
             }
@@ -358,6 +358,7 @@ object Logging {
     /**
      * Add a log destination (non-suspending version).
      */
+    @Synchronized
     fun addDestinationSync(destination: LogDestination) {
         if (_destinations.none { it.identifier == destination.identifier }) {
             _destinations.add(destination)
@@ -762,6 +763,9 @@ class SDKLogger(
         /** Logger for RAG (Retrieval-Augmented Generation) operations */
         val rag = SDKLogger("RAG")
 
+        /** Logger for Qualcomm Genie (NPU LLM) operations */
+        val genie = SDKLogger("Genie")
+
         /** Logger for VoiceAgent operations */
         val voiceAgent = SDKLogger("VoiceAgent")
 
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/foundation/device/DeviceInfoService.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/foundation/device/DeviceInfoService.kt
index ddf0d290a..529ea7953 100644
--- a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/foundation/device/DeviceInfoService.kt
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/foundation/device/DeviceInfoService.kt
@@ -23,7 +23,7 @@ expect class DeviceInfoService() {
     fun getDeviceModel(): String
 
     /**
-     * Get chip/CPU name (e.g., "ARM64", "x86_64")
+     * Get SoC model name for NPU chip detection (e.g., "SM8750", "kalama")
      * Returns null if unable to determine
      */
     fun getChipName(): String?
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/RunAnywhere.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/RunAnywhere.kt
index 8060e8679..286dca7cf 100644
--- a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/RunAnywhere.kt
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/RunAnywhere.kt
@@ -14,6 +14,8 @@ import com.runanywhere.sdk.foundation.LogLevel
 import com.runanywhere.sdk.foundation.SDKLogger
 import com.runanywhere.sdk.public.events.EventBus
 import com.runanywhere.sdk.utils.SDKConstants
+import kotlinx.coroutines.sync.Mutex
+import kotlinx.coroutines.sync.withLock
 
 // ═══════════════════════════════════════════════════════════════════════════
 // SDK INITIALIZATION FLOW (Two-Phase Pattern)
@@ -91,6 +93,7 @@ object RunAnywhere {
     private var _areServicesReady: Boolean = false
 
     private val lock = Any()
+    private val servicesMutex = Mutex()
 
     // ═══════════════════════════════════════════════════════════════════════════
     // MARK: - Public Properties
@@ -240,7 +243,7 @@ object RunAnywhere {
             return
         }
 
-        synchronized(lock) {
+        servicesMutex.withLock {
             if (_areServicesReady) {
                 return
             }
@@ -334,7 +337,7 @@ object RunAnywhere {
      * Initialize CppBridge services (Phase 2)
      * Implementation is in jvmAndroidMain via expect/actual
      */
-    private fun initializeCppBridgeServices() {
+    private suspend fun initializeCppBridgeServices() {
         logger.debug("CppBridge services initialization requested")
         initializePlatformBridgeServices()
     }
@@ -367,7 +370,7 @@ internal expect fun initializePlatformBridge(environment: SDKEnvironment, apiKey
  * Initialize platform-specific bridge services (Phase 2).
  * On JVM/Android, this calls CppBridge.initializeServices().
  */
-internal expect fun initializePlatformBridgeServices()
+internal expect suspend fun initializePlatformBridgeServices()
 
 /**
  * Shutdown platform-specific bridge.
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/LLM/LLMTypes.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/LLM/LLMTypes.kt
index 053e0d155..a09cf6b1d 100644
--- a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/LLM/LLMTypes.kt
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/LLM/LLMTypes.kt
@@ -206,6 +206,7 @@ data class LoRAAdapterConfig(
 ) {
     init {
         require(path.isNotBlank()) { "LoRA adapter path cannot be blank" }
+        require(scale.isFinite() && scale > 0f) { "LoRA adapter scale must be positive and finite (got $scale)" }
     }
 }
 
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/Models/ModelTypes.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/Models/ModelTypes.kt
index db78834ca..3a402ca3e 100644
--- a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/Models/ModelTypes.kt
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/Models/ModelTypes.kt
@@ -45,6 +45,7 @@ enum class ModelFormat(
     ORT("ort"),
     GGUF("gguf"),
     BIN("bin"),
+    QNN_CONTEXT("qnn_context"),
     UNKNOWN("unknown"),
 }
 
@@ -115,6 +116,7 @@ enum class ModelSelectionContext(
         when (this) {
             LLM ->
                 framework == com.runanywhere.sdk.core.types.InferenceFramework.LLAMA_CPP ||
+                    framework == com.runanywhere.sdk.core.types.InferenceFramework.GENIE ||
                     framework == com.runanywhere.sdk.core.types.InferenceFramework.FOUNDATION_MODELS
             STT ->
                 framework == com.runanywhere.sdk.core.types.InferenceFramework.ONNX
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt
new file mode 100644
index 000000000..c514e5d31
--- /dev/null
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt
@@ -0,0 +1,21 @@
+package com.runanywhere.sdk.public.extensions
+
+import com.runanywhere.sdk.core.types.NPUChip
+import com.runanywhere.sdk.public.RunAnywhere
+
+/**
+ * Detect the device's NPU chipset for Genie model compatibility.
+ *
+ * Returns the [NPUChip] if the device has a supported Qualcomm SoC,
+ * or null if the device does not support NPU inference.
+ *
+ * Use [NPUChip.downloadUrl] to construct chipset-specific download URLs:
+ * ```kotlin
+ * val chip = RunAnywhere.getChip()
+ * if (chip != null) {
+ *     val url = chip.downloadUrl("qwen3-4b")
+ *     RunAnywhere.registerModel(id = "qwen3-4b-npu", name = "Qwen3 4B NPU", url = url, ...)
+ * }
+ * ```
+ */
+expect fun RunAnywhere.getChip(): NPUChip?
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/CppBridge.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/CppBridge.kt
index d273c86ab..b384250a2 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/CppBridge.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/CppBridge.kt
@@ -558,8 +558,12 @@ object CppBridge {
      * @return true if rac_is_initialized() returns true
      */
     fun isNativeInitialized(): Boolean {
-        // TODO: Call rac_is_initialized()
-        return _isInitialized
+        if (!_isInitialized || !isNativeLibraryLoaded) return false
+        return try {
+            RunAnywhereBridge.racIsInitialized()
+        } catch (_: Exception) {
+            _isInitialized
+        }
     }
 
     // =============================================================================
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeEvents.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeEvents.kt
index dab3443a7..266702b92 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeEvents.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeEvents.kt
@@ -804,6 +804,7 @@ object CppBridgeEvents {
         const val COREML = 4
         const val FOUNDATION = 5
         const val SYSTEM = 6
+        const val GENIE = 10
     }
 
     /**
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeModelPaths.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeModelPaths.kt
index d06c39c65..14e25a7ba 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeModelPaths.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeModelPaths.kt
@@ -908,9 +908,21 @@ object CppBridgeModelPaths {
             val finalPath = getModelPathByTypeCallback(modelId, modelType)
             val finalFile = File(finalPath)
 
-            // Delete existing file if present
+            // Delete existing file/directory if present
             if (finalFile.exists()) {
-                finalFile.delete()
+                val deleted = if (finalFile.isDirectory) {
+                    finalFile.deleteRecursively()
+                } else {
+                    finalFile.delete()
+                }
+                if (!deleted) {
+                    CppBridgePlatformAdapter.logCallback(
+                        CppBridgePlatformAdapter.LogLevel.ERROR,
+                        TAG,
+                        "Failed to delete existing destination: ${finalFile.absolutePath} (isDir=${finalFile.isDirectory})",
+                    )
+                    return false
+                }
             }
 
             // Move file
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeModelRegistry.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeModelRegistry.kt
index 74596ac8f..410022e07 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeModelRegistry.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeModelRegistry.kt
@@ -14,6 +14,7 @@
 package com.runanywhere.sdk.foundation.bridge.extensions
 
 import com.runanywhere.sdk.native.bridge.RunAnywhereBridge
+import java.io.File
 
 /**
  * Model registry bridge that provides direct access to the C++ model registry.
@@ -73,13 +74,13 @@ object CppBridgeModelRegistry {
      * Model format constants matching C++ RAC_MODEL_FORMAT_* values.
      */
     object ModelFormat {
-        const val UNKNOWN = 0 // RAC_MODEL_FORMAT_UNKNOWN
-        const val GGUF = 1 // RAC_MODEL_FORMAT_GGUF
-        const val ONNX = 2 // RAC_MODEL_FORMAT_ONNX
-        const val ORT = 3 // RAC_MODEL_FORMAT_ORT
-        const val BIN = 4 // RAC_MODEL_FORMAT_BIN
-        const val COREML = 5 // RAC_MODEL_FORMAT_COREML
-        const val TFLITE = 6 // RAC_MODEL_FORMAT_TFLITE
+        const val ONNX = 0 // RAC_MODEL_FORMAT_ONNX
+        const val ORT = 1 // RAC_MODEL_FORMAT_ORT
+        const val GGUF = 2 // RAC_MODEL_FORMAT_GGUF
+        const val BIN = 3 // RAC_MODEL_FORMAT_BIN
+        const val COREML = 4 // RAC_MODEL_FORMAT_COREML
+        const val QNN_CONTEXT = 5 // RAC_MODEL_FORMAT_QNN_CONTEXT
+        const val UNKNOWN = 99 // RAC_MODEL_FORMAT_UNKNOWN
     }
 
     /**
@@ -94,6 +95,7 @@ object CppBridgeModelRegistry {
         const val FLUID_AUDIO = 4 // RAC_FRAMEWORK_FLUID_AUDIO
         const val BUILTIN = 5 // RAC_FRAMEWORK_BUILTIN
         const val NONE = 6 // RAC_FRAMEWORK_NONE
+        const val GENIE = 10 // RAC_FRAMEWORK_GENIE
         const val UNKNOWN = 99 // RAC_FRAMEWORK_UNKNOWN
     }
 
@@ -214,7 +216,7 @@ object CppBridgeModelRegistry {
     }
 
     /**
-     * Update download status in C++ registry.
+     * Update download status in C++ registry (in-memory only).
      *
      * @param modelId The model ID
      * @param localPath The local path (or null to clear download)
@@ -258,71 +260,43 @@ object CppBridgeModelRegistry {
     }
 
     /**
-     * Scan filesystem and restore downloaded models.
+     * Scan filesystem and restore downloaded models whose filename matches their model ID.
+     * This handles single-file models (GGUF, ONNX) and archive models that extracted into
+     * a named directory matching the model ID.
      *
-     * This is called during SDK initialization to detect previously
-     * downloaded models and update their status in the C++ registry.
+     * For archive models with flat extraction (e.g. Genie), see
+     * [RunAnywhere.restorePersistedDownloadPaths] in RunAnywhere+ModelManagement.jvmAndroid.kt.
      */
     fun scanAndRestoreDownloadedModels() {
-        log(LogLevel.DEBUG, "Scanning for previously downloaded models...")
-
         val baseDir = CppBridgeModelPaths.getBaseDirectory()
-        val modelsDir = java.io.File(baseDir, "models")
+        val modelsDir = File(baseDir, "models")
 
         if (!modelsDir.exists()) {
             log(LogLevel.DEBUG, "Models directory does not exist: ${modelsDir.absolutePath}")
             return
         }
 
-        val typeDirectories =
-    mapOf(
-        "llm" to ModelCategory.LANGUAGE,
-        "stt" to ModelCategory.SPEECH_RECOGNITION,
-        "tts" to ModelCategory.SPEECH_SYNTHESIS,
-        "vad" to ModelCategory.AUDIO,
-
-        // RAG
-        "embedding" to ModelType.EMBEDDING,
-
-        // Vision / VLM
-        "vision" to ModelCategory.VISION,
-        "multimodal" to ModelCategory.MULTIMODAL,
-
-        // Backward compatibility
-        "other" to -1,
-    )
-
+        log(LogLevel.DEBUG, "Scanning for previously downloaded models...")
         var restoredCount = 0
 
-        for ((dirName, _) in typeDirectories) {
-            val typeDir = java.io.File(modelsDir, dirName)
+        val typeDirectories = listOf("llm", "stt", "tts", "vad", "embedding", "vision", "multimodal", "other")
+        for (dirName in typeDirectories) {
+            val typeDir = File(modelsDir, dirName)
             if (!typeDir.exists() || !typeDir.isDirectory) continue
 
-            log(LogLevel.DEBUG, "Scanning type directory: ${typeDir.absolutePath}")
-
-            // Scan each model file or folder in this type directory
             typeDir.listFiles()?.forEach { modelPath ->
-                // Model can be stored as:
-                // 1. A directory containing the model (e.g., models/llm/model-name/)
-                // 2. A file directly (e.g., models/llm/model-name)
                 val modelId = modelPath.name
-                log(LogLevel.DEBUG, "Found: $modelId (isDir=${modelPath.isDirectory}, isFile=${modelPath.isFile})")
-
-                // Check if this model exists in registry
                 val existingModel = get(modelId)
-                if (existingModel != null) {
-                    // Update with local path
+                if (existingModel != null && existingModel.localPath == null) {
                     if (updateDownloadStatus(modelId, modelPath.absolutePath)) {
                         restoredCount++
-                        log(LogLevel.DEBUG, "Restored downloaded model: $modelId at ${modelPath.absolutePath}")
+                        log(LogLevel.DEBUG, "Restored $modelId at ${modelPath.absolutePath}")
                     }
-                } else {
-                    log(LogLevel.DEBUG, "Model $modelId not found in registry, skipping")
                 }
             }
         }
 
-        log(LogLevel.INFO, "Scan complete: Restored $restoredCount previously downloaded models")
+        log(LogLevel.INFO, "Filesystem scan complete: restored $restoredCount models")
     }
 
     // ========================================================================
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/PlatformBridge.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/PlatformBridge.kt
index 610e4459d..a7c95737d 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/PlatformBridge.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/PlatformBridge.kt
@@ -11,7 +11,6 @@ package com.runanywhere.sdk.public
 import com.runanywhere.sdk.foundation.SDKLogger
 import com.runanywhere.sdk.foundation.bridge.CppBridge
 import com.runanywhere.sdk.foundation.bridge.extensions.CppBridgeTelemetry
-import kotlinx.coroutines.runBlocking
 
 private const val TAG = "PlatformBridge"
 private val logger = SDKLogger(TAG)
@@ -49,15 +48,9 @@ internal actual fun initializePlatformBridge(environment: SDKEnvironment, apiKey
  * Initialize CppBridge services (Phase 2).
  * This includes model assignment, platform services, and device registration.
  */
-internal actual fun initializePlatformBridgeServices() {
+internal actual suspend fun initializePlatformBridgeServices() {
     logger.info("Initializing CppBridge services...")
-
-    // Use runBlocking to call the suspend function
-    // This is safe because services initialization is typically called once
-    runBlocking {
-        CppBridge.initializeServices()
-    }
-
+    CppBridge.initializeServices()
     logger.info("CppBridge services initialization complete")
 }
 
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+LoRA.jvmAndroid.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+LoRA.jvmAndroid.kt
index 4df98117e..eb15ae516 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+LoRA.jvmAndroid.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+LoRA.jvmAndroid.kt
@@ -47,7 +47,7 @@ actual suspend fun RunAnywhere.loadLoraAdapter(config: LoRAAdapterConfig) {
 
     val result = CppBridgeLLM.loadLoraAdapter(config.path, config.scale)
     if (result != 0) {
-        throw SDKError.llm("Failed to load LoRA adapter: error $result")
+        throw SDKError.llm("Failed to load LoRA adapter '${config.path}': error code $result")
     }
 }
 
@@ -250,6 +250,21 @@ actual fun RunAnywhere.downloadLoraAdapter(adapterId: String): Flow<DownloadProg
         connection.disconnect()
     }
 
+    // Validate GGUF magic bytes (matches iOS validation)
+    val isValidGguf = destFile.inputStream().use { stream ->
+        val bytes = ByteArray(4)
+        if (stream.read(bytes) != 4) return@use false
+        val magic = (bytes[0].toUInt() and 0xFFu) or
+            ((bytes[1].toUInt() and 0xFFu) shl 8) or
+            ((bytes[2].toUInt() and 0xFFu) shl 16) or
+            ((bytes[3].toUInt() and 0xFFu) shl 24)
+        magic == 0x46554747u  // "GGUF" in little-endian
+    }
+    if (!isValidGguf) {
+        destFile.delete()
+        throw SDKError.download("Downloaded LoRA adapter is not a valid GGUF file: ${entry.filename}")
+    }
+
     loraLogger.info("LoRA download completed: ${destFile.absolutePath}")
     emit(DownloadProgress(
         modelId = adapterId, progress = 1f,
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+ModelManagement.jvmAndroid.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+ModelManagement.jvmAndroid.kt
index 15661c1cd..71e838614 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+ModelManagement.jvmAndroid.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+ModelManagement.jvmAndroid.kt
@@ -15,6 +15,7 @@ import com.runanywhere.sdk.foundation.bridge.extensions.CppBridgeLLM
 import com.runanywhere.sdk.foundation.bridge.extensions.CppBridgeModelPaths
 import com.runanywhere.sdk.foundation.bridge.extensions.CppBridgeModelRegistry
 import com.runanywhere.sdk.foundation.bridge.extensions.CppBridgeSTT
+import com.runanywhere.sdk.foundation.bridge.extensions.CppBridgeStorage
 import com.runanywhere.sdk.foundation.errors.SDKError
 import com.runanywhere.sdk.public.RunAnywhere
 import com.runanywhere.sdk.public.extensions.Models.DownloadProgress
@@ -92,6 +93,8 @@ internal actual fun registerModelInternal(modelInfo: ModelInfo) {
                         ModelFormat.GGUF -> CppBridgeModelRegistry.ModelFormat.GGUF
                         ModelFormat.ONNX -> CppBridgeModelRegistry.ModelFormat.ONNX
                         ModelFormat.ORT -> CppBridgeModelRegistry.ModelFormat.ORT
+                        ModelFormat.BIN -> CppBridgeModelRegistry.ModelFormat.BIN
+                        ModelFormat.QNN_CONTEXT -> CppBridgeModelRegistry.ModelFormat.QNN_CONTEXT
                         else -> CppBridgeModelRegistry.ModelFormat.UNKNOWN
                     },
                 // CRITICAL: Map InferenceFramework to C++ framework constant
@@ -104,6 +107,7 @@ internal actual fun registerModelInternal(modelInfo: ModelInfo) {
                         InferenceFramework.FLUID_AUDIO -> CppBridgeModelRegistry.Framework.FLUID_AUDIO
                         InferenceFramework.BUILT_IN -> CppBridgeModelRegistry.Framework.BUILTIN
                         InferenceFramework.NONE -> CppBridgeModelRegistry.Framework.NONE
+                        InferenceFramework.GENIE -> CppBridgeModelRegistry.Framework.GENIE
                         InferenceFramework.UNKNOWN -> CppBridgeModelRegistry.Framework.UNKNOWN
                     },
                 downloadUrl = modelInfo.downloadURL,
@@ -294,6 +298,8 @@ private fun bridgeModelToPublic(bridge: CppBridgeModelRegistry.ModelInfo): Model
                 CppBridgeModelRegistry.ModelFormat.GGUF -> ModelFormat.GGUF
                 CppBridgeModelRegistry.ModelFormat.ONNX -> ModelFormat.ONNX
                 CppBridgeModelRegistry.ModelFormat.ORT -> ModelFormat.ORT
+                CppBridgeModelRegistry.ModelFormat.BIN -> ModelFormat.BIN
+                CppBridgeModelRegistry.ModelFormat.QNN_CONTEXT -> ModelFormat.QNN_CONTEXT
                 else -> ModelFormat.UNKNOWN
             },
         framework =
@@ -302,6 +308,7 @@ private fun bridgeModelToPublic(bridge: CppBridgeModelRegistry.ModelInfo): Model
                 CppBridgeModelRegistry.Framework.ONNX -> InferenceFramework.ONNX
                 CppBridgeModelRegistry.Framework.FOUNDATION_MODELS -> InferenceFramework.FOUNDATION_MODELS
                 CppBridgeModelRegistry.Framework.SYSTEM_TTS -> InferenceFramework.SYSTEM_TTS
+                CppBridgeModelRegistry.Framework.GENIE -> InferenceFramework.GENIE
                 else -> InferenceFramework.UNKNOWN
             },
         downloadURL = bridge.downloadUrl,
@@ -506,6 +513,7 @@ actual fun RunAnywhere.downloadModel(modelId: String): Flow<DownloadProgress> {
                 val updatedModelInfo = modelInfo.copy(localPath = finalPath)
                 addToModelCache(updatedModelInfo)
                 CppBridgeModelRegistry.updateDownloadStatus(modelId, finalPath)
+                CppBridgeStorage.storeString(CppBridgeStorage.StorageNamespace.DOWNLOADS, modelId, finalPath)
 
                 downloadLogger.info("Multi-file model ready at: $finalPath")
 
@@ -722,6 +730,7 @@ actual fun RunAnywhere.downloadModel(modelId: String): Flow<DownloadProgress> {
             val updatedModelInfo = modelInfo.copy(localPath = finalModelPath)
             addToModelCache(updatedModelInfo)
             CppBridgeModelRegistry.updateDownloadStatus(modelId, finalModelPath)
+            CppBridgeStorage.storeString(CppBridgeStorage.StorageNamespace.DOWNLOADS, modelId, finalModelPath)
 
             downloadLogger.info("Model ready at: $finalModelPath")
 
@@ -811,6 +820,9 @@ private suspend fun extractArchive(
         // Use the URL to determine archive type (file may be saved without extension)
         val lowercaseUrl = originalUrl.lowercase()
 
+        // Snapshot existing items BEFORE extraction to detect newly extracted flat files
+        val itemsBeforeExtraction = parentDir.listFiles()?.map { it.name }?.toSet() ?: emptySet()
+
         // IMPORTANT: The archive file name might conflict with the folder inside the archive
         // (e.g., file "sherpa-onnx-whisper-tiny.en" and archive contains folder "sherpa-onnx-whisper-tiny.en/")
         // We need to rename/move the archive before extracting to avoid ENOTDIR errors
@@ -860,19 +872,52 @@ private suspend fun extractArchive(
             }
         }
 
-        // Find the extracted model directory
-        // The archive should have created a folder with the model ID name
+        // Find the extracted model directory.
+        // Compute new items by comparing current contents against the pre-extraction snapshot.
+        // Explicitly exclude the temp archive name in case cleanup failed (delete() returns false
+        // without throwing), to avoid moving a multi-GB archive file into the model directory.
+        val tempArchiveName = tempArchiveFile.name
         val expectedModelDir = File(parentDir, modelId)
+        val newItems = parentDir.listFiles()
+            ?.filter { it.name !in itemsBeforeExtraction && it.name != tempArchiveName }
+            ?: emptyList()
+        val newDirs = newItems.filter { it.isDirectory }
+        val newFiles = newItems.filter { it.isFile }
+
         val finalPath =
             if (expectedModelDir.exists() && expectedModelDir.isDirectory) {
+                // Standard case: archive root folder name matches modelId
                 expectedModelDir.absolutePath
+            } else if (newDirs.size == 1 && newFiles.isEmpty()) {
+                // Archive had a single root directory with a different name (e.g. Genie NPU
+                // tar.gz containing "llama_v3_2_1b_instruct-genie-w4-qualcomm_snapdragon_8_elite/").
+                // Rename it to the expected modelId so the SDK can discover it consistently.
+                val extractedDir = newDirs.first()
+                if (extractedDir.renameTo(expectedModelDir)) {
+                    logger.info("Renamed extracted dir '${extractedDir.name}' -> '$modelId'")
+                    expectedModelDir.absolutePath
+                } else {
+                    logger.warn("Could not rename '${extractedDir.name}' -> '$modelId', using as-is")
+                    extractedDir.absolutePath
+                }
             } else {
-                // Fallback: look for any new directory created
-                parentDir
-                    .listFiles()
-                    ?.firstOrNull {
-                        it.isDirectory && it.name.contains(modelId.substringBefore("-"))
-                    }?.absolutePath ?: parentDir.absolutePath
+                // Flat archive: files extracted directly into parentDir.
+                // Move them into a per-model subdirectory so the SDK filesystem
+                // scan can discover this model by its ID across app restarts.
+                expectedModelDir.mkdirs()
+                val itemsToMove = newItems.filter { it != expectedModelDir }
+                var movedCount = 0
+                itemsToMove.forEach { file ->
+                    val dest = File(expectedModelDir, file.name)
+                    if (!file.renameTo(dest)) {
+                        logger.warn("Failed to move '${file.name}' into model dir, trying copy")
+                        file.copyTo(dest, overwrite = true)
+                        file.delete()
+                    }
+                    movedCount++
+                }
+                logger.info("Moved $movedCount flat-extracted files into: ${expectedModelDir.absolutePath}")
+                expectedModelDir.absolutePath
             }
 
         logger.info("Model extracted to: $finalPath")
@@ -1063,6 +1108,7 @@ private suspend fun downloadEmbeddingModelFiles(
         }
     }
     CppBridgeModelRegistry.updateDownloadStatus(modelId, dirPath)
+    CppBridgeStorage.storeString(CppBridgeStorage.StorageNamespace.DOWNLOADS, modelId, dirPath)
     CppBridgeEvents.emitDownloadCompleted(modelId, 0.0, 0)
 
     logger.info("Embedding model ready at: $dirPath")
@@ -1141,6 +1187,7 @@ actual suspend fun RunAnywhere.deleteModel(modelId: String) {
     if (!isInitialized) {
         throw SDKError.notInitialized("SDK not initialized")
     }
+    CppBridgeStorage.delete(CppBridgeStorage.StorageNamespace.DOWNLOADS, modelId)
     CppBridgeModelRegistry.remove(modelId)
 }
 
@@ -1341,17 +1388,20 @@ private fun parseModelAssignmentsJson(json: String): List<ModelInfo> {
                             },
                         format =
                             when (formatInt) {
-                                1 -> ModelFormat.GGUF
-                                2 -> ModelFormat.ONNX
-                                3 -> ModelFormat.ORT
+                                CppBridgeModelRegistry.ModelFormat.ONNX -> ModelFormat.ONNX
+                                CppBridgeModelRegistry.ModelFormat.ORT -> ModelFormat.ORT
+                                CppBridgeModelRegistry.ModelFormat.GGUF -> ModelFormat.GGUF
+                                CppBridgeModelRegistry.ModelFormat.BIN -> ModelFormat.BIN
+                                CppBridgeModelRegistry.ModelFormat.QNN_CONTEXT -> ModelFormat.QNN_CONTEXT
                                 else -> ModelFormat.UNKNOWN
                             },
                         framework =
                             when (frameworkInt) {
-                                1 -> InferenceFramework.LLAMA_CPP
-                                2 -> InferenceFramework.ONNX
-                                3 -> InferenceFramework.FOUNDATION_MODELS
-                                4 -> InferenceFramework.SYSTEM_TTS
+                                CppBridgeModelRegistry.Framework.LLAMACPP -> InferenceFramework.LLAMA_CPP
+                                CppBridgeModelRegistry.Framework.ONNX -> InferenceFramework.ONNX
+                                CppBridgeModelRegistry.Framework.FOUNDATION_MODELS -> InferenceFramework.FOUNDATION_MODELS
+                                CppBridgeModelRegistry.Framework.SYSTEM_TTS -> InferenceFramework.SYSTEM_TTS
+                                CppBridgeModelRegistry.Framework.GENIE -> InferenceFramework.GENIE
                                 else -> InferenceFramework.UNKNOWN
                             },
                         downloadURL = downloadUrl,
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Storage.jvmAndroid.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Storage.jvmAndroid.kt
index 622ac9f39..c2bbd256b 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Storage.jvmAndroid.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Storage.jvmAndroid.kt
@@ -206,6 +206,7 @@ private fun convertToModelStorageMetrics(
             CppBridgeModelRegistry.Framework.FLUID_AUDIO -> InferenceFramework.FLUID_AUDIO
             CppBridgeModelRegistry.Framework.BUILTIN -> InferenceFramework.BUILT_IN
             CppBridgeModelRegistry.Framework.NONE -> InferenceFramework.NONE
+            CppBridgeModelRegistry.Framework.GENIE -> InferenceFramework.GENIE
             else -> InferenceFramework.UNKNOWN
         }
 
@@ -230,6 +231,7 @@ private fun convertToModelStorageMetrics(
             CppBridgeModelRegistry.ModelFormat.ONNX -> ModelFormat.ONNX
             CppBridgeModelRegistry.ModelFormat.ORT -> ModelFormat.ORT
             CppBridgeModelRegistry.ModelFormat.BIN -> ModelFormat.BIN
+            CppBridgeModelRegistry.ModelFormat.QNN_CONTEXT -> ModelFormat.QNN_CONTEXT
             else -> ModelFormat.UNKNOWN
         }
 
diff --git a/sdk/runanywhere-kotlin/src/jvmMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt b/sdk/runanywhere-kotlin/src/jvmMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt
new file mode 100644
index 000000000..da85a2394
--- /dev/null
+++ b/sdk/runanywhere-kotlin/src/jvmMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt
@@ -0,0 +1,9 @@
+package com.runanywhere.sdk.public.extensions
+
+import com.runanywhere.sdk.core.types.NPUChip
+import com.runanywhere.sdk.public.RunAnywhere
+
+/**
+ * JVM stub — NPU chip detection is not applicable on desktop.
+ */
+actual fun RunAnywhere.getChip(): NPUChip? = null
diff --git a/sdk/runanywhere-react-native/packages/core/android/CMakeLists.txt b/sdk/runanywhere-react-native/packages/core/android/CMakeLists.txt
index 7b56bf009..96c0e67f9 100644
--- a/sdk/runanywhere-react-native/packages/core/android/CMakeLists.txt
+++ b/sdk/runanywhere-react-native/packages/core/android/CMakeLists.txt
@@ -27,8 +27,8 @@ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,max-page-size
 # Path to pre-built native libraries (downloaded from runanywhere-sdks)
 set(JNILIB_DIR ${CMAKE_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI})
 
-# Path to RAC headers (downloaded with native libraries)
-set(RAC_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/src/main/include)
+# Path to RAC headers (downloaded with native libraries into jniLibs)
+set(RAC_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/src/main/jniLibs/include)
 
 # =============================================================================
 # RACommons - Core SDK functionality (rac_* API)
@@ -60,6 +60,10 @@ file(GLOB BRIDGE_SOURCES "../cpp/bridges/*.cpp")
 # TODO: Re-enable ToolCallingBridge when commons library includes rac_tool_call_* functions
 # Currently disabled because these functions aren't in commons v0.1.4
 list(FILTER BRIDGE_SOURCES EXCLUDE REGEX ".*ToolCallingBridge\\.cpp$")
+# TODO: Re-enable RAGBridge when commons library includes rac_rag_* functions
+list(FILTER BRIDGE_SOURCES EXCLUDE REGEX ".*RAGBridge\\.cpp$")
+# TODO: Re-enable CompatibilityBridge when commons library includes rac_model_check_compatibility
+list(FILTER BRIDGE_SOURCES EXCLUDE REGEX ".*CompatibilityBridge\\.cpp$")
 
 add_library(${PACKAGE_NAME} SHARED
     src/main/cpp/cpp-adapter.cpp
@@ -109,6 +113,7 @@ include_directories(
     "${RAC_INCLUDE_DIR}/rac/features/voice_agent"
     "${RAC_INCLUDE_DIR}/rac/features/rag"
     "${RAC_INCLUDE_DIR}/rac/features/platform"
+    "${RAC_INCLUDE_DIR}/rac/backends"
     "${RAC_INCLUDE_DIR}/rac/infrastructure"
     "${RAC_INCLUDE_DIR}/rac/infrastructure/device"
     "${RAC_INCLUDE_DIR}/rac/infrastructure/download"
diff --git a/sdk/runanywhere-react-native/packages/core/android/src/main/java/com/margelo/nitro/runanywhere/PlatformAdapterBridge.kt b/sdk/runanywhere-react-native/packages/core/android/src/main/java/com/margelo/nitro/runanywhere/PlatformAdapterBridge.kt
index 60e645e6f..b69bc2967 100644
--- a/sdk/runanywhere-react-native/packages/core/android/src/main/java/com/margelo/nitro/runanywhere/PlatformAdapterBridge.kt
+++ b/sdk/runanywhere-react-native/packages/core/android/src/main/java/com/margelo/nitro/runanywhere/PlatformAdapterBridge.kt
@@ -379,10 +379,16 @@ object PlatformAdapterBridge {
     }
 
     /**
-     * Get chip name (e.g., "Tensor G3")
+     * Get chip name (e.g., "SM8750")
      */
     @JvmStatic
     fun getChipName(): String {
+        if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.S) {
+            val socModel = android.os.Build.SOC_MODEL
+            if (!socModel.isNullOrEmpty() && socModel != "unknown") {
+                return socModel
+            }
+        }
         return android.os.Build.HARDWARE
     }
 
diff --git a/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.cpp b/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.cpp
index 12506abf1..8c9c60f2e 100644
--- a/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.cpp
+++ b/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.cpp
@@ -271,6 +271,7 @@ rac_inference_framework_t frameworkFromString(const std::string& framework) {
 #endif
     if (framework == "FoundationModels") return RAC_FRAMEWORK_FOUNDATION_MODELS;
     if (framework == "SystemTTS") return RAC_FRAMEWORK_SYSTEM_TTS;
+    if (framework == "Genie" || framework == "genie") return (rac_inference_framework_t)10; // RAC_FRAMEWORK_GENIE
     return RAC_FRAMEWORK_UNKNOWN;
 }
 
@@ -905,6 +906,7 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::getAvailableModels(
 #endif
                     case RAC_FRAMEWORK_FOUNDATION_MODELS: frameworkStr = "FoundationModels"; break;
                     case RAC_FRAMEWORK_SYSTEM_TTS: frameworkStr = "SystemTTS"; break;
+                    case 10: frameworkStr = "Genie"; break; // RAC_FRAMEWORK_GENIE
                     default: frameworkStr = "unknown"; break;
                 }
 
@@ -978,6 +980,7 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::getModelInfo(
 #endif
             case RAC_FRAMEWORK_FOUNDATION_MODELS: frameworkStr = "FoundationModels"; break;
             case RAC_FRAMEWORK_SYSTEM_TTS: frameworkStr = "SystemTTS"; break;
+            case 10: frameworkStr = "Genie"; break; // RAC_FRAMEWORK_GENIE
             default: frameworkStr = "unknown"; break;
         }
 
@@ -1307,7 +1310,10 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::httpGet(
 // ============================================================================
 
 std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::getLastError() {
-    return Promise<std::string>::async([this]() { return lastError_; });
+    return Promise<std::string>::async([this]() {
+        std::lock_guard<std::mutex> lock(errorMutex_);
+        return lastError_;
+    });
 }
 
 // Forward declaration for platform-specific archive extraction
@@ -1432,7 +1438,10 @@ std::shared_ptr<Promise<double>> HybridRunAnywhereCore::getMemoryUsage() {
 // ============================================================================
 
 void HybridRunAnywhereCore::setLastError(const std::string& error) {
-    lastError_ = error;
+    {
+        std::lock_guard<std::mutex> lock(errorMutex_);
+        lastError_ = error;
+    }
     LOGE("%s", error.c_str());
 }
 
@@ -1549,12 +1558,15 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::generate(
 
         std::string text = llmResult.text ? llmResult.text : "";
         int tokensUsed = llmResult.completion_tokens;
+        double latencyMs = llmResult.total_time_ms;
+
+        rac_llm_result_free(&llmResult);
 
         return buildJsonObject({
             {"text", jsonString(text)},
             {"tokensUsed", std::to_string(tokensUsed)},
             {"modelUsed", jsonString("llm")},
-            {"latencyMs", std::to_string(llmResult.total_time_ms)}
+            {"latencyMs", std::to_string(latencyMs)}
         });
     });
 }
@@ -1981,11 +1993,12 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::transcribeFile(
             while (pos + 8 < dataSize) {
                 char chunkId[5] = {0};
                 memcpy(chunkId, &data[pos], 4);
-                uint32_t chunkSize = *reinterpret_cast<const uint32_t*>(&data[pos + 4]);
+                uint32_t chunkSize;
+                memcpy(&chunkSize, &data[pos + 4], sizeof(chunkSize));
 
                 if (strcmp(chunkId, "fmt ") == 0) {
                     if (pos + 8 + chunkSize <= dataSize && chunkSize >= 16) {
-                        sampleRate = *reinterpret_cast<const int32_t*>(&data[pos + 12]);
+                        memcpy(&sampleRate, &data[pos + 12], sizeof(sampleRate));
                         if (sampleRate <= 0 || sampleRate > 48000) sampleRate = 16000;
                         LOGI("WAV sample rate: %d Hz", sampleRate);
                     }
@@ -2039,11 +2052,11 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::transcribeFile(
 
             rac_stt_result_free(&result);
             LOGI("Transcription result: %s", transcribedText.c_str());
-            return transcribedText;
+            return "{\"text\":" + jsonString(transcribedText) + ",\"confidence\":0}";
         } catch (const std::exception& e) {
             std::string msg = e.what();
             LOGI("TranscribeFile exception: %s", msg.c_str());
-            return "{\"error\":\"" + msg + "\"}";
+            return "{\"error\":" + jsonString(msg) + "}";
         } catch (...) {
             return "{\"error\":\"Transcription failed (unknown error)\"}";
         }
diff --git a/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.hpp b/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.hpp
index 880458e5a..c7fa08019 100644
--- a/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.hpp
+++ b/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.hpp
@@ -292,6 +292,7 @@ class HybridRunAnywhereCore : public HybridRunAnywhereCoreSpec {
 private:
   // Thread safety
   std::mutex initMutex_;
+  std::mutex errorMutex_;
 
   // State tracking
   std::string lastError_;
diff --git a/sdk/runanywhere-react-native/packages/core/cpp/bridges/CompatibilityBridge.hpp b/sdk/runanywhere-react-native/packages/core/cpp/bridges/CompatibilityBridge.hpp
index aba0f11c7..3087af02b 100644
--- a/sdk/runanywhere-react-native/packages/core/cpp/bridges/CompatibilityBridge.hpp
+++ b/sdk/runanywhere-react-native/packages/core/cpp/bridges/CompatibilityBridge.hpp
@@ -2,8 +2,8 @@
  * @file CompatibilityBridge.hpp
  * @brief C++ bridge for model compatibility checks.
  *
- * Uses DeviceBridge and StorageBridge to query device capabilities,
- * then calls rac_model_check_compatibility() from runanywhere-commons.
+ * NOTE: Stub implementation — rac_model_check_compatibility() not yet in librac_commons.so.
+ * Returns permissive result (always compatible) until the library is updated.
  */
 
 #pragma once
@@ -11,9 +11,15 @@
 #include <string>
 #include <cstdint>
 
-#include "rac_types.h"
-#include "rac_model_compatibility.h"
-#include "rac_model_registry.h"
+// rac_model_registry_handle_t is defined in rac_model_registry.h.
+// In a stub context where the header may not be on the search path,
+// fall back to void* — the real type is struct rac_model_registry*.
+// (ModelRegistryBridge.hpp includes rac_model_registry.h first in practice.)
+#ifdef RAC_MODEL_REGISTRY_H
+// Already included via ModelRegistryBridge.hpp — type is already defined
+#else
+typedef void* rac_model_registry_handle_t;
+#endif
 
 namespace runanywhere {
 namespace bridges {
@@ -22,9 +28,9 @@ namespace bridges {
  * Compatibility result wrapper
  */
 struct CompatibilityResult {
-    bool isCompatible = false;
-    bool canRun = false;
-    bool canFit = false;
+    bool isCompatible = true;   // Default permissive — function not yet in librac_commons
+    bool canRun = true;
+    bool canFit = true;
     int64_t requiredMemory = 0;
     int64_t availableMemory = 0;
     int64_t requiredStorage = 0;
@@ -32,26 +38,17 @@ struct CompatibilityResult {
 };
 
 /**
- * CompatibilityBridge - Model compatibility checks
- *
- * Queries device capabilities via DeviceBridge and StorageBridge,
- * then delegates to rac_model_check_compatibility() in runanywhere-commons.
+ * CompatibilityBridge - Model compatibility checks (stub)
  */
 class CompatibilityBridge {
 public:
-    /**
-     * Check model compatibility against current device resources
-     *
-     * Automatically queries available RAM and storage via existing bridges.
-     *
-     * @param modelId  Model identifier
-     * @param registryHandle  Model registry handle
-     * @return CompatibilityResult with canRun, canFit, isCompatible
-     */
     static CompatibilityResult checkCompatibility(
-        const std::string& modelId,
-        rac_model_registry_handle_t registryHandle
-    );
+        const std::string& /*modelId*/,
+        rac_model_registry_handle_t /*registryHandle*/
+    ) {
+        // Stub: rac_model_check_compatibility not yet available in librac_commons.so
+        return CompatibilityResult{};
+    }
 };
 
 } // namespace bridges
diff --git a/sdk/runanywhere-react-native/packages/core/cpp/bridges/RAGBridge.hpp b/sdk/runanywhere-react-native/packages/core/cpp/bridges/RAGBridge.hpp
index 004ba48b0..3b81e2fe8 100644
--- a/sdk/runanywhere-react-native/packages/core/cpp/bridges/RAGBridge.hpp
+++ b/sdk/runanywhere-react-native/packages/core/cpp/bridges/RAGBridge.hpp
@@ -4,6 +4,9 @@
  *
  * Wraps rac_rag_pipeline_* C APIs for JSI access.
  * RAG is a pipeline (like Voice Agent), not a backend.
+ *
+ * NOTE: Stub implementation — rac_rag_* functions not yet in librac_commons.so.
+ * Returns safe defaults until the library is updated.
  */
 
 #pragma once
@@ -11,29 +14,27 @@
 #include <string>
 #include <mutex>
 
-// Forward declare opaque pipeline handle
-struct rac_rag_pipeline;
-typedef struct rac_rag_pipeline rac_rag_pipeline_t;
-
 namespace runanywhere {
 namespace bridges {
 
 class RAGBridge {
 public:
-    static RAGBridge& shared();
-
-    bool createPipeline(const std::string& configJson);
-    bool destroyPipeline();
-    bool addDocument(const std::string& text, const std::string& metadataJson);
-    bool addDocumentsBatch(const std::string& documentsJson);
-    std::string query(const std::string& queryJson);
-    bool clearDocuments();
-    double getDocumentCount();
-    std::string getStatistics();
+    static RAGBridge& shared() {
+        static RAGBridge instance;
+        return instance;
+    }
+
+    bool createPipeline(const std::string& /*configJson*/) { return false; }
+    bool destroyPipeline() { return false; }
+    bool addDocument(const std::string& /*text*/, const std::string& /*metadataJson*/) { return false; }
+    bool addDocumentsBatch(const std::string& /*documentsJson*/) { return false; }
+    std::string query(const std::string& /*queryJson*/) { return "{}"; }
+    bool clearDocuments() { return false; }
+    double getDocumentCount() { return 0.0; }
+    std::string getStatistics() { return "{}"; }
 
 private:
     RAGBridge() = default;
-    rac_rag_pipeline_t* pipeline_ = nullptr;
     std::mutex mutex_;
 };
 
diff --git a/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+Device.ts b/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+Device.ts
new file mode 100644
index 000000000..c3e9a5b42
--- /dev/null
+++ b/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+Device.ts
@@ -0,0 +1,59 @@
+/**
+ * RunAnywhere+Device.ts
+ *
+ * NPU chip detection extension. Android only.
+ * Returns null on iOS and other platforms.
+ */
+
+import { Platform } from 'react-native';
+import { requireDeviceInfoModule } from '../../native/NativeRunAnywhereCore';
+import { npuChipFromSocModel } from '../../types/NPUChip';
+import { SDKLogger } from '../../Foundation/Logging/Logger/SDKLogger';
+import type { NPUChip } from '../../types/NPUChip';
+
+const logger = new SDKLogger('RunAnywhere.Device');
+
+/**
+ * Detect the device's NPU chipset for Genie model compatibility.
+ *
+ * Returns the NPUChip if the device has a supported Qualcomm SoC,
+ * or null if the device is not Android or does not support NPU inference.
+ *
+ * @example
+ * ```typescript
+ * const chip = await getChip();
+ * if (chip) {
+ *   const url = getNPUDownloadUrl(chip, 'qwen');
+ *   await RunAnywhere.registerModel({ id: 'qwen-npu', name: 'Qwen NPU', url, ... });
+ * }
+ * ```
+ */
+export async function getChip(): Promise<NPUChip | null> {
+  if (Platform.OS !== 'android') {
+    return null;
+  }
+
+  try {
+    const deviceInfo = requireDeviceInfoModule();
+    const chipName = await deviceInfo.getChipName();
+
+    if (!chipName || chipName === 'Unknown') {
+      logger.debug('No chip name available from device info');
+      return null;
+    }
+
+    const chip = npuChipFromSocModel(chipName);
+    if (chip) {
+      logger.info(
+        `Detected NPU chip: ${chip.displayName} (chipName=${chipName})`
+      );
+    } else {
+      logger.debug(`No supported NPU chip for: ${chipName}`);
+    }
+
+    return chip ?? null;
+  } catch (error) {
+    logger.debug('Failed to detect NPU chip');
+    return null;
+  }
+}
diff --git a/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+Models.ts b/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+Models.ts
index 035df0632..efb57d4db 100644
--- a/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+Models.ts
+++ b/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+Models.ts
@@ -14,7 +14,7 @@ import { FileSystem, MultiFileModelCache } from '../../services/FileSystem';
 import type { ModelFileDescriptor } from '../../services/FileSystem';
 import { SDKLogger } from '../../Foundation/Logging/Logger/SDKLogger';
 import type { ModelInfo, LLMFramework, ModelCompatibilityResult } from '../../types';
-import { ModelCategory, ModelArtifactType } from '../../types';
+import { ModelCategory, ModelArtifactType, ModelFormat, ConfigurationSource } from '../../types';
 
 const logger = new SDKLogger('RunAnywhere.Models');
 
@@ -85,7 +85,10 @@ export async function getModelInfo(modelId: string): Promise<ModelInfo | null> {
   const infoJson = await native.getModelInfo(modelId);
   try {
     const result = JSON.parse(infoJson);
-    return result === 'null' ? null : result;
+    if (result === null || result === 'null' || (typeof result === 'object' && Object.keys(result).length === 0)) {
+      return null;
+    }
+    return result;
   } catch {
     return null;
   }
@@ -227,7 +230,6 @@ export async function registerModel(options: {
   memoryRequirement?: number;
   supportsThinking?: boolean;
 }): Promise<ModelInfo> {
-  const { ModelFormat, ConfigurationSource } = await import('../../types/enums');
   const now = new Date().toISOString();
   const modelId = options.id ?? generateModelId(options.url);
 
@@ -291,7 +293,6 @@ export async function registerMultiFileModel(options: {
   modality?: ModelCategory;
   memoryRequirement?: number;
 }): Promise<ModelInfo> {
-  const { ModelFormat, ConfigurationSource } = await import('../../types/enums');
   const now = new Date().toISOString();
 
   MultiFileModelCache.set(options.id, options.files);
@@ -356,8 +357,7 @@ function inferFrameworkDir(framework: LLMFramework): string {
   }
 }
 
-function inferFormat(url: string, framework?: LLMFramework): import('../../types/enums').ModelFormat {
-  const { ModelFormat } = require('../../types/enums');
+function inferFormat(url: string, framework?: LLMFramework): ModelFormat {
   const lower = url.toLowerCase();
   if (lower.includes('.gguf')) return ModelFormat.GGUF;
   if (lower.includes('.onnx')) return ModelFormat.ONNX;
@@ -511,8 +511,10 @@ export async function downloadModel(
  */
 export async function cancelDownload(modelId: string): Promise<boolean> {
   if (activeDownloads.has(modelId)) {
+    // Stop the native RNFS download job
+    FileSystem.cancelDownload(modelId);
     activeDownloads.delete(modelId);
-    logger.info(`Marked download as cancelled: ${modelId}`);
+    logger.info(`Cancelled download: ${modelId}`);
     return true;
   }
   return false;
@@ -524,9 +526,19 @@ export async function cancelDownload(modelId: string): Promise<boolean> {
 export async function deleteModel(modelId: string): Promise<boolean> {
   try {
     const modelInfo = await ModelRegistry.getModel(modelId);
-    const extension = modelInfo?.downloadURL?.includes('.gguf')
-      ? '.gguf'
-      : '';
+    const url = modelInfo?.downloadURL ?? '';
+    let extension = '';
+    if (url.includes('.gguf')) {
+      extension = '.gguf';
+    } else if (url.includes('.onnx')) {
+      extension = '.onnx';
+    } else if (url.includes('.tar.bz2')) {
+      extension = '.tar.bz2';
+    } else if (url.includes('.tar.gz')) {
+      extension = '.tar.gz';
+    } else if (url.includes('.zip')) {
+      extension = '.zip';
+    }
     const fileName = `${modelId}${extension}`;
 
     // Delete using FileSystem service
diff --git a/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+VLM.ts b/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+VLM.ts
index 3164a04f5..5eae9f858 100644
--- a/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+VLM.ts
+++ b/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/RunAnywhere+VLM.ts
@@ -20,11 +20,10 @@ import type {
 
 const logger = new SDKLogger('RunAnywhere.VLM');
 
-type VLMModule = typeof import('../../../llamacpp/src/RunAnywhere+VLM');
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+let _vlmModule: any = null;
 
-let _vlmModule: VLMModule | null = null;
-
-async function getVLMModule(): Promise<VLMModule> {
+async function getVLMModule(): Promise<any> {
   if (_vlmModule) return _vlmModule;
   try {
     _vlmModule = require('@runanywhere/llamacpp');
diff --git a/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/index.ts b/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/index.ts
index edd16ac24..b34fd7a10 100644
--- a/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/index.ts
+++ b/sdk/runanywhere-react-native/packages/core/src/Public/Extensions/index.ts
@@ -109,6 +109,9 @@ export type {
   StructuredOutputStreamResult
 } from './RunAnywhere+StructuredOutput';
 
+// Device (NPU Chip Detection)
+export { getChip } from './RunAnywhere+Device';
+
 // Logging
 export { setLogLevel } from './RunAnywhere+Logging';
 
diff --git a/sdk/runanywhere-react-native/packages/core/src/Public/RunAnywhere.ts b/sdk/runanywhere-react-native/packages/core/src/Public/RunAnywhere.ts
index 08786e1ad..9da6635a8 100644
--- a/sdk/runanywhere-react-native/packages/core/src/Public/RunAnywhere.ts
+++ b/sdk/runanywhere-react-native/packages/core/src/Public/RunAnywhere.ts
@@ -16,6 +16,7 @@ import { ServiceContainer } from '../Foundation/DependencyInjection/ServiceConta
 import { SDKLogger } from '../Foundation/Logging/Logger/SDKLogger';
 import { SDKConstants } from '../Foundation/Constants';
 import { FileSystem } from '../services/FileSystem';
+import { SecureStorageService } from '../Foundation/Security/SecureStorageService';
 import {
   HTTPService,
   SDKEnvironment as NetworkSDKEnvironment,
@@ -49,6 +50,7 @@ import * as StructuredOutput from './Extensions/RunAnywhere+StructuredOutput';
 import * as Audio from './Extensions/RunAnywhere+Audio';
 import * as ToolCalling from './Extensions/RunAnywhere+ToolCalling';
 import * as RAG from './Extensions/RunAnywhere+RAG';
+import * as Device from './Extensions/RunAnywhere+Device';
 import * as VLM from './Extensions/RunAnywhere+VLM';
 
 const logger = new SDKLogger('RunAnywhere');
@@ -346,7 +348,6 @@ export const RunAnywhere = {
 
       // Store tokens in secure storage for persistence
       try {
-        const { SecureStorageService } = await import('../Foundation/Security/SecureStorageService');
         await SecureStorageService.storeAuthTokens(
           authResponse.access_token,
           authResponse.refresh_token,
@@ -501,6 +502,12 @@ export const RunAnywhere = {
     }
   },
 
+  // ============================================================================
+  // Device / NPU Chip Detection (Delegated to Extension)
+  // ============================================================================
+
+  getChip: Device.getChip,
+
   // ============================================================================
   // Logging (Delegated to Extension)
   // ============================================================================
diff --git a/sdk/runanywhere-react-native/packages/core/src/index.ts b/sdk/runanywhere-react-native/packages/core/src/index.ts
index 744ab5fa4..3a07d8691 100644
--- a/sdk/runanywhere-react-native/packages/core/src/index.ts
+++ b/sdk/runanywhere-react-native/packages/core/src/index.ts
@@ -256,6 +256,19 @@ export {
   registerMultiFileModel,
 } from './Public/Extensions/RunAnywhere+Models';
 
+// =============================================================================
+// Device / NPU Chip Detection
+// =============================================================================
+
+export { getChip } from './Public/Extensions/RunAnywhere+Device';
+export type { NPUChip } from './types/NPUChip';
+export {
+  NPU_CHIPS,
+  NPU_BASE_URL,
+  getNPUDownloadUrl,
+  npuChipFromSocModel,
+} from './types/NPUChip';
+
 // =============================================================================
 // RAG Pipeline
 // =============================================================================
diff --git a/sdk/runanywhere-react-native/packages/core/src/services/FileSystem.ts b/sdk/runanywhere-react-native/packages/core/src/services/FileSystem.ts
index 5fe867b65..25b23c6e1 100644
--- a/sdk/runanywhere-react-native/packages/core/src/services/FileSystem.ts
+++ b/sdk/runanywhere-react-native/packages/core/src/services/FileSystem.ts
@@ -121,6 +121,9 @@ try {
 const RUN_ANYWHERE_DIR = 'RunAnywhere';
 const MODELS_DIR = 'Models';
 
+/** Tracks active RNFS download jobIds by modelId for cancellation support. */
+const activeDownloadJobs = new Map<string, number>();
+
 /**
  * Describes a single file within a multi-file model.
  * Mirrors Swift SDK's ModelFileDescriptor.
@@ -503,7 +506,15 @@ if (fw === 'LlamaCpp' && archiveType === null) {
       },
     });
 
-    const result = await downloadResult.promise;
+    // Track jobId for cancellation support
+    activeDownloadJobs.set(modelId, downloadResult.jobId);
+
+    let result;
+    try {
+      result = await downloadResult.promise;
+    } finally {
+      activeDownloadJobs.delete(modelId);
+    }
 
     if (result.statusCode !== 200) {
       throw new Error(`Download failed with status: ${result.statusCode}`);
@@ -867,6 +878,21 @@ if (fw === 'LlamaCpp' && archiveType === null) {
     }
   },
 
+  /**
+   * Cancel an active download by modelId.
+   * Calls RNFS.stopDownload to abort the underlying HTTP request.
+   */
+  cancelDownload(modelId: string): boolean {
+    const jobId = activeDownloadJobs.get(modelId);
+    if (jobId != null && RNFS) {
+      RNFS.stopDownload(jobId);
+      activeDownloadJobs.delete(modelId);
+      logger.info(`Cancelled download for: ${modelId} (jobId=${jobId})`);
+      return true;
+    }
+    return false;
+  },
+
   /**
    * Get available disk space in bytes
    */
diff --git a/sdk/runanywhere-react-native/packages/core/src/types/NPUChip.ts b/sdk/runanywhere-react-native/packages/core/src/types/NPUChip.ts
new file mode 100644
index 000000000..73c99127c
--- /dev/null
+++ b/sdk/runanywhere-react-native/packages/core/src/types/NPUChip.ts
@@ -0,0 +1,62 @@
+/**
+ * Supported NPU chipsets for on-device Genie model inference.
+ *
+ * Each chip has an `identifier` used in model IDs and an `npuSuffix` used
+ * to construct download URLs from the HuggingFace model repository.
+ *
+ * @example
+ * ```typescript
+ * const chip = await RunAnywhere.getChip();
+ * if (chip) {
+ *   const url = getNPUDownloadUrl(chip, 'qwen3-4b');
+ *   // → https://huggingface.co/runanywhere/genie-npu-models/resolve/main/qwen3-4b-genie-w4a16-8elite-gen5.tar.gz
+ * }
+ * ```
+ */
+
+export interface NPUChip {
+  identifier: string;
+  displayName: string;
+  socModel: string;
+  npuSuffix: string;
+}
+
+/** Base URL for NPU model downloads on HuggingFace. */
+export const NPU_BASE_URL =
+  'https://huggingface.co/runanywhere/genie-npu-models/resolve/main/';
+
+/** All supported NPU chipsets. */
+export const NPU_CHIPS: readonly NPUChip[] = [
+  {
+    identifier: '8elite',
+    displayName: 'Snapdragon 8 Elite',
+    socModel: 'SM8750',
+    npuSuffix: '8elite',
+  },
+  {
+    identifier: '8elite-gen5',
+    displayName: 'Snapdragon 8 Elite Gen 5',
+    socModel: 'SM8850',
+    npuSuffix: '8elite-gen5',
+  },
+] as const;
+
+/**
+ * Build a HuggingFace download URL for a chip.
+ * @param chip - The detected NPU chip
+ * @param modelSlug - Model slug (e.g. "qwen3-4b") → produces
+ *   "qwen3-4b-genie-w4a16-8elite-gen5.tar.gz"
+ * @param quant - Quantization format (e.g. "w4a16", "w8a16"). Defaults to "w4a16".
+ */
+export function getNPUDownloadUrl(chip: NPUChip, modelSlug: string, quant = 'w4a16'): string {
+  return `${NPU_BASE_URL}${modelSlug}-genie-${quant}-${chip.npuSuffix}.tar.gz`;
+}
+
+/**
+ * Match an NPU chip from a SoC model string (e.g. "SM8750").
+ * Returns undefined if the SoC is not a supported NPU chipset.
+ */
+export function npuChipFromSocModel(socModel: string): NPUChip | undefined {
+  const upper = socModel.toUpperCase();
+  return NPU_CHIPS.find((chip) => upper.includes(chip.socModel));
+}
diff --git a/sdk/runanywhere-react-native/packages/core/src/types/enums.ts b/sdk/runanywhere-react-native/packages/core/src/types/enums.ts
index ebfa4efef..83029bb55 100644
--- a/sdk/runanywhere-react-native/packages/core/src/types/enums.ts
+++ b/sdk/runanywhere-react-native/packages/core/src/types/enums.ts
@@ -43,6 +43,7 @@ export enum LLMFramework {
   OpenAIWhisper = 'OpenAIWhisper',
   SystemTTS = 'SystemTTS',
   PiperTTS = 'PiperTTS',
+  Genie = 'Genie',
 }
 
 /**
@@ -64,6 +65,7 @@ export const LLMFrameworkDisplayNames: Record<LLMFramework, string> = {
   [LLMFramework.OpenAIWhisper]: 'OpenAI Whisper',
   [LLMFramework.SystemTTS]: 'System TTS',
   [LLMFramework.PiperTTS]: 'Piper TTS',
+  [LLMFramework.Genie]: 'Qualcomm Genie',
 };
 
 /**
diff --git a/sdk/runanywhere-react-native/packages/core/src/types/index.ts b/sdk/runanywhere-react-native/packages/core/src/types/index.ts
index 5bb71aca2..98a95c619 100644
--- a/sdk/runanywhere-react-native/packages/core/src/types/index.ts
+++ b/sdk/runanywhere-react-native/packages/core/src/types/index.ts
@@ -160,6 +160,15 @@ export type {
   ToolCallingResult,
 } from './ToolCallingTypes';
 
+// NPU Chip Types
+export type { NPUChip } from './NPUChip';
+export {
+  NPU_CHIPS,
+  NPU_BASE_URL,
+  getNPUDownloadUrl,
+  npuChipFromSocModel,
+} from './NPUChip';
+
 // VLM Types
 export type {
   VLMResult,
diff --git a/sdk/runanywhere-react-native/packages/llamacpp/android/CMakeLists.txt b/sdk/runanywhere-react-native/packages/llamacpp/android/CMakeLists.txt
index 4dace7942..53567b796 100644
--- a/sdk/runanywhere-react-native/packages/llamacpp/android/CMakeLists.txt
+++ b/sdk/runanywhere-react-native/packages/llamacpp/android/CMakeLists.txt
@@ -70,7 +70,7 @@ include(${CMAKE_SOURCE_DIR}/../nitrogen/generated/android/runanywherellama+autol
 # =============================================================================
 # Get core package include dir (for rac/*.h headers)
 get_filename_component(RN_NODE_MODULES "${CMAKE_SOURCE_DIR}/../../.." ABSOLUTE)
-set(CORE_INCLUDE_DIR "${RN_NODE_MODULES}/@runanywhere/core/android/src/main/include")
+set(CORE_INCLUDE_DIR "${RN_NODE_MODULES}/@runanywhere/core/android/src/main/jniLibs/include")
 set(CORE_JNILIB_DIR "${RN_NODE_MODULES}/@runanywhere/core/android/src/main/jniLibs/${ANDROID_ABI}")
 
 include_directories(
@@ -89,7 +89,9 @@ include_directories(
     "${CORE_INCLUDE_DIR}/rac/features/tts"
     "${CORE_INCLUDE_DIR}/rac/features/vad"
     "${CORE_INCLUDE_DIR}/rac/features/voice_agent"
+    "${CORE_INCLUDE_DIR}/rac/features/vlm"
     "${CORE_INCLUDE_DIR}/rac/features/platform"
+    "${CORE_INCLUDE_DIR}/rac/backends"
     "${CORE_INCLUDE_DIR}/rac/infrastructure"
     "${CORE_INCLUDE_DIR}/rac/infrastructure/device"
     "${CORE_INCLUDE_DIR}/rac/infrastructure/download"
diff --git a/sdk/runanywhere-react-native/packages/llamacpp/cpp/HybridRunAnywhereLlama.cpp b/sdk/runanywhere-react-native/packages/llamacpp/cpp/HybridRunAnywhereLlama.cpp
index 8aa9c6505..54358c7e2 100644
--- a/sdk/runanywhere-react-native/packages/llamacpp/cpp/HybridRunAnywhereLlama.cpp
+++ b/sdk/runanywhere-react-native/packages/llamacpp/cpp/HybridRunAnywhereLlama.cpp
@@ -50,7 +50,11 @@ int extractIntValue(const std::string& json, const std::string& key, int default
   pos += searchKey.length();
   while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) pos++;
   if (pos >= json.size()) return defaultValue;
-  return std::stoi(json.substr(pos));
+  try {
+    return std::stoi(json.substr(pos));
+  } catch (...) {
+    return defaultValue;
+  }
 }
 
 float extractFloatValue(const std::string& json, const std::string& key, float defaultValue) {
@@ -60,7 +64,11 @@ float extractFloatValue(const std::string& json, const std::string& key, float d
   pos += searchKey.length();
   while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) pos++;
   if (pos >= json.size()) return defaultValue;
-  return std::stof(json.substr(pos));
+  try {
+    return std::stof(json.substr(pos));
+  } catch (...) {
+    return defaultValue;
+  }
 }
 
 std::string extractStringValue(const std::string& json, const std::string& key, const std::string& defaultValue = "") {
diff --git a/sdk/runanywhere-react-native/packages/onnx/android/CMakeLists.txt b/sdk/runanywhere-react-native/packages/onnx/android/CMakeLists.txt
index bb939c058..7d03bd7ea 100644
--- a/sdk/runanywhere-react-native/packages/onnx/android/CMakeLists.txt
+++ b/sdk/runanywhere-react-native/packages/onnx/android/CMakeLists.txt
@@ -110,7 +110,7 @@ include(${CMAKE_SOURCE_DIR}/../nitrogen/generated/android/runanywhereonnx+autoli
 # =============================================================================
 # Get core package include dir (for rac/*.h headers)
 get_filename_component(RN_NODE_MODULES "${CMAKE_SOURCE_DIR}/../../.." ABSOLUTE)
-set(CORE_INCLUDE_DIR "${RN_NODE_MODULES}/@runanywhere/core/android/src/main/include")
+set(CORE_INCLUDE_DIR "${RN_NODE_MODULES}/@runanywhere/core/android/src/main/jniLibs/include")
 set(CORE_JNILIB_DIR "${RN_NODE_MODULES}/@runanywhere/core/android/src/main/jniLibs/${ANDROID_ABI}")
 
 include_directories(