diff --git a/.github/workflows/gradle-extraction-check.yml b/.github/workflows/gradle-extraction-check.yml
new file mode 100644
index 00000000000..2b1a24c4cce
--- /dev/null
+++ b/.github/workflows/gradle-extraction-check.yml
@@ -0,0 +1,25 @@
+name: Gradle Extraction Check
+
+on:
+  pull_request:
+    branches:
+      - '*'
+
+jobs:
+  test:
+    name: gradle extraction test
+
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    env:
+      DEVELOCITY_ACCESS_KEY: ${{ secrets.SOLR_DEVELOCITY_ACCESS_KEY }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - uses: ./.github/actions/prepare-for-build
+
+      - name: Run extraction module tests
+        run: ./gradlew --no-daemon solr:modules:extraction:check
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index a3783feaac1..d9525057058 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -194,6 +194,7 @@ squareup-okhttp3-okhttp = "4.12.0"
 stephenc-jcip = "1.0-1"
 swagger3 = "2.2.22"
 tdunning-tdigest = "3.3"
+testcontainers = "1.20.4"
 thetaphi-forbiddenapis = "3.9"
 thisptr-jacksonjq = "0.0.13"
 threeten-bp = "1.6.8"
@@ -512,6 +513,7 @@ stephenc-jcip-annotations = { module = "com.github.stephenc.jcip:jcip-annotation
 swagger3-annotations-jakarta = { module = "io.swagger.core.v3:swagger-annotations-jakarta", version.ref = "swagger3" }
 swagger3-jaxrs2-jakarta = { module = "io.swagger.core.v3:swagger-jaxrs2-jakarta", version.ref = "swagger3" }
 tdunning-tdigest = { module = "com.tdunning:t-digest", version.ref = "tdunning-tdigest" }
+testcontainers = { module = "org.testcontainers:testcontainers", version.ref = "testcontainers" }
 thisptr-jacksonjq = { module = "net.thisptr:jackson-jq", version.ref = "thisptr-jacksonjq" }
 threeten-bp = { module = "org.threeten:threetenbp", version.ref = "threeten-bp" }
 xerces-impl = { module = "xerces:xercesImpl", version.ref = "xerces" }
diff --git a/gradle/testing/randomization/policies/solr-tests.policy b/gradle/testing/randomization/policies/solr-tests.policy
index 2d3246c6d9b..7eb635db831 100644
--- a/gradle/testing/randomization/policies/solr-tests.policy
+++ b/gradle/testing/randomization/policies/solr-tests.policy
@@ -31,6 +31,9 @@ grant {
   permission java.io.FilePermission "${java.io.tmpdir}", "read,write";
   permission java.io.FilePermission "${java.io.tmpdir}${/}-", "read,write,delete";
 
+  // Allow Testcontainers to read user-level configuration
+  permission java.io.FilePermission "${user.home}${/}.testcontainers.properties", "read";
+
   permission java.io.FilePermission "${tests.linedocsfile}", "read";
   // DirectoryFactoryTest messes with these (wtf?)
   permission java.io.FilePermission "/tmp/inst1/conf/solrcore.properties", "read";
@@ -130,11 +133,11 @@ grant {
   permission javax.management.MBeanServerPermission "findMBeanServer";
   permission javax.management.MBeanServerPermission "releaseMBeanServer";
   permission javax.management.MBeanTrustPermission "register";
-  
+
   // needed by crossdc
   permission javax.security.auth.AuthPermission "getLoginConfiguration";
   permission javax.security.auth.AuthPermission "setLoginConfiguration";
-  
+
   // needed by benchmark
   permission java.security.SecurityPermission "insertProvider";
 
@@ -206,7 +209,7 @@ grant {
 
 // additional permissions based on system properties set by /bin/solr
 // NOTE: if the property is not set, the permission entry is ignored.
-grant {  
+grant {
   permission java.io.FilePermission "${solr.jetty.keystore}", "read,write,delete,readlink";
   permission java.io.FilePermission "${solr.jetty.keystore}${/}-", "read,write,delete,readlink";
 
diff --git a/solr/licenses/docker-java-LICENSE-ASL.txt b/solr/licenses/docker-java-LICENSE-ASL.txt
new file mode 100644
index 00000000000..492933f08c2
--- /dev/null
+++ b/solr/licenses/docker-java-LICENSE-ASL.txt
@@ -0,0 +1,176 @@
+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction,
+    and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by
+    the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all
+    other entities that control, are controlled by, or are under common
+    control with that entity. For the purposes of this definition,
+    "control" means (i) the power, direct or indirect, to cause the
+    direction or management of such entity, whether by contract or
+    otherwise, or (ii) ownership of fifty percent (50%) or more of the
+    outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity
+    exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+    including but not limited to software source code, documentation
+    source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical
+    transformation or translation of a Source form, including but
+    not limited to compiled object code, generated documentation,
+    and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or
+    Object form, made available under the License, as indicated by a
+    copyright notice that is included in or attached to the work
+    (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object
+    form, that is based on (or derived from) the Work and for which the
+    editorial revisions, annotations, elaborations, or other modifications
+    represent, as a whole, an original work of authorship. For the purposes
+    of this License, Derivative Works shall not include works that remain
+    separable from, or merely link (or bind by name) to the interfaces of,
+    the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including
+    the original version of the Work and any modifications or additions
+    to that Work or Derivative Works thereof, that is intentionally
+    submitted to Licensor for inclusion in the Work by the copyright owner
+    or by an individual or Legal Entity authorized to submit on behalf of
+    the copyright owner. For the purposes of this definition, "submitted"
+    means any form of electronic, verbal, or written communication sent
+    to the Licensor or its representatives, including but not limited to
+    communication on electronic mailing lists, source code control systems,
+    and issue tracking systems that are managed by, or on behalf of, the
+    Licensor for the purpose of discussing and improving the Work, but
+    excluding communication that is conspicuously marked or otherwise
+    designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity
+    on behalf of whom a Contribution has been received by Licensor and
+    subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+    this License, each Contributor hereby grants to You a perpetual,
+    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+    copyright license to reproduce, prepare Derivative Works of,
+    publicly display, publicly perform, sublicense, and distribute the
+    Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+    this License, each Contributor hereby grants to You a perpetual,
+    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+    (except as stated in this section) patent license to make, have made,
+    use, offer to sell, sell, import, and otherwise transfer the Work,
+    where such license applies only to those patent claims licensable
+    by such Contributor that are necessarily infringed by their
+    Contribution(s) alone or by combination of their Contribution(s)
+    with the Work to which such Contribution(s) was submitted. If You
+    institute patent litigation against any entity (including a
+    cross-claim or counterclaim in a lawsuit) alleging that the Work
+    or a Contribution incorporated within the Work constitutes direct
+    or contributory patent infringement, then any patent licenses
+    granted to You under this License for that Work shall terminate
+    as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+    Work or Derivative Works thereof in any medium, with or without
+    modifications, and in Source or Object form, provided that You
+    meet the following conditions:
+
+    (a) You must give any other recipients of the Work or
+        Derivative Works a copy of this License; and
+
+    (b) You must cause any modified files to carry prominent notices
+        stating that You changed the files; and
+
+    (c) You must retain, in the Source form of any Derivative Works
+        that You distribute, all copyright, patent, trademark, and
+        attribution notices from the Source form of the Work,
+        excluding those notices that do not pertain to any part of
+        the Derivative Works; and
+
+    (d) If the Work includes a "NOTICE" text file as part of its
+        distribution, then any Derivative Works that You distribute must
+        include a readable copy of the attribution notices contained
+        within such NOTICE file, excluding those notices that do not
+        pertain to any part of the Derivative Works, in at least one
+        of the following places: within a NOTICE text file distributed
+        as part of the Derivative Works; within the Source form or
+        documentation, if provided along with the Derivative Works; or,
+        within a display generated by the Derivative Works, if and
+        wherever such third-party notices normally appear. The contents
+        of the NOTICE file are for informational purposes only and
+        do not modify the License. You may add Your own attribution
+        notices within Derivative Works that You distribute, alongside
+        or as an addendum to the NOTICE text from the Work, provided
+        that such additional attribution notices cannot be construed
+        as modifying the License.
+
+    You may add Your own copyright statement to Your modifications and
+    may provide additional or different license terms and conditions
+    for use, reproduction, or distribution of Your modifications, or
+    for any such Derivative Works as a whole, provided Your use,
+    reproduction, and distribution of the Work otherwise complies with
+    the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+    any Contribution intentionally submitted for inclusion in the Work
+    by You to the Licensor shall be under the terms and conditions of
+    this License, without any additional terms or conditions.
+    Notwithstanding the above, nothing herein shall supersede or modify
+    the terms of any separate license agreement you may have executed
+    with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+    names, trademarks, service marks, or product names of the Licensor,
+    except as required for reasonable and customary use in describing the
+    origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+    agreed to in writing, Licensor provides the Work (and each
+    Contributor provides its Contributions) on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+    implied, including, without limitation, any warranties or conditions
+    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+    PARTICULAR PURPOSE. You are solely responsible for determining the
+    appropriateness of using or redistributing the Work and assume any
+    risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+    whether in tort (including negligence), contract, or otherwise,
+    unless required by applicable law (such as deliberate and grossly
+    negligent acts) or agreed to in writing, shall any Contributor be
+    liable to You for damages, including any direct, indirect, special,
+    incidental, or consequential damages of any character arising as a
+    result of this License or out of the use or inability to use the
+    Work (including but not limited to damages for loss of goodwill,
+    work stoppage, computer failure or malfunction, or any and all
+    other commercial damages or losses), even if such Contributor
+    has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+    the Work or Derivative Works thereof, You may choose to offer,
+    and charge a fee for, acceptance of support, warranty, indemnity,
+    or other liability obligations and/or rights consistent with this
+    License. However, in accepting such obligations, You may act only
+    on Your own behalf and on Your sole responsibility, not on behalf
+    of any other Contributor, and only if You agree to indemnify,
+    defend, and hold each Contributor harmless for any liability
+    incurred by, or claims asserted against, such Contributor by reason
+    of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/solr/licenses/docker-java-NOTICE.txt b/solr/licenses/docker-java-NOTICE.txt
new file mode 100644
index 00000000000..49a9e022cce
--- /dev/null
+++ b/solr/licenses/docker-java-NOTICE.txt
@@ -0,0 +1,7 @@
+This product includes software developed by the docker-java project.
+
+Copyright (c) 2013-2025, docker-java project contributors
+
+Project: https://github.com/docker-java/docker-java
+
+Licensed under the Apache License, Version 2.0.
diff --git a/solr/licenses/docker-java-api-3.4.0.jar.sha1 b/solr/licenses/docker-java-api-3.4.0.jar.sha1
new file mode 100644
index 00000000000..bf5ca0d6db4
--- /dev/null
+++ b/solr/licenses/docker-java-api-3.4.0.jar.sha1
@@ -0,0 +1 @@
+9ef23dcc93693f15e69b64632be096c38e31bc44
diff --git a/solr/licenses/docker-java-transport-3.4.0.jar.sha1 b/solr/licenses/docker-java-transport-3.4.0.jar.sha1
new file mode 100644
index 00000000000..c1232d24a6b
--- /dev/null
+++ b/solr/licenses/docker-java-transport-3.4.0.jar.sha1
@@ -0,0 +1 @@
+c058705684d782effc4b2edfdef1a87544ba4af8
diff --git a/solr/licenses/docker-java-transport-zerodep-3.4.0.jar.sha1 b/solr/licenses/docker-java-transport-zerodep-3.4.0.jar.sha1
new file mode 100644
index 00000000000..b658f8f0810
--- /dev/null
+++ b/solr/licenses/docker-java-transport-zerodep-3.4.0.jar.sha1
@@ -0,0 +1 @@
+c4ce6d8695cfdb0027872f99cc20f8f679f8a969
diff --git a/solr/licenses/duct-tape-1.0.8.jar.sha1 b/solr/licenses/duct-tape-1.0.8.jar.sha1
new file mode 100644
index 00000000000..8ccb86d64ea
--- /dev/null
+++ b/solr/licenses/duct-tape-1.0.8.jar.sha1
@@ -0,0 +1 @@
+92edc22a9ab2f3e17c9bf700aaee377d50e8b530
diff --git a/solr/licenses/duct-tape-LICENSE-MIT.txt b/solr/licenses/duct-tape-LICENSE-MIT.txt
new file mode 100644
index 00000000000..9cf106272ac
--- /dev/null
+++ b/solr/licenses/duct-tape-LICENSE-MIT.txt
@@ -0,0 +1,19 @@
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/solr/licenses/jna-5.13.0.jar.sha1 b/solr/licenses/jna-5.13.0.jar.sha1
new file mode 100644
index 00000000000..93b456b9293
--- /dev/null
+++ b/solr/licenses/jna-5.13.0.jar.sha1
@@ -0,0 +1 @@
+1200e7ebeedbe0d10062093f32925a912020e747
diff --git a/solr/licenses/testcontainers-1.20.4.jar.sha1 b/solr/licenses/testcontainers-1.20.4.jar.sha1
new file mode 100644
index 00000000000..29746a98e88
--- /dev/null
+++ b/solr/licenses/testcontainers-1.20.4.jar.sha1
@@ -0,0 +1 @@
+ee2fe3afc9fa6cb2e6a43233998f3633f761692f
diff --git a/solr/licenses/testcontainers-LICENSE-MIT.txt b/solr/licenses/testcontainers-LICENSE-MIT.txt
new file mode 100644
index 00000000000..9cf106272ac
--- /dev/null
+++ b/solr/licenses/testcontainers-LICENSE-MIT.txt
@@ -0,0 +1,19 @@
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/solr/modules/extraction/build.gradle b/solr/modules/extraction/build.gradle
index da6ebaccd68..66f3c2d0c00 100644
--- a/solr/modules/extraction/build.gradle
+++ b/solr/modules/extraction/build.gradle
@@ -19,6 +19,11 @@ apply plugin: 'java-library'
 
 description = 'Solr Integration with Tika for extracting content from binary file formats such as Microsoft Word and Adobe PDF'
 
+ext {
+  // Disable security manager for extraction module tests
+  useSecurityManager = false
+}
+
 dependencies {
   implementation platform(project(':platform'))
   implementation project(':solr:core')
@@ -35,11 +40,10 @@ dependencies {
     exclude group: 'org.quartz-scheduler', module: 'quartz'
     exclude group: 'xml-apis', module: 'xml-apis'
   })
-  implementation (libs.xerces.impl, {
-    exclude group: 'xml-apis', module: 'xml-apis'
-  })
 
   testImplementation project(':solr:test-framework')
   testImplementation libs.apache.lucene.testframework
   testImplementation libs.junit.junit
+  testImplementation libs.testcontainers
+  testImplementation libs.carrotsearch.randomizedtesting.runner
 }
diff --git a/solr/modules/extraction/gradle.lockfile b/solr/modules/extraction/gradle.lockfile
index abff70b0d7c..ef5da19b320 100644
--- a/solr/modules/extraction/gradle.lockfile
+++ b/solr/modules/extraction/gradle.lockfile
@@ -15,6 +15,9 @@ com.fasterxml.jackson.module:jackson-module-jakarta-xmlbind-annotations:2.20.0=j
 com.fasterxml.jackson:jackson-bom:2.20.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 com.fasterxml.woodstox:woodstox-core:7.0.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 com.github.ben-manes.caffeine:caffeine:3.2.2=annotationProcessor,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testRuntimeClasspath
+com.github.docker-java:docker-java-api:3.4.0=jarValidation,testCompileClasspath,testRuntimeClasspath
+com.github.docker-java:docker-java-transport-zerodep:3.4.0=jarValidation,testCompileClasspath,testRuntimeClasspath
+com.github.docker-java:docker-java-transport:3.4.0=jarValidation,testCompileClasspath,testRuntimeClasspath
 com.github.jai-imageio:jai-imageio-core:1.4.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 com.github.junrar:junrar:7.5.3=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 com.github.kevinstern:software-and-algorithms:1.0=annotationProcessor,errorprone,testAnnotationProcessor
@@ -99,7 +102,8 @@ javax.inject:javax.inject:1=annotationProcessor,errorprone,testAnnotationProcess
 javax.measure:unit-api:1.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 joda-time:joda-time:2.14.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 junit:junit:4.13.2=jarValidation,testCompileClasspath,testRuntimeClasspath
-net.java.dev.jna:jna:5.12.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
+net.java.dev.jna:jna:5.12.1=compileClasspath,runtimeClasspath,runtimeLibs
+net.java.dev.jna:jna:5.13.0=jarValidation,testCompileClasspath,testRuntimeClasspath
 net.sf.ehcache:ehcache-core:2.6.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.antlr:antlr4-runtime:4.13.2=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.apache.commons:commons-collections4:4.5.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
@@ -215,6 +219,7 @@ org.hamcrest:hamcrest:3.0=jarValidation,testCompileClasspath,testRuntimeClasspat
 org.itadaki:bzip2:0.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.javassist:javassist:3.30.2-GA=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.jdom:jdom2:2.0.6.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
+org.jetbrains:annotations:26.0.2=jarValidation,testCompileClasspath,testRuntimeClasspath
 org.jspecify:jspecify:1.0.0=annotationProcessor,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath
 org.junit.jupiter:junit-jupiter-api:5.6.2=jarValidation,testRuntimeClasspath
 org.junit.platform:junit-platform-commons:1.6.2=jarValidation,testRuntimeClasspath
@@ -226,6 +231,7 @@ org.ow2.asm:asm-commons:9.8=jarValidation,runtimeClasspath,runtimeLibs,solrPlatf
 org.ow2.asm:asm-tree:9.8=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.ow2.asm:asm:9.8=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 org.pcollections:pcollections:4.0.1=annotationProcessor,errorprone,testAnnotationProcessor
+org.rnorth.duct-tape:duct-tape:1.0.8=jarValidation,testCompileClasspath,testRuntimeClasspath
 org.semver4j:semver4j:6.0.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 org.slf4j:jcl-over-slf4j:2.0.17=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
 org.slf4j:jul-to-slf4j:2.0.17=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath
@@ -234,6 +240,7 @@ org.tallison.xmp:xmpcore-shaded:6.1.10=compileClasspath,jarValidation,runtimeCla
 org.tallison:isoparser:1.9.41.7=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.tallison:jmatio:1.5=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.tallison:metadata-extractor:2.17.1.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
+org.testcontainers:testcontainers:1.20.4=jarValidation,testCompileClasspath,testRuntimeClasspath
 org.tukaani:xz:1.9=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
 org.xerial.snappy:snappy-java:1.1.10.8=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath
 xerces:xercesImpl:2.12.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/DummyExtractionBackend.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/DummyExtractionBackend.java
new file mode 100644
index 00000000000..cf42e72453b
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/DummyExtractionBackend.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.InputStream;
+import org.xml.sax.helpers.DefaultHandler;
+
+/** Dummy backend that emits predictable test data without actually parsing input content. */
+public class DummyExtractionBackend implements ExtractionBackend {
+  public static final String NAME = "dummy";
+  private final String text = "This is dummy extracted content";
+
+  @Override
+  public String name() {
+    return NAME;
+  }
+
+  @Override
+  public ExtractionResult extract(InputStream inputStream, ExtractionRequest request) {
+    ExtractionMetadata metadata = buildMetadataFromRequest(request);
+    metadata.add("Dummy-Backend", "true");
+    metadata.add(
+        "Content-Type",
+        request.contentType != null ? request.contentType : "application/octet-stream");
+    if (request.resourceName != null) {
+      metadata.add("resourcename", request.resourceName);
+    }
+    return new ExtractionResult(text, metadata);
+  }
+
+  @Override
+  public void extractWithSaxHandler(
+      InputStream inputStream,
+      ExtractionRequest request,
+      ExtractionMetadata md,
+      DefaultHandler saxContentHandler)
+      throws Exception {
+
+    ExtractionResult res = extract(inputStream, request);
+    md.putAll(res.getMetadata().asMap());
+    // Append the content to the SAX handler
+    saxContentHandler.characters(res.getContent().toCharArray(), 0, res.getContent().length());
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
index 014d56caae4..d07f25f3a0c 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
@@ -18,9 +18,11 @@
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.StringWriter;
 import java.lang.invoke.MethodHandles;
-import java.util.Locale;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.UpdateParams;
@@ -33,38 +35,11 @@
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.update.AddUpdateCommand;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.HttpHeaders;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.DefaultParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.PasswordProvider;
-import org.apache.tika.parser.html.HtmlMapper;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.tika.sax.xpath.Matcher;
-import org.apache.tika.sax.xpath.MatchingContentHandler;
-import org.apache.tika.sax.xpath.XPathParser;
-import org.apache.xml.serialize.BaseMarkupSerializer;
-import org.apache.xml.serialize.OutputFormat;
-import org.apache.xml.serialize.TextSerializer;
-import org.apache.xml.serialize.XMLSerializer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
 
-/**
- * The class responsible for loading extracted content into Solr.
- *
- * @deprecated Will be replaced with something similar that calls out to a separate Tika Server
- *     process running in its own JVM.
- */
-@Deprecated(since = "9.10.0")
+/** The class responsible for loading extracted content into Solr. */
 public class ExtractingDocumentLoader extends ContentStreamLoader {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -75,40 +50,34 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
   /** Extract Only supported format. Default */
   public static final String XML_FORMAT = "xml";
 
-  /** XHTML XPath parser. */
-  private static final XPathParser PARSER = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
-
   final SolrCore core;
   final SolrParams params;
   final UpdateRequestProcessor processor;
   final boolean ignoreTikaException;
-  protected AutoDetectParser autoDetectParser;
+  final boolean backCompat;
 
   private final AddUpdateCommand templateAdd;
 
-  protected TikaConfig config;
-  protected ParseContextConfig parseContextConfig;
   protected SolrContentHandlerFactory factory;
+  protected ExtractionBackend backend;
 
   public ExtractingDocumentLoader(
       SolrQueryRequest req,
       UpdateRequestProcessor processor,
-      TikaConfig config,
-      ParseContextConfig parseContextConfig,
-      SolrContentHandlerFactory factory) {
+      SolrContentHandlerFactory factory,
+      ExtractionBackend backend) {
     this.params = req.getParams();
     this.core = req.getCore();
-    this.config = config;
-    this.parseContextConfig = parseContextConfig;
     this.processor = processor;
+    this.backCompat = params.getBool(ExtractingParams.BACK_COMPATIBILITY, true);
 
     templateAdd = new AddUpdateCommand(req);
     templateAdd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);
     templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);
+    templateAdd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);
 
-    // this is lightweight
-    autoDetectParser = new AutoDetectParser(config);
     this.factory = factory;
+    this.backend = backend;
 
     ignoreTikaException = params.getBool(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);
   }
@@ -131,169 +100,199 @@ public void load(
       ContentStream stream,
       UpdateRequestProcessor processor)
       throws Exception {
-    Parser parser = null;
     String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null);
-    if (streamType != null) {
-      // Cache?  Parsers are lightweight to construct and thread-safe, so I'm told
-      MediaType mt = MediaType.parse(streamType.trim().toLowerCase(Locale.ROOT));
-      parser = new DefaultParser(config.getMediaTypeRegistry()).getParsers().get(mt);
-    } else {
-      parser = autoDetectParser;
-    }
-    if (parser != null) {
-      Metadata metadata = new Metadata();
-
-      // If you specify the resource name (the filename, roughly) with this parameter,
-      // then Tika can make use of it in guessing the appropriate MIME type:
-      String resourceName = req.getParams().get(ExtractingParams.RESOURCE_NAME, null);
-      if (resourceName != null) {
-        metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, resourceName);
-      }
-      // Provide stream's content type as hint for auto detection
-      if (stream.getContentType() != null) {
-        metadata.add(HttpHeaders.CONTENT_TYPE, stream.getContentType());
-      }
+    String resourceName = req.getParams().get(ExtractingParams.RESOURCE_NAME, null);
+
+    try (InputStream inputStream = stream.getStream()) {
+      String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+
+      String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
+      boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
+      boolean recursive = params.getBool(ExtractingParams.RECURSIVE, false);
+      String extractFormat =
+          params.get(ExtractingParams.EXTRACT_FORMAT, extractOnly ? XML_FORMAT : TEXT_FORMAT);
 
-      try (InputStream inputStream = stream.getStream()) {
-        metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
-        metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
-        metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
-        metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
-        // HtmlParser and TXTParser regard Metadata.CONTENT_ENCODING in metadata
-        String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
-        if (charset != null) {
-          metadata.add(HttpHeaders.CONTENT_ENCODING, charset);
+      // Parse optional passwords file into a map (keeps Tika usages out of this class)
+      LinkedHashMap<Pattern, String> pwMap = null;
+      String passwordsFile = params.get("passwordsFile");
+      if (passwordsFile != null) {
+        try (java.io.InputStream is = core.getResourceLoader().openResource(passwordsFile)) {
+          pwMap = RegexRulesPasswordProvider.parseRulesFile(is);
         }
+      }
 
-        String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
-        boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
-        SolrContentHandler handler =
-            factory.createSolrContentHandler(metadata, params, req.getSchema());
-        ContentHandler parsingHandler = handler;
-
-        StringWriter writer = null;
-        BaseMarkupSerializer serializer = null;
-        if (extractOnly == true) {
-          String extractFormat = params.get(ExtractingParams.EXTRACT_FORMAT, "xml");
-          writer = new StringWriter();
-          if (extractFormat.equals(TEXT_FORMAT)) {
-            serializer = new TextSerializer();
-            serializer.setOutputCharStream(writer);
-            serializer.setOutputFormat(new OutputFormat("Text", "UTF-8", true));
-          } else {
-            serializer = new XMLSerializer(writer, new OutputFormat("XML", "UTF-8", true));
-          }
-          if (xpathExpr != null) {
-            Matcher matcher = PARSER.parse(xpathExpr);
-            serializer
-                .startDocument(); // The MatchingContentHandler does not invoke startDocument.  See
-            // https://lists.apache.org/thread.html/5ec63e104e564a2363e45f74d5aced6520b7d32b4b625762ef56cb86%401226775505%40%3Cdev.tika.apache.org%3E
-            parsingHandler = new MatchingContentHandler(serializer, matcher);
-          } else {
-            parsingHandler = serializer;
-          }
-        } else if (xpathExpr != null) {
-          Matcher matcher = PARSER.parse(xpathExpr);
-          parsingHandler = new MatchingContentHandler(handler, matcher);
-        } // else leave it as is
+      ExtractionRequest extractionRequest =
+          new ExtractionRequest(
+              streamType,
+              resourceName,
+              stream.getContentType(),
+              charset,
+              stream.getName(),
+              stream.getSourceInfo(),
+              stream.getSize(),
+              params.get(ExtractingParams.RESOURCE_PASSWORD, null),
+              pwMap,
+              extractFormat,
+              recursive,
+              Collections.emptyMap());
+
+      boolean captureAttr = params.getBool(ExtractingParams.CAPTURE_ATTRIBUTES, false);
+      String[] captureElems = params.getParams(ExtractingParams.CAPTURE_ELEMENTS);
+      boolean needLegacySax =
+          extractOnly
+              || xpathExpr != null
+              || captureAttr
+              || (captureElems != null && captureElems.length > 0)
+              || (params.get(ExtractingParams.RESOURCE_PASSWORD) != null)
+              || (passwordsFile != null);
 
+      if (extractOnly) {
         try {
-          // potentially use a wrapper handler for parsing, but we still need the SolrContentHandler
-          // for getting the document.
-          ParseContext context = parseContextConfig.create();
-
-          context.set(Parser.class, parser);
-          context.set(HtmlMapper.class, MostlyPassthroughHtmlMapper.INSTANCE);
-
-          // Password handling
-          RegexRulesPasswordProvider epp = new RegexRulesPasswordProvider();
-          String pwMapFile = params.get(ExtractingParams.PASSWORD_MAP_FILE);
-          if (pwMapFile != null && pwMapFile.length() > 0) {
-            InputStream is = req.getCore().getResourceLoader().openResource(pwMapFile);
-            if (is != null) {
-              log.debug("Password file supplied: {}", pwMapFile);
-              epp.parse(is);
+          ExtractionMetadata md = backend.buildMetadataFromRequest(extractionRequest);
+          String content;
+          if (ExtractingDocumentLoader.TEXT_FORMAT.equals(extractionRequest.extractFormat)
+              || xpathExpr != null) {
+            org.apache.tika.sax.ToTextContentHandler textHandler =
+                new org.apache.tika.sax.ToTextContentHandler();
+            DefaultHandler ch = textHandler;
+            if (xpathExpr != null) {
+              org.apache.tika.sax.xpath.XPathParser xparser =
+                  new org.apache.tika.sax.xpath.XPathParser(
+                      "xhtml", org.apache.tika.sax.XHTMLContentHandler.XHTML);
+              org.apache.tika.sax.xpath.Matcher matcher = xparser.parse(xpathExpr);
+              ch = new org.apache.tika.sax.xpath.MatchingContentHandler(textHandler, matcher);
+            }
+            backend.extractWithSaxHandler(inputStream, extractionRequest, md, ch);
+            content = textHandler.toString();
+          } else { // XML format
+            org.apache.tika.sax.ToXMLContentHandler toXml =
+                new org.apache.tika.sax.ToXMLContentHandler();
+            DefaultHandler ch = toXml;
+            if (xpathExpr != null) {
+              org.apache.tika.sax.xpath.XPathParser xparser =
+                  new org.apache.tika.sax.xpath.XPathParser(
+                      "xhtml", org.apache.tika.sax.XHTMLContentHandler.XHTML);
+              org.apache.tika.sax.xpath.Matcher matcher = xparser.parse(xpathExpr);
+              ch = new org.apache.tika.sax.xpath.MatchingContentHandler(toXml, matcher);
+            }
+            backend.extractWithSaxHandler(inputStream, extractionRequest, md, ch);
+            content = toXml.toString();
+            if (!content.startsWith("<?xml")) {
+              content = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + content;
             }
           }
-          context.set(PasswordProvider.class, epp);
-          String resourcePassword = params.get(ExtractingParams.RESOURCE_PASSWORD);
-          if (resourcePassword != null) {
-            epp.setExplicitPassword(resourcePassword);
-            log.debug("Literal password supplied for file {}", resourceName);
+
+          appendBackCompatTikaMetadata(md);
+
+          // Write content
+          rsp.add(stream.getName(), content);
+          // Write metadata
+          NamedList<String[]> metadataNL = new NamedList<>();
+          for (String name : md.names()) {
+            metadataNL.add(name, md.getValues(name));
           }
-          parser.parse(inputStream, parsingHandler, metadata, context);
-        } catch (TikaException e) {
+          rsp.add(stream.getName() + "_metadata", metadataNL);
+        } catch (UnsupportedOperationException uoe) {
+          // For backends that don't support xpath
+          throw new SolrException(
+              SolrException.ErrorCode.BAD_REQUEST,
+              "XPath filtering is not supported by backend '" + backend.name() + "'.");
+        } catch (Exception e) {
           if (ignoreTikaException) {
-            if (log.isWarnEnabled()) {
-              log.warn(
-                  "skip extracting text due to {}. metadata={}",
-                  e.getLocalizedMessage(),
-                  metadata,
-                  e);
-            }
-          } else {
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+            if (log.isWarnEnabled())
+              log.warn("skip extracting text due to {}.", e.getLocalizedMessage(), e);
+            return;
           }
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
         }
-        if (extractOnly == false) {
-          addDoc(handler);
-        } else {
-          // serializer is not null, so we need to call endDoc on it if using xpath
-          if (xpathExpr != null) {
-            serializer.endDocument();
+        return;
+      }
+
+      if (needLegacySax) {
+        // Indexing with capture/xpath/etc: delegate SAX parse to backend
+        ExtractionMetadata metadata = backend.buildMetadataFromRequest(extractionRequest);
+        SolrContentHandler handler =
+            factory.createSolrContentHandler(metadata, params, req.getSchema());
+        try {
+          backend.extractWithSaxHandler(inputStream, extractionRequest, metadata, handler);
+        } catch (UnsupportedOperationException uoe) {
+          // For backends that don't support parseToSolrContentHandler
+          if (log.isWarnEnabled()) {
+            log.warn("skip extracting text since tika backend does not yet support this option");
           }
-          rsp.add(stream.getName(), writer.toString());
-          writer.close();
-          String[] names = metadata.names();
-          NamedList<String[]> metadataNL = new NamedList<>();
-          for (int i = 0; i < names.length; i++) {
-            String[] vals = metadata.getValues(names[i]);
-            metadataNL.add(names[i], vals);
+          throw new SolrException(
+              SolrException.ErrorCode.BAD_REQUEST,
+              "The requested operation is not supported by backend '" + backend.name() + "'.");
+        } catch (Exception e) {
+          if (ignoreTikaException) {
+            if (log.isWarnEnabled()) {
+              log.warn("skip extracting text due to {}.", e.getLocalizedMessage(), e);
+              return;
+            }
           }
-          rsp.add(stream.getName() + "_metadata", metadataNL);
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+        }
+        appendBackCompatTikaMetadata(handler.metadata);
+
+        addDoc(handler);
+        return;
+      }
+
+      // Default simple backend-neutral path
+      ExtractionResult result;
+      try {
+        result = backend.extract(inputStream, extractionRequest);
+      } catch (Exception e) {
+        if (ignoreTikaException) {
+          if (log.isWarnEnabled())
+            log.warn("skip extracting text due to {}.", e.getLocalizedMessage(), e);
+          return;
         }
-      } catch (SAXException e) {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
       }
-    } else {
-      throw new SolrException(
-          SolrException.ErrorCode.BAD_REQUEST,
-          "Stream type of "
-              + streamType
-              + " didn't match any known parsers.  Please supply the "
-              + ExtractingParams.STREAM_TYPE
-              + " parameter.");
+
+      ExtractionMetadata metadata = result.getMetadata();
+
+      appendBackCompatTikaMetadata(metadata);
+
+      String content = result.getContent();
+
+      SolrContentHandler handler =
+          factory.createSolrContentHandler(metadata, params, req.getSchema());
+      handler.appendToContent(content);
+      addDoc(handler);
     }
   }
 
-  public static class MostlyPassthroughHtmlMapper implements HtmlMapper {
-    public static final HtmlMapper INSTANCE = new MostlyPassthroughHtmlMapper();
-
-    /**
-     * Keep all elements and their content.
-     *
-     * <p>Apparently &lt;SCRIPT&gt; and &lt;STYLE&gt; elements are blocked elsewhere
-     */
-    @Override
-    public boolean isDiscardElement(String name) {
-      return false;
-    }
+  private final Map<String, String> fieldMappings = new LinkedHashMap<>();
 
-    /** Lowercases the attribute name */
-    @Override
-    public String mapSafeAttribute(String elementName, String attributeName) {
-      return attributeName.toLowerCase(Locale.ENGLISH);
+  {
+    fieldMappings.put("dc:title", "title");
+    fieldMappings.put("dc:creator", "author");
+    fieldMappings.put("dc:description", "description");
+    fieldMappings.put("dc:subject", "subject");
+    fieldMappings.put("dc:language", "language");
+    fieldMappings.put("dc:publisher", "publisher");
+    fieldMappings.put("dcterms:created", "created");
+    fieldMappings.put("dcterms:modified", "modified");
+    fieldMappings.put("meta:author", "Author");
+    fieldMappings.put("meta:creation-date", "Creation-Date");
+    fieldMappings.put("meta:save-date", "Last-Save-Date");
+    fieldMappings.put("meta:keyword", "Keywords");
+    fieldMappings.put("pdf:docinfo:keywords", "Keywords");
+  }
+
+  private void appendBackCompatTikaMetadata(ExtractionMetadata md) {
+    if (!backCompat) {
+      return;
     }
 
-    /**
-     * Lowercases the element name, but returns null for &lt;BR&gt;, which suppresses the
-     * start-element event for lt;BR&gt; tags. This also suppresses the &lt;BODY&gt; tags because
-     * those are handled internally by Tika's XHTMLContentHandler.
-     */
-    @Override
-    public String mapSafeElement(String name) {
-      String lowerName = name.toLowerCase(Locale.ROOT);
-      return (lowerName.equals("br") || lowerName.equals("body")) ? null : lowerName;
+    for (Map.Entry<String, String> mapping : fieldMappings.entrySet()) {
+      String sourceField = mapping.getKey();
+      String targetField = mapping.getValue();
+      if (md.get(sourceField) != null && md.get(targetField) == null) {
+        md.addValues(targetField, md.getValues(sourceField));
+      }
     }
   }
 }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java
index a7d159678f1..eb70d5b6f6d 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java
@@ -136,4 +136,13 @@ public interface ExtractingParams {
    * .*=&lt;defaultmypassword&gt; at the end
    */
   public static final String PASSWORD_MAP_FILE = "passwordsFile";
+
+  /** Backend selection parameter and */
+  public static final String EXTRACTION_BACKEND = "extraction.backend";
+
+  /** Fix metadata to match Tika 1.x */
+  public static final String BACK_COMPATIBILITY = "backCompatibility";
+
+  /** Enable recursive parsing of embedded documents */
+  String RECURSIVE = "recursive";
 }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
index c9a319bc0bb..09e2dddb0e0 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
@@ -16,8 +16,6 @@
  */
 package org.apache.solr.handler.extraction;
 
-import java.io.InputStream;
-import java.nio.file.Path;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.core.SolrCore;
@@ -28,26 +26,24 @@
 import org.apache.solr.security.PermissionNameProvider;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
 import org.apache.solr.util.plugin.SolrCoreAware;
-import org.apache.tika.config.TikaConfig;
 
 /**
  * Handler for rich documents like PDF or Word or any other file format that Tika handles that need
  * the text to be extracted first from the document.
- *
- * @deprecated Will be replaced with something similar that calls out to a separate Tika Server
- *     process running in its own JVM.
  */
-@Deprecated(since = "9.10.0")
 public class ExtractingRequestHandler extends ContentStreamHandlerBase
     implements SolrCoreAware, PermissionNameProvider {
 
   public static final String PARSE_CONTEXT_CONFIG = "parseContext.config";
   public static final String CONFIG_LOCATION = "tika.config";
+  public static final String TIKASERVER_URL = "tikaserver.url";
 
-  protected TikaConfig config;
+  protected String tikaConfigLoc;
   protected ParseContextConfig parseContextConfig;
 
   protected SolrContentHandlerFactory factory;
+  protected ExtractionBackendFactory backendFactory;
+  protected String defaultBackendName;
 
   @Override
   public PermissionNameProvider.Name getPermissionName(AuthorizationContext request) {
@@ -57,22 +53,8 @@ public PermissionNameProvider.Name getPermissionName(AuthorizationContext reques
   @Override
   public void inform(SolrCore core) {
     try {
-      String tikaConfigLoc = (String) initArgs.get(CONFIG_LOCATION);
-      if (tikaConfigLoc == null) { // default
-        ClassLoader classLoader = core.getResourceLoader().getClassLoader();
-        try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) {
-          config = new TikaConfig(is);
-        }
-      } else {
-        Path configFile = Path.of(tikaConfigLoc);
-        if (configFile.isAbsolute()) {
-          config = new TikaConfig(configFile);
-        } else { // in conf/
-          try (InputStream is = core.getResourceLoader().openResource(tikaConfigLoc)) {
-            config = new TikaConfig(is);
-          }
-        }
-      }
+      // Store tika config location (backend-specific)
+      this.tikaConfigLoc = (String) initArgs.get(CONFIG_LOCATION);
 
       String parseContextConfigLoc = (String) initArgs.get(PARSE_CONTEXT_CONFIG);
       if (parseContextConfigLoc == null) { // default:
@@ -81,20 +63,37 @@ public void inform(SolrCore core) {
         parseContextConfig =
             new ParseContextConfig(core.getResourceLoader(), parseContextConfigLoc);
       }
+
+      // Initialize backend factory once; backends are created lazily on demand
+      String tikaServerUrl = (String) initArgs.get(TIKASERVER_URL);
+      backendFactory =
+          new ExtractionBackendFactory(core, tikaConfigLoc, parseContextConfig, tikaServerUrl);
+
+      // Choose default backend name (do not instantiate yet)
+      String backendName = (String) initArgs.get(ExtractingParams.EXTRACTION_BACKEND);
+      defaultBackendName =
+          (backendName == null || backendName.trim().isEmpty())
+              ? LocalTikaExtractionBackend.NAME
+              : backendName;
+
     } catch (Exception e) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to load Tika Config", e);
+      throw new SolrException(
+          ErrorCode.SERVER_ERROR, "Unable to initialize ExtractingRequestHandler", e);
     }
 
-    factory = createFactory();
-  }
-
-  protected SolrContentHandlerFactory createFactory() {
-    return new SolrContentHandlerFactory();
+    factory = new SolrContentHandlerFactory();
   }
 
   @Override
   protected ContentStreamLoader newLoader(SolrQueryRequest req, UpdateRequestProcessor processor) {
-    return new ExtractingDocumentLoader(req, processor, config, parseContextConfig, factory);
+    // Allow per-request override of backend via request param
+    String backendParam = req.getParams().get(ExtractingParams.EXTRACTION_BACKEND);
+    String nameToUse =
+        (backendParam != null && !backendParam.trim().isEmpty())
+            ? backendParam
+            : defaultBackendName;
+    ExtractionBackend extractionBackend = backendFactory.getBackend(nameToUse);
+    return new ExtractingDocumentLoader(req, processor, factory, extractionBackend);
   }
 
   // ////////////////////// SolrInfoMBeans methods //////////////////////
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackend.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackend.java
new file mode 100644
index 00000000000..9d15b5a1159
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackend.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.InputStream;
+import org.apache.tika.metadata.HttpHeaders;
+import org.apache.tika.metadata.TikaMetadataKeys;
+import org.xml.sax.helpers.DefaultHandler;
+
+/** Strategy interface for content extraction backends. */
+public interface ExtractionBackend {
+  /**
+   * Extract plain text and metadata from the inputStream. Implementations should not close the
+   * inputStream. This API is backend-neutral and does not expose SAX or XML-specific types.
+   */
+  ExtractionResult extract(InputStream inputStream, ExtractionRequest request) throws Exception;
+
+  /**
+   * Perform extraction of text from input stream with SAX handler. Sax handler can be
+   * SolrContentHandler, ToTextContentHandler, ToXMLContentHandler, MatchingContentHandler etc
+   */
+  void extractWithSaxHandler(
+      InputStream inputStream,
+      ExtractionRequest request,
+      ExtractionMetadata md,
+      DefaultHandler saxContentHandler)
+      throws Exception;
+
+  /** Build ExtractionMetadata from the request context */
+  default ExtractionMetadata buildMetadataFromRequest(ExtractionRequest request) {
+    ExtractionMetadata md = new ExtractionMetadata();
+    md.addIfNotNull(TikaMetadataKeys.RESOURCE_NAME_KEY, request.resourceName);
+    md.addIfNotNull(HttpHeaders.CONTENT_TYPE, request.contentType);
+    md.addIfNotNull(ExtractingMetadataConstants.STREAM_NAME, request.streamName);
+    md.addIfNotNull(ExtractingMetadataConstants.STREAM_SOURCE_INFO, request.streamSourceInfo);
+    md.addIfNotNull(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(request.streamSize));
+    md.addIfNotNull(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, request.contentType);
+    md.addIfNotNull(HttpHeaders.CONTENT_ENCODING, request.charset);
+    return md;
+  }
+
+  /** A short name for debugging/config, e.g., "local" or "dummy". */
+  String name();
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackendFactory.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackendFactory.java
new file mode 100644
index 00000000000..7ee0c163152
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionBackendFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.core.SolrCore;
+
+/**
+ * Factory for ExtractionBackend instances. Lazily constructs backends by short name (e.g., "local",
+ * "dummy") and caches them for reuse.
+ */
+public class ExtractionBackendFactory {
+  private final SolrCore core;
+  private final String tikaConfigLoc;
+  private final ParseContextConfig parseContextConfig;
+  private final String tikaServerUrl;
+  private final Map<String, ExtractionBackend> cache = new ConcurrentHashMap<>();
+
+  public ExtractionBackendFactory(
+      SolrCore core,
+      String tikaConfigLoc,
+      ParseContextConfig parseContextConfig,
+      String tikaServerUrl) {
+    this.core = core;
+    this.tikaConfigLoc = tikaConfigLoc;
+    this.parseContextConfig = parseContextConfig;
+    this.tikaServerUrl = tikaServerUrl;
+  }
+
+  /** Returns a backend instance for the given name, creating it if necessary. */
+  public ExtractionBackend getBackend(String name) {
+    String key = normalize(name);
+    return cache.computeIfAbsent(
+        key,
+        k -> {
+          try {
+            return create(k);
+          } catch (Exception e) {
+            throw new SolrException(
+                SolrException.ErrorCode.SERVER_ERROR,
+                "Failed to create extraction backend '" + k + "'",
+                e);
+          }
+        });
+  }
+
+  private String normalize(String name) {
+    if (name == null || name.trim().isEmpty()) return LocalTikaExtractionBackend.NAME;
+    return name.trim().toLowerCase(Locale.ROOT);
+  }
+
+  /** Creates a new backend instance for the given normalized name. */
+  protected ExtractionBackend create(String normalizedName) throws Exception {
+    return switch (normalizedName) {
+      case DummyExtractionBackend.NAME -> new DummyExtractionBackend();
+      case TikaServerExtractionBackend.NAME -> new TikaServerExtractionBackend(
+          tikaServerUrl != null ? tikaServerUrl : "http://localhost:9998");
+      case LocalTikaExtractionBackend.NAME -> new LocalTikaExtractionBackend(
+          core, tikaConfigLoc, parseContextConfig);
+      default -> throw new SolrException(
+          SolrException.ErrorCode.BAD_REQUEST, "Unknown extraction backend: " + normalizedName);
+    };
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionMetadata.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionMetadata.java
new file mode 100644
index 00000000000..67592432fa0
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionMetadata.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/** Simple metadata bean */
+public class ExtractionMetadata {
+  private final Map<String, List<String>> map = new LinkedHashMap<>();
+
+  public void add(String name, String value) {
+    if (name == null || value == null) return;
+    map.computeIfAbsent(name, k -> new ArrayList<>()).add(value);
+  }
+
+  public void addValues(String name, String[] values) {
+    if (name == null || values == null || values.length == 0) return;
+    map.computeIfAbsent(name, k -> new ArrayList<>()).addAll(List.of(values));
+  }
+
+  public void addIfNotNull(String resourceNameKey, String resourceName) {
+    if (resourceName != null) {
+      add(resourceNameKey, resourceName);
+    }
+  }
+
+  public void putAll(Map<String, List<String>> map) {
+    this.map.putAll(map);
+  }
+
+  public String[] getValues(String name) {
+    List<String> vals = map.get(name);
+    if (vals == null) return new String[0];
+    return vals.toArray(new String[0]);
+  }
+
+  public String get(String name) {
+    List<String> vals = map.get(name);
+    if (vals == null || vals.isEmpty()) return null;
+    return vals.get(0);
+  }
+
+  public String[] names() {
+    return map.keySet().toArray(new String[0]);
+  }
+
+  public void remove(String name) {
+    map.remove(name);
+  }
+
+  public Map<String, List<String>> asMap() {
+    return map;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("ExtractionMetadata{");
+    boolean first = true;
+    for (Map.Entry<String, List<String>> e : map.entrySet()) {
+      if (!first) sb.append(", ");
+      first = false;
+      sb.append(e.getKey()).append('=').append(e.getValue());
+    }
+    sb.append('}');
+    return sb.toString();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) return true;
+    if (!(obj instanceof ExtractionMetadata)) return false;
+    ExtractionMetadata that = (ExtractionMetadata) obj;
+    return Objects.equals(this.map, that.map);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(map);
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionRequest.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionRequest.java
new file mode 100644
index 00000000000..99ab4d8d742
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionRequest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/** Immutable request info needed by extraction backends. */
+public class ExtractionRequest {
+  public final String streamType; // explicit MIME type (optional)
+  public final String resourceName; // filename hint
+  public final String contentType; // HTTP content-type header
+  public final String charset; // derived charset if available
+  public final String streamName;
+  public final String streamSourceInfo;
+  public final Long streamSize;
+  public final String resourcePassword; // optional password for encrypted docs
+  public final java.util.LinkedHashMap<java.util.regex.Pattern, String>
+      passwordsMap; // optional passwords map
+  public final String extractFormat;
+  public final boolean recursive;
+  public final Map<String, String> tikaRequestHeaders = new HashMap<>();
+
+  public ExtractionRequest(
+      String streamType,
+      String resourceName,
+      String contentType,
+      String charset,
+      String streamName,
+      String streamSourceInfo,
+      Long streamSize,
+      String resourcePassword,
+      java.util.LinkedHashMap<java.util.regex.Pattern, String> passwordsMap,
+      String extractFormat,
+      boolean recursive,
+      Map<String, String> tikaRequestHeaders) {
+    this.streamType = streamType;
+    this.resourceName = resourceName;
+    this.contentType = contentType;
+    this.charset = charset;
+    this.streamName = streamName;
+    this.streamSourceInfo = streamSourceInfo;
+    this.streamSize = streamSize;
+    this.resourcePassword = resourcePassword;
+    this.passwordsMap = passwordsMap;
+    this.extractFormat = extractFormat;
+    this.recursive = recursive;
+    if (tikaRequestHeaders != null) {
+      this.tikaRequestHeaders.putAll(tikaRequestHeaders);
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionResult.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionResult.java
new file mode 100644
index 00000000000..97767d15367
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/ExtractionResult.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+/** Immutable extraction result with plain text content and neutral metadata. */
+public final class ExtractionResult {
+  private final String content;
+  private final ExtractionMetadata metadata;
+
+  public ExtractionResult(String content, ExtractionMetadata metadata) {
+    this.content = content == null ? "" : content;
+    this.metadata = metadata;
+  }
+
+  /** Extracted textual content (plain text). */
+  public String getContent() {
+    return content;
+  }
+
+  /** Extracted metadata in neutral, backend-agnostic form. */
+  public ExtractionMetadata getMetadata() {
+    return metadata;
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/LocalTikaExtractionBackend.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/LocalTikaExtractionBackend.java
new file mode 100644
index 00000000000..8ad2adc47c0
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/LocalTikaExtractionBackend.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.Locale;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.logging.DeprecationLog;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.html.HtmlMapper;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Extraction backend using local in-process Apache Tika. This encapsulates the previous direct
+ * usage of Tika from the loader.
+ *
+ * @deprecated Will be removed soon, please use the 'tikaserver' extraction backend instead.
+ */
+@Deprecated(since = "9.10.0")
+public class LocalTikaExtractionBackend implements ExtractionBackend {
+  private final TikaConfig tikaConfig;
+  private final ParseContextConfig parseContextConfig;
+  private final AutoDetectParser autoDetectParser;
+
+  // Local HtmlMapper moved from ExtractingDocumentLoader
+  private static class MostlyPassthroughHtmlMapper implements HtmlMapper {
+    static final HtmlMapper INSTANCE = new MostlyPassthroughHtmlMapper();
+
+    @Override
+    public boolean isDiscardElement(String name) {
+      return false;
+    }
+
+    @Override
+    public String mapSafeAttribute(String elementName, String attributeName) {
+      return attributeName.toLowerCase(java.util.Locale.ENGLISH);
+    }
+
+    @Override
+    public String mapSafeElement(String name) {
+      String lowerName = name.toLowerCase(java.util.Locale.ROOT);
+      return (lowerName.equals("br") || lowerName.equals("body")) ? null : lowerName;
+    }
+  }
+
+  public LocalTikaExtractionBackend(TikaConfig config, ParseContextConfig parseContextConfig) {
+    this.tikaConfig = config;
+    this.parseContextConfig = parseContextConfig;
+    this.autoDetectParser = new AutoDetectParser(config);
+  }
+
+  /**
+   * Construct backend by loading TikaConfig based on handler/core configuration without exposing
+   * Tika types to the handler.
+   */
+  public LocalTikaExtractionBackend(
+      SolrCore core, String tikaConfigLoc, ParseContextConfig parseContextConfig) throws Exception {
+    TikaConfig cfg;
+    if (tikaConfigLoc == null) { // default
+      ClassLoader classLoader = core.getResourceLoader().getClassLoader();
+      try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) {
+        cfg = new TikaConfig(is);
+      }
+    } else {
+      Path configFile = Path.of(tikaConfigLoc);
+      if (configFile.isAbsolute()) {
+        cfg = new TikaConfig(configFile);
+      } else { // in conf/
+        try (InputStream is = core.getResourceLoader().openResource(tikaConfigLoc)) {
+          cfg = new TikaConfig(is);
+        }
+      }
+    }
+    this.tikaConfig = cfg;
+    this.parseContextConfig = parseContextConfig;
+    this.autoDetectParser = new AutoDetectParser(cfg);
+    DeprecationLog.log("Local Tika", "The 'local' extraction backend is deprecated");
+  }
+
+  public static final String NAME = "local";
+
+  @Override
+  public String name() {
+    return NAME;
+  }
+
+  private Parser selectParser(ExtractionRequest request) {
+    if (request.streamType != null) {
+      MediaType mt = MediaType.parse(request.streamType.trim().toLowerCase(Locale.ROOT));
+      return new DefaultParser(tikaConfig.getMediaTypeRegistry()).getParsers().get(mt);
+    }
+    return autoDetectParser;
+  }
+
+  private Metadata buildMetadata(ExtractionRequest request) {
+    ExtractionMetadata extractionMetadata = buildMetadataFromRequest(request);
+    Metadata md = new Metadata();
+    for (String name : extractionMetadata.names()) {
+      String[] vals = extractionMetadata.getValues(name);
+      if (vals != null) for (String v : vals) md.add(name, v);
+    }
+    return md;
+  }
+
+  private ParseContext buildContext(Parser parser, ExtractionRequest request) {
+    ParseContext context = parseContextConfig.create();
+    context.set(Parser.class, parser);
+    context.set(HtmlMapper.class, MostlyPassthroughHtmlMapper.INSTANCE);
+    PasswordProvider pwd = new RegexRulesPasswordProvider();
+    if (request.resourcePassword != null && pwd instanceof RegexRulesPasswordProvider) {
+      ((RegexRulesPasswordProvider) pwd).setExplicitPassword(request.resourcePassword);
+    }
+    if (request.passwordsMap != null && pwd instanceof RegexRulesPasswordProvider) {
+      ((RegexRulesPasswordProvider) pwd).setPasswordMap(request.passwordsMap);
+    }
+    context.set(PasswordProvider.class, pwd);
+    return context;
+  }
+
+  private static ExtractionMetadata tikaMetadataToExtractionMetadata(Metadata md) {
+    ExtractionMetadata out = new ExtractionMetadata();
+    for (String name : md.names()) {
+      String[] vals = md.getValues(name);
+      if (vals != null) for (String v : vals) out.add(name, v);
+    }
+    return out;
+  }
+
+  @Override
+  public ExtractionResult extract(InputStream inputStream, ExtractionRequest request)
+      throws Exception {
+    Parser parser = selectParser(request);
+    if (parser == null) {
+      throw new IllegalArgumentException("No Tika parser for stream type: " + request.streamType);
+    }
+    ParseContext context = buildContext(parser, request);
+    Metadata md = buildMetadata(request);
+    BodyContentHandler textHandler = new BodyContentHandler(-1);
+    parser.parse(inputStream, textHandler, md, context);
+    return new ExtractionResult(textHandler.toString(), tikaMetadataToExtractionMetadata(md));
+  }
+
+  @Override
+  public void extractWithSaxHandler(
+      InputStream inputStream,
+      ExtractionRequest request,
+      ExtractionMetadata md,
+      DefaultHandler saxContentHandler)
+      throws Exception {
+    Parser parser = selectParser(request);
+    if (parser == null) {
+      throw new IllegalArgumentException("No Tika parser for stream type: " + request.streamType);
+    }
+    ParseContext context = buildContext(parser, request);
+    Metadata tikaMetadata = buildMetadata(request);
+    parser.parse(inputStream, saxContentHandler, tikaMetadata, context);
+    for (String name : tikaMetadata.names()) {
+      String[] vals = tikaMetadata.getValues(name);
+      if (vals != null) for (String v : vals) md.add(name, v);
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java
index 84b4e94171c..8e7f876da83 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java
@@ -55,6 +55,17 @@ public String getPassword(Metadata meta) {
     return null;
   }
 
+  public String getPassword(ExtractionMetadata extractionMetadata) {
+    if (getExplicitPassword() != null) {
+      return getExplicitPassword();
+    }
+
+    if (passwordMap.size() > 0)
+      return lookupPasswordFromMap(extractionMetadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
+
+    return null;
+  }
+
   private String lookupPasswordFromMap(String fileName) {
     if (fileName != null && fileName.length() > 0) {
       for (Entry<Pattern, String> e : passwordMap.entrySet()) {
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
index 9edba0e925e..22be163c816 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
@@ -30,7 +30,7 @@
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
-import org.apache.tika.metadata.Metadata;
+// note: decoupled from Tika Metadata
 import org.apache.tika.metadata.TikaMetadataKeys;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -57,7 +57,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
 
   protected final SolrInputDocument document;
 
-  protected final Metadata metadata;
+  protected final ExtractionMetadata metadata;
   protected final SolrParams params;
   protected final StringBuilder catchAllBuilder = new StringBuilder(2048);
   protected final IndexSchema schema;
@@ -74,7 +74,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
 
   private Set<String> literalFieldNames = null;
 
-  public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
+  public SolrContentHandler(ExtractionMetadata metadata, SolrParams params, IndexSchema schema) {
     this.document = new SolrInputDocument();
     this.metadata = metadata;
     this.params = params;
@@ -152,6 +152,13 @@ protected void addContent() {
     addField(contentFieldName, catchAllBuilder.toString(), null);
   }
 
+  /** Append pre-extracted plain text content to the catch-all builder. */
+  public void appendToContent(String text) {
+    if (text != null && !text.isEmpty()) {
+      catchAllBuilder.append(text);
+    }
+  }
+
   /**
    * Add in the literals to the document using the {@link #params} and the {@link #LITERALS_PREFIX}.
    */
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
index 1070e744d84..b4fe031a068 100644
--- a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
@@ -18,7 +18,6 @@
 
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.schema.IndexSchema;
-import org.apache.tika.metadata.Metadata;
 
 /** */
 public class SolrContentHandlerFactory {
@@ -26,7 +25,7 @@ public class SolrContentHandlerFactory {
   public SolrContentHandlerFactory() {}
 
   public SolrContentHandler createSolrContentHandler(
-      Metadata metadata, SolrParams params, IndexSchema schema) {
+      ExtractionMetadata metadata, SolrParams params, IndexSchema schema) {
     return new SolrContentHandler(metadata, params, schema);
   }
 }
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerExtractionBackend.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerExtractionBackend.java
new file mode 100644
index 00000000000..d2dbc5485a9
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerExtractionBackend.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.time.Duration;
+import org.apache.solr.common.SolrException;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Extraction backend that delegates parsing to a remote Apache Tika Server.
+ *
+ * <p>This backend uses Java 11 HttpClient to call Tika Server endpoints. It supports
+ * backend-neutral extract() and extractOnly() operations. Legacy SAX-based parsing is not supported
+ * and will throw UnsupportedOperationException.
+ */
+public class TikaServerExtractionBackend implements ExtractionBackend {
+  private final HttpClient httpClient;
+  private final String baseUrl; // e.g., http://localhost:9998
+  private final Duration timeout = Duration.ofSeconds(30);
+  private final TikaServerParser tikaServerResponseParser = new TikaServerParser();
+
+  public TikaServerExtractionBackend(String baseUrl) {
+    this(HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build(), baseUrl);
+  }
+
+  // Visible for tests
+  TikaServerExtractionBackend(HttpClient httpClient, String baseUrl) {
+    if (baseUrl.endsWith("/")) {
+      this.baseUrl = baseUrl.substring(0, baseUrl.length() - 1);
+    } else {
+      this.baseUrl = baseUrl;
+    }
+    this.httpClient = httpClient;
+  }
+
+  public static final String NAME = "tikaserver";
+
+  @Override
+  public String name() {
+    return NAME;
+  }
+
+  @Override
+  public ExtractionResult extract(InputStream inputStream, ExtractionRequest request)
+      throws Exception {
+    try (InputStream tikaResponse = callTikaServer(inputStream, request)) {
+      ExtractionMetadata md = buildMetadataFromRequest(request);
+      BodyContentHandler textHandler = new BodyContentHandler(-1);
+      if (request.recursive) {
+        tikaServerResponseParser.parseRmetaJson(tikaResponse, textHandler, md);
+      } else {
+        tikaServerResponseParser.parseXml(tikaResponse, textHandler, md);
+      }
+      return new ExtractionResult(textHandler.toString(), md);
+    }
+  }
+
+  @Override
+  public void extractWithSaxHandler(
+      InputStream inputStream,
+      ExtractionRequest request,
+      ExtractionMetadata md,
+      DefaultHandler saxContentHandler)
+      throws Exception {
+    try (InputStream tikaResponse = callTikaServer(inputStream, request)) {
+      if (request.recursive) {
+        tikaServerResponseParser.parseRmetaJson(tikaResponse, saxContentHandler, md);
+      } else {
+        tikaServerResponseParser.parseXml(tikaResponse, saxContentHandler, md);
+      }
+    }
+  }
+
+  private static String firstNonNull(String a, String b) {
+    return a != null ? a : b;
+  }
+
+  /**
+   * Call the Tika Server to extract text and metadata. Depending on request.recursive, will either
+   * return XML (false) or JSON array (true)
+   *
+   * @return InputStream of the response body, either XML or json depending on request.recursive
+   */
+  private InputStream callTikaServer(InputStream inputStream, ExtractionRequest request)
+      throws IOException, InterruptedException {
+    String url = baseUrl + (request.recursive ? "/rmeta" : "/tika");
+    HttpRequest.Builder b =
+        HttpRequest.newBuilder(URI.create(url))
+            .timeout(timeout)
+            .header("Accept", (request.recursive ? "application/json" : "text/xml"));
+    String contentType = firstNonNull(request.streamType, request.contentType);
+    if (contentType != null) {
+      b.header("Content-Type", contentType);
+    }
+    if (!request.tikaRequestHeaders.isEmpty()) {
+      request.tikaRequestHeaders.forEach(b::header);
+    }
+    ExtractionMetadata md = buildMetadataFromRequest(request);
+    if (request.resourcePassword != null || request.passwordsMap != null) {
+      RegexRulesPasswordProvider passwordProvider = new RegexRulesPasswordProvider();
+      if (request.resourcePassword != null) {
+        passwordProvider.setExplicitPassword(request.resourcePassword);
+      }
+      if (request.passwordsMap != null) {
+        passwordProvider.setPasswordMap(request.passwordsMap);
+      }
+
+      String pwd = passwordProvider.getPassword(md);
+      if (pwd != null) {
+        b.header("Password", pwd);
+      }
+    }
+    if (request.resourceName != null) {
+      b.header("Content-Disposition", "attachment; filename=\"" + request.resourceName + "\"");
+    }
+    b.PUT(HttpRequest.BodyPublishers.ofInputStream(() -> inputStream));
+
+    HttpResponse<InputStream> resp =
+        httpClient.send(b.build(), HttpResponse.BodyHandlers.ofInputStream());
+    int code = resp.statusCode();
+    if (code < 200 || code >= 300) {
+      throw new SolrException(
+          SolrException.ErrorCode.getErrorCode(code),
+          "TikaServer " + url + " returned status " + code);
+    }
+    return resp.body();
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerParser.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerParser.java
new file mode 100644
index 00000000000..26137c049c9
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/TikaServerParser.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Map;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.Utils;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class TikaServerParser {
+  private final SAXParser saxParser;
+
+  public TikaServerParser() {
+    SAXParserFactory factory = SAXParserFactory.newInstance();
+    factory.setNamespaceAware(true);
+    try {
+      factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
+      factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
+      factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
+    } catch (Throwable ignore) {
+      // Some parsers may not support all features; ignore
+    }
+    try {
+      saxParser = factory.newSAXParser();
+    } catch (Exception e) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+    }
+  }
+
+  /**
+   * Parses response in XML format from Tika Server /tika endpoint. The result is that the metadata
+   * object is populated and the content handler is called with extracted text.
+   */
+  public void parseXml(InputStream inputStream, ContentHandler handler, ExtractionMetadata metadata)
+      throws IOException, SAXException {
+    DefaultHandler myHandler = new TikaXmlResponseSaxContentHandler(handler, metadata);
+    InputStream sanitizedStream = XmlSanitizingReader.sanitize(inputStream);
+    saxParser.parse(sanitizedStream, myHandler);
+  }
+
+  // TODO: Warning, this method 100% AI generated, not reviewed
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  void parseRmetaJson(InputStream jsonStream, DefaultHandler handler, ExtractionMetadata md)
+      throws Exception {
+    Object parsed = Utils.fromJSON(jsonStream);
+    if (!(parsed instanceof List)) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR, "Unexpected /rmeta response, expected JSON array");
+    }
+    List list = (List) parsed;
+    for (Object o : list) {
+      if (!(o instanceof Map)) continue;
+      Map map = (Map) o;
+      // Copy metadata
+      for (Object k : map.keySet()) {
+        String key = String.valueOf(k);
+        Object val = map.get(k);
+        if ("X-TIKA:content".equalsIgnoreCase(key)) {
+          // handled below
+          continue;
+        }
+        if (val instanceof List) {
+          for (Object v : (List) val) {
+            if (v != null) md.add(key, String.valueOf(v));
+          }
+        } else if (val != null) {
+          md.add(key, String.valueOf(val));
+        }
+      }
+      Object content = map.get("X-TIKA:content");
+      if (content != null) {
+        String xhtml = String.valueOf(content);
+        if (!xhtml.isEmpty() && handler != null) {
+          InputStream inputStream =
+              new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8));
+          InputStream sanitizedStream = XmlSanitizingReader.sanitize(inputStream);
+          saxParser.parse(sanitizedStream, handler);
+        }
+      }
+    }
+  }
+
+  /** Custom SAX handler that will extract meta tags from the tika xml and delegate */
+  static class TikaXmlResponseSaxContentHandler extends DefaultHandler {
+    private final ContentHandler delegate;
+    private final ExtractionMetadata metadata;
+    private boolean inHead = false;
+
+    public TikaXmlResponseSaxContentHandler(ContentHandler delegate, ExtractionMetadata metadata) {
+      this.delegate = delegate;
+      this.metadata = metadata;
+    }
+
+    @Override
+    public void startDocument() throws SAXException {
+      if (delegate != null) delegate.startDocument();
+    }
+
+    @Override
+    public void endDocument() throws SAXException {
+      if (delegate != null) delegate.endDocument();
+    }
+
+    @Override
+    public void startElement(
+        String uri, String localName, String qName, org.xml.sax.Attributes attributes)
+        throws SAXException {
+      String ln = localName != null && !localName.isEmpty() ? localName : qName;
+      if ("head".equalsIgnoreCase(ln)) {
+        inHead = true;
+      } else if (inHead && "meta".equalsIgnoreCase(ln) && attributes != null) {
+        String name = attributes.getValue("name");
+        String content = attributes.getValue("content");
+        if (name != null && content != null) {
+          metadata.add(name, content);
+        }
+      }
+      if (delegate != null) delegate.startElement(uri, localName, qName, attributes);
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String qName) throws SAXException {
+      String ln = localName != null && !localName.isEmpty() ? localName : qName;
+      if ("head".equalsIgnoreCase(ln)) {
+        inHead = false;
+      }
+      if (delegate != null) delegate.endElement(uri, localName, qName);
+    }
+
+    @Override
+    public void characters(char[] ch, int start, int length) throws SAXException {
+      if (delegate != null) delegate.characters(ch, start, length);
+    }
+
+    @Override
+    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+      if (delegate != null) delegate.ignorableWhitespace(ch, start, length);
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri) throws SAXException {
+      if (delegate != null) delegate.startPrefixMapping(prefix, uri);
+    }
+
+    @Override
+    public void endPrefixMapping(String prefix) throws SAXException {
+      if (delegate != null) delegate.endPrefixMapping(prefix);
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/XmlSanitizingReader.java b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/XmlSanitizingReader.java
new file mode 100644
index 00000000000..5c211df155b
--- /dev/null
+++ b/solr/modules/extraction/src/java/org/apache/solr/handler/extraction/XmlSanitizingReader.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PipedInputStream;
+import java.io.PipedOutputStream;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Make sure the XHTML input is valid XML. Pipe text through this reader before passing it to an XML
+ * parser. TODO: Warning: Most of this class is AI generated. Can be a lot smaller, only sanitize
+ * '0x0'?
+ */
+final class XmlSanitizingReader extends java.io.Reader {
+  private final java.io.Reader in;
+  private final StringBuilder entityBuf = new StringBuilder();
+  private boolean inEntity = false; // after reading '&'
+
+  // For surrogate tracking to evaluate XML validity by code point
+  private int pendingHighSurrogate = -1;
+
+  public XmlSanitizingReader(java.io.Reader in) {
+    this.in = in;
+  }
+
+  @Override
+  public int read(char[] cbuf, int off, int len) throws java.io.IOException {
+    int written = 0;
+    while (written < len) {
+      int ci = in.read();
+      if (ci == -1) break;
+      char ch = (char) ci;
+
+      // Handle numeric entity stripping for &#0; and &#x0; variants
+      if (!inEntity) {
+        if (ch == '&') {
+          inEntity = true;
+          entityBuf.setLength(0);
+          entityBuf.append(ch);
+          continue; // don't write yet
+        }
+      } else {
+        entityBuf.append(ch);
+        // stop conditions for entity buffering
+        if (ch == ';' || entityBuf.length() > 12) { // entities are short; cap length defensively
+          String ent = entityBuf.toString();
+          boolean drop = isNullNumericEntity(ent);
+          inEntity = false;
+          if (!drop) {
+            // flush buffered entity to output
+            for (int i = 0; i < ent.length() && written < len; i++) {
+              cbuf[off + written++] = ent.charAt(i);
+            }
+          }
+          continue;
+        }
+        // Keep buffering alphanumerics and '#', 'x'
+        continue;
+      }
+
+      // Filter invalid XML 1.0 characters by code point
+      if (Character.isHighSurrogate(ch)) {
+        pendingHighSurrogate = ch;
+        continue; // need next char to form code point
+      }
+      if (Character.isLowSurrogate(ch) && pendingHighSurrogate != -1) {
+        int cp = Character.toCodePoint((char) pendingHighSurrogate, ch);
+        pendingHighSurrogate = -1;
+        if (isAllowedXmlChar(cp)) {
+          // encode back as surrogate pair
+          cbuf[off + written++] = Character.highSurrogate(cp);
+          if (written < len) {
+            cbuf[off + written++] = Character.lowSurrogate(cp);
+          } else {
+            // If no space for low surrogate, keep it pending (edge, unlikely with reasonable len)
+            // Fallback: buffer low surrogate into a small one-char pushback by using a field
+            // For simplicity, write only if space available; otherwise, return and next read
+            // continues
+            // But to avoid corruption, store it
+            pushbackChar = Character.lowSurrogate(cp);
+          }
+        }
+        continue;
+      } else {
+        // previous high surrogate without low surrogate -> invalid; drop it
+        pendingHighSurrogate = -1;
+      }
+
+      int cp = ch;
+      if (!Character.isSurrogate(ch) && isAllowedXmlChar(cp)) {
+        cbuf[off + written++] = ch;
+      }
+    }
+    return (written == 0) ? -1 : written;
+  }
+
+  private Character pushbackChar = null;
+
+  @Override
+  public boolean ready() throws java.io.IOException {
+    return in.ready();
+  }
+
+  @Override
+  public void close() throws java.io.IOException {
+    in.close();
+  }
+
+  private static boolean isNullNumericEntity(String ent) {
+    // Accept patterns like '&#0;', '&#00;', '&#x0;', '&#x0000;' (case-insensitive)
+    if (ent == null) return false;
+    if (!ent.startsWith("&#") || !ent.endsWith(";")) return false;
+    String mid = ent.substring(2, ent.length() - 1);
+    if (mid.isEmpty()) return false;
+    if (mid.charAt(0) == 'x' || mid.charAt(0) == 'X') {
+      // hex
+      for (int i = 1; i < mid.length(); i++) {
+        char c = mid.charAt(i);
+        if (c != '0') return false;
+      }
+      return mid.length() > 1; // at least one zero after x
+    } else {
+      // decimal
+      for (int i = 0; i < mid.length(); i++) {
+        char c = mid.charAt(i);
+        if (c != '0') return false;
+      }
+      return true; // one or more zeros
+    }
+  }
+
+  private static boolean isAllowedXmlChar(int cp) {
+    return cp == 0x9
+        || cp == 0xA
+        || cp == 0xD
+        || (cp >= 0x20 && cp <= 0xD7FF)
+        || (cp >= 0xE000 && cp <= 0xFFFD)
+        || (cp >= 0x10000 && cp <= 0x10FFFF);
+  }
+
+  public static InputStream sanitize(InputStream in) throws IOException {
+    PipedOutputStream out = new PipedOutputStream();
+    PipedInputStream pipedIn = new PipedInputStream(out);
+
+    Reader reader = new XmlSanitizingReader(new InputStreamReader(in, StandardCharsets.UTF_8));
+    Writer writer = new OutputStreamWriter(out, StandardCharsets.UTF_8);
+
+    Thread worker =
+        new Thread(
+            () -> {
+              try (reader;
+                  writer) {
+                reader.transferTo(writer);
+              } catch (IOException e) {
+                try {
+                  pipedIn.close();
+                } catch (IOException ignored) {
+                }
+              }
+            },
+            "XmlSanitizingReaderWorker");
+    worker.setDaemon(true);
+    worker.start();
+
+    return pipedIn;
+  }
+}
diff --git a/solr/modules/extraction/src/test-files/extraction/example.html b/solr/modules/extraction/src/test-files/extraction/example.html
index 5732f6214bc..2801c3c97d8 100644
--- a/solr/modules/extraction/src/test-files/extraction/example.html
+++ b/solr/modules/extraction/src/test-files/extraction/example.html
@@ -6,8 +6,8 @@
 <p>
   Here is some text
 </p>
-<div>Here is some text in a div</div>
-<div>This has a <a href="http://www.apache.org">link</a>.</div>
+<h1>a h1 tag</h1>
+<p>This has a <a href="http://www.apache.org">link</a> in a paragraph.</p>
 <a href="#news">News</a>
 <ul class="minitoc">
 <li>
diff --git a/solr/modules/extraction/src/test-files/extraction/simple.html b/solr/modules/extraction/src/test-files/extraction/simple.html
index 3c807fb1d98..3ec4d4e0d01 100644
--- a/solr/modules/extraction/src/test-files/extraction/simple.html
+++ b/solr/modules/extraction/src/test-files/extraction/simple.html
@@ -10,7 +10,7 @@
   Here is some text
 </p>
 <p>distinct<br/>words</p>
-<div>Here is some text in a div</div>
+<h1>Here is some text in a h1</h1>
 <div>This has a <a href="http://www.apache.org">link</a>.</div>
 </body>
 <script>
diff --git a/solr/modules/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml b/solr/modules/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml
index 2c52f4591e8..f8a227b8cf9 100644
--- a/solr/modules/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml
+++ b/solr/modules/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml
@@ -152,6 +152,8 @@
 
   <requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
     <str name="parseContext.config">parseContext.xml</str>
+    <str name="extraction.backend">${solr.test.extraction.backend:local}</str>
+    <str name="tikaserver.url">${solr.test.tikaserver.url:}</str>
   </requestHandler>
 
   <requestHandler name="/update/extract/lit-def" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerLocalTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerLocalTest.java
new file mode 100644
index 00000000000..32892b8efb4
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerLocalTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import org.junit.Test;
+
+public class ExtractingRequestHandlerLocalTest extends ExtractingRequestHandlerTestAbstract {
+  @Test
+  public void testPdfWithImages() throws Exception {
+    // This test moved from abstract class since TikaServer with Tika3 does not extract images by
+    // default
+    // Tests possibility to configure ParseContext (by example to extract embedded images from pdf)
+    loadLocal(
+        "extraction/pdf-with-image.pdf",
+        "fmap.created",
+        "extractedDate",
+        "fmap.producer",
+        "extractedProducer",
+        "fmap.creator",
+        "extractedCreator",
+        "fmap.Keywords",
+        "extractedKeywords",
+        "fmap.Creation-Date",
+        "extractedDate",
+        "uprefix",
+        "ignored_",
+        "fmap.Author",
+        "extractedAuthor",
+        "fmap.content",
+        "wdf_nocase",
+        "literal.id",
+        "pdfWithImage",
+        "resource.name",
+        "pdf-with-image.pdf",
+        "resource.password",
+        "solrRules",
+        "fmap.Last-Modified",
+        "extractedDate");
+
+    assertQ(req("wdf_nocase:\"embedded:image0.jpg\""), "//*[@numFound='0']");
+    assertU(commit());
+    assertQ(req("wdf_nocase:\"embedded:image0.jpg\""), "//*[@numFound='1']");
+  }
+}
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTestAbstract.java
similarity index 86%
rename from solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
rename to solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTestAbstract.java
index 0097b86e818..38cf275b3f1 100644
--- a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTestAbstract.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.handler.extraction;
 
+import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
@@ -24,7 +25,6 @@
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.util.ContentStreamBase;
-import org.apache.solr.common.util.EnvUtils;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
@@ -34,24 +34,18 @@
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-/** */
-public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
+public abstract class ExtractingRequestHandlerTestAbstract extends SolrTestCaseJ4 {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
+  @SuppressWarnings("resource")
   @BeforeClass
   public static void beforeClass() throws Exception {
     // Is the JDK/env affected by a known bug?
     final String tzDisplayName =
         TimeZone.getDefault().getDisplayName(false, TimeZone.SHORT, Locale.US);
-    if (!tzDisplayName.matches("[A-Za-z]{3,}([+-]\\d\\d(:\\d\\d)?)?")) {
-      assertTrue(
-          "Is some other JVM affected?  Or bad regex? TzDisplayName: " + tzDisplayName,
-          EnvUtils.getProperty("java.version").startsWith("11"));
-      assumeTrue(
-          "SOLR-12759 JDK 11 (1st release) and Tika 1.x can result in extracting dates in a bad format.",
-          false);
-    }
-
     initCore("solrconfig.xml", "schema.xml", getFile("extraction/solr"));
   }
 
@@ -298,13 +292,13 @@ public void testCapture() throws Exception {
         "uprefix",
         "t_",
         "capture",
-        "div",
-        "fmap.div",
+        "h1",
+        "fmap.h1",
         "foo_t",
         "commit",
         "true");
     assertQ(req("+id:capture1 +t_content:Solr"), "//*[@numFound='1']");
-    assertQ(req("+id:capture1 +foo_t:\"here is some text in a div\""), "//*[@numFound='1']");
+    assertQ(req("+id:capture1 +foo_t:\"here is some text in a h1\""), "//*[@numFound='1']");
 
     loadLocal(
         "extraction/simple.html",
@@ -314,18 +308,18 @@ public void testCapture() throws Exception {
         "true",
         "defaultField",
         "text",
-        "fmap.div",
-        "div_t",
+        "fmap.h1",
+        "h1_t",
         "fmap.a",
         "anchor_t",
         "capture",
-        "div",
+        "h1",
         "capture",
         "a",
         "commit",
         "true");
     assertQ(req("+id:capture2 +text:Solr"), "//*[@numFound='1']");
-    assertQ(req("+id:capture2 +div_t:\"here is some text in a div\""), "//*[@numFound='1']");
+    assertQ(req("+id:capture2 +h1_t:\"here is some text in a h1\""), "//*[@numFound='1']");
     assertQ(req("+id:capture2 +anchor_t:http\\://www.apache.org"), "//*[@numFound='1']");
     assertQ(req("+id:capture2 +anchor_t:link"), "//*[@numFound='1']");
   }
@@ -341,20 +335,19 @@ public void testDefaultField() throws Exception {
       ignoreException("unknown field 'meta'"); // TODO: should this exception be happening?
       expectThrows(
           SolrException.class,
-          () -> {
-            loadLocal(
-                "extraction/simple.html",
-                "literal.id",
-                "simple2",
-                "lowernames",
-                "true",
-                "captureAttr",
-                "true",
-                // "fmap.content_type", "abcxyz",
-                "commit",
-                "true" // test immediate commit
-                );
-          });
+          () ->
+              loadLocal(
+                  "extraction/simple.html",
+                  "literal.id",
+                  "simple2",
+                  "lowernames",
+                  "true",
+                  "captureAttr",
+                  "true",
+                  // "fmap.content_type", "abcxyz",
+                  "commit",
+                  "true" // test immediate commit
+                  ));
     } finally {
       resetExceptionIgnores();
     }
@@ -465,6 +458,16 @@ public void testLiterals() throws Exception {
           "two",
           "fmap.X-Parsed-By",
           "ignored_parser",
+          "fmap.X-TIKA:Parsed-By",
+          "ignored_parser",
+          "fmap.X-TIKA:Parsed-By-Full-Set",
+          "ignored_parser",
+          "fmap.X-TIKA:content_handler",
+          "ignored_parser",
+          "fmap.X-TIKA:parse_time_millis",
+          "ignored_parser",
+          "fmap.X-TIKA:embedded_depth",
+          "ignored_parser",
           "fmap.Last-Modified",
           "extractedDate");
       // TODO: original author did not specify why an exception should be thrown... how to fix?
@@ -495,6 +498,16 @@ public void testLiterals() throws Exception {
         "one",
         "fmap.X-Parsed-By",
         "ignored_parser",
+        "fmap.X-TIKA:Parsed-By",
+        "ignored_parser",
+        "fmap.X-TIKA:Parsed-By-Full-Set",
+        "ignored_parser",
+        "fmap.X-TIKA:content_handler",
+        "ignored_parser",
+        "fmap.X-TIKA:parse_time_millis",
+        "ignored_parser",
+        "fmap.X-TIKA:embedded_depth",
+        "ignored_parser",
         "fmap.Last-Modified",
         "extractedDate");
     assertU(commit());
@@ -601,6 +614,20 @@ public void testPlainTextSpecifyingMimeType() throws Exception {
         "extractedLanguage",
         "fmap.X-Parsed-By",
         "ignored_parser",
+        "fmap.X-TIKA:Parsed-By",
+        "ignored_parser",
+        "fmap.X-TIKA:detectedEncoding",
+        "ignored_parser",
+        "fmap.X-TIKA:encodingDetector",
+        "ignored_parser",
+        "fmap.X-TIKA:Parsed-By-Full-Set",
+        "ignored_parser",
+        "fmap.X-TIKA:content_handler",
+        "ignored_parser",
+        "fmap.X-TIKA:parse_time_millis",
+        "ignored_parser",
+        "fmap.X-TIKA:embedded_depth",
+        "ignored_parser",
         "fmap.content",
         "extractedContent",
         ExtractingParams.STREAM_TYPE,
@@ -635,6 +662,20 @@ public void testPlainTextSpecifyingResourceName() throws Exception {
         "extractedLanguage",
         "fmap.X-Parsed-By",
         "ignored_parser",
+        "fmap.X-TIKA:Parsed-By",
+        "ignored_parser",
+        "fmap.X-TIKA:detectedEncoding",
+        "ignored_parser",
+        "fmap.X-TIKA:encodingDetector",
+        "ignored_parser",
+        "fmap.X-TIKA:Parsed-By-Full-Set",
+        "ignored_parser",
+        "fmap.X-TIKA:content_handler",
+        "ignored_parser",
+        "fmap.X-TIKA:parse_time_millis",
+        "ignored_parser",
+        "fmap.X-TIKA:embedded_depth",
+        "ignored_parser",
         "fmap.content",
         "extractedContent",
         ExtractingParams.RESOURCE_NAME,
@@ -692,9 +733,12 @@ public void testExtractOnly() throws Exception {
 
     NamedList<?> nl = (NamedList<?>) list.get("solr-word.pdf_metadata");
     assertNotNull("nl is null and it shouldn't be", nl);
-    Object title = nl.get("title");
+    // TODO: Tika server v3.x has normalized metadata and do not return the 'title' key. Consider
+    // backcompat mode mapping dc:title to title???
+    Object title = nl.get("dc:title");
     assertNotNull("title is null and it shouldn't be", title);
-    assertTrue(extraction.contains("<?xml"));
+    // TODO: Tika Server return xhtml, without xml header, otherwise fairly similar
+    assertTrue(extraction.contains("<?xml") || extraction.contains("<html xmlns"));
 
     rsp =
         loadLocal(
@@ -713,7 +757,8 @@ public void testExtractOnly() throws Exception {
 
     nl = (NamedList<?>) list.get("solr-word.pdf_metadata");
     assertNotNull("nl is null and it shouldn't be", nl);
-    title = nl.get("title");
+    // TODO: See above
+    title = nl.get("dc:title");
     assertNotNull("title is null and it shouldn't be", title);
   }
 
@@ -743,16 +788,16 @@ public void testXPath() throws Exception {
         "defaultField",
         "text",
         "capture",
-        "div",
-        "fmap.div",
+        "h1",
+        "fmap.h1",
         "foo_t",
         "boost.foo_t",
         "3",
         "xpath",
-        "/xhtml:html/xhtml:body/xhtml:div//node()",
+        "/xhtml:html/xhtml:body/xhtml:cite//node()",
         "commit",
         "true");
-    assertQ(req("+id:example1 +foo_t:\"here is some text in a div\""), "//*[@numFound='1']");
+    assertQ(req("+id:example1 +foo_t:\"a h1 tag\""), "//*[@numFound='1']");
   }
 
   /** test arabic PDF extraction is functional */
@@ -789,83 +834,6 @@ public void testArabicPDF() throws Exception {
     assertQ(req("wdf_nocase:السلم"), "//result[@numFound=1]");
   }
 
-  @Test
-  public void testTikaExceptionHandling() throws Exception {
-    ExtractingRequestHandler handler =
-        (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
-    assertNotNull("handler is null and it shouldn't be", handler);
-
-    expectThrows(
-        Exception.class,
-        () -> {
-          loadLocal("extraction/password-is-solrcell.docx", "literal.id", "one");
-        });
-    assertU(commit());
-    assertQ(req("*:*"), "//result[@numFound=0]");
-
-    try {
-      loadLocal(
-          "extraction/password-is-solrcell.docx",
-          "fmap.created",
-          "extractedDate",
-          "fmap.producer",
-          "extractedProducer",
-          "fmap.creator",
-          "extractedCreator",
-          "fmap.Keywords",
-          "extractedKeywords",
-          "fmap.Creation-Date",
-          "extractedDate",
-          "uprefix",
-          "ignored_",
-          "fmap.Author",
-          "extractedAuthor",
-          "fmap.content",
-          "wdf_nocase",
-          "literal.id",
-          "one",
-          "ignoreTikaException",
-          "true", // set ignore flag
-          "fmap.Last-Modified",
-          "extractedDate");
-    } catch (Exception e) {
-      fail("TikaException should be ignored.");
-    }
-    assertU(commit());
-    assertQ(req("*:*"), "//result[@numFound=1]");
-  }
-
-  @Test
-  public void testWrongStreamType() throws Exception {
-    ExtractingRequestHandler handler =
-        (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
-    assertNotNull("handler is null and it shouldn't be", handler);
-
-    expectThrows(
-        Exception.class,
-        () -> {
-          // Load plain text specifying another mime type, should fail
-          loadLocal(
-              "extraction/version_control.txt",
-              "literal.id",
-              "one",
-              ExtractingParams.STREAM_TYPE,
-              "application/pdf");
-        });
-
-    expectThrows(
-        Exception.class,
-        () -> {
-          // Load plain text specifying non existing mimetype, should fail
-          loadLocal(
-              "extraction/version_control.txt",
-              "literal.id",
-              "one",
-              ExtractingParams.STREAM_TYPE,
-              "foo/bar");
-        });
-  }
-
   public void testLiteralsOverride() throws Exception {
     ExtractingRequestHandler handler =
         (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
@@ -971,41 +939,6 @@ public void testLiteralsOverride() throws Exception {
         req("extractedKeywords:(solr AND word AND pdf AND literalkeyword)"), "//*[@numFound='1']");
   }
 
-  @Test
-  public void testPdfWithImages() throws Exception {
-    // Tests possibility to configure ParseContext (by example to extract embedded images from pdf)
-    loadLocal(
-        "extraction/pdf-with-image.pdf",
-        "fmap.created",
-        "extractedDate",
-        "fmap.producer",
-        "extractedProducer",
-        "fmap.creator",
-        "extractedCreator",
-        "fmap.Keywords",
-        "extractedKeywords",
-        "fmap.Creation-Date",
-        "extractedDate",
-        "uprefix",
-        "ignored_",
-        "fmap.Author",
-        "extractedAuthor",
-        "fmap.content",
-        "wdf_nocase",
-        "literal.id",
-        "pdfWithImage",
-        "resource.name",
-        "pdf-with-image.pdf",
-        "resource.password",
-        "solrRules",
-        "fmap.Last-Modified",
-        "extractedDate");
-
-    assertQ(req("wdf_nocase:\"embedded:image0.jpg\""), "//*[@numFound='0']");
-    assertU(commit());
-    assertQ(req("wdf_nocase:\"embedded:image0.jpg\""), "//*[@numFound='1']");
-  }
-
   @Test
   public void testPasswordProtected() throws Exception {
     // PDF, Passwords from resource.password
@@ -1129,16 +1062,42 @@ public void testPasswordProtected() throws Exception {
   SolrQueryResponse loadLocalFromHandler(String handler, String filename, String... args)
       throws Exception {
 
-    LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args);
-    try {
+    try (LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args)) {
       // TODO: stop using locally defined streams once stream.file and
       // stream.body work everywhere
       List<ContentStream> cs = new ArrayList<>();
       cs.add(new ContentStreamBase.FileStream(getFile(filename)));
       req.setContentStreams(cs);
       return h.queryAndResponse(handler, req);
-    } finally {
-      req.close();
+    }
+  }
+
+  @Test
+  public void testDummyBackendExtractOnly() throws Exception {
+    ExtractingRequestHandler handler =
+        (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
+    assertNotNull("handler is null and it shouldn't be", handler);
+    SolrQueryResponse rsp =
+        loadLocal(
+            "extraction/version_control.txt",
+            ExtractingParams.EXTRACTION_BACKEND,
+            DummyExtractionBackend.NAME,
+            ExtractingParams.EXTRACT_ONLY,
+            "true",
+            ExtractingParams.EXTRACT_FORMAT,
+            ExtractingDocumentLoader.TEXT_FORMAT);
+    assertNotNull("rsp is null and it shouldn't be", rsp);
+    NamedList<?> list = rsp.getValues();
+    String extraction = (String) list.get("version_control.txt");
+    assertNotNull("extraction is null and it shouldn't be", extraction);
+    assertEquals("This is dummy extracted content", extraction);
+
+    NamedList<?> nl = (NamedList<?>) list.get("version_control.txt_metadata");
+    assertNotNull("metadata is null and it shouldn't be", nl);
+    Object dummyFlag = nl.get("Dummy-Backend");
+    assertNotNull("Dummy-Backend metadata missing", dummyFlag);
+    if (dummyFlag instanceof String[]) {
+      assertEquals("true", ((String[]) dummyFlag)[0]);
     }
   }
 
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTikaServerTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTikaServerTest.java
new file mode 100644
index 00000000000..06d5ac96527
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTikaServerTest.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import com.carrotsearch.randomizedtesting.ThreadFilter;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+import java.lang.invoke.MethodHandles;
+import org.apache.lucene.tests.util.QuickPatchThreadsFilter;
+import org.apache.solr.SolrIgnoredThreadsFilter;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.containers.wait.strategy.Wait;
+
+/**
+ * Generic tests, randomized between local and tikaserver backends TODO: This test still has thread
+ * leaks.
+ */
+@ThreadLeakFilters(
+    defaultFilters = true,
+    filters = {
+      SolrIgnoredThreadsFilter.class,
+      QuickPatchThreadsFilter.class,
+      ExtractingRequestHandlerTikaServerTest.TestcontainersThreadsFilter.class
+    })
+public class ExtractingRequestHandlerTikaServerTest extends ExtractingRequestHandlerTestAbstract {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static GenericContainer<?> tika;
+
+  // Ignore known non-daemon threads spawned by Testcontainers and Java HttpClient in this test
+  @SuppressWarnings("NewClassNamingConvention")
+  public static class TestcontainersThreadsFilter implements ThreadFilter {
+    @Override
+    public boolean reject(Thread t) {
+      if (t == null || t.getName() == null) return false;
+      String n = t.getName();
+      return n.startsWith("testcontainers-ryuk")
+          || n.startsWith("testcontainers-wait-")
+          || n.startsWith("HttpClient-")
+          || n.startsWith("HttpClient-TestContainers");
+    }
+  }
+
+  @BeforeClass
+  public static void beforeClassTika() throws Exception {
+    String baseUrl = null;
+    tika =
+        new GenericContainer<>("apache/tika:3.2.3.0-full")
+            .withExposedPorts(9998)
+            .waitingFor(Wait.forListeningPort());
+    try {
+      tika.start();
+      baseUrl = "http://" + tika.getHost() + ":" + tika.getMappedPort(9998);
+      System.setProperty("solr.test.tikaserver.url", baseUrl);
+      System.setProperty("solr.test.extraction.backend", "tikaserver");
+      log.info("Using extraction backend 'tikaserver'. Tika server running on {}", baseUrl);
+      ExtractingRequestHandlerTestAbstract.beforeClass();
+    } catch (Throwable t) {
+      // Best-effort cleanup to avoid leaking resources if class initialization fails
+      System.clearProperty("solr.test.tikaserver.url");
+      System.clearProperty("solr.test.extraction.backend");
+      // Skip tests if Docker/Testcontainers are not available in the environment
+      Assume.assumeNoException("Docker/Testcontainers not available; skipping test", t);
+    }
+  }
+
+  @AfterClass
+  public static void afterClassTika() throws Exception {
+    // TODO: There are still thread leaks after these tests, probably due to failing tests
+    deleteCore();
+    // Stop and dispose of the Tika container if it was started
+    if (tika != null) {
+      try {
+        tika.stop();
+      } finally {
+        try {
+          tika.close();
+        } catch (Throwable ignore2) {
+        }
+        tika = null;
+      }
+    }
+    System.clearProperty("solr.test.tikaserver.url");
+    System.clearProperty("solr.test.extraction.backend");
+  }
+}
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/LocalTikaExtractionBackendTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/LocalTikaExtractionBackendTest.java
new file mode 100644
index 00000000000..d78f49b8a8d
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/LocalTikaExtractionBackendTest.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.util.Collections;
+import java.util.Map;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.tika.config.TikaConfig;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/** Unit tests for LocalTikaExtractionBackend independent of the HTTP handler. */
+public class LocalTikaExtractionBackendTest extends SolrTestCaseJ4 {
+
+  private static TikaConfig tikaConfig;
+  private static ParseContextConfig parseContextConfig;
+
+  @BeforeClass
+  public static void setupClass() throws Exception {
+    try (InputStream is =
+        LocalTikaExtractionBackendTest.class
+            .getClassLoader()
+            .getResourceAsStream("solr-default-tika-config.xml")) {
+      assertNotNull("solr-default-tika-config.xml not on classpath", is);
+      tikaConfig = new TikaConfig(is);
+    }
+    parseContextConfig = new ParseContextConfig();
+  }
+
+  private LocalTikaExtractionBackend newBackend() {
+    return new LocalTikaExtractionBackend(tikaConfig, parseContextConfig);
+  }
+
+  private ExtractionRequest newRequest(
+      String resourceName,
+      String streamType,
+      String contentType,
+      String charset,
+      String streamName,
+      String streamSourceInfo,
+      Long streamSize,
+      String resourcePassword,
+      String returnType,
+      boolean recursive,
+      Map<String, String> tikaRequestHeaders) {
+    return new ExtractionRequest(
+        streamType,
+        resourceName,
+        contentType,
+        charset,
+        streamName,
+        streamSourceInfo,
+        streamSize,
+        resourcePassword,
+        null,
+        returnType,
+        recursive,
+        tikaRequestHeaders);
+  }
+
+  @Test
+  public void testWrongStreamTypeThrows() throws Exception {
+    LocalTikaExtractionBackend backend = newBackend();
+    try (InputStream in = Files.newInputStream(getFile("extraction/version_control.txt"))) {
+      // Non-existing type -> no parser available
+      ExtractionRequest req =
+          newRequest(
+              "version_control.txt",
+              "foo/bar",
+              null,
+              null,
+              "version_control.txt",
+              null,
+              null,
+              null,
+              "text",
+              false,
+              Collections.emptyMap());
+      expectThrows(IllegalArgumentException.class, () -> backend.extract(in, req));
+    }
+
+    try (InputStream in = Files.newInputStream(getFile("extraction/version_control.txt"))) {
+      // Wrong but existing type -> likely to fail when parsing
+      ExtractionRequest req =
+          newRequest(
+              "version_control.txt",
+              "application/pdf",
+              null,
+              null,
+              "version_control.txt",
+              null,
+              null,
+              null,
+              "text",
+              false,
+              Collections.emptyMap());
+      expectThrows(Exception.class, () -> backend.extract(in, req));
+    }
+  }
+
+  @Test
+  public void testPasswordProtectedDocxWithoutPasswordThrows() throws Exception {
+    LocalTikaExtractionBackend backend = newBackend();
+    try (InputStream in = Files.newInputStream(getFile("extraction/password-is-Word2010.docx"))) {
+      ExtractionRequest req =
+          newRequest(
+              "password-is-Word2010.docx",
+              null,
+              null,
+              null,
+              "password-is-Word2010.docx",
+              null,
+              null,
+              null,
+              "text",
+              false,
+              Collections.emptyMap());
+      expectThrows(Exception.class, () -> backend.extract(in, req));
+    }
+  }
+
+  @Test
+  public void testPasswordProtectedDocxWithPasswordSucceeds() throws Exception {
+    LocalTikaExtractionBackend backend = newBackend();
+    try (InputStream in = Files.newInputStream(getFile("extraction/password-is-Word2010.docx"))) {
+      ExtractionRequest req =
+          newRequest(
+              "password-is-Word2010.docx",
+              null,
+              null,
+              null,
+              "password-is-Word2010.docx",
+              null,
+              null,
+              "Word2010",
+              "text",
+              false,
+              Collections.emptyMap());
+      ExtractionResult res = backend.extract(in, req);
+      assertNotNull(res);
+      assertNotNull(res.getMetadata());
+      String content = res.getContent();
+      assertNotNull(content);
+      assertTrue(
+          "Content should mention password-protected doc text",
+          content.contains("Test password protected word doc"));
+    }
+  }
+}
diff --git a/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/TikaServerExtractionBackendTest.java b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/TikaServerExtractionBackendTest.java
new file mode 100644
index 00000000000..e855f35bc60
--- /dev/null
+++ b/solr/modules/extraction/src/test/org/apache/solr/handler/extraction/TikaServerExtractionBackendTest.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import com.carrotsearch.randomizedtesting.ThreadFilter;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+import java.io.ByteArrayInputStream;
+import java.net.http.HttpClient;
+import java.nio.file.Files;
+import java.util.Collections;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import org.apache.lucene.tests.util.QuickPatchThreadsFilter;
+import org.apache.solr.SolrIgnoredThreadsFilter;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.tika.sax.ToXMLContentHandler;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.testcontainers.containers.GenericContainer;
+
+/**
+ * Integration tests for TikaServerExtractionBackend using a real Tika Server via Testcontainers.
+ */
+@ThreadLeakFilters(
+    defaultFilters = true,
+    filters = {
+      SolrIgnoredThreadsFilter.class,
+      QuickPatchThreadsFilter.class,
+      TikaServerExtractionBackendTest.TestcontainersThreadsFilter.class
+    })
+public class TikaServerExtractionBackendTest extends SolrTestCaseJ4 {
+
+  // Ignore known non-daemon threads spawned by Testcontainers and Java HttpClient in this test
+  public static class TestcontainersThreadsFilter implements ThreadFilter {
+    @Override
+    public boolean reject(Thread t) {
+      if (t == null || t.getName() == null) return false;
+      String n = t.getName();
+      return n.startsWith("testcontainers-ryuk")
+          || n.startsWith("testcontainers-wait-")
+          || n.startsWith("HttpClient-")
+          || n.startsWith("HttpClient-TestContainers");
+    }
+  }
+
+  private static GenericContainer<?> tika;
+  private static String baseUrl;
+  private static ExecutorService httpExec;
+  private static HttpClient client;
+
+  @BeforeClass
+  public static void startTikaServer() {
+    try {
+      httpExec =
+          ExecutorUtil.newMDCAwareFixedThreadPool(
+              2,
+              r -> {
+                Thread t = new Thread(r, "HttpClient-TestContainers");
+                t.setDaemon(true);
+                return t;
+              });
+      client = HttpClient.newBuilder().executor(httpExec).build();
+      tika = new GenericContainer<>("apache/tika:3.2.3.0-full").withExposedPorts(9998);
+      tika.start();
+      baseUrl = "http://" + tika.getHost() + ":" + tika.getMappedPort(9998);
+    } catch (Throwable t) {
+      // Skip tests if Docker/Testcontainers are not available in the environment
+      Assume.assumeNoException("Docker/Testcontainers not available; skipping TikaServer tests", t);
+    }
+  }
+
+  @AfterClass
+  public static void stopTikaServer() {
+    if (tika != null) {
+      try {
+        tika.stop();
+      } catch (Throwable ignore) {
+      }
+      tika = null;
+    }
+    if (httpExec != null) {
+      try {
+        httpExec.shutdownNow();
+      } catch (Throwable ignore) {
+      }
+      httpExec = null;
+    }
+    client = null;
+  }
+
+  private static ExtractionRequest newRequest(
+      String resourceName,
+      String contentType,
+      String extractFormat,
+      boolean recursive,
+      Map<String, String> tikaRequestHeaders) {
+    return new ExtractionRequest(
+        contentType, // streamType
+        resourceName, // resourceName
+        contentType, // contentType
+        null, // charset
+        resourceName, // streamName
+        null, // sourceInfo
+        null, // size
+        null, // resourcePassword
+        null, // passwordsMap
+        extractFormat, // extraction format xml or text
+        recursive,
+        tikaRequestHeaders);
+  }
+
+  @Test
+  public void testExtractTextAndMetadata() throws Exception {
+    Assume.assumeTrue("Tika server container not started", tika != null);
+    TikaServerExtractionBackend backend = new TikaServerExtractionBackend(client, baseUrl);
+    byte[] data = "Hello TestContainers".getBytes(java.nio.charset.StandardCharsets.UTF_8);
+    try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
+      ExtractionResult res = backend.extract(in, newRequest("test.txt", "text/plain", "text"));
+      assertNotNull(res);
+      assertNotNull(res.getContent());
+      assertTrue(res.getContent().contains("Hello TestContainers"));
+      assertNotNull(res.getMetadata());
+      String[] cts = res.getMetadata().getValues("Content-Type");
+      assertNotNull(cts);
+      assertTrue(cts.length >= 1);
+      // Tika may append charset; be flexible
+      assertTrue(cts[0].startsWith("text/plain"));
+    }
+  }
+
+  @Test
+  public void testExtractWithSaxHandlerXml() throws Exception {
+    Assume.assumeTrue("Tika server container not started", tika != null);
+    TikaServerExtractionBackend backend = new TikaServerExtractionBackend(client, baseUrl);
+    byte[] data = "Hello XML".getBytes(java.nio.charset.StandardCharsets.UTF_8);
+    ExtractionRequest request = newRequest("test.txt", "text/plain", "xml");
+    try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
+      ToXMLContentHandler xmlHandler = new ToXMLContentHandler();
+      ExtractionMetadata md = backend.buildMetadataFromRequest(request);
+      backend.extractWithSaxHandler(in, request, md, xmlHandler);
+      String c = xmlHandler.toString();
+      assertNotNull(c);
+      // Tika Server may return XHTML without XML declaration; be flexible
+      assertTrue(
+          c.contains("<?xml")
+              || c.toLowerCase(java.util.Locale.ROOT).contains("<html")
+              || c.toLowerCase(java.util.Locale.ROOT).contains("<xhtml"));
+      assertTrue(c.contains("Hello XML"));
+    }
+  }
+
+  @Test
+  public void testPdfWithImageRecursive() throws Exception {
+    Assume.assumeTrue("Tika server container not started", tika != null);
+    TikaServerExtractionBackend backend = new TikaServerExtractionBackend(client, baseUrl);
+    byte[] data = Files.readAllBytes(getFile("extraction/pdf-with-image.pdf"));
+    // Enable recursive extraction and set header to extract images from PDF
+    ExtractionRequest request =
+        newRequest(
+            "pdf-with-image.pdf",
+            "application/pdf",
+            "xml",
+            true,
+            Map.of("X-Tika-PDFextractInlineImages", "true"));
+    try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
+      ToXMLContentHandler xmlHandler = new ToXMLContentHandler();
+      ExtractionMetadata md = backend.buildMetadataFromRequest(request);
+      backend.extractWithSaxHandler(in, request, md, xmlHandler);
+      String c = xmlHandler.toString();
+      assertNotNull(c);
+      assertTrue(c.contains("Puppet Apply"));
+      assertTrue(c.contains("embedded:image0.jpg"));
+      assertEquals("org.apache.tika.parser.DefaultParser", md.get("X-TIKA:Parsed-By-Full-Set"));
+    }
+  }
+
+  private ExtractionRequest newRequest(String file, String contentType, String xml) {
+    return newRequest(file, contentType, xml, false, Collections.emptyMap());
+  }
+}
diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/indexing-with-tika.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/indexing-with-tika.adoc
index b0cdb7eba30..c7f67b9968b 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/indexing-with-tika.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/indexing-with-tika.adoc
@@ -18,9 +18,9 @@
 
 If the documents you need to index are in a binary format, such as Word, Excel, PDFs, etc., Solr includes a request handler which uses http://tika.apache.org/[Apache Tika] to extract text for indexing to Solr.
 
-Solr uses code from the Tika project to provide a framework for incorporating many different file-format parsers such as http://pdfbox.apache.org/[Apache PDFBox] and http://poi.apache.org/index.html[Apache POI] into Solr itself.
+There are two backends for this module. The `local` backend uses code from the Tika project to provide a framework for incorporating many different file-format parsers such as http://pdfbox.apache.org/[Apache PDFBox] and http://poi.apache.org/index.html[Apache POI] into Solr itself. The `tikaserver` backend uses an external Tika server process to do the extraction.
 
-Working with this framework, Solr's `ExtractingRequestHandler` uses Tika internally to support uploading binary files
+Working with this framework, Solr's `ExtractingRequestHandler` uses Tika to support uploading binary files
 for data extraction and indexing.
 Downloading Tika is not required to use Solr Cell.
 
@@ -49,6 +49,43 @@ By default it maps to the same name but several parameters control how this is d
 * When Solr Cell finishes creating the internal `SolrInputDocument`, the rest of the indexing stack takes over.
 The next step after any update handler is the xref:configuration-guide:update-request-processors.adoc[Update Request Processor] chain.
 
+== Tika Server
+
+The `tikaserver` backend lets Solr delegate content extraction to an external Apache Tika Server process instead of running Tika parsers inside the Solr JVM. This can improve operational isolation (crashes or heavy parsing won’t impact Solr), simplify dependency management, and allow you to scale Tika independently of Solr.
+
+Example handler configuration:
+
+[source,xml]
+----
+<requestHandler name="/update/extract" class="solr.extraction.ExtractingRequestHandler">
+  <lst name="defaults">
+    <!-- Select the tikaserver backend by default for this handler -->
+    <str name="extraction.backend">tikaserver</str>
+  </lst>
+  <!-- Point Solr to your Tika Server -->
+  <str name="tikaserver.url">http://localhost:9998</str>
+</requestHandler>
+----
+
+=== Starting Tika Server with Docker
+
+The quickest way to run Tika Server for development is using Docker. The examples below expose Tika on port 9998 on localhost, matching the default value when `tikaserver.url` is not explicitly set.
+
+[,bash]
+----
+docker run --rm -p 9998:9998 apache/tika:3.2.3.0-full
+----
+
+NOTE: If Solr runs in Docker too, ensure both containers share a network and use the Tika container name as the host in `tikaserver.url`.
+
+=== Limitations
+Currently, the `tikaserver` option lacks some features and will return HTTP 400 in these cases:
+
+- `capture` and `captureAttr`: Selecting specific XHTML elements/attributes during indexing requires Solr’s SAX ContentHandler and is not supported by the `tikaserver` backend.
+- `xpath`: Server-side XPath filtering of the XHTML is not supported.
+- `passwordsFile` and `resource.password` for the indexing path: these options trigger the legacy SAX path in Solr and are not currently supported.
+
+Metadata produced by Tika Server can differ slightly from local Tika, particularly in key names and the presence/absence of certain fields. Adjust your `fmap.*` mappings accordingly.
 
 == Module
 
@@ -58,7 +95,7 @@ The "techproducts" example included with Solr is pre-configured to have Solr Cel
 If you are not using the example, you will want to pay attention to the section <<solrconfig.xml Configuration>> below.
 
 
-=== Solr Cell Performance Implications
+=== Solr Cell Performance Implications (local mode)
 
 Rich document formats are frequently not well documented, and even in cases where there is documentation for the format, not everyone who creates documents will follow the specifications faithfully.
 
@@ -73,7 +110,8 @@ the request handler is running in the same JVM that Solr uses for other operatio
 Indexing can also consume all available Solr resources, particularly with large PDFs, presentations, or other files
 that have a lot of rich media embedded in them.
 
-For these reasons, Solr Cell is not recommended for use in a production system.
+For these reasons, Solr Cell with `local` backend is not recommended for use in a production system. Prefer the
+`tikaserver` backend, which is more robust and isolates failures from Solr itself.
 
 It is a best practice to use Solr Cell as a proof-of-concept tool during development and then run Tika as an external
 process that sends the extracted documents to Solr (via xref:deployment-guide:solrj.adoc[]) for indexing.
@@ -170,6 +208,32 @@ The following parameters are accepted by the `ExtractingRequestHandler`.
 
 These parameters can be set for each indexing request (as request parameters), or they can be set for all requests to the request handler by defining them in <<solrconfig.xml Configuration,`solrconfig.xml`>>.
 
+`extraction.backend`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: local
+|===
++
+Choose the backend to use for extraction. The options are `local` or `tikaserver`.
+The `local` backend uses Tika libraries included with Solr to do the extraction, and is the default in Solr 9.x.
+The `tikaserver` backend uses an external Tika server process to do the extraction.
+**The `local` backend is deprecated and will be removed in a future release.**
++
+Example: In `solrconfig.xml`: `<str name="extraction.backend">tikaserver</str>`.
+
+`tikaserver.url`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: none
+|===
++
+Specifies the URL of the Tika server to use when the `extraction.backend` parameter is set to `tikaserver`.
+This parameter is required when using the `tikaserver` backend. Defaults to `http://localhost:9998` if not specified.
++
+Example: In `solrconfig.xml`: `<str name="tikaserver.url">http://localhost:9998</str>`.
+
 `capture`::
 +
 [%autowidth,frame=none]
@@ -471,6 +535,8 @@ So you can use the other URPs without worrying about unexpected field additions.
 
 === Parser-Specific Properties
 
+NOTE: This setting applies to `local` backend only.
+
 Parsers used by Tika may have specific properties to govern how data is extracted.
 These can be passed through Solr for special parsing situations.
 
@@ -492,6 +558,8 @@ Consult the Tika Java API documentation for configuration parameters that can be
 
 === Indexing Encrypted Documents
 
+NOTE: The `tikaserver` backend does not currently support indexing encrypted documents.
+
 The ExtractingRequestHandler will decrypt encrypted files and index their content if you supply a password in either `resource.password` in the request, or in a `passwordsFile` file.
 
 In the case of `passwordsFile`, the file supplied must be formatted so there is one line per rule.
@@ -629,6 +697,7 @@ public class SolrCellRequestDemo {
     req.setParam(ExtractingParams.EXTRACT_ONLY, "true");
     NamedList<Object> result = client.request(req);
     System.out.println("Result: " + result);
+  }
 }
 ----