diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 9cc317d..32e0334 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -14,7 +14,7 @@ concurrency:
 # needed to allow julia-actions/cache to delete old caches that it has created
 permissions:
   actions: write
-  contents: read
+  contents: write
 jobs:
   test:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
@@ -44,6 +44,16 @@ jobs:
           Pkg.Registry.add(
             RegistrySpec(url = "https://github.com/tensor4all/T4ARegistry.git")
           )'
+      - name: Develop extracted dependencies (temporary)
+        run: |
+          julia --project=@. -e '
+          using Pkg
+          Pkg.Registry.add()
+          Pkg.add([
+            PackageSpec(url = "https://github.com/tensor4all/T4AMatrixCI.jl.git", rev = "main"),
+            PackageSpec(url = "https://github.com/tensor4all/T4ATensorTrain.jl.git", rev = "fix/t4aitensorcompat-uuid"),
+          ])
+          Pkg.instantiate()'
       - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
@@ -65,7 +75,29 @@ jobs:
           Pkg.Registry.add(
             RegistrySpec(url = "https://github.com/tensor4all/T4ARegistry.git")
           )'
+      - name: Develop extracted dependencies (temporary)
+        run: |
+          julia --project=@. -e '
+          using Pkg
+          Pkg.Registry.add()
+          Pkg.add([
+            PackageSpec(url = "https://github.com/tensor4all/T4AMatrixCI.jl.git", rev = "main"),
+            PackageSpec(url = "https://github.com/tensor4all/T4ATensorTrain.jl.git", rev = "fix/t4aitensorcompat-uuid"),
+          ])
+          Pkg.instantiate()'
       - uses: julia-actions/julia-buildpkg@v1
+      - name: Instantiate docs environment (pin extracted dependencies)
+        run: |
+          julia --project=docs/ -e '
+          using Pkg
+          # Ensure default registries are available
+          Pkg.Registry.add()
+          # Pin extracted packages by URL to avoid relying on registry metadata during docs builds
+          Pkg.add([
+            PackageSpec(url = "https://github.com/tensor4all/T4AMatrixCI.jl.git", rev = "main"),
+            PackageSpec(url = "https://github.com/tensor4all/T4ATensorTrain.jl.git", rev = "fix/t4aitensorcompat-uuid"),
+          ])
+          Pkg.instantiate()'
       - uses: julia-actions/julia-docdeploy@v1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 20fe29d..b8dd623 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,9 @@
 *.jl.mem
 /Manifest.toml
 /docs/build/
+
+# Local logs and build artifacts (do not commit)
+*.log
+/docs/Manifest.toml
+/docs/*.log
+/docs_build.log
diff --git a/Project.toml b/Project.toml
index 5af9239..44e6f88 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,6 +9,8 @@ EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+T4AMatrixCI = "c72b89c1-c391-41a7-b711-dfcca653bb47"
+T4ATensorTrain = "ec6ec972-96ae-4a21-b859-3911b77e305f"
 
 [weakdeps]
 ITensorMPS = "0d1a4710-d33b-49a5-8f18-73bdf49b47e2"
@@ -26,6 +28,8 @@ Optim = "1"
 QuadGK = "2.9"
 Random = "1.9.0"
 T4AITensorCompat = "0.8"
+T4AMatrixCI = "0.1"
+T4ATensorTrain = "0.1"
 julia = "1.10"
 
 [extras]
diff --git a/docs/Project.toml b/docs/Project.toml
index 47a6d63..d7e0c51 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -5,4 +5,6 @@ ITensors = "9136182c-28ba-11e9-034c-db9fb085ebd5"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 T4AITensorCompat = "864b39ca-388c-4f43-a593-a1076cf4b253"
+T4AMatrixCI = "c72b89c1-c391-41a7-b711-dfcca653bb47"
 T4ATensorCI = "14428447-6903-48c7-83df-f2cb08af9918"
+T4ATensorTrain = "ec6ec972-96ae-4a21-b859-3911b77e305f"
diff --git a/docs/make.jl b/docs/make.jl
index 179e9be..b93c27a 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,3 +1,16 @@
+using Pkg
+
+Pkg.activate(@__DIR__)
+Pkg.develop(PackageSpec(path=joinpath(@__DIR__, "..")))
+for pkgdir in ("T4AMatrixCI.jl", "T4ATensorTrain.jl")
+    local_path = joinpath(@__DIR__, "..", "..", pkgdir)
+    if isdir(local_path)
+        Pkg.develop(PackageSpec(path=local_path))
+    end
+end
+Pkg.resolve()
+Pkg.instantiate()
+
 using T4ATensorCI
 using ITensors
 using ITensorMPS
diff --git a/docs/src/implementation.md b/docs/src/implementation.md
index 336fc07..4e40dc3 100644
--- a/docs/src/implementation.md
+++ b/docs/src/implementation.md
@@ -50,7 +50,7 @@ This function adds one pivot at bond $\ell$ (in the code, we use `p` instead of
 
 To add a pivot, we have to add a row and a column to $T_\ell, P_\ell$ and $T_{\ell + 1}$. Afterwards, we update neighbouring $\Pi$ tensors $\Pi_{\ell-1}$ and $\Pi_{\ell+1}$ for efficiency.
 
-- Construct an $MCI$ ([`MatrixCI`](@ref)) object with row indices $I$, column indices $J$, columns $C$ and rows $R$, where:
+- Construct an $MCI$ (`T4AMatrixCI.MatrixCI`) object with row indices $I$, column indices $J$, columns $C$ and rows $R$, where:
     - Row indices $MCI.I \leftarrow \Pi I_\ell [I_{\ell+1}]$
     - Column indices $MCI.J \leftarrow \Pi J_{\ell+1} [J_\ell]$
     - Column vectors $MCI.C \leftarrow \text{reshape}(T_\ell; D_{\ell-1}\times d_\ell, D_{\ell})$
diff --git a/docs/src/index.md b/docs/src/index.md
index 9463b23..b74462a 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -41,7 +41,7 @@ println("Sum of original function: $sumorig")
 sumtt = sum(tci)
 println("Sum of tensor train: $sumtt")
 ```
-For further information, see [`sum`](@ref).
+For further information, see `sum`.
 
 This factorized sum can be used for efficient evaluation of high-dimensional integrals. This is implemented with Gauss-Kronrod quadrature rules in [`integrate`](@ref). For example, the integral
 ```math
diff --git a/ext/TCIITensorConversion/TCIITensorConversion.jl b/ext/TCIITensorConversion/TCIITensorConversion.jl
index be868ba..3844db6 100644
--- a/ext/TCIITensorConversion/TCIITensorConversion.jl
+++ b/ext/TCIITensorConversion/TCIITensorConversion.jl
@@ -2,6 +2,7 @@ module TCIITensorConversion
 
 import T4ATensorCI as TCI
 import T4ATensorCI: evaluate
+import T4ATensorTrain
 
 using ITensors
 import ITensorMPS
@@ -10,7 +11,10 @@ import ITensorMPS: MPS, MPO
 export MPS, MPO
 export evaluate
 
-include("ttmpsconversion.jl")
+# Conversions between `T4ATensorTrain.TensorTrain` (re-exported by T4ATensorCI)
+# and ITensorMPS types are provided by `T4ATensorTrain`'s extension
+# `TTITensorConversion`. Do not re-define them here to avoid method overwriting
+# during precompilation.
 include("mpsutil.jl")
 
 end
diff --git a/ext/TCIITensorConversion/mpsutil.jl b/ext/TCIITensorConversion/mpsutil.jl
index 542cf69..855312e 100644
--- a/ext/TCIITensorConversion/mpsutil.jl
+++ b/ext/TCIITensorConversion/mpsutil.jl
@@ -8,7 +8,7 @@ Evaluates an MPS or MPO for a given set of index values.
 
 - `indexspec` is a list of tuples, where each tuple contains an `ITensorMPS.Index` object specifying an index, and an `Int` corresponding to the value of the specified index.
 
-If many evaluations are necessary, it may be advantageous to convert your MPS to a [`T4ATensorCI.TTCache`](@ref) object first.
+If many evaluations are necessary, it may be advantageous to convert your MPS to a `T4ATensorCI.TTCache` object first.
 """
 function evaluate(
     mps::Union{ITensorMPS.MPS,ITensorMPS.MPO},
@@ -39,7 +39,7 @@ Evaluates an MPS or MPO for a given set of index values.
 - `indices` is a list of `ITensors.Index` objects that specifies the order in which indices are given.
 - `indexvalues` is a list of integer values in the same order as `indices`.
 
-If many evaluations are necessary, it may be advantageous to convert your MPS to a [`T4ATensorCI.TTCache`](@ref) object first.
+If many evaluations are necessary, it may be advantageous to convert your MPS to a `T4ATensorCI.TTCache` object first.
 """
 function evaluate(
     mps::Union{ITensorMPS.MPS,ITensorMPS.MPO},
diff --git a/ext/TCIITensorConversion/ttmpsconversion.jl b/ext/TCIITensorConversion/ttmpsconversion.jl
deleted file mode 100644
index 6a9e5ef..0000000
--- a/ext/TCIITensorConversion/ttmpsconversion.jl
+++ /dev/null
@@ -1,138 +0,0 @@
-function ITensorMPS.MPS(tt::TCI.TensorTrain{T}; sites=nothing)::MPS where {T}
-    N = length(tt)
-    localdims = [size(t, 2) for t in tt]
-
-    if sites === nothing
-        sites = [Index(localdims[n], "n=$n") for n in 1:N]
-    else
-        all(localdims .== dim.(sites)) ||
-            error("ranks are not consistent with dimension of sites")
-    end
-
-    linkdims = [1, TCI.linkdims(tt)..., 1]
-    links = [Index(linkdims[l + 1], "link,l=$l") for l in 0:N]
-
-    tensors_ = [ITensor(deepcopy(tt[n]), links[n], sites[n], links[n + 1]) for n in 1:N]
-    tensors_[1] *= onehot(links[1] => 1)
-    tensors_[end] *= onehot(links[end] => 1)
-
-    return MPS(tensors_)
-end
-
-"""
-    MPS(tt::TCI.AbstractTensorTrain{T}; sites=nothing)
-
-Convert a tensor train to an `ITensorMPS.MPS`.
-Arguments:
-- `tt`: The tensor train to be converted.
-- `siteindices`: An array of ITensor Index objects, where `sites[n]` corresponds to the index for the nth site.
-
-If `siteindices` is left empty, a default set of indices will be generated.
-"""
-function ITensorMPS.MPS(tt::TCI.AbstractTensorTrain{T}; sites=nothing)::MPS where {T}
-    return MPS(TCI.tensortrain(tt), sites=sites)
-end
-
-function ITensorMPS.MPO(tt::TCI.TensorTrain{T}; sites=nothing)::ITensorMPS.MPO where {T}
-    N = length(tt)
-    localdims = TCI.sitedims(tt)
-
-    if sites === nothing
-        sites = [
-            [Index(legdim, "ell=$ell, n=$n") for (n, legdim) in enumerate(ld)]
-            for (ell, ld) in enumerate(localdims)
-        ]
-    elseif !all(all.(
-            localdimell .== dim.(siteell)
-            for (localdimell, siteell) in zip(localdims, sites)
-        ))
-        error("ranks are not consistent with dimension of sites")
-    end
-
-    linkdims = [1, TCI.linkdims(tt)..., 1]
-    links = [Index(linkdims[l + 1], "link,l=$l") for l in 0:N]
-
-    tensors_ = [ITensor(deepcopy(tt[n]), links[n], sites[n]..., links[n + 1]) for n in 1:N]
-    tensors_[1] *= onehot(links[1] => 1)
-    tensors_[end] *= onehot(links[end] => 1)
-
-    return ITensorMPS.MPO(tensors_)
-end
-
-"""
-    MPO(tt::TCI.AbstractTensorTrain{T}; sites=nothing)
-
-Convert a tensor train to an `ITensorMPS.MPO`.
-Arguments:
-- `tt`: The tensor train to be converted.
-- `sites`: An array of arrays of ITensor Index objects, where `sites[n][m]` corresponds to the mth index attached to the nth site.
-
-If `siteindices` is left empty, a default set of indices will be generated.
-"""
-function ITensorMPS.MPO(tci::TCI.AbstractTensorTrain{T}; sites=nothing)::ITensorMPS.MPO where {T}
-    return ITensorMPS.MPO(TCI.tensortrain(tci), sites=sites)
-end
-
-
-"""
-    function TensorTrain(mps::ITensorMPS.MPS)
-
-Converts an `ITensorMPS.MPS` object into a TensorTrain. Note that this only works if the MPS has a single leg per site. Otherwise, use [`T4ATensorCI.TensorTrain(mps::ITensorMPS.MPO)`](@ref).
-"""
-function TCI.TensorTrain(mps::ITensorMPS.MPS)
-    links = ITensorMPS.linkinds(mps)
-    sites = ITensors.SiteTypes.siteinds(mps)
-    
-    # Detect scalar type from input tensors by promoting all element types
-    T = eltype(Array(mps[1], sites[1], links[1]))
-    for i in 2:length(mps)-1
-        Ti = eltype(Array(mps[i], links[i-1], sites[i], links[i]))
-        T = promote_type(T, Ti)
-    end
-    if length(mps) > 1
-        Ti = eltype(Array(mps[end], links[end], sites[end]))
-        T = promote_type(T, Ti)
-    end
-    
-    Tfirst = zeros(T, 1, dim(sites[1]), dim(links[1]))
-    Tfirst[1, :, :] = Array(mps[1], sites[1], links[1])
-    Tlast = zeros(T, dim(links[end]), dim(sites[end]), 1)
-    Tlast[:, :, 1] = Array(mps[end], links[end], sites[end])
-    return TCI.TensorTrain{T,3}(
-        vcat(
-            [Tfirst],
-            [Array(mps[i], links[i-1], sites[i], links[i]) for i in 2:length(mps)-1],
-            [Tlast]
-        )
-    )
-end
-
-"""
-    function TensorTrain(mps::ITensorMPS.MPO)
-
-Converts an `ITensorMPS.MPO` object into a [`T4ATensorCI.TensorTrain`](@ref).
-"""
-function TCI.TensorTrain{V, N}(mpo::ITensorMPS.MPO; sites=nothing) where {N, V}
-    links = ITensorMPS.linkinds(mpo)
-    if sites === nothing
-        sites = ITensors.SiteTypes.siteinds(mpo)
-    elseif !all(issetequal.(ITensors.SiteTypes.siteinds(mpo), sites))
-        error("Site indices do not correspond to the site indices of the MPO.")
-    end
-
-    # Use the specified type parameter V
-    # Conversion errors will be raised when Array() is called if types are incompatible
-    Tfirst = zeros(V, 1, dim.(sites[1])..., dim(links[1]))
-    Tfirst[1, fill(Colon(), length(sites[1]) + 1)...] = Array(mpo[1], sites[1]..., links[1])
-
-    Tlast = zeros(V, dim(links[end]), dim.(sites[end])..., 1)
-    Tlast[fill(Colon(), length(sites[end]) + 1)..., 1] = Array(mpo[end], links[end], sites[end]...)
-
-    return TCI.TensorTrain{V, N}(
-        vcat(
-            Array{V, N}[Tfirst],
-            Array{V, N}[Array(mpo[i], links[i-1], sites[i]..., links[i]) for i in 2:length(mpo)-1],
-            Array{V, N}[Tlast]
-        )
-    )
-end
diff --git a/ext/TCIT4AITensorCompatConversion/TCIT4AITensorCompatConversion.jl b/ext/TCIT4AITensorCompatConversion/TCIT4AITensorCompatConversion.jl
index 0283a24..cb6f316 100644
--- a/ext/TCIT4AITensorCompatConversion/TCIT4AITensorCompatConversion.jl
+++ b/ext/TCIT4AITensorCompatConversion/TCIT4AITensorCompatConversion.jl
@@ -1,11 +1,14 @@
 module TCIT4AITensorCompatConversion
 
 import T4ATensorCI as TCI
+import T4ATensorTrain
 
 using ITensors
 using T4AITensorCompat
 
-include("t4aitensorcompat_conversion.jl")
+# Conversions between `T4ATensorTrain.TensorTrain` (re-exported by T4ATensorCI)
+# and `T4AITensorCompat.TensorTrain` are provided by `T4ATensorTrain`'s extension
+# `TTT4AITensorCompatConversion`. Do not re-define them here.
 
 end
 
diff --git a/ext/TCIT4AITensorCompatConversion/t4aitensorcompat_conversion.jl b/ext/TCIT4AITensorCompatConversion/t4aitensorcompat_conversion.jl
deleted file mode 100644
index d243e78..0000000
--- a/ext/TCIT4AITensorCompatConversion/t4aitensorcompat_conversion.jl
+++ /dev/null
@@ -1,182 +0,0 @@
-"""
-    T4AITensorCompat.TensorTrain(tt::TCI.TensorTrain{ValueType,N}; sites=nothing)
-
-Convert a `T4ATensorCI.TensorTrain` to a `T4AITensorCompat.TensorTrain`.
-
-# Arguments
-- `tt::TCI.TensorTrain{ValueType,N}`: The tensor train to convert
-- `sites`: Optional site indices. If `nothing`, default indices will be generated.
-
-# Returns
-- `T4AITensorCompat.TensorTrain`: The converted tensor train
-"""
-function T4AITensorCompat.TensorTrain(tt::TCI.TensorTrain{ValueType,N}; sites=nothing) where {ValueType,N}
-    N_sites = length(tt)
-    localdims = TCI.sitedims(tt)
-    
-    # Generate site indices if not provided
-    if sites === nothing
-        if N == 3  # MPS case: one physical index per site
-            sites = [Index(localdims[n][1], "n=$n") for n in 1:N_sites]
-        else  # MPO case: multiple physical indices per site
-            sites = [
-                [Index(localdims[n][ell], "ell=$ell, n=$n") for ell in 1:length(localdims[n])]
-                for n in 1:N_sites
-            ]
-        end
-    end
-    
-    # Generate link indices
-    linkdims = [1, TCI.linkdims(tt)..., 1]
-    links = [Index(linkdims[l + 1], "link,l=$l") for l in 0:N_sites]
-    
-    # Convert each tensor to ITensor
-    tensors = Vector{ITensor}(undef, N_sites)
-    for n in 1:N_sites
-        core = tt[n]
-        if N == 3  # MPS case
-            tensors[n] = ITensor(core, links[n], sites[n], links[n + 1])
-        else  # MPO case
-            tensors[n] = ITensor(core, links[n], sites[n]..., links[n + 1])
-        end
-    end
-    
-    # Set boundary conditions
-    tensors[1] *= onehot(links[1] => 1)
-    tensors[end] *= onehot(links[end] => 1)
-    
-    return T4AITensorCompat.TensorTrain(tensors)
-end
-
-"""
-    T4AITensorCompat.TensorTrain(tt::TCI.AbstractTensorTrain{ValueType}; sites=nothing)
-
-Convert an `AbstractTensorTrain` to a `T4AITensorCompat.TensorTrain` by first converting to `TensorTrain`.
-
-# Arguments
-- `tt::TCI.AbstractTensorTrain{ValueType}`: The tensor train to convert
-- `sites`: Optional site indices. If `nothing`, default indices will be generated.
-
-# Returns
-- `T4AITensorCompat.TensorTrain`: The converted tensor train
-"""
-function T4AITensorCompat.TensorTrain(tt::TCI.AbstractTensorTrain{ValueType}; sites=nothing) where {ValueType}
-    return T4AITensorCompat.TensorTrain(TCI.tensortrain(tt); sites=sites)
-end
-
-"""
-    TCI.TensorTrain(tt::T4AITensorCompat.TensorTrain; sites=nothing)
-    TCI.TensorTrain{V, N}(tt::T4AITensorCompat.TensorTrain; sites=nothing)
-
-Convert a `T4AITensorCompat.TensorTrain` to a `T4ATensorCI.TensorTrain`.
-
-# Arguments
-- `tt::T4AITensorCompat.TensorTrain`: The tensor train to convert
-- `sites`: Optional site indices. If `nothing`, site indices will be extracted from the tensor train.
-- `V`: Optional type parameter for the element type (e.g., Float64, ComplexF64)
-- `N`: Optional type parameter for the number of dimensions per tensor
-
-# Returns
-- `TCI.TensorTrain{ValueType,N}`: The converted tensor train
-"""
-function TCI.TensorTrain(tt::T4AITensorCompat.TensorTrain; sites=nothing)
-    N = length(tt)
-    N == 0 && return TCI.TensorTrain(Vector{Array{Float64,3}}())
-    
-    # Extract site indices if not provided
-    if sites === nothing
-        sites = T4AITensorCompat.siteinds(tt)
-    end
-    
-    # T4AITensorCompat.siteinds always returns Vector{Vector{Index}}
-    # Determine if this is MPS (one index per site) or MPO (multiple indices per site)
-    is_mps = all(length(s) == 1 for s in sites)
-    
-    if is_mps
-        # MPS case: convert to TensorTrain{ValueType,3}
-        # Extract link indices (these are internal links between tensors)
-        links = T4AITensorCompat.linkinds(tt)
-        
-        # Flatten sites to Vector{Index}
-        sites_flat = [s[1] for s in sites]
-        
-        # Detect scalar type from input tensors by promoting all element types
-        # Get element type from first tensor
-        T = eltype(Array(tt[1], sites_flat[1], links[1]))
-        # Promote with middle tensors
-        for i in 2:length(tt)-1
-            Ti = eltype(Array(tt[i], links[i-1], sites_flat[i], links[i]))
-            T = promote_type(T, Ti)
-        end
-        # Promote with last tensor
-        if length(tt) > 1
-            Ti = eltype(Array(tt[end], links[end], sites_flat[end]))
-            T = promote_type(T, Ti)
-        end
-        
-        # Convert each ITensor to Array, following the same logic as ttmpsconversion.jl
-        Tfirst = zeros(T, 1, dim(sites_flat[1]), dim(links[1]))
-        Tfirst[1, :, :] = Array(tt[1], sites_flat[1], links[1])
-        
-        Tlast = zeros(T, dim(links[end]), dim(sites_flat[end]), 1)
-        Tlast[:, :, 1] = Array(tt[end], links[end], sites_flat[end])
-        
-        return TCI.TensorTrain{T,3}(
-            vcat(
-                [Tfirst],
-                [Array(tt[i], links[i-1], sites_flat[i], links[i]) for i in 2:length(tt)-1],
-                [Tlast]
-            )
-        )
-    else
-        # MPO case: convert to TensorTrain{ValueType,N} where N > 3
-        # This is more complex as we need to determine N from the structure
-        error("MPO conversion from T4AITensorCompat.TensorTrain to T4ATensorCI.TensorTrain is not yet implemented. Please convert via ITensorMPS.MPO first.")
-    end
-end
-
-function TCI.TensorTrain{V, N}(tt::T4AITensorCompat.TensorTrain; sites=nothing) where {V, N}
-    N_tt = length(tt)
-    N_tt == 0 && return TCI.TensorTrain{V, N}(Vector{Array{V, N}}())
-    
-    # Extract site indices if not provided
-    if sites === nothing
-        sites = T4AITensorCompat.siteinds(tt)
-    end
-    
-    # T4AITensorCompat.siteinds always returns Vector{Vector{Index}}
-    # Determine if this is MPS (one index per site) or MPO (multiple indices per site)
-    is_mps = all(length(s) == 1 for s in sites)
-    
-    if is_mps && N == 3
-        # MPS case: convert to TensorTrain{V,3}
-        # Extract link indices (these are internal links between tensors)
-        links = T4AITensorCompat.linkinds(tt)
-        
-        # Flatten sites to Vector{Index}
-        sites_flat = [s[1] for s in sites]
-        
-        # Use the specified type parameter V
-        # Conversion errors will be raised when Array() is called if types are incompatible
-        Tfirst = zeros(V, 1, dim(sites_flat[1]), dim(links[1]))
-        Tfirst[1, :, :] = Array(tt[1], sites_flat[1], links[1])
-        
-        Tlast = zeros(V, dim(links[end]), dim(sites_flat[end]), 1)
-        Tlast[:, :, 1] = Array(tt[end], links[end], sites_flat[end])
-        
-        return TCI.TensorTrain{V, N}(
-            vcat(
-                [Tfirst],
-                [Array(tt[i], links[i-1], sites_flat[i], links[i]) for i in 2:length(tt)-1],
-                [Tlast]
-            )
-        )
-    elseif !is_mps && N > 3
-        # MPO case: convert to TensorTrain{V,N} where N > 3
-        # This is more complex as we need to determine N from the structure
-        error("MPO conversion from T4AITensorCompat.TensorTrain to T4ATensorCI.TensorTrain is not yet implemented. Please convert via ITensorMPS.MPO first.")
-    else
-        error("Type parameter N=$N does not match the tensor train structure. MPS requires N=3, MPO requires N>3.")
-    end
-end
-
diff --git a/src/T4ATensorCI.jl b/src/T4ATensorCI.jl
index e1294b6..1410f73 100644
--- a/src/T4ATensorCI.jl
+++ b/src/T4ATensorCI.jl
@@ -5,6 +5,10 @@ using EllipsisNotation
 using BitIntegers
 import QuadGK
 
+# Import from new packages
+import T4AMatrixCI
+import T4ATensorTrain
+
 # To add a method for rank(tci)
 import LinearAlgebra: rank, diag
 import LinearAlgebra as LA
@@ -15,24 +19,98 @@ import Base: isempty, iterate, getindex, lastindex, broadcastable
 import Base: length, size, sum
 import Random
 
+# Import from T4AMatrixCI - use import for types that may be extended
+import T4AMatrixCI
+import T4AMatrixCI: rrlu, rrlu!, arrlu, rrLU, MatrixCI, MatrixLUCI, MatrixACA
+import T4AMatrixCI: left, right, npivots, rowindices, colindices, pivoterrors, lastpivoterror
+# MatrixCI helpers used throughout TCI code (kept for backwards compatibility)
+import T4AMatrixCI: AtimesBinv, AinvtimesB
+import T4AMatrixCI: submatrix, localerror, findnewpivot
+import T4AMatrixCI: addpivotrow!, addpivotcol!, addpivot!
+import T4AMatrixCI: setcols!, setrows!
+import T4AMatrixCI: pivotmatrix, leftmatrix, rightmatrix
+import T4AMatrixCI: nrows, ncols, availablerows, availablecols
+import T4AMatrixCI: row, col
+import T4AMatrixCI: submatrixargmax
+import T4AMatrixCI: colmatrix, rowmatrix, colstimespivotinv, pivotinvtimesrows
+# Re-export for backwards compatibility
+export rrlu, rrlu!, arrlu, rrLU, MatrixCI, MatrixLUCI, MatrixACA
+export left, right, npivots, rowindices, colindices, pivoterrors, lastpivoterror
+export AtimesBinv, AinvtimesB
+export submatrix, localerror, findnewpivot
+export addpivotrow!, addpivotcol!, addpivot!
+export setcols!, setrows!
+export pivotmatrix, leftmatrix, rightmatrix
+export nrows, ncols, availablerows, availablecols
+export row, col
+export crossinterpolate
+export submatrixargmax
+export colmatrix, rowmatrix, colstimespivotinv, pivotinvtimesrows
+export submatrixargmax
+
+# Import from T4ATensorTrain - use import for functions that will be extended
+import T4ATensorTrain
+import T4ATensorTrain: AbstractTensorTrain, TensorTrain, TTCache, BatchEvaluator
+import T4ATensorTrain: TensorTrainFit
+import T4ATensorTrain: tensortrain, sitedims, sitedim, linkdims, linkdim
+import T4ATensorTrain: sitetensors, sitetensor, fulltensor, compress!
+import T4ATensorTrain: add, subtract, contract_naive, contract_zipup
+import T4ATensorTrain: Algorithm, @Algorithm_str
+import T4ATensorTrain: LocalIndex, MultiIndex, _contract, _contractsitetensors
+import T4ATensorTrain: isbatchevaluable
+import T4ATensorTrain: batchevaluate
+import T4ATensorTrain: flatten, to_tensors
+# Re-export for backwards compatibility
+export AbstractTensorTrain, TensorTrain, TTCache, BatchEvaluator
+export TensorTrainFit
+export tensortrain, evaluate, sitedims, sitedim, linkdims, linkdim, rank
+export sitetensors, sitetensor, fulltensor, compress!
+export add, subtract, contract, contract_naive, contract_zipup
+export Algorithm, @Algorithm_str
+export isbatchevaluable
+export batchevaluate
+export flatten, to_tensors
+
 export crossinterpolate1, crossinterpolate2, optfirstpivot
-export tensortrain, TensorTrain, sitedims, evaluate
 
-export contract
+# -----------------------------------------------------------------------------
+# Backwards-compatible `evaluate` entry point
+#
+# Historically, T4ATensorCI owned `evaluate` as a generic and had methods for TT,
+# matrix CI objects, and TCI objects. After extraction, we keep a local `evaluate`
+# and forward to the appropriate package to avoid type piracy.
+# -----------------------------------------------------------------------------
+
+evaluate(args...; kwargs...) = T4ATensorTrain.evaluate(args...; kwargs...)
+
+function evaluate(ci::T4AMatrixCI.MatrixCI, args...; kwargs...)
+    isempty(kwargs) || throw(ArgumentError("Keyword arguments are not supported for evaluate(::MatrixCI, ...)."))
+    return T4AMatrixCI.evaluate(ci, args...)
+end
+
+function evaluate(ci::T4AMatrixCI.MatrixACA, args...; kwargs...)
+    isempty(kwargs) || throw(ArgumentError("Keyword arguments are not supported for evaluate(::MatrixACA, ...)."))
+    return T4AMatrixCI.evaluate(ci, args...)
+end
+export evaluate
+
+# -----------------------------------------------------------------------------
+# Backwards-compatible `crossinterpolate` entry point
+#
+# `T4ATensorCI` historically provided both:
+# - `crossinterpolate(::AbstractMatrix)` -> matrix cross interpolation (now in T4AMatrixCI)
+# - `crossinterpolate(::Type{ValueType}, f, localdims, ...)` -> TensorCI1 constructor (still here)
+#
+# We keep `crossinterpolate` owned by T4ATensorCI and forward the matrix method.
+# -----------------------------------------------------------------------------
+
+crossinterpolate(a::AbstractMatrix; kwargs...) = T4AMatrixCI.crossinterpolate(a; kwargs...)
 
 include("util.jl")
 include("sweepstrategies.jl")
-include("abstractmatrixci.jl")
-include("matrixci.jl")
-include("matrixaca.jl")
-include("matrixlu.jl")
-include("matrixluci.jl")
 include("indexset.jl")
-include("abstracttensortrain.jl")
-include("cachedtensortrain.jl")
 include("batcheval.jl")
 include("cachedfunction.jl")
-include("tensortrain.jl")
 include("tensorci1.jl")
 include("globalpivotfinder.jl")
 include("tensorci2.jl")
diff --git a/src/contraction.jl b/src/contraction.jl
index af847e6..8168402 100644
--- a/src/contraction.jl
+++ b/src/contraction.jl
@@ -1,3 +1,6 @@
+# TCI-specific contraction code
+# The basic contract, contract_naive, contract_zipup are now in T4ATensorTrain.jl
+
 """
 Contraction of two TTOs
 Optionally, the contraction can be done with a function applied to the result.
@@ -68,28 +71,12 @@ _localdims(obj::Contraction{<:Any}, n::Int)::Tuple{Int,Int} =
 
 _getindex(x, indices) = ntuple(i -> x[indices[i]], length(indices))
 
-function _contract(
-    a::AbstractArray{T1,N1},
-    b::AbstractArray{T2,N2},
-    idx_a::NTuple{n1,Int},
-    idx_b::NTuple{n2,Int}
-) where {T1,T2,N1,N2,n1,n2}
-    length(idx_a) == length(idx_b) || error("length(idx_a) != length(idx_b)")
-    # check if idx_a contains only unique elements
-    length(unique(idx_a)) == length(idx_a) || error("idx_a contains duplicate elements")
-    # check if idx_b contains only unique elements
-    length(unique(idx_b)) == length(idx_b) || error("idx_b contains duplicate elements")
-    # check if idx_a and idx_b are subsets of 1:N1 and 1:N2
-    all(1 <= idx <= N1 for idx in idx_a) || error("idx_a contains elements out of range")
-    all(1 <= idx <= N2 for idx in idx_b) || error("idx_b contains elements out of range")
-
-    rest_idx_a = setdiff(1:N1, idx_a)
-    rest_idx_b = setdiff(1:N2, idx_b)
-
-    amat = reshape(permutedims(a, (rest_idx_a..., idx_a...)), prod(_getindex(size(a), rest_idx_a)), prod(_getindex(size(a), idx_a)))
-    bmat = reshape(permutedims(b, (idx_b..., rest_idx_b...)), prod(_getindex(size(b), idx_b)), prod(_getindex(size(b), rest_idx_b)))
-
-    return reshape(amat * bmat, _getindex(size(a), rest_idx_a)..., _getindex(size(b), rest_idx_b)...)
+function _extend_cache(oldcache::Matrix{T}, a_ell::Array{T,4}, b_ell::Array{T,4}, i::Int, j::Int) where {T}
+    # (link_a, link_b) * (link_a, s, link_a') => (link_b, s, link_a')
+    tmp1 = _contract(oldcache, a_ell[:, i, :, :], (1,), (1,))
+
+    # (link_b, s, link_a') * (link_b, s, link_b') => (link_a', link_b')
+    return _contract(tmp1, b_ell[:, :, j, :], (1, 2), (1, 2))
 end
 
 function _unfuse_idx(obj::Contraction{T}, n::Int, idx::Int)::Tuple{Int,Int} where {T}
@@ -100,14 +87,6 @@ function _fuse_idx(obj::Contraction{T}, n::Int, idx::Tuple{Int,Int})::Int where
     return idx[1] + _localdims(obj, n)[1] * (idx[2] - 1)
 end
 
-function _extend_cache(oldcache::Matrix{T}, a_ell::Array{T,4}, b_ell::Array{T,4}, i::Int, j::Int) where {T}
-    # (link_a, link_b) * (link_a, s, link_a') => (link_b, s, link_a')
-    tmp1 = _contract(oldcache, a_ell[:, i, :, :], (1,), (1,))
-
-    # (link_b, s, link_a') * (link_b, s, link_b') => (link_a', link_b')
-    return _contract(tmp1, b_ell[:, :, j, :], (1, 2), (1, 2))
-end
-
 # Compute left environment
 function evaluateleft(
     obj::Contraction{T},
@@ -334,43 +313,6 @@ function batchevaluate(obj::Contraction{T},
     return reshape(res, return_size)
 end
 
-
-function _contractsitetensors(a::Array{T,4}, b::Array{T,4})::Array{T,4} where {T}
-    # indices: (link_a, s1, s2, link_a') * (link_b, s2, s3, link_b')
-    ab::Array{T,6} = _contract(a, b, (3,), (2,))
-    # => indices: (link_a, s1, link_a', link_b, s3, link_b')
-    abpermuted = permutedims(ab, (1, 4, 2, 5, 3, 6))
-    # => indices: (link_a, link_b, s1, s3, link_a', link_b')
-    return reshape(abpermuted,
-        size(a, 1) * size(b, 1),  # link_a * link_b
-        size(a, 2), size(b, 3),  # s1, s3
-        size(a, 4) * size(b, 4)   # link_a' * link_b'
-    )
-end
-
-function contract_naive(
-    a::TensorTrain{T,4}, b::TensorTrain{T,4};
-    tolerance=0.0, maxbonddim=typemax(Int)
-)::TensorTrain{T,4} where {T}
-    return contract_naive(Contraction(a, b); tolerance, maxbonddim)
-end
-
-function contract_naive(
-    obj::Contraction{T};
-    tolerance=0.0, maxbonddim=typemax(Int)
-)::TensorTrain{T,4} where {T}
-    if obj.f isa Function
-        error("Naive contraction implementation cannot contract matrix product with a function. Use algorithm=:TCI instead.")
-    end
-
-    a, b = obj.mpo
-    tt = TensorTrain{T,4}(_contractsitetensors.(sitetensors(a), sitetensors(b)))
-    if tolerance > 0 || maxbonddim < typemax(Int)
-        compress!(tt, :SVD; tolerance, maxbonddim)
-    end
-    return tt
-end
-
 function _reshape_fusesites(t::AbstractArray{T}) where {T}
     shape = size(t)
     return reshape(t, shape[1], prod(shape[2:end-1]), shape[end]), shape[2:end-1]
@@ -436,57 +378,7 @@ function contract_TCI(
 end
 
 """
-See SVD version:
-https://tensornetwork.org/mps/algorithms/zip_up_mpo/
-"""
-function contract_zipup(
-    A::TensorTrain{ValueType,4},
-    B::TensorTrain{ValueType,4};
-    tolerance::Float64=1e-12,
-    method::Symbol=:SVD, # :SVD, :LU
-    maxbonddim::Int=typemax(Int)
-) where {ValueType}
-    if length(A) != length(B)
-        throw(ArgumentError("Cannot contract tensor trains with different length."))
-    end
-    R::Array{ValueType,3} = ones(ValueType, 1, 1, 1)
-
-    sitetensors = Vector{Array{ValueType,4}}(undef, length(A))
-    for n in 1:length(A)
-        # R:     (link_ab, link_an, link_bn)
-        # A[n]:  (link_an, s_n, s_n', link_anp1)
-        RA = _contract(R, A[n], (2,), (1,))
-
-        # RA[n]: (link_ab, link_bn, s_n, s_n' link_anp1)
-        # B[n]:  (link_bn, s_n', s_n'', link_bnp1)
-        # C:     (link_ab, s_n, link_anp1, s_n'', link_bnp1)
-        #  =>    (link_ab, s_n, s_n'', link_anp1, link_bnp1)
-        C = permutedims(_contract(RA, B[n], (2, 4), (1, 2)), (1, 2, 4, 3, 5))
-        if n == length(A)
-            sitetensors[n] = reshape(C, size(C)[1:3]..., 1)
-            break
-        end
-
-        # Cmat:  (link_ab * s_n * s_n'', link_anp1 * link_bnp1)
-
-        #lu = rrlu(Cmat; reltol, abstol, leftorthogonal=true)
-        left, right, newbonddim = _factorize(
-            reshape(C, prod(size(C)[1:3]), prod(size(C)[4:5])),
-            method; tolerance, maxbonddim
-        )
-
-        # U:     (link_ab, s_n, s_n'', link_ab_new)
-        sitetensors[n] = reshape(left, size(C)[1:3]..., newbonddim)
-
-        # R:     (link_ab_new, link_an, link_bn)
-        R = reshape(right, newbonddim, size(C)[4:5]...)
-    end
-
-    return TensorTrain{ValueType,4}(sitetensors)
-end
-
-"""
-    function contract(
+    contract(
         A::TensorTrain{V1,4},
         B::TensorTrain{V2,4};
         algorithm::Symbol=:TCI,
@@ -498,19 +390,9 @@ end
 
 Contract two tensor trains `A` and `B`.
 
-Currently, two implementations are available:
- 1. `algorithm=:TCI` constructs a new TCI that fits the contraction of `A` and `B`.
- 2. `algorithm=:naive` uses a naive tensor contraction and subsequent SVD recompression of the tensor train.
- 2. `algorithm=:zipup` uses a naive tensor contraction with on-the-fly LU decomposition.
-
-Arguments:
-- `A` and `B` are the tensor trains to be contracted.
-- `algorithm` chooses the algorithm used to evaluate the contraction.
-- `tolerance` is the tolerance of the TCI or SVD recompression.
-- `maxbonddim` sets the maximum bond dimension of the resulting tensor train.
-- `f` is a function to be applied elementwise to the result. This option is only available with `algorithm=:TCI`.
-- `method` chooses the method used for the factorization in the `algorithm=:zipup` case (`:SVD` or `:LU`).
-- `kwargs...` are forwarded to [`crossinterpolate2`](@ref) if `algorithm=:TCI`.
+Available implementations:
+- `algorithm=:TCI`: TCI-based contraction (implemented in this package).
+- `algorithm=:naive|:zipup`: delegated to `T4ATensorTrain.contract` (non-TCI).
 """
 function contract(
     A::TensorTrain{V1,4},
@@ -521,24 +403,32 @@ function contract(
     f::Union{Nothing,Function}=nothing,
     kwargs...
 )::TensorTrain{promote_type(V1,V2),4} where {V1,V2}
-    Vres = promote_type(V1, V2)
-    A_ = TensorTrain{Vres,4}(A)
-    B_ = TensorTrain{Vres,4}(B)
     if algorithm === :TCI
+        Vres = promote_type(V1, V2)
+        A_ = TensorTrain{Vres,4}(A)
+        B_ = TensorTrain{Vres,4}(B)
         return contract_TCI(A_, B_; tolerance=tolerance, maxbonddim=maxbonddim, f=f, kwargs...)
-    elseif algorithm === :naive
-        if f !== nothing
-            error("Naive contraction implementation cannot contract matrix product with a function. Use algorithm=:TCI instead.")
-        end
-        return contract_naive(A_, B_; tolerance=tolerance, maxbonddim=maxbonddim)
-    elseif algorithm === :zipup
-        if f !== nothing
-            error("Zipup contraction implementation cannot contract matrix product with a function. Use algorithm=:TCI instead.")
-        end
-        return contract_zipup(A_, B_; tolerance, maxbonddim)
-    else
-        throw(ArgumentError("Unknown algorithm $algorithm."))
     end
+
+    f === nothing || error("Only algorithm=:TCI supports `f`. Use algorithm=:TCI or set f=nothing.")
+    return T4ATensorTrain.contract(A, B; algorithm=algorithm, tolerance=tolerance, maxbonddim=maxbonddim, kwargs...)
+end
+
+# Re-export contract_naive with Contraction argument
+function contract_naive(
+    obj::Contraction{T};
+    tolerance=0.0, maxbonddim=typemax(Int)
+)::TensorTrain{T,4} where {T}
+    if obj.f isa Function
+        error("Naive contraction implementation cannot contract matrix product with a function. Use algorithm=:TCI instead.")
+    end
+
+    a, b = obj.mpo
+    tt = TensorTrain{T,4}(T4ATensorTrain._contractsitetensors.(sitetensors(a), sitetensors(b)))
+    if tolerance > 0 || maxbonddim < typemax(Int)
+        compress!(tt, :SVD; tolerance, maxbonddim)
+    end
+    return tt
 end
 
 function contract(
diff --git a/src/conversion.jl b/src/conversion.jl
index 00a6d26..0dee297 100644
--- a/src/conversion.jl
+++ b/src/conversion.jl
@@ -1,25 +1,3 @@
-function MatrixACA(lu::rrLU{T}) where {T}
-    aca = MatrixACA(T, size(lu)...)
-    aca.rowindices = rowindices(lu)
-    aca.colindices = colindices(lu)
-    aca.u = left(lu)
-    aca.v = right(lu)
-    aca.alpha = 1 ./ diag(lu)
-
-    if lu.leftorthogonal
-        # Set the permuted diagonal of aca.u to the diagonal elements of lu.
-        for j in axes(aca.u, 2)
-            aca.u[:, j] *= diag(lu)[j]
-        end
-    else
-        # Same with aca.v
-        for i in axes(aca.v, 1)
-            aca.v[i, :] *= diag(lu)[i]
-        end
-    end
-    return aca
-end
-
 function TensorCI1{ValueType}(
     tci2::TensorCI2{ValueType},
     f;
diff --git a/test/test_with_aqua.jl b/test/test_with_aqua.jl
index 68a9ad5..379e2f6 100644
--- a/test/test_with_aqua.jl
+++ b/test/test_with_aqua.jl
@@ -2,5 +2,5 @@ using Aqua
 import T4ATensorCI as TCI
 
 @testset "Aqua" begin
-    Aqua.test_all(TCI; ambiguities = false, unbound_args = false, deps_compat = false)
+    Aqua.test_all(TCI; ambiguities = false, unbound_args = false, deps_compat = false, persistent_tasks = false)
 end