diff --git a/docs/RunTorchModel.md b/docs/RunTorchModel.md
index 65030ac42e..6ca11ea635 100644
--- a/docs/RunTorchModel.md
+++ b/docs/RunTorchModel.md
@@ -22,7 +22,7 @@ cd build-light
 cmake -DCMAKE_CXX_COMPILER=/usr/bin/c++ \
       -DONNX_MLIR_ENABLE_PYRUNTIME_LIGHT=ON \
       ..
-make OMCreatONNXMLIRTOrchPackage
+make OMCreateONNXMLIRTorchPackage
 pip3 install -e src/Runtime/python/onnxmlirtorch
 ```
 ## Install from pip repository
diff --git a/src/Runtime/python/CMakeLists.txt b/src/Runtime/python/CMakeLists.txt
index fd340ce068..3153c7229c 100644
--- a/src/Runtime/python/CMakeLists.txt
+++ b/src/Runtime/python/CMakeLists.txt
@@ -119,9 +119,10 @@ add_custom_target(OMCreatePyRuntimePackage
      )
 
 # Target to prepare onnxmlirtorch package
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/onnxmlirtorch/src/onnxmlirtorch/libs)
 add_custom_target(OMCreateONNXMLIRTorchPackage
         COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/onnxmlirtorch ${CMAKE_CURRENT_BINARY_DIR}
-        COMMAND cp ${ONNX_MLIR_BIN_ROOT}/${CMAKE_BUILD_TYPE}/lib/PyRuntimeC.*.so ${CMAKE_CURRENT_BINARY_DIR}/onnxmlirtorch/src/onnxmlirtorch/
+        COMMAND cp ${ONNX_MLIR_BIN_ROOT}/${CMAKE_BUILD_TYPE}/lib/PyRuntimeC.*.so ${CMAKE_CURRENT_BINARY_DIR}/onnxmlirtorch/src/onnxmlirtorch/libs
         COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/onnxmlirdocker.py ${CMAKE_CURRENT_BINARY_DIR}/onnxmlirtorch/src/onnxmlirtorch
         COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/PyRuntime.py ${CMAKE_CURRENT_BINARY_DIR}/onnxmlirtorch/src/onnxmlirtorch
         DEPENDS PyRuntimeC
diff --git a/src/Runtime/python/PyRuntime.py b/src/Runtime/python/PyRuntime.py
index 79c49d0d95..29428e71e0 100644
--- a/src/Runtime/python/PyRuntime.py
+++ b/src/Runtime/python/PyRuntime.py
@@ -15,9 +15,9 @@
 import pkgutil
 
 if __package__ == "onnxmlir" or __package__ == "onnxmlirtorch":
-    loader = pkgutil.get_loader("onnxmlir")
+    loader = pkgutil.get_loader(__package__)
     PyRuntimeC_module = os.path.join(
-        os.path.dirname(loader.get_filename("onnxmlir")), "libs"
+        os.path.dirname(loader.get_filename(__package__)), "libs"
     )
     sys.path.append(PyRuntimeC_module)
 
diff --git a/src/Runtime/python/onnxmlirdocker.py b/src/Runtime/python/onnxmlirdocker.py
index ef874f8ffb..2fd36ef452 100644
--- a/src/Runtime/python/onnxmlirdocker.py
+++ b/src/Runtime/python/onnxmlirdocker.py
@@ -58,6 +58,7 @@ class InferenceSession:
     def __init__(self, model_path, **kwargs):
         self.debug = False
         self.session = None
+        self.output_dir = tempfile.TemporaryDirectory()
         self.handleParameters(model_path, **kwargs)
         if self.session is not None:
             return
@@ -117,7 +118,7 @@ def handleParameters(self, model_path, **kwargs):
                 self.compiled_model += ".so"
             self.output_dirname = os.path.dirname(self.compiled_model)
         else:
-            self.output_dirname = tempfile.TemporaryDirectory().name
+            self.output_dirname = self.output_dir.name
             self.compiled_model = os.path.join(
                 self.output_dirname, self.model_basename.removesuffix(self.model_suffix)
             )
@@ -292,7 +293,7 @@ def Compile(self):
 
     def getSession(self):
         # When the script is used in package onnxmlir, the files to be imported
-        # are within the package. Path in the pakcage should be used.
+        # are within the package. Path in the package should be used.
         # Otherwise, env variable ONNX_MLIR_HOME is used to for import path
         if __package__ == "onnxmlir" or __package__ == "onnxmlirtorch":
             try:
diff --git a/src/Runtime/python/onnxmlirtorch/src/onnxmlirtorch/onnxmlirtorch.py b/src/Runtime/python/onnxmlirtorch/src/onnxmlirtorch/onnxmlirtorch.py
index 086ecd5f52..64713141f2 100644
--- a/src/Runtime/python/onnxmlirtorch/src/onnxmlirtorch/onnxmlirtorch.py
+++ b/src/Runtime/python/onnxmlirtorch/src/onnxmlirtorch/onnxmlirtorch.py
@@ -123,13 +123,16 @@ class config:
     cache_size = 3
 
 
+glocalSessionCache = SessionCache(config.cache_size)
+
+
 class ONNXMLIRTorch:
     def __init__(self, torch_model, **kwargs):
         self.torch_model = torch_model
         # Temporary directory
         self.workdir = tempfile.TemporaryDirectory()
         self.default_model_name = "model"
-        self.sessionCache = SessionCache(config.cache_size)
+        self.sessionCache = glocalSessionCache
         if "compile_tag" in kwargs.keys():
             self.tag = kwargs["compile_tag"]
         else:
@@ -195,5 +198,5 @@ def forward(self, *args, **kwargs):
             _, sess = cached_session
 
         # Run the inference
-        outputs = sess.run(None, np_args)
+        outputs = sess.run(np_args)
         return [torch.from_numpy(output) for output in outputs]
diff --git a/src/Runtime/python/onnxmlirtorch/tests/torch_compile_add.py b/src/Runtime/python/onnxmlirtorch/tests/torch_compile_add.py
index b46a5bca19..2ef8a3659c 100644
--- a/src/Runtime/python/onnxmlirtorch/tests/torch_compile_add.py
+++ b/src/Runtime/python/onnxmlirtorch/tests/torch_compile_add.py
@@ -45,13 +45,13 @@ def forward(self, x, y):
 output = opt_mod(input, input)
 print("output: ", output)
 
-
-# Second inference
+# Second inference: different input shapes, so recompile the model.
 input1 = torch.randn(3)
 input2 = torch.randn(3)
 output1 = opt_mod(input1, input2)
 print("output: ", output1)
 
+# Third inference: reuse the compiled .so in the cache: no recompilation.
 input3 = torch.randn(2)
 output2 = opt_mod(input3, input3)
 print("output: ", output2)