From cbafdf9500d24e57eb3ecce73166e0b9197219df Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Tue, 20 Jan 2026 16:23:18 -0500
Subject: [PATCH 1/7] Do not gitignore .github

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index df1576d7e..33d279478 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 .env
 debug*
 .*
+!.github
 # Byte-compiled / optimized / DLL files
 biomni_release/biomni_env/biomni_tools/
 open_source_process.ipynb

From cdf617663c11088e65c668483dee97a23d6856da Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Tue, 20 Jan 2026 16:26:00 -0500
Subject: [PATCH 2/7] Add rudimentary codespell config

---
 pyproject.toml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0fad3a913..dd1fc1c7a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,7 +78,7 @@ ignore = [
     # First line should be in imperative mood; try rephrasing
     "D401",
     ## Disable one in each pair of mutually incompatible rules
-    # We don’t want a blank line before a class docstring
+    # We don't want a blank line before a class docstring
     "D203",
     # We want docstrings to start immediately after the opening triple quote
     "D213",
@@ -103,3 +103,8 @@ ignore = [
     # strip with multi characters
     "B005"
 ]
+
+[tool.codespell]
+skip = '.git*,*.pdf,*.svg,*.css,*.min.*,*/i18n/*,*/build/*,biomni_env/*'
+check-hidden = true
+ignore-regex = '^\s*"image/\S+": ".*'

From 63974e22cd033ca6e86b79bfb7ef323afd5e8e43 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Tue, 20 Jan 2026 16:26:17 -0500
Subject: [PATCH 3/7] Add github action to codespell main on push and PRs

---
 .github/workflows/codespell.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/codespell.yml

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 000000000..b23166743
--- /dev/null
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2

From 8535e2d5c42e377d83c082018a635978cece29e4 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Tue, 20 Jan 2026 16:26:40 -0500
Subject: [PATCH 4/7] Add pre-commit definition for codespell

---
 .pre-commit-config.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7b5c4676b..c434dbe4f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,6 +32,13 @@ repos:
           - id: check-merge-conflict
           - id: no-commit-to-branch
             args: ["--branch=main"]
+    - repo: https://github.com/codespell-project/codespell
+      # Configuration for codespell is in pyproject.toml
+      rev: v2.4.1
+      hooks:
+          - id: codespell
+            additional_dependencies:
+                - tomli; python_version<'3.11'
     #- repo: https://github.com/pre-commit/mirrors-mypy
     # rev: v1.16.1
     #  hooks:

From 48669e6987de642896e06dcb8241c6008d09296c Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Tue, 20 Jan 2026 16:28:10 -0500
Subject: [PATCH 5/7] Update codespell config with additional exclusions

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index dd1fc1c7a..e796c63c8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -105,6 +105,7 @@ ignore = [
 ]
 
 [tool.codespell]
-skip = '.git*,*.pdf,*.svg,*.css,*.min.*,*/i18n/*,*/build/*,biomni_env/*'
+skip = '.git*,*.pdf,*.svg,*.css,*.min.*,*/i18n/*,*/build/*,biomni_env/*,.cache,.npm'
 check-hidden = true
 ignore-regex = '^\s*"image/\S+": ".*'
+ignore-words-list = 'scarches,hsa,ser,abl,basf,optmizer,transferrin,te,tbe,tre,ther,commun,theis,inactivate'

From 6584439c8cc694e65b4b192e5d5daf52007dcb98 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Tue, 20 Jan 2026 21:35:16 -0500
Subject: [PATCH 6/7] Fix ambiguous typos requiring context review

Fixed ambiguous typos:
- seach -> search (biomni/tool/literature.py:190) - docstring example
- respons -> response (tutorials/biomni_101.ipynb:778) - truncated table cell
- checkpoin -> checkpoint (tutorials/biomni_101.ipynb:771) - truncated table cell
---
 biomni/tool/literature.py  | 2 +-
 tutorials/biomni_101.ipynb | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/biomni/tool/literature.py b/biomni/tool/literature.py
index e4b31b143..d36eaf93f 100644
--- a/biomni/tool/literature.py
+++ b/biomni/tool/literature.py
@@ -187,7 +187,7 @@ def search_google(query: str, num_results: int = 3, language: str = "en") -> lis
     """Search using Google search.
 
     Args:
-        query (str): The search query (e.g., "protocol text or seach question")
+        query (str): The search query (e.g., "protocol text or search question")
         num_results (int): Number of results to return (default: 10)
         language (str): Language code for search results (default: 'en')
         pause (float): Pause between searches to avoid rate limiting (default: 2.0 seconds)
diff --git a/tutorials/biomni_101.ipynb b/tutorials/biomni_101.ipynb
index d7366c4e1..7f2a406c2 100644
--- a/tutorials/biomni_101.ipynb
+++ b/tutorials/biomni_101.ipynb
@@ -768,14 +768,14 @@
       "Rank Gene     Category             Expected Effect\n",
       "--------------------------------------------------------------------------------\n",
       "1    TOX      Transcription Factors KO should reduce exhaustion (master exhaustion TF)\n",
-      "2    PDCD1    Immune Checkpoints   KO should reduce exhaustion (remove PD-1 checkpoin\n",
+      "2    PDCD1    Immune Checkpoints   KO should reduce exhaustion (remove PD-1 checkpoint\n",
       "3    EOMES    Transcription Factors KO should reduce exhaustion (exhaustion-promoting \n",
       "4    HAVCR2   Immune Checkpoints   KO should reduce exhaustion (remove TIM-3 checkpoi\n",
       "5    LAG3     Immune Checkpoints   KO should reduce exhaustion (remove LAG-3 checkpoi\n",
       "6    TIGIT    Immune Checkpoints   KO should reduce exhaustion (remove TIGIT checkpoi\n",
       "7    CTLA4    Immune Checkpoints   KO should enhance early activation\n",
       "8    BATF     Transcription Factors KO should reduce exhaustion (exhaustion-promoting \n",
-      "9    HIF1A    Metabolic Regulators KO may reduce exhaustion (metabolic stress respons\n",
+      "9    HIF1A    Metabolic Regulators KO may reduce exhaustion (metabolic stress response\n",
       "10   MYC      Metabolic Regulators KO may increase exhaustion (metabolic reprogrammin\n",
       "11   LCK      TCR Signaling        KO should reduce TCR signaling strength\n",
       "12   ZAP70    TCR Signaling        KO should reduce TCR signaling strength\n",

From 4f7edeced4570b6c7050a4d02f397f1933520663 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Tue, 20 Jan 2026 22:03:29 -0500
Subject: [PATCH 7/7] [DATALAD RUNCMD] chore: fix non-ambiguous typos with
 codespell -w

Fixed typos automatically:
- succint -> succinct (biomni/utils.py:273)
- becuase -> because (biomni/utils.py:687)
- softwares -> software (biomni/agent/a1.py:1100, 1128)
- arugments -> arguments (biomni/tool/genomics.py:77)
- referece -> reference (biomni/tool/genomics.py:867)
- re-used -> reused (protocol file)
- heterogenous -> heterogeneous (protocol file)
- useable -> usable (protocol file)
- tendancy -> tendency (protocol file)
- homogenous -> homogeneous (protocol file)
- softwares -> software (biomni_env/README.md:12)
- Automaticlly -> Automatically (biomni_env/bio_env_py310.yml:14)
- isses -> issues (biomni_env/new_software_v008.sh:13)

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "uvx codespell -w",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 biomni/agent/a1.py                                            | 4 ++--
 biomni/tool/genomics.py                                       | 4 ++--
 ... of Recombinant Antibodies with Protein A or Protein G.txt | 2 +-
 ...ting a Monoclonal Cell Population by Limiting Dilution.txt | 2 +-
 .../Addgene_ Protocol - How to Perform Sequence Analysis.txt  | 2 +-
 .../addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt | 2 +-
 ...Addgene_ Virus Protocol - Generating Stable Cell Lines.txt | 2 +-
 biomni/utils.py                                               | 4 ++--
 biomni_env/README.md                                          | 2 +-
 biomni_env/bio_env_py310.yml                                  | 2 +-
 biomni_env/new_software_v008.sh                               | 2 +-
 11 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/biomni/agent/a1.py b/biomni/agent/a1.py
index 7a62e59f1..3f5c06fe1 100644
--- a/biomni/agent/a1.py
+++ b/biomni/agent/a1.py
@@ -1097,7 +1097,7 @@ def format_item_with_description(name, description):
         # Base prompt
         prompt_modifier = """
 You are a helpful biomedical assistant assigned with the task of problem-solving.
-To achieve this, you will be using an interactive coding environment equipped with a variety of tool functions, data, and softwares to assist you throughout the process.
+To achieve this, you will be using an interactive coding environment equipped with a variety of tool functions, data, and software to assist you throughout the process.
 
 Given a task, make a plan first. The plan should be a numbered list of steps that you will take to solve the task. Be specific and detailed.
 Format your plan as a checklist with empty checkboxes like this:
@@ -1125,7 +1125,7 @@ def format_item_with_description(name, description):
    - For Python code (default): <execute> print("Hello World!") </execute>
    - For R code: <execute> #!R\nlibrary(ggplot2)\nprint("Hello from R") </execute>
    - For Bash scripts and commands: <execute> #!BASH\necho "Hello from Bash"\nls -la </execute>
-   - For CLI softwares, use Bash scripts.
+   - For CLI software, use Bash scripts.
 
 2) When you think it is ready, directly provide a solution that adheres to the required format for the given task to the user. Your solution should be enclosed using "<solution>" tag, for example: The answer is <solution> A </solution>. IMPORTANT: You must end the solution block with </solution> tag.
 
diff --git a/biomni/tool/genomics.py b/biomni/tool/genomics.py
index d84d26bc6..272930313 100644
--- a/biomni/tool/genomics.py
+++ b/biomni/tool/genomics.py
@@ -74,7 +74,7 @@ def unsupervised_celltype_transfer_between_scRNA_datasets(
         prediction_mode="retrain",
     ).adata
 
-    # passing arugments this way decreases chance of LLM generation and parsing errors
+    # passing arguments this way decreases chance of LLM generation and parsing errors
     flags = {
         "CELLTYPIST": CELLTYPIST,
         "KNN_BBKNN": KNN_BBKNN,
@@ -864,7 +864,7 @@ def get_uce_embeddings_scRNA(
     DATA_ROOT="/dfs/project/bioagentos/data/singlecell/",
     custom_args=None,
 ):
-    """The UCE embeddings are usually our default tools to get cell embeddings, we map UCE embeddings to IMA referece dataset and get the cell types for a better understanding.
+    """The UCE embeddings are usually our default tools to get cell embeddings, we map UCE embeddings to IMA reference dataset and get the cell types for a better understanding.
     The custom_args is a list of strings that will be passed as command line arguments to the UCE script,
     like ["--adata_path", adata_file, "--dir", output_dir]. The default value is None.
     """
diff --git a/biomni/tool/protocols/addgene/Addgene_ Affinity Purification of Recombinant Antibodies with Protein A or Protein G.txt b/biomni/tool/protocols/addgene/Addgene_ Affinity Purification of Recombinant Antibodies with Protein A or Protein G.txt
index 9a87b9856..bfc98fc1e 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Affinity Purification of Recombinant Antibodies with Protein A or Protein G.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Affinity Purification of Recombinant Antibodies with Protein A or Protein G.txt	
@@ -209,7 +209,7 @@ Section 1: Affinity chromatography
 23. (Optional) Regenerate the column by washing with 25 mL of Protein A/G binding
     buffer and store in 20% ethanol at 4 °C.
 
-    💡 PRO TIP: Columns may be re-used up to 5x when purifying the same
+    💡 PRO TIP: Columns may be reused up to 5x when purifying the same
     recombinant antibody.
 
 Section 2: Buffer exchange
diff --git a/biomni/tool/protocols/addgene/Addgene_ Isolating a Monoclonal Cell Population by Limiting Dilution.txt b/biomni/tool/protocols/addgene/Addgene_ Isolating a Monoclonal Cell Population by Limiting Dilution.txt
index 0c04f2c9d..2f415afac 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Isolating a Monoclonal Cell Population by Limiting Dilution.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Isolating a Monoclonal Cell Population by Limiting Dilution.txt	
@@ -8,7 +8,7 @@ INTRODUCTION
 This protocol describes how to generate a monoclonal cell line from a polyclonal
 pool of stable cells.
 
-Transducing cells with lentivirus results in a heterogenous polyclonal population
+Transducing cells with lentivirus results in a heterogeneous polyclonal population
 that varies in the number of integration events and the site(s) of proviral
 integration across cells. Selective pressure on this heterogeneous cell pool
 could lead to reduced transgene expression over time, as the lower expressing
diff --git a/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Perform Sequence Analysis.txt b/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Perform Sequence Analysis.txt
index 418ea2d11..3ce952f86 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Perform Sequence Analysis.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Perform Sequence Analysis.txt	
@@ -53,7 +53,7 @@ recommends using Addgene's sequencing results as a reference for primer design.
 SEQUENCING RESULTS
 ================================================================================
 
-A good sequencing reaction will produce between 300-900 base pairs of useable
+A good sequencing reaction will produce between 300-900 base pairs of usable
 sequence. You should receive your sequencing results as a trace file (.ab1) which
 graphically depicts the sequence as a series of colored peaks corresponding to
 one of the four nucleotide bases. This is an example of a trace file from a
diff --git a/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt b/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt
index 54ab07e5c..d689d1db9 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt	
@@ -46,7 +46,7 @@ Microwave for 1-3 min until the agarose is completely dissolved (but do not over
 Caution
 HOT! Be careful stirring, eruptive boiling can occur.
 Pro-Tip
-It is a good idea to microwave for 30-45 sec, stop and swirl, and then continue towards a boil. Keep an eye on it the solution has a tendancy to boil over. Placing saran wrap over the top of the flask can help with this, but is not necessary if you pay close attention.
+It is a good idea to microwave for 30-45 sec, stop and swirl, and then continue towards a boil. Keep an eye on it the solution has a tendency to boil over. Placing saran wrap over the top of the flask can help with this, but is not necessary if you pay close attention.
 Let agarose solution cool down to about 50 °C (about when you can comfortably keep your hand on the flask), about 5 mins.
 Optional
 : Add ethidium bromide (EtBr) to a final concentration of approximately 0.2-0.5 μg/mL (usually about 2-3 μl of lab stock solution per 100 mL gel). EtBr binds to the DNA and allows you to visualize the DNA under ultraviolet (UV) light.
diff --git a/biomni/tool/protocols/addgene/Addgene_ Virus Protocol - Generating Stable Cell Lines.txt b/biomni/tool/protocols/addgene/Addgene_ Virus Protocol - Generating Stable Cell Lines.txt
index 9bc463a21..df539a27e 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Virus Protocol - Generating Stable Cell Lines.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Virus Protocol - Generating Stable Cell Lines.txt	
@@ -12,7 +12,7 @@ Mol Bio Protocols
 Viral Service
 Introduction
 This protocol can be used to generate stable cell lines expressing a gene of interest from an integrated lentiviral vector. Unlike the short-term protein expression observed using transient transfection approaches, generating cell lines using lentiviral vectors enables long-term protein expression studies. Moreover, repeating experiments in a stable cell line, as opposed to transiently-transfected cells, increases reproducibility, as it eliminates the variation associated with repeated transient transfection.
-Some lentiviral vectors deliver mammalian antibiotic resistance (e.g., puromycin, blasticidin), which enables selection of a stable cell culture after transduction. Performing antibiotic selection on transduced cells enables elimination of untransduced cells, resulting in a more homogenous (but still polyclonal) cell population. Depending on the transducibility of the cell line used, this antibiotic selection may be a vital step for obtaining a population of cells that have taken up the lentiviral transgene. Note that not all lentiviral vectors deliver antibiotic resistance.
+Some lentiviral vectors deliver mammalian antibiotic resistance (e.g., puromycin, blasticidin), which enables selection of a stable cell culture after transduction. Performing antibiotic selection on transduced cells enables elimination of untransduced cells, resulting in a more homogeneous (but still polyclonal) cell population. Depending on the transducibility of the cell line used, this antibiotic selection may be a vital step for obtaining a population of cells that have taken up the lentiviral transgene. Note that not all lentiviral vectors deliver antibiotic resistance.
 This protocol was established using 293T cells but can be adapted to alternative cell lines.
 Workflow Timeline
 Day 0:
diff --git a/biomni/utils.py b/biomni/utils.py
index 09e1ef91a..0e5bb2c5e 100644
--- a/biomni/utils.py
+++ b/biomni/utils.py
@@ -270,7 +270,7 @@ def function_to_api_schema(function_string, llm):
     For variable without default values, set them as None, not null.
     For variable with boolean values, use capitalized True or False, not true or false.
     Do not add any return type in the docstring.
-    Be as clear and succint as possible for the descriptions. Please do not make it overly verbose.
+    Be as clear and succinct as possible for the descriptions. Please do not make it overly verbose.
     Here is the code snippet:
     {code}
     """
@@ -684,7 +684,7 @@ def on_chat_model_start(self, serialized, messages, **kwargs):
 
 class NodeLogger(BaseCallbackHandler):
     def on_llm_end(self, response, **kwargs):  # response of type LLMResult
-        for generations in response.generations:  # response.generations of type List[List[Generations]] becuase "each input could have multiple candidate generations"
+        for generations in response.generations:  # response.generations of type List[List[Generations]] because "each input could have multiple candidate generations"
             for generation in generations:
                 generated_text = generation.message.content
                 # token_usage = generation.message.response_metadata["token_usage"]
diff --git a/biomni_env/README.md b/biomni_env/README.md
index a561401c9..cb69f2812 100644
--- a/biomni_env/README.md
+++ b/biomni_env/README.md
@@ -9,7 +9,7 @@ This directory contains scripts and configuration files to set up a comprehensiv
    ```
 
 2. Setting up the environment:
-- (a) If you want to use or try out the basic agent without the full E1 or install your own softwares, run the following script:
+- (a) If you want to use or try out the basic agent without the full E1 or install your own software, run the following script:
 
 ```bash
 conda env create -f environment.yml
diff --git a/biomni_env/bio_env_py310.yml b/biomni_env/bio_env_py310.yml
index db0b40f4e..823da6b4f 100644
--- a/biomni_env/bio_env_py310.yml
+++ b/biomni_env/bio_env_py310.yml
@@ -11,7 +11,7 @@ dependencies:
   - pip
 
 # Purpose: Python 3.10 compatibility environment for tools not yet supporting newer Python versions.
-# Automaticlly setup on setup.sh execution.
+# Automatically setup on setup.sh execution.
 # Contains tools that require Python 3.10 or earlier versions.
 # Can be create with:
 #   micromamba create -f bio_env_py310.yml   OR   conda env create -f bio_env_py310.yml
diff --git a/biomni_env/new_software_v008.sh b/biomni_env/new_software_v008.sh
index 306d68857..1005f5795 100644
--- a/biomni_env/new_software_v008.sh
+++ b/biomni_env/new_software_v008.sh
@@ -10,7 +10,7 @@ pip install pybiomart
 pip install fair-esm
 pip install uv
 uv pip install transcriptformer
-pip install "zarr>=2.0,<3.0" #this resolved transcripformer download isses
+pip install "zarr>=2.0,<3.0" #this resolved transcripformer download issues
 uv tool install arc-state
 pip install nnunet nibabel nilearn
 pip install mi-googlesearch-python