diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 00000000..b2316674
--- /dev/null
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/.gitignore b/.gitignore
index df1576d7..33d27947 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 .env
 debug*
 .*
+!.github
 # Byte-compiled / optimized / DLL files
 biomni_release/biomni_env/biomni_tools/
 open_source_process.ipynb
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7b5c4676..c434dbe4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,6 +32,13 @@ repos:
           - id: check-merge-conflict
           - id: no-commit-to-branch
             args: ["--branch=main"]
+    - repo: https://github.com/codespell-project/codespell
+      # Configuration for codespell is in pyproject.toml
+      rev: v2.4.1
+      hooks:
+          - id: codespell
+            additional_dependencies:
+                - tomli; python_version<'3.11'
     #- repo: https://github.com/pre-commit/mirrors-mypy
     # rev: v1.16.1
     #  hooks:
diff --git a/biomni/agent/a1.py b/biomni/agent/a1.py
index 7a62e59f..3f5c06fe 100644
--- a/biomni/agent/a1.py
+++ b/biomni/agent/a1.py
@@ -1097,7 +1097,7 @@ def format_item_with_description(name, description):
         # Base prompt
         prompt_modifier = """
 You are a helpful biomedical assistant assigned with the task of problem-solving.
-To achieve this, you will be using an interactive coding environment equipped with a variety of tool functions, data, and softwares to assist you throughout the process.
+To achieve this, you will be using an interactive coding environment equipped with a variety of tool functions, data, and software to assist you throughout the process.
 
 Given a task, make a plan first. The plan should be a numbered list of steps that you will take to solve the task. Be specific and detailed.
 Format your plan as a checklist with empty checkboxes like this:
@@ -1125,7 +1125,7 @@ def format_item_with_description(name, description):
    - For Python code (default): <execute> print("Hello World!") </execute>
    - For R code: <execute> #!R\nlibrary(ggplot2)\nprint("Hello from R") </execute>
    - For Bash scripts and commands: <execute> #!BASH\necho "Hello from Bash"\nls -la </execute>
-   - For CLI softwares, use Bash scripts.
+   - For CLI software, use Bash scripts.
 
 2) When you think it is ready, directly provide a solution that adheres to the required format for the given task to the user. Your solution should be enclosed using "<solution>" tag, for example: The answer is <solution> A </solution>. IMPORTANT: You must end the solution block with </solution> tag.
 
diff --git a/biomni/tool/genomics.py b/biomni/tool/genomics.py
index d84d26bc..27293031 100644
--- a/biomni/tool/genomics.py
+++ b/biomni/tool/genomics.py
@@ -74,7 +74,7 @@ def unsupervised_celltype_transfer_between_scRNA_datasets(
         prediction_mode="retrain",
     ).adata
 
-    # passing arugments this way decreases chance of LLM generation and parsing errors
+    # passing arguments this way decreases chance of LLM generation and parsing errors
     flags = {
         "CELLTYPIST": CELLTYPIST,
         "KNN_BBKNN": KNN_BBKNN,
@@ -864,7 +864,7 @@ def get_uce_embeddings_scRNA(
     DATA_ROOT="/dfs/project/bioagentos/data/singlecell/",
     custom_args=None,
 ):
-    """The UCE embeddings are usually our default tools to get cell embeddings, we map UCE embeddings to IMA referece dataset and get the cell types for a better understanding.
+    """The UCE embeddings are usually our default tools to get cell embeddings, we map UCE embeddings to IMA reference dataset and get the cell types for a better understanding.
     The custom_args is a list of strings that will be passed as command line arguments to the UCE script,
     like ["--adata_path", adata_file, "--dir", output_dir]. The default value is None.
     """
diff --git a/biomni/tool/literature.py b/biomni/tool/literature.py
index e4b31b14..d36eaf93 100644
--- a/biomni/tool/literature.py
+++ b/biomni/tool/literature.py
@@ -187,7 +187,7 @@ def search_google(query: str, num_results: int = 3, language: str = "en") -> lis
     """Search using Google search.
 
     Args:
-        query (str): The search query (e.g., "protocol text or seach question")
+        query (str): The search query (e.g., "protocol text or search question")
         num_results (int): Number of results to return (default: 10)
         language (str): Language code for search results (default: 'en')
         pause (float): Pause between searches to avoid rate limiting (default: 2.0 seconds)
diff --git a/biomni/tool/protocols/addgene/Addgene_ Affinity Purification of Recombinant Antibodies with Protein A or Protein G.txt b/biomni/tool/protocols/addgene/Addgene_ Affinity Purification of Recombinant Antibodies with Protein A or Protein G.txt
index 9a87b985..bfc98fc1 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Affinity Purification of Recombinant Antibodies with Protein A or Protein G.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Affinity Purification of Recombinant Antibodies with Protein A or Protein G.txt	
@@ -209,7 +209,7 @@ Section 1: Affinity chromatography
 23. (Optional) Regenerate the column by washing with 25 mL of Protein A/G binding
     buffer and store in 20% ethanol at 4 °C.
 
-    💡 PRO TIP: Columns may be re-used up to 5x when purifying the same
+    💡 PRO TIP: Columns may be reused up to 5x when purifying the same
     recombinant antibody.
 
 Section 2: Buffer exchange
diff --git a/biomni/tool/protocols/addgene/Addgene_ Isolating a Monoclonal Cell Population by Limiting Dilution.txt b/biomni/tool/protocols/addgene/Addgene_ Isolating a Monoclonal Cell Population by Limiting Dilution.txt
index 0c04f2c9..2f415afa 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Isolating a Monoclonal Cell Population by Limiting Dilution.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Isolating a Monoclonal Cell Population by Limiting Dilution.txt	
@@ -8,7 +8,7 @@ INTRODUCTION
 This protocol describes how to generate a monoclonal cell line from a polyclonal
 pool of stable cells.
 
-Transducing cells with lentivirus results in a heterogenous polyclonal population
+Transducing cells with lentivirus results in a heterogeneous polyclonal population
 that varies in the number of integration events and the site(s) of proviral
 integration across cells. Selective pressure on this heterogeneous cell pool
 could lead to reduced transgene expression over time, as the lower expressing
diff --git a/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Perform Sequence Analysis.txt b/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Perform Sequence Analysis.txt
index 418ea2d1..3ce952f8 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Perform Sequence Analysis.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Perform Sequence Analysis.txt	
@@ -53,7 +53,7 @@ recommends using Addgene's sequencing results as a reference for primer design.
 SEQUENCING RESULTS
 ================================================================================
 
-A good sequencing reaction will produce between 300-900 base pairs of useable
+A good sequencing reaction will produce between 300-900 base pairs of usable
 sequence. You should receive your sequencing results as a trace file (.ab1) which
 graphically depicts the sequence as a series of colored peaks corresponding to
 one of the four nucleotide bases. This is an example of a trace file from a
diff --git a/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt b/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt
index 54ab07e5..d689d1db 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Protocol - How to Run an Agarose Gel.txt	
@@ -46,7 +46,7 @@ Microwave for 1-3 min until the agarose is completely dissolved (but do not over
 Caution
 HOT! Be careful stirring, eruptive boiling can occur.
 Pro-Tip
-It is a good idea to microwave for 30-45 sec, stop and swirl, and then continue towards a boil. Keep an eye on it the solution has a tendancy to boil over. Placing saran wrap over the top of the flask can help with this, but is not necessary if you pay close attention.
+It is a good idea to microwave for 30-45 sec, stop and swirl, and then continue towards a boil. Keep an eye on it the solution has a tendency to boil over. Placing saran wrap over the top of the flask can help with this, but is not necessary if you pay close attention.
 Let agarose solution cool down to about 50 °C (about when you can comfortably keep your hand on the flask), about 5 mins.
 Optional
 : Add ethidium bromide (EtBr) to a final concentration of approximately 0.2-0.5 μg/mL (usually about 2-3 μl of lab stock solution per 100 mL gel). EtBr binds to the DNA and allows you to visualize the DNA under ultraviolet (UV) light.
diff --git a/biomni/tool/protocols/addgene/Addgene_ Virus Protocol - Generating Stable Cell Lines.txt b/biomni/tool/protocols/addgene/Addgene_ Virus Protocol - Generating Stable Cell Lines.txt
index 9bc463a2..df539a27 100644
--- a/biomni/tool/protocols/addgene/Addgene_ Virus Protocol - Generating Stable Cell Lines.txt	
+++ b/biomni/tool/protocols/addgene/Addgene_ Virus Protocol - Generating Stable Cell Lines.txt	
@@ -12,7 +12,7 @@ Mol Bio Protocols
 Viral Service
 Introduction
 This protocol can be used to generate stable cell lines expressing a gene of interest from an integrated lentiviral vector. Unlike the short-term protein expression observed using transient transfection approaches, generating cell lines using lentiviral vectors enables long-term protein expression studies. Moreover, repeating experiments in a stable cell line, as opposed to transiently-transfected cells, increases reproducibility, as it eliminates the variation associated with repeated transient transfection.
-Some lentiviral vectors deliver mammalian antibiotic resistance (e.g., puromycin, blasticidin), which enables selection of a stable cell culture after transduction. Performing antibiotic selection on transduced cells enables elimination of untransduced cells, resulting in a more homogenous (but still polyclonal) cell population. Depending on the transducibility of the cell line used, this antibiotic selection may be a vital step for obtaining a population of cells that have taken up the lentiviral transgene. Note that not all lentiviral vectors deliver antibiotic resistance.
+Some lentiviral vectors deliver mammalian antibiotic resistance (e.g., puromycin, blasticidin), which enables selection of a stable cell culture after transduction. Performing antibiotic selection on transduced cells enables elimination of untransduced cells, resulting in a more homogeneous (but still polyclonal) cell population. Depending on the transducibility of the cell line used, this antibiotic selection may be a vital step for obtaining a population of cells that have taken up the lentiviral transgene. Note that not all lentiviral vectors deliver antibiotic resistance.
 This protocol was established using 293T cells but can be adapted to alternative cell lines.
 Workflow Timeline
 Day 0:
diff --git a/biomni/utils.py b/biomni/utils.py
index 09e1ef91..0e5bb2c5 100644
--- a/biomni/utils.py
+++ b/biomni/utils.py
@@ -270,7 +270,7 @@ def function_to_api_schema(function_string, llm):
     For variable without default values, set them as None, not null.
     For variable with boolean values, use capitalized True or False, not true or false.
     Do not add any return type in the docstring.
-    Be as clear and succint as possible for the descriptions. Please do not make it overly verbose.
+    Be as clear and succinct as possible for the descriptions. Please do not make it overly verbose.
     Here is the code snippet:
     {code}
     """
@@ -684,7 +684,7 @@ def on_chat_model_start(self, serialized, messages, **kwargs):
 
 class NodeLogger(BaseCallbackHandler):
     def on_llm_end(self, response, **kwargs):  # response of type LLMResult
-        for generations in response.generations:  # response.generations of type List[List[Generations]] becuase "each input could have multiple candidate generations"
+        for generations in response.generations:  # response.generations of type List[List[Generations]] because "each input could have multiple candidate generations"
             for generation in generations:
                 generated_text = generation.message.content
                 # token_usage = generation.message.response_metadata["token_usage"]
diff --git a/biomni_env/README.md b/biomni_env/README.md
index a561401c..cb69f281 100644
--- a/biomni_env/README.md
+++ b/biomni_env/README.md
@@ -9,7 +9,7 @@ This directory contains scripts and configuration files to set up a comprehensiv
    ```
 
 2. Setting up the environment:
-- (a) If you want to use or try out the basic agent without the full E1 or install your own softwares, run the following script:
+- (a) If you want to use or try out the basic agent without the full E1 or install your own software, run the following script:
 
 ```bash
 conda env create -f environment.yml
diff --git a/biomni_env/bio_env_py310.yml b/biomni_env/bio_env_py310.yml
index db0b40f4..823da6b4 100644
--- a/biomni_env/bio_env_py310.yml
+++ b/biomni_env/bio_env_py310.yml
@@ -11,7 +11,7 @@ dependencies:
   - pip
 
 # Purpose: Python 3.10 compatibility environment for tools not yet supporting newer Python versions.
-# Automaticlly setup on setup.sh execution.
+# Automatically setup on setup.sh execution.
 # Contains tools that require Python 3.10 or earlier versions.
 # Can be create with:
 #   micromamba create -f bio_env_py310.yml   OR   conda env create -f bio_env_py310.yml
diff --git a/biomni_env/new_software_v008.sh b/biomni_env/new_software_v008.sh
index 306d6885..1005f579 100644
--- a/biomni_env/new_software_v008.sh
+++ b/biomni_env/new_software_v008.sh
@@ -10,7 +10,7 @@ pip install pybiomart
 pip install fair-esm
 pip install uv
 uv pip install transcriptformer
-pip install "zarr>=2.0,<3.0" #this resolved transcripformer download isses
+pip install "zarr>=2.0,<3.0" #this resolved transcripformer download issues
 uv tool install arc-state
 pip install nnunet nibabel nilearn
 pip install mi-googlesearch-python
diff --git a/pyproject.toml b/pyproject.toml
index 0fad3a91..e796c63c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,7 +78,7 @@ ignore = [
     # First line should be in imperative mood; try rephrasing
     "D401",
     ## Disable one in each pair of mutually incompatible rules
-    # We don’t want a blank line before a class docstring
+    # We don't want a blank line before a class docstring
     "D203",
     # We want docstrings to start immediately after the opening triple quote
     "D213",
@@ -103,3 +103,9 @@ ignore = [
     # strip with multi characters
     "B005"
 ]
+
+[tool.codespell]
+skip = '.git*,*.pdf,*.svg,*.css,*.min.*,*/i18n/*,*/build/*,biomni_env/*,.cache,.npm'
+check-hidden = true
+ignore-regex = '^\s*"image/\S+": ".*'
+ignore-words-list = 'scarches,hsa,ser,abl,basf,optmizer,transferrin,te,tbe,tre,ther,commun,theis,inactivate'
diff --git a/tutorials/biomni_101.ipynb b/tutorials/biomni_101.ipynb
index d7366c4e..7f2a406c 100644
--- a/tutorials/biomni_101.ipynb
+++ b/tutorials/biomni_101.ipynb
@@ -768,14 +768,14 @@
       "Rank Gene     Category             Expected Effect\n",
       "--------------------------------------------------------------------------------\n",
       "1    TOX      Transcription Factors KO should reduce exhaustion (master exhaustion TF)\n",
-      "2    PDCD1    Immune Checkpoints   KO should reduce exhaustion (remove PD-1 checkpoin\n",
+      "2    PDCD1    Immune Checkpoints   KO should reduce exhaustion (remove PD-1 checkpoint\n",
       "3    EOMES    Transcription Factors KO should reduce exhaustion (exhaustion-promoting \n",
       "4    HAVCR2   Immune Checkpoints   KO should reduce exhaustion (remove TIM-3 checkpoi\n",
       "5    LAG3     Immune Checkpoints   KO should reduce exhaustion (remove LAG-3 checkpoi\n",
       "6    TIGIT    Immune Checkpoints   KO should reduce exhaustion (remove TIGIT checkpoi\n",
       "7    CTLA4    Immune Checkpoints   KO should enhance early activation\n",
       "8    BATF     Transcription Factors KO should reduce exhaustion (exhaustion-promoting \n",
-      "9    HIF1A    Metabolic Regulators KO may reduce exhaustion (metabolic stress respons\n",
+      "9    HIF1A    Metabolic Regulators KO may reduce exhaustion (metabolic stress response\n",
       "10   MYC      Metabolic Regulators KO may increase exhaustion (metabolic reprogrammin\n",
       "11   LCK      TCR Signaling        KO should reduce TCR signaling strength\n",
       "12   ZAP70    TCR Signaling        KO should reduce TCR signaling strength\n",