Skip to content

Commit 1d83080

Browse files
authored
Merge branch 'main' into olmo2
2 parents f0801c5 + 4f812ef commit 1d83080

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+4097
-235
lines changed

.github/workflows/documentation.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
- name: Install dependencies
1515
run: |
1616
pip install -r docs/requirements.txt
17-
pip install -r requirements.txt
17+
pip install -e ".[dev]"
1818
- name: Sphinx build
1919
run: |
2020
sphinx-build docs/source docs/build

.github/workflows/python-app.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,11 @@ jobs:
2323
uses: actions/setup-python@v3
2424
with:
2525
python-version: "3.10"
26-
cache: 'pip'
2726
- name: Install dependencies
2827
run: |
2928
python -m pip install --upgrade pip
3029
pip install flake8 pytest
31-
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30+
if [ -f pyproject.toml ]; then pip install -e ".[dev]"; fi
3231
- name: Lint with flake8
3332
run: |
3433
# stop the build if there are Python syntax errors or undefined names

MANIFEST.in

Lines changed: 0 additions & 1 deletion
This file was deleted.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<br />
22
<div align="center">
3-
<h1 align="center"><img src="https://i.ibb.co/BNkhQH3/pyvene-logo.png"></h1>
3+
<h1 align="center"><img src="https://i.ibb.co/BNkhQH3/pyvene-logo.png" height="100"></h1>
44
<a href="https://arxiv.org/abs/2403.07809"><strong>Read our paper »</strong></a> | <a href="https://stanfordnlp.github.io/pyvene/"><strong>Read the docs »</strong></a>
55
</div>
66

pyproject.toml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
[project]
2+
name = "pyvene"
3+
version = "0.1.8"
4+
description = "Use Activation Intervention to Interpret Causal Mechanism of Model"
5+
readme = "README.md"
6+
authors = [
7+
{ name="Zhengxuan Wu", email="[email protected]" }
8+
]
9+
classifiers = [
10+
"Development Status :: 3 - Alpha",
11+
"Intended Audience :: Science/Research",
12+
"License :: OSI Approved :: Apache Software License",
13+
"Operating System :: POSIX :: Linux",
14+
"Programming Language :: Python :: 3",
15+
"Programming Language :: Python :: 3.9",
16+
]
17+
license = { text = "Apache License 2.0" }
18+
requires-python = ">=3.9"
19+
dependencies = [
20+
"torch>=2.0.0",
21+
"transformers>=4.55.0.dev0",
22+
"tokenizers>=0.20.0",
23+
"datasets>=3.0.1",
24+
"protobuf>=3.20.0",
25+
"matplotlib>=3.7.4",
26+
"ipywidgets>=8.1.1",
27+
"plotnine>=0.12.4",
28+
"huggingface-hub>=0.25.1",
29+
"numpy>1.24.4",
30+
"fsspec>=2023.6.0",
31+
"accelerate>=0.34.2",
32+
"sentencepiece>=0.2.0",
33+
]
34+
35+
[dependency-groups]
36+
dev = [
37+
"flake8>=7.1.1",
38+
"pytest>=8.3.4",
39+
"ipykernel>=6.29.5",
40+
]
41+
42+
[project.urls]
43+
Documentation = "https://stanfordnlp.github.io/pyvene"
44+
Homepage = "https://github.com/stanfordnlp/pyvene"
45+
Repository = "https://github.com/stanfordnlp/pyvene.git"
46+
47+
[build-system]
48+
requires = ["hatchling"]
49+
build-backend = "hatchling.build"
50+
51+
[tool.hatch.build.targets.wheel]
52+
packages = ["pyvene"]
53+
54+
[tool.hatch.build]
55+
include = [
56+
"pyvene/**/*.py",
57+
"pyvene/**/*.json",
58+
"pyvene/**/*.yaml",
59+
"pyvene/**/*.yml",
60+
"pyvene/**/*.txt",
61+
"pyvene/**/*.md",
62+
"pyvene/**/*.ipynb",
63+
]
64+
65+
[tool.pytest.ini_options]
66+
testpaths = ["tests"]
67+
python_files = ["test_*.py"]
68+
python_classes = ["*Test", "Test*"]
69+
python_functions = ["test_*"]
70+
addopts = "-v"
71+
72+
[tool.flake8]
73+
max-line-length = 127
74+
extend-ignore = ["E203"]
75+
exclude = [".git", "__pycache__", "build", "dist", "*.egg-info"]
76+
per-file-ignores = [
77+
"__init__.py: F401"
78+
]

pyvene/__init__.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22

33
# Generic APIs
44
from .data_generators.causal_model import CausalModel
5-
from .models.intervenable_base import IntervenableModel, IntervenableNdifModel, build_intervenable_model
5+
from .models.intervenable_base import (
6+
IntervenableModel,
7+
IntervenableNdifModel,
8+
build_intervenable_model,
9+
)
610
from .models.configuration_intervenable_model import IntervenableConfig
711
from .models.configuration_intervenable_model import RepresentationConfig
812

@@ -37,7 +41,10 @@
3741
# Utils
3842
from .models.basic_utils import *
3943
from .models.intervention_utils import _do_intervention_by_swap
40-
from .models.intervenable_modelcard import type_to_module_mapping, type_to_dimension_mapping
44+
from .models.intervenable_modelcard import (
45+
type_to_module_mapping,
46+
type_to_dimension_mapping,
47+
)
4148
from .models.gpt2.modelings_intervenable_gpt2 import create_gpt2
4249
from .models.gpt2.modelings_intervenable_gpt2 import create_gpt2_lm
4350
from .models.blip.modelings_intervenable_blip import create_blip
@@ -51,5 +58,8 @@
5158
from .models.gru.modelings_gru import GRUConfig
5259
from .models.llama.modelings_intervenable_llama import create_llama
5360
from .models.mlp.modelings_intervenable_mlp import create_mlp_classifier
54-
from .models.backpack_gpt2.modelings_intervenable_backpack_gpt2 import create_backpack_gpt2
55-
61+
from .models.backpack_gpt2.modelings_intervenable_backpack_gpt2 import (
62+
create_backpack_gpt2,
63+
)
64+
from .models.olmo.modelings_intervenable_olmo import create_olmo
65+
from .models.gpt_oss.modelings_intervenable_gpt_oss import create_gpt_oss

pyvene/models/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
## How to add new models?
2+
3+
You can prompt a LM to generate files, or modifying existing ones in this folder by simply following these steps:
4+
5+
- Get the relevent implementation file from `https://github.com/huggingface/transformers/blob/main/src/transformers/models/` (e.g., the implementation for `gpt-oss` [here](https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt_oss/modeling_gpt_oss.py)).
6+
7+
- Copy the whole transformer model src file.
8+
9+
- Create a new folder for your new model.
10+
11+
- Move one of the existing model file to your new folder (e.g., `/gpt2/modelings_intervenable_gpt2.py` along with the default `__init__.py` file).
12+
13+
- Prompt a language model with the following template:
14+
15+
```text
16+
[YOUR_EXAMPLE_PYVENE_MODEL_FILE_COPY]
17+
18+
Generate a new mapping file based on the existing one above for the following new model:
19+
20+
[HF_TRANSFORMER_MODEL_SRC_FILE_COPY]
21+
22+
You also need to pay attention to these details:
23+
- [OTHER_REQ_GOES_HERE] (e.g., you need to take care of the MoE strcuture)
24+
```

pyvene/models/blip/modelings_intervenable_blip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
# 'vis.attention_output': ("vision_model.encoder.layers[%s].self_attn", CONST_OUTPUT_HOOK),
2323
# 'vis.attention_input': ("vision_model.encoder.layers[%s].self_attn", CONST_INPUT_HOOK),
2424
"block_input": ("text_encoder.encoder.layer[%s]", CONST_INPUT_HOOK),
25-
"block_output": ("text_encoder.encoder.layer[%s]", CONST_INPUT_HOOK),
25+
"block_output": ("text_encoder.encoder.layer[%s]", CONST_OUTPUT_HOOK),
2626
"mlp_activation": (
2727
"text_encoder.encoder.layer[%s].intermediate.dense",
2828
CONST_OUTPUT_HOOK,

pyvene/models/blip/modelings_intervenable_blip_itm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
# 'vis.attention_output': ("vision_model.encoder.layers[%s].self_attn", CONST_OUTPUT_HOOK),
2323
# 'vis.attention_input': ("vision_model.encoder.layers[%s].self_attn", CONST_INPUT_HOOK),
2424
"block_input": ("text_encoder.encoder.layer[%s]", CONST_INPUT_HOOK),
25-
"block_output": ("text_encoder.encoder.layer[%s]", CONST_INPUT_HOOK),
25+
"block_output": ("text_encoder.encoder.layer[%s]", CONST_OUTPUT_HOOK),
2626
"mlp_activation": (
2727
"text_encoder.encoder.layer[%s].intermediate.dense",
2828
CONST_OUTPUT_HOOK,

pyvene/models/esm/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)