diff --git a/circuit_tracer/attribution/attribute_nnsight.py b/circuit_tracer/attribution/attribute_nnsight.py index ea8fdf7..d50aa1d 100644 --- a/circuit_tracer/attribution/attribute_nnsight.py +++ b/circuit_tracer/attribution/attribute_nnsight.py @@ -285,7 +285,7 @@ def _run_attribution( full_edge_matrix[-n_logits:] = edge_matrix[actual_max_feature_nodes:] graph = Graph( - input_string=model.tokenizer.decode(input_ids), + input_string=str(model.tokenizer.decode(input_ids)), input_tokens=input_ids, logit_targets=targets.logit_targets, logit_probabilities=targets.logit_probabilities, diff --git a/circuit_tracer/replacement_model/replacement_model_nnsight.py b/circuit_tracer/replacement_model/replacement_model_nnsight.py index 390314c..a9dd36d 100644 --- a/circuit_tracer/replacement_model/replacement_model_nnsight.py +++ b/circuit_tracer/replacement_model/replacement_model_nnsight.py @@ -963,7 +963,7 @@ def feature_intervention_generate( with tracer.invoke(): out = save(self.generator.output) return ( - tokenizer.decode(out.squeeze(0)), + str(tokenizer.decode(out.squeeze(0))), torch.cat(all_logits, dim=0), (activation_cache[0] if return_activations else None), ) diff --git a/circuit_tracer/utils/hf_utils.py b/circuit_tracer/utils/hf_utils.py index eb92da9..fb876d1 100644 --- a/circuit_tracer/utils/hf_utils.py +++ b/circuit_tracer/utils/hf_utils.py @@ -10,10 +10,11 @@ import torch import yaml from huggingface_hub import get_token, hf_api, hf_hub_download, snapshot_download -from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER from huggingface_hub.utils.tqdm import tqdm as hf_tqdm from tqdm.contrib.concurrent import thread_map +from circuit_tracer.utils.tl_nnsight_mapping import TRANSFORMERS_GTE_5_0_0 + logger = logging.getLogger(__name__) @@ -336,8 +337,16 @@ def _download(uri: str) -> str: force_download=False, ) - if HF_HUB_ENABLE_HF_TRANSFER: - # Use a simple loop for sequential download if HF_TRANSFER is enabled + # Check for high-performance transfer mode for which we use sequential downloads. + # In huggingface_hub v1.0+ (transformers v5+), HF_XET_HIGH_PERFORMANCE replaces + # the deprecated HF_HUB_ENABLE_HF_TRANSFER environment variable. + if TRANSFORMERS_GTE_5_0_0: + use_sequential = os.environ.get("HF_XET_HIGH_PERFORMANCE", "0") == "1" + else: + use_sequential = os.environ.get("HF_HUB_ENABLE_HF_TRANSFER", "0") == "1" + + if use_sequential: + # Use a simple loop for sequential download when high-performance transfer is enabled results = [_download(uri) for uri in uri_list] return dict(zip(uri_list, results)) diff --git a/circuit_tracer/utils/tl_nnsight_mapping.py b/circuit_tracer/utils/tl_nnsight_mapping.py index e33fb5b..7467fe9 100644 --- a/circuit_tracer/utils/tl_nnsight_mapping.py +++ b/circuit_tracer/utils/tl_nnsight_mapping.py @@ -1,6 +1,13 @@ from dataclasses import dataclass from typing import Any, Literal +from packaging import version +import transformers + +# Version detection for transformers v5+ compatibility +TRANSFORMERS_VERSION = version.parse(transformers.__version__) +TRANSFORMERS_GTE_5_0_0 = TRANSFORMERS_VERSION >= version.parse("5.0.0") + @dataclass class TransformerLens_NNSight_Mapping: @@ -71,30 +78,43 @@ class TransformerLens_NNSight_Mapping: }, ) +# Gemma 3 Conditional (multimodal) mapping +# In transformers v5+, the model structure is: model.language_model.layers[...] +# In transformers v4, the model structure is: language_model.layers[...] +_gemma3_cond_prefix = "model.language_model" if TRANSFORMERS_GTE_5_0_0 else "language_model" gemma_3_conditional_mapping = TransformerLens_NNSight_Mapping( model_architecture="Gemma3ForConditionalGeneration", - attention_location_pattern="language_model.layers[{layer}].self_attn.source.attention_interface_0.source.nn_functional_dropout_0", + attention_location_pattern=f"{_gemma3_cond_prefix}.layers[{{layer}}].self_attn.source.attention_interface_0.source.nn_functional_dropout_0", layernorm_scale_location_patterns=[ - "language_model.layers[{layer}].input_layernorm.source.self__norm_0.source.torch_rsqrt_0", - "language_model.layers[{layer}].self_attn.q_norm.source.self__norm_0.source.torch_rsqrt_0", - "language_model.layers[{layer}].self_attn.k_norm.source.self__norm_0.source.torch_rsqrt_0", - "language_model.layers[{layer}].post_attention_layernorm.source.self__norm_0.source.torch_rsqrt_0", - "language_model.layers[{layer}].pre_feedforward_layernorm.source.self__norm_0.source.torch_rsqrt_0", - "language_model.layers[{layer}].post_feedforward_layernorm.source.self__norm_0.source.torch_rsqrt_0", - "language_model.norm.source.self__norm_0.source.torch_rsqrt_0", + f"{_gemma3_cond_prefix}.layers[{{layer}}].input_layernorm.source.self__norm_0.source.torch_rsqrt_0", + f"{_gemma3_cond_prefix}.layers[{{layer}}].self_attn.q_norm.source.self__norm_0.source.torch_rsqrt_0", + f"{_gemma3_cond_prefix}.layers[{{layer}}].self_attn.k_norm.source.self__norm_0.source.torch_rsqrt_0", + f"{_gemma3_cond_prefix}.layers[{{layer}}].post_attention_layernorm.source.self__norm_0.source.torch_rsqrt_0", + f"{_gemma3_cond_prefix}.layers[{{layer}}].pre_feedforward_layernorm.source.self__norm_0.source.torch_rsqrt_0", + f"{_gemma3_cond_prefix}.layers[{{layer}}].post_feedforward_layernorm.source.self__norm_0.source.torch_rsqrt_0", + f"{_gemma3_cond_prefix}.norm.source.self__norm_0.source.torch_rsqrt_0", ], - pre_logit_location="language_model", - embed_location="language_model.embed_tokens", - embed_weight="language_model.embed_tokens.weight", + pre_logit_location=_gemma3_cond_prefix, + embed_location=f"{_gemma3_cond_prefix}.embed_tokens", + embed_weight=f"{_gemma3_cond_prefix}.embed_tokens.weight", unembed_weight="lm_head.weight", feature_hook_mapping={ "ln2.hook_normalized": ( - "language_model.layers[{layer}].pre_feedforward_layernorm.source.self__norm_0", + f"{_gemma3_cond_prefix}.layers[{{layer}}].pre_feedforward_layernorm.source.self__norm_0", + "output", + ), + "hook_resid_mid": ( + f"{_gemma3_cond_prefix}.layers[{{layer}}].pre_feedforward_layernorm", + "input", + ), + "mlp.hook_in": ( + f"{_gemma3_cond_prefix}.layers[{{layer}}].pre_feedforward_layernorm", + "output", + ), + "hook_mlp_out": ( + f"{_gemma3_cond_prefix}.layers[{{layer}}].post_feedforward_layernorm", "output", ), - "hook_resid_mid": ("language_model.layers[{layer}].pre_feedforward_layernorm", "input"), - "mlp.hook_in": ("language_model.layers[{layer}].pre_feedforward_layernorm", "output"), - "hook_mlp_out": ("language_model.layers[{layer}].post_feedforward_layernorm", "output"), }, ) @@ -139,6 +159,15 @@ class TransformerLens_NNSight_Mapping: ) +# GptOss (MoE) mapping +# In transformers v5, GptOssMLP.forward() explicitly reshapes hidden_states from 2D to 3D +# before returning, changing which NNSight .source reference captures the 3D output. +# In transformers v4, the 3D output was captured via self_experts_0. +_gpt_oss_mlp_hook = ( + "model.layers[{layer}].mlp.source.hidden_states_reshape_1" + if TRANSFORMERS_GTE_5_0_0 + else "model.layers[{layer}].mlp.source.self_experts_0" +) gpt_oss_mapping = TransformerLens_NNSight_Mapping( model_architecture="GptOssForCausalLM", attention_location_pattern="model.layers[{layer}].self_attn.source.attention_interface_0.source.nn_functional_dropout_0", @@ -154,8 +183,8 @@ class TransformerLens_NNSight_Mapping: feature_hook_mapping={ "hook_resid_mid": ("model.layers[{layer}].post_attention_layernorm", "input"), "mlp.hook_in": ("model.layers[{layer}].post_attention_layernorm", "output"), - "mlp.hook_out": ("model.layers[{layer}].mlp.source.self_experts_0", "output"), - "hook_mlp_out": ("model.layers[{layer}].mlp.source.self_experts_0", "output"), + "mlp.hook_out": (_gpt_oss_mlp_hook, "output"), + "hook_mlp_out": (_gpt_oss_mlp_hook, "output"), }, ) diff --git a/pyproject.toml b/pyproject.toml index 0a7e4ee..93b6a81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,11 +6,12 @@ readme = "README.md" requires-python = ">=3.10" dependencies = [ "einops>=0.8.0", - "huggingface_hub<1.0.0", + "huggingface_hub>=0.25.0", "ipykernel>=6.29.5,<7.0.0", "ipywidgets>=8.1.7", "nnsight>=0.5.13", "numpy>=1.24.0", + "packaging>=20.0", "pydantic>=2.0.0", "safetensors>=0.5.0", "seaborn>=0.13.2", @@ -18,11 +19,11 @@ dependencies = [ "torch>=2.0.0", "tqdm>=4.60.0", "transformer-lens>=2.16.0", - "transformers>=4.56.0,<=4.57.3", + "transformers>=4.56.0", ] [project.optional-dependencies] -dev = ["pytest>=8.0.0", "ruff>=0.12.7", "pyright>=1.1.403", "ipython>=8.37.0"] +dev = ["pytest>=8.0.0", "pytest-rerunfailures>=14.0", "ruff>=0.12.7", "pyright>=1.1.403", "ipython>=8.37.0"] [project.scripts] circuit-tracer = "circuit_tracer.__main__:main" diff --git a/tests/test_attributions_gemma3_nnsight.py b/tests/test_attributions_gemma3_nnsight.py index d5c6f1f..1076f78 100644 --- a/tests/test_attributions_gemma3_nnsight.py +++ b/tests/test_attributions_gemma3_nnsight.py @@ -396,6 +396,8 @@ def load_gemma3_with_dummy_transcoders(): def load_gemma3_with_dummy_clt(): cfg = gemma_3_config + assert cfg.num_hidden_layers is not None + assert cfg.hidden_size is not None clt = CrossLayerTranscoder( n_layers=cfg.num_hidden_layers, diff --git a/tests/test_attributions_gpt_oss_nnsight.py b/tests/test_attributions_gpt_oss_nnsight.py index 4916880..b3c8923 100644 --- a/tests/test_attributions_gpt_oss_nnsight.py +++ b/tests/test_attributions_gpt_oss_nnsight.py @@ -387,7 +387,9 @@ def load_large_gpt_oss_model_with_dummy_clt(): gpt_oss_large_cfg.num_attention_heads = 16 gpt_oss_large_cfg.num_key_value_heads = 16 gpt_oss_large_cfg.pad_token_id = 127 - gpt_oss_large_cfg.torch_dtype = "float64" + gpt_oss_large_cfg.torch_dtype = ( + "float32" # transformers v5 MoE integration layer doesn't support float64 + ) clt = CrossLayerTranscoder( n_layers=gpt_oss_large_cfg.num_hidden_layers, @@ -398,7 +400,7 @@ def load_large_gpt_oss_model_with_dummy_clt(): lazy_encoder=False, feature_input_hook="hook_resid_mid", feature_output_hook="hook_mlp_out", - dtype=torch.float64, + dtype=torch.float32, # transformers v5 MoE integration layer doesn't support float64 ) initialize_transcoder_weights(clt.W_enc, clt.W_dec, clt.b_enc, clt.b_dec) @@ -473,7 +475,7 @@ def test_large_gpt_oss_model(): gpt_oss_large_cfg.num_attention_heads = 16 gpt_oss_large_cfg.num_key_value_heads = 16 gpt_oss_large_cfg.pad_token_id = 127 - gpt_oss_large_cfg.torch_dtype = "float64" + gpt_oss_large_cfg.torch_dtype = "float32" # transformers v5 MoE doesn't support float64 model = load_dummy_gpt_oss_model(gpt_oss_large_cfg) # type:ignore # Save original property to restore later @@ -484,8 +486,14 @@ def test_large_gpt_oss_model(): graph = attribute(s, model) assert isinstance(model, NNSightReplacementModel) - verify_token_and_error_edges(model, graph) - verify_feature_edges(model, graph) + # Use relaxed tolerances for float32 (transformers v5 MoE doesn't support float64) + # With 16 layers and random weights, accumulated float32 errors require looser atol + verify_token_and_error_edges( + model, graph, act_atol=1.5, act_rtol=1e-2, logit_atol=1.5, logit_rtol=1e-2 + ) + verify_feature_edges( + model, graph, act_atol=1.5, act_rtol=1e-2, logit_atol=1.5, logit_rtol=1e-2 + ) finally: # Restore original property tokenizer_class.all_special_ids = original_all_special_ids # type:ignore diff --git a/tests/utils/test_hf_utils.py b/tests/utils/test_hf_utils.py index a128157..24bdea5 100644 --- a/tests/utils/test_hf_utils.py +++ b/tests/utils/test_hf_utils.py @@ -69,7 +69,12 @@ def test_gated_without_access(self, mock_get_token, mock_repo_info, mock_downloa # Setup: Pre-flight check passes, as repo_info just returns metadata. mock_repo_info.return_value = mock.MagicMock(private=False, gated=True) # Setup: The download itself will fail. - mock_download.side_effect = GatedRepoError("User has not accepted terms.") + # huggingface_hub >=1.3.4 requires a response parameter for HfHubHTTPError subclasses + mock_response = mock.MagicMock() + mock_response.status_code = 403 + mock_download.side_effect = GatedRepoError( + "User has not accepted terms.", response=mock_response + ) # Execute & Assert: Check that the GatedRepoError is raised by the function. with self.assertRaises(GatedRepoError): @@ -99,7 +104,12 @@ def test_private_no_access_or_non_existent(self, mock_get_token, mock_repo_info, """Tests a private repo the user can't see, or a repo that doesn't exist. Pre-flight check fails and error is propagated """ - mock_repo_info.side_effect = RepositoryNotFoundError("Repo not found.") + # huggingface_hub >=1.3.4 requires a response parameter for HfHubHTTPError subclasses + mock_response = mock.MagicMock() + mock_response.status_code = 404 + mock_repo_info.side_effect = RepositoryNotFoundError( + "Repo not found.", response=mock_response + ) with self.assertRaises(RepositoryNotFoundError): download_hf_uris([TEST_URI])