Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IndexError: index out of range in self #440

Open
Soham07081 opened this issue Jun 19, 2024 · 0 comments
Open

IndexError: index out of range in self #440

Soham07081 opened this issue Jun 19, 2024 · 0 comments

Comments

@Soham07081
Copy link

Hey @NielsRogge

I am encountering an “index out of range in self” error while fine-tuning IDEfICS2. Initially, I suspected the issue might be with my dataset. However, I experienced the same error even when using your dataset.

Could you please provide more details or guidance on how to resolve this issue?

`IndexError Traceback (most recent call last)
Cell In[32], line 18
1 # from lightning.pytorch.loggers import WandbLogger
2
3 # wandb_logger = WandbLogger(project="Idefics2-PL", name="demo-run-cord")
5 trainer = L.Trainer(
6 accelerator="cpu",
7 # devices=[0],
(...)
15 callbacks=[PushToHubCallback()],
16 )
---> 18 trainer.fit(model_module)

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py:543, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
541 self.state.status = TrainerStatus.RUNNING
542 self.training = True
--> 543 call._call_and_handle_interrupt(
544 self, self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
545 )

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py:44, in _call_and_handle_interrupt(trainer, trainer_fn, *args, **kwargs)
42 if trainer.strategy.launcher is not None:
43 return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
---> 44 return trainer_fn(*args, **kwargs)
46 except _TunerExitException:
47 _call_teardown_hook(trainer)

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py:579, in Trainer._fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
572 assert self.state.fn is not None
573 ckpt_path = self._checkpoint_connector._select_ckpt_path(
574 self.state.fn,
575 ckpt_path,
576 model_provided=True,
577 model_connected=self.lightning_module is not None,
578 )
--> 579 self._run(model, ckpt_path=ckpt_path)
581 assert self.state.stopped
582 self.training = False

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py:986, in Trainer._run(self, model, ckpt_path)
981 self._signal_connector.register_signal_handlers()
983 # ----------------------------
984 # RUN THE TRAINER
985 # ----------------------------
--> 986 results = self._run_stage()
988 # ----------------------------
989 # POST-Training CLEAN UP
990 # ----------------------------
991 log.debug(f"{self.class.name}: trainer tearing down")

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py:1030, in Trainer._run_stage(self)
1028 self._run_sanity_check()
1029 with torch.autograd.set_detect_anomaly(self._detect_anomaly):
-> 1030 self.fit_loop.run()
1031 return None
1032 raise RuntimeError(f"Unexpected state {self.state}")

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:205, in _FitLoop.run(self)
203 try:
204 self.on_advance_start()
--> 205 self.advance()
206 self.on_advance_end()
207 self._restarting = False

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:363, in _FitLoop.advance(self)
361 with self.trainer.profiler.profile("run_training_epoch"):
362 assert self._data_fetcher is not None
--> 363 self.epoch_loop.run(self._data_fetcher)

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py:140, in _TrainingEpochLoop.run(self, data_fetcher)
138 while not self.done:
139 try:
--> 140 self.advance(data_fetcher)
141 self.on_advance_end(data_fetcher)
142 self._restarting = False

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py:250, in _TrainingEpochLoop.advance(self, data_fetcher)
247 with trainer.profiler.profile("run_training_batch"):
248 if trainer.lightning_module.automatic_optimization:
249 # in automatic optimization, there can only be one optimizer
--> 250 batch_output = self.automatic_optimization.run(trainer.optimizers[0], batch_idx, kwargs)
251 else:
252 batch_output = self.manual_optimization.run(kwargs)

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py:183, in _AutomaticOptimization.run(self, optimizer, batch_idx, kwargs)
172 if (
173 # when the strategy handles accumulation, we want to always call the optimizer step
174 not self.trainer.strategy.handles_gradient_accumulation and self.trainer.fit_loop._should_accumulate()
(...)
180 # -------------------
181 # automatic_optimization=True: perform ddp sync only when performing optimizer_step
182 with _block_parallel_sync_behavior(self.trainer.strategy, block=True):
--> 183 closure()
185 # ------------------------------
186 # BACKWARD PASS
187 # ------------------------------
188 # gradient update with accumulated gradients
189 else:
190 self._optimizer_step(batch_idx, closure)

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py:144, in Closure.call(self, *args, **kwargs)
142 @OverRide
143 def call(self, *args: Any, **kwargs: Any) -> Optional[Tensor]:
--> 144 self._result = self.closure(*args, **kwargs)
145 return self._result.loss

File /opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator..decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py:129, in Closure.closure(self, *args, **kwargs)
126 @OverRide
127 @torch.enable_grad()
128 def closure(self, *args: Any, **kwargs: Any) -> ClosureResult:
--> 129 step_output = self._step_fn()
131 if step_output.closure_loss is None:
132 self.warning_cache.warn("training_step returned None. If this was on purpose, ignore this warning...")

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py:317, in _AutomaticOptimization._training_step(self, kwargs)
306 """Performs the actual train step with the tied hooks.
307
308 Args:
(...)
313
314 """
315 trainer = self.trainer
--> 317 training_step_output = call._call_strategy_hook(trainer, "training_step", *kwargs.values())
318 self.trainer.strategy.post_training_step() # unused hook - call anyway for backward compatibility
320 if training_step_output is None and trainer.world_size > 1:

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py:311, in _call_strategy_hook(trainer, hook_name, *args, **kwargs)
308 return None
310 with trainer.profiler.profile(f"[Strategy]{trainer.strategy.class.name}.{hook_name}"):
--> 311 output = fn(*args, **kwargs)
313 # restore current_fx when nested context
314 pl_module._current_fx_name = prev_fx_name

File /opt/conda/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py:390, in Strategy.training_step(self, *args, **kwargs)
388 if self.model != self.lightning_module:
389 return self._forward_redirection(self.model, self.lightning_module, "training_step", *args, **kwargs)
--> 390 return self.lightning_module.training_step(*args, **kwargs)

Cell In[27], line 29, in Idefics2ModelPLModule.training_step(self, batch, batch_idx)
22 # print(f"attention_mask: {len(attention_mask)}")
23 # print(f"pixel_values: {len(pixel_values)}")
24 # print(f"pixel_attention_mask: {len(pixel_attention_mask)}")
25 print(f"labels: {labels}")
---> 29 outputs = self.model(input_ids=input_ids,
30 attention_mask=attention_mask,
31 pixel_values=pixel_values,
32 pixel_attention_mask=pixel_attention_mask,
33 labels=labels)
34 loss = outputs.loss
36 self.log("train_loss", loss)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None

File /opt/conda/lib/python3.10/site-packages/transformers/models/idefics2/modeling_idefics2.py:1830, in Idefics2ForConditionalGeneration.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, pixel_values, pixel_attention_mask, image_hidden_states, labels, use_cache, output_attentions, output_hidden_states, return_dict)
1827 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1829 # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
-> 1830 outputs = self.model(
1831 input_ids=input_ids,
1832 attention_mask=attention_mask,
1833 position_ids=position_ids,
1834 past_key_values=past_key_values,
1835 inputs_embeds=inputs_embeds,
1836 pixel_values=pixel_values,
1837 pixel_attention_mask=pixel_attention_mask,
1838 image_hidden_states=image_hidden_states,
1839 use_cache=use_cache,
1840 output_attentions=output_attentions,
1841 output_hidden_states=output_hidden_states,
1842 return_dict=return_dict,
1843 )
1845 hidden_states = outputs[0]
1846 logits = self.lm_head(hidden_states)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None

File /opt/conda/lib/python3.10/site-packages/transformers/models/idefics2/modeling_idefics2.py:1606, in Idefics2Model.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, pixel_values, pixel_attention_mask, image_hidden_states, use_cache, output_attentions, output_hidden_states, return_dict)
1603 raise ValueError("When first calling the model, if input_embeds are passed, input_ids should not be None.")
1605 if inputs_embeds is None:
-> 1606 inputs_embeds = self.text_model.get_input_embeddings()(input_ids)
1608 # START VISUAL INPUTS INTEGRATION
1609 if pixel_values is not None and image_hidden_states is not None:

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/sparse.py:162, in Embedding.forward(self, input)
161 def forward(self, input: Tensor) -> Tensor:
--> 162 return F.embedding(
163 input, self.weight, self.padding_idx, self.max_norm,
164 self.norm_type, self.scale_grad_by_freq, self.sparse)

File /opt/conda/lib/python3.10/site-packages/torch/nn/functional.py:2233, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2227 # Note [embedding_renorm set_grad_enabled]
2228 # XXX: equivalent to
2229 # with torch.no_grad():
2230 # torch.embedding_renorm_
2231 # remove once script supports set_grad_enabled
2232 no_grad_embedding_renorm(weight, input, max_norm, norm_type)
-> 2233 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)

IndexError: index out of range in self`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant