model: recipes/torchrecipes/rec at main · facebookresearch/recipes · GitHub
error:
dlrm_main/0 [0]:[rank0]: Traceback (most recent call last): dlrm_main/0 [0]:[rank0]: File "/opt/meituan/zhaozheng09/user/recipes/torchrecipes/rec/dlrm_main.py", line 260, in <module> dlrm_main/0 [0]:[rank0]: invoke_main() # pragma: no cover dlrm_main/0 [0]:[rank0]: File "/opt/meituan/zhaozheng09/user/recipes/torchrecipes/rec/dlrm_main.py", line 256, in invoke_main dlrm_main/0 [0]:[rank0]: main(sys.argv[1:]) dlrm_main/0 [0]:[rank0]: File "/opt/meituan/zhaozheng09/user/recipes/torchrecipes/rec/dlrm_main.py", line 252, in main dlrm_main/0 [0]:[rank0]: trainer.fit(sharded_model, datamodule=datamodule) dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit dlrm_main/0 [0]:[rank0]: call._call_and_handle_interrupt( dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/trainer/call.py", line 47, in _call_and_handle_interrupt dlrm_main/0 [0]:[rank0]: return trainer_fn(*args, **kwargs) dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl dlrm_main/0 [0]:[rank0]: self._run(model, ckpt_path=ckpt_path) dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 981, in _run dlrm_main/0 [0]:[rank0]: results = self._run_stage() dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1023, in _run_stage dlrm_main/0 [0]:[rank0]: self._run_sanity_check() dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1052, in _run_sanity_check dlrm_main/0 [0]:[rank0]: val_loop.run() dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/loops/utilities.py", line 178, in _decorator dlrm_main/0 [0]:[rank0]: return loop_run(self, *args, **kwargs) dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 135, in run dlrm_main/0 [0]:[rank0]: self._evaluation_step(batch, batch_idx, dataloader_idx, dataloader_iter) dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 397, in _evaluation_step dlrm_main/0 [0]:[rank0]: output = call._call_strategy_hook(trainer, hook_name, *step_args) dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/trainer/call.py", line 319, in _call_strategy_hook dlrm_main/0 [0]:[rank0]: output = fn(*args, **kwargs) dlrm_main/0 [0]:[rank0]: File "/conda/envs/torch2.4.1/lib/python3.9/site-packages/pytorch_lightning/strategies/strategy.py", line 414, in validation_step dlrm_main/0 [0]:[rank0]: return self.lightning_module.validation_step(*args, **kwargs) dlrm_main/0 [0]:[rank0]: TypeError: validation_step() missing 1 required positional argument: 'batch_idx'