-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
incorrect looping or insufficient data for the requested number of epochs and evaluations #116
Comments
Hi, could you attach your code for CustomDataset? |
Thanks for your response and follow up def parse_room(room, dataset_root, output_root, align_angle=True):
def main_process():
if name == "main": |
Oh sorry, I mean the |
Thanks for your response. Also, I have placed my configuration file above. |
Could you show me your local version of the code around "datasets/defaults.py" L 208? |
You're right, I initially faced some problems, and to solve them, I used this function instead of the defaults. def get_data_list(self): data_list = [] # Append the directory itself, not the files inside return data_list |
Check our code here "https://github.com/Pointcept/Pointcept/blob/main/pointcept/datasets/defaults.py#L86" and check your local get_data function (get data path with |
Hi, first thanks for your innovation,
I am using the point transformer v3 for my custom datasets and here is my configuration file :
base = ["../base/default_runtime.py"]
misc custom setting
batch_size = 3 # bs: total bs in all gpus
num_worker = 4
mix_prob = 0.8
empty_cache = False
enable_amp = True
model settings
model = dict(
type="DefaultSegmentorV2",
num_classes=12,
backbone_out_channels=64,
backbone=dict(
type="PT-v3m1",
in_channels=7,
order=("z", "z-trans", "hilbert", "hilbert-trans"),
stride=(2, 2, 2, 2),
enc_depths=(2, 2, 2, 6, 2),
enc_channels=(32, 64, 128, 256, 512),
enc_num_head=(2, 4, 8, 16, 32),
enc_patch_size=(1024, 1024, 1024, 1024, 1024),
dec_depths=(2, 2, 2, 2),
dec_channels=(64, 64, 128, 256),
dec_num_head=(4, 4, 8, 16),
dec_patch_size=(1024, 1024, 1024, 1024),
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
attn_drop=0.0,
proj_drop=0.0,
drop_path=0.3,
shuffle_orders=True,
pre_norm=True,
enable_rpe=False,
enable_flash=True,
upcast_attention=False,
upcast_softmax=False,
cls_mode=False,
pdnorm_bn=False,
pdnorm_ln=False,
pdnorm_decouple=True,
pdnorm_adaptive=False,
pdnorm_affine=True,
# pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
pdnorm_conditions=("nuScenes", "SemanticKITTI", "Waymo"),
),
criteria=[
dict(type="CrossEntropyLoss",
weight=[1.0000, 5.7017, 1.8023, 7.3687,
1.0904, 4.4289, 3.1646, 5.8000,
2.4000, 1.3571, 7.7758, 9.4042],
loss_weight=1.0, ignore_index=-1),
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
],
)
scheduler settings
epoch = 800
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
scheduler = dict(
type="OneCycleLR",
max_lr=0.002,
pct_start=0.04,
anneal_strategy="cos",
div_factor=10.0,
final_div_factor=1000.0,
)
param_dicts = [dict(keyword="block", lr=0.0002)]
dataset settings
dataset_type = "S3DISDataset"
data = dict(
num_classes=12,
ignore_index=-1,
names=[
"other",
"sidewalk",
"road and asphalt",
"curb cut",
"Vegetation",
"Tree",
"post",
"Ramp",
"Road-sign",
"Building",
"steps",
"Door",
],
train=dict(
type=dataset_type,
split=("ND-Train-1", "ND-Train-2", "ND-Train-3"),
data_root = "/home/mehrdad/Codes/PTV3-2/processed_output/Train",
transform=[
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
# dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)),
dict(type="RandomScale", scale=[0.9, 1.1]),
dict(type="RandomFlip", p=0.5),
dict(type="RandomJitter", sigma=0.005, clip=0.02),
dict(
type="GridSample",
grid_size=0.05,
hash_type="fnv",
mode="train",
return_grid_coord=True,
),
dict(type="SphereCrop", point_max=250000, mode="random"),
dict(type="CenterShift", apply_z=False),
dict(type="ToTensor"),
dict(
type="Collect",
keys=("coord", "grid_coord", "segment"),
feat_keys=("coord","color", "strength"),
),
],
test_mode=False,
),
val=dict(
type=dataset_type,
split="ND-Validation",
data_root="/home/mehrdad/Codes/PTV3-2/processed_output/Validation",
transform=[
dict(type="CenterShift", apply_z=True),
dict(
type="Copy",
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
),
dict(
type="GridSample",
grid_size=0.05,
hash_type="fnv",
mode="train",
return_grid_coord=True,
),
dict(type="CenterShift", apply_z=False),
dict(type="ToTensor"),
dict(
type="Collect",
keys=(
"coord",
"grid_coord",
"origin_coord",
"segment",
"origin_segment",
),
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
feat_keys=("coord", "color", "strength"),
),
],
test_mode=False,
),
# test=dict(
# type=dataset_type,
# split="ND-Validation",
# data_root="/home/mehrdad/Codes/PTV3-2/processed_output/Validation",
# transform=[
# dict(type="CenterShift", apply_z=True),
# dict(type="NormalizeColor"),
# ],
# test_mode=True,
# test_cfg=dict(
# voxelize=dict(
# type="GridSample",
# grid_size=0.02,
# hash_type="fnv",
# mode="test",
# keys=("coord", "color", "strength"),
# return_grid_coord=True,
# ),
# crop=None,
# post_transform=[
# dict(type="CenterShift", apply_z=False),
# dict(type="ToTensor"),
# dict(
# type="Collect",
# keys=("coord", "grid_coord", "segment", "index"),
# feat_keys=("color", "strength"),
# ),
# ],
# aug_transform=[
# [dict(type="RandomScale", scale=[0.9, 0.9])],
# [dict(type="RandomScale", scale=[0.95, 0.95])],
# [dict(type="RandomScale", scale=[1, 1])],
# [dict(type="RandomScale", scale=[1.05, 1.05])],
# [dict(type="RandomScale", scale=[1.1, 1.1])],
# [
# dict(type="RandomScale", scale=[0.9, 0.9]),
# dict(type="RandomFlip", p=1),
# ],
# [
# dict(type="RandomScale", scale=[0.95, 0.95]),
# dict(type="RandomFlip", p=1),
# ],
# [
# dict(type="RandomScale", scale=[1, 1]),
# dict(type="RandomFlip", p=1),
# ],
# [
# dict(type="RandomScale", scale=[1.05, 1.05]),
# dict(type="RandomFlip", p=1),
# ],
# [
# dict(type="RandomScale", scale=[1.1, 1.1]),
# dict(type="RandomFlip", p=1),
# ],
# ],
# ),
# ),
)
when I set the epoch = 100 and eval_epoch=100 the model work well, and I know that for example when I set epoch = 800 + eval_epoch =100 => train_epoch = 100 + loop = 8 that means each epoch loop 8 times.
but when I set the epoch= 800 and eval_epoch =100 , I faced with this error:
Exception has occurred: SystemExit
1
File "/home/mehrdad/Codes/PTV3-2/pointcept/engines/train.py", line 161, in train
for (
IndexError: Caught IndexError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/mehrdad/anaconda3/envs/mehrdad_env/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
data = fetcher.fetch(index) # type: ignore[possibly-undefined]
File "/home/mehrdad/anaconda3/envs/mehrdad_env/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/mehrdad/anaconda3/envs/mehrdad_env/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/mehrdad/Codes/PTV3-2/pointcept/datasets/defaults.py", line 208, in getitem
raise IndexError(f"Index {idx} out of range for data list of length {len(self.data_list)}")
IndexError: Index 14 out of range for data list of length 3
During handling of the above exception, another exception occurred:
File "/home/mehrdad/Codes/PTV3-2/pointcept/utils/events.py", line 612, in exit
sys.exit(1) # This prevents double logging the error to the console
File "/home/mehrdad/Codes/PTV3-2/pointcept/engines/train.py", line 174, in train
self.after_train()
File "/home/mehrdad/Codes/PTV3-2/tools/train.py", line 20, in main_worker
trainer.train()
File "/home/mehrdad/Codes/PTV3-2/pointcept/engines/launch.py", line 89, in launch
main_func(*cfg)
File "/home/mehrdad/Codes/PTV3-2/tools/train.py", line 27, in main
launch(
File "/home/mehrdad/Codes/PTV3-2/tools/train.py", line 38, in
main()
SystemExit: 1
I have to mention that I use the configuration of S3dis dataset for my custom dataset but my dataset is outdoor dataset with massive point( about 900 milion points)
I will be thankful if you help me.
The text was updated successfully, but these errors were encountered: