Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Index Error when loading dataset #29

Open
nhuytan opened this issue Sep 5, 2024 · 0 comments
Open

Index Error when loading dataset #29

nhuytan opened this issue Sep 5, 2024 · 0 comments

Comments

@nhuytan
Copy link

nhuytan commented Sep 5, 2024

I got error when loading dataset. Any suggestion to fix that

IndexError: index 8389024 is out of bounds for axis 0 with size 66462

{
"name": "IndexError",
"message": "index 8389024 is out of bounds for axis 0 with size 66462",
"stack": "---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_22712\2632409830.py in
13 verbose=True,
14 exclude_genes=['Control1', 'Control2', 'Control3', 'Control4', 'Control5','Control6', 'Control7', 'Control8'],
---> 15 z=[-140, 600, 1200, 1810, 2420, 3000, 3600])
16
17 # Have olfactory bulb pointing left.

c:\users\anonymous\fishscale\FISHscale\utils\dataset.py in init(self, data, data_folder, unique_genes, MultiDataset_name, color_input, verbose, grid_layout, columns_layout, x_label, y_label, z_label, gene_label, other_columns, exclude_genes, z, pixel_size, x_offset, y_offset, z_offset, polygon, select_valid, reparse, parse_num_threads)
732 self.load_from_files(data, x_label, y_label, z_label, z, gene_label, other_columns, unique_genes, exclude_genes,
733 pixel_size, x_offset, y_offset, z_offset, polygon, select_valid, reparse, color_input,
--> 734 parse_num_threads)
735 else:
736 raise Exception(f'Input for "data" not understood. Should be list with initiated Datasets or valid path to files.')

c:\users\anonymous\fishscale\FISHscale\utils\dataset.py in load_from_files(self, filepath, x_label, y_label, z_label, z, gene_label, other_columns, unique_genes, exclude_genes, pixel_size, x_offset, y_offset, z_offset, polygon, select_valid, reparse, color_input, num_threads)
920 part_of_multidataset=True)
921 lazy_result.append(lr)
--> 922 futures = dask.persist(*lazy_result, num_workers=1, num_threads = num_threads)
923 self.datasets = dask.compute(*futures)
924 self.datasets_names = [d.dataset_name for d in self.datasets]

c:\Anaconda\envs\my_env\lib\site-packages\dask\base.py in persist(traverse, optimize_graph, scheduler, *args, **kwargs)
833 postpersists.append((rebuild, a_keys, state))
834
--> 835 results = schedule(dsk, keys, **kwargs)
836 d = dict(zip(keys, results))
837 results2 = [r({k: d[k] for k in ks}, *s) for r, ks, s in postpersists]

c:\Anaconda\envs\my_env\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
87 get_id=_thread_get_id,
88 pack_exception=pack_exception,
---> 89 **kwargs,
90 )
91

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
504 _execute_task(task, data) # Re-execute locally
505 else:
--> 506 raise_exception(exc, tb)
507 res, worker_id = loads(res_info)
508 state["cache"][key] = res

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in reraise(exc, tb)
312 if exc.traceback is not tb:
313 raise exc.with_traceback(tb)
--> 314 raise exc
315
316

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
217 try:
218 task, data = loads(task_info)
--> 219 result = _execute_task(task, data)
220 id = get_id()
221 result = dumps((result, id))

c:\Anaconda\envs\my_env\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
121 return arg

c:\Anaconda\envs\my_env\lib\site-packages\dask\utils.py in apply(func, args, kwargs)
37 def apply(func, args, kwargs=None):
38 if kwargs:
---> 39 return func(*args, **kwargs)
40 else:
41 return func(*args)

c:\users\anonymous\fishscale\FISHscale\utils\dataset.py in init(self, filename, x_label, y_label, z_label, z, gene_label, other_columns, unique_genes, exclude_genes, pixel_size, x_offset, y_offset, z_offset, polygon, select_valid, reparse, color_input, working_selection, verbose, part_of_multidataset, image)
206 self.load_data(self.filename, x_label, y_label, gene_label, self.other_columns, x_offset, y_offset, z_offset,
207 self.pixel_size.magnitude, unique_genes, exclude_genes, self.polygon, self.select_valid,
--> 208 reparse, z_label)
209
210 #Gene metadata

c:\users\anonymous\fishscale\FISHscale\utils\data_handling.py in load_data(self, filename, x_label, y_label, gene_label, other_columns, x_offset, y_offset, z_offset, pixel_size, unique_genes, exclude_genes, polygon, select_valid, reparse, z_label)
522 #Load selected genes
523 self.df = dd.read_parquet(filter_filelist)
--> 524 self.shape = (self.df.shape[0].compute(), self.df.shape[1])
525 else:
526 #Load all genes

c:\Anaconda\envs\my_env\lib\site-packages\dask\base.py in compute(self, **kwargs)
288 dask.base.compute
289 """
--> 290 (result,) = compute(self, traverse=False, **kwargs)
291 return result
292

c:\Anaconda\envs\my_env\lib\site-packages\dask\base.py in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
571 postcomputes.append(x.dask_postcompute())
572
--> 573 results = schedule(dsk, keys, **kwargs)
574 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
575

c:\Anaconda\envs\my_env\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
87 get_id=_thread_get_id,
88 pack_exception=pack_exception,
---> 89 **kwargs,
90 )
91

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
504 _execute_task(task, data) # Re-execute locally
505 else:
--> 506 raise_exception(exc, tb)
507 res, worker_id = loads(res_info)
508 state["cache"][key] = res

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in reraise(exc, tb)
312 if exc.traceback is not tb:
313 raise exc.with_traceback(tb)
--> 314 raise exc
315
316

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
217 try:
218 task, data = loads(task_info)
--> 219 result = _execute_task(task, data)
220 id = get_id()
221 result = dumps((result, id))

c:\Anaconda\envs\my_env\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
121 return arg

c:\Anaconda\envs\my_env\lib\site-packages\dask\optimization.py in call(self, *args)
967 if not len(args) == len(self.inkeys):
968 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 969 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
970
971 def reduce(self):

c:\Anaconda\envs\my_env\lib\site-packages\dask\core.py in get(dsk, out, cache)
147 for key in toposort(dsk):
148 task = dsk[key]
--> 149 result = _execute_task(task, cache)
150 cache[key] = result
151 result = _execute_task(out, cache)

c:\Anaconda\envs\my_env\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
121 return arg

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\core.py in call(self, part)
95 self.columns,
96 self.index,
---> 97 self.common_kwargs,
98 )
99

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\core.py in read_parquet_part(fs, engine, meta, part, columns, index, kwargs)
501 dfs = [
502 func(fs, rg, columns.copy(), index, **toolz.merge(kwargs, kw))
--> 503 for (rg, kw) in part
504 ]
505 df = concat(dfs, axis=0) if len(dfs) > 1 else dfs[0]

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\core.py in (.0)
501 dfs = [
502 func(fs, rg, columns.copy(), index, **toolz.merge(kwargs, kw))
--> 503 for (rg, kw) in part
504 ]
505 df = concat(dfs, axis=0) if len(dfs) > 1 else dfs[0]

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\fastparquet.py in read_partition(cls, fs, pieces, columns, index, categories, root_cats, root_file_scheme, base_path, **kwargs)
1031 categories=categories,
1032 index=index,
-> 1033 **kwargs.get("read", {}),
1034 )
1035

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\fastparquet.py in pf_to_pandas(cls, pf, fs, columns, categories, index, open_file_options, **kwargs)
1125 partition_meta=pf.partition_meta,
1126 infile=infile,
-> 1127 **kwargs,
1128 )
1129 start += thislen

c:\Anaconda\envs\my_env\lib\site-packages\fastparquet\api.py in read_row_group_file(self, rg, columns, categories, index, assign, partition_meta, row_filter, infile)
363 selfmade=self.selfmade, index=index,
364 assign=assign, scheme=self.file_scheme, partition_meta=partition_meta,
--> 365 row_filter=row_filter
366 )
367 if ret:

c:\Anaconda\envs\my_env\lib\site-packages\fastparquet\core.py in read_row_group(file, rg, columns, categories, schema_helper, cats, selfmade, index, assign, scheme, partition_meta, row_filter)
607 raise RuntimeError('Going with pre-allocation!')
608 read_row_group_arrays(file, rg, columns, categories, schema_helper,
--> 609 cats, selfmade, assign=assign, row_filter=row_filter)
610
611 for cat in cats:

c:\Anaconda\envs\my_env\lib\site-packages\fastparquet\core.py in read_row_group_arrays(file, rg, columns, categories, schema_helper, cats, selfmade, assign, row_filter)
581 selfmade=selfmade, assign=out[name],
582 catdef=out.get(name+'-catdef', None),
--> 583 row_filter=row_filter)
584
585 if _is_map_like(schema_helper, column):

c:\Anaconda\envs\my_env\lib\site-packages\fastparquet\core.py in read_col(column, schema_helper, infile, use_cat, selfmade, assign, catdef, row_filter)
547 piece[:] = i.codes
548 elif d and not use_cat:
--> 549 piece[:] = dic[val]
550 elif not use_cat:
551 piece[:] = convert(val, se)

IndexError: index 8389024 is out of bounds for axis 0 with size 66462"
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant