Skip to content

Commit

Permalink
Update README and docstrings (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche authored Nov 4, 2024
1 parent 6c00b07 commit 05b53b6
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 7 deletions.
50 changes: 46 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ files = os.listdir(bw_dir)
bw_files = [f"{bw_dir}/{f}" for f in files]

features = pd.DataFrame({
"chrom": ["chr1", "chr1"],
"start": [1000, 2000],
"end": [1500, 2500]
"seqnames": ["chr1", "chr1"],
"starts": [1000, 2000],
"ends": [1500, 2500]
})

# Build GenomicArray
garr.build_genomicarray(
dataset = garr.build_genomicarray(
files=bw_files,
output_path=tempdir,
features=features,
Expand All @@ -80,6 +80,48 @@ garr.build_genomicarray(
The build process stores missing intervals from a bigwig file as `np.nan`. The
default is to choose an aggregate functions that works with `np.nan`.

### Query a `GenomicArrayDataset`

Users have the option to reuse the `dataset` object retuned when building the arrays or by creating a `GenomicArrayDataset` object by initializing it to the path where the files were created.

```python
# Create a GenomicArrayDataset object from the existing dataset
dataset = GenomicArrayDataset(dataset_path=tempdir)

# Query data for the first 10 regions across all samples
coverage_data = dataset[0:10, :]

print(expression_data.matrix)
print(expression_data.feature_annotation)
```

## output 1
array([[1. , 0.5],
[1. , 0.5],
[1. , 0.5],
[1. , 0.5],
[1. , 0.5],
[1. , 0.5],
[1. , 0.5],
[1. , 0.5],
[1. , 0.5],
[1. , 0.5],
[1. , nan]], dtype=float32)

## output 2
seqnames starts ends genarr_feature_index
0 chr1 300 315 0
1 chr1 320 335 1
2 chr1 340 355 2
3 chr1 360 375 3
4 chr1 380 395 4
5 chr1 400 415 5
6 chr1 420 435 6
7 chr1 440 455 7
8 chr1 460 475 8
9 chr1 480 495 9
10 chr1 500 515 10


<!-- pyscaffold-notes -->

Expand Down
3 changes: 1 addition & 2 deletions src/genomicarrays/GenomicArrayDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
from genomicarray import GenomicArrayDataset
garr = GenomicArrayDataset(dataset_path="/path/to/genomicarray/dir")
feature_indices = 1:10
result1 = garr[feature_indices, 0]
result1 = garr[0:10, 0]
print(result1)
"""
Expand Down
2 changes: 1 addition & 1 deletion src/genomicarrays/GenomicArrayDatasetSlice.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from genomicarray import GenomicArrayDataset
garr = GenomicArrayDataset(dataset_path="/path/to/genomicarray/dir")
feature_indices = slice(1, 10)
feature_indices = slice(0, 10)
result1 = garr[feature_indices, 0]
print(result1)
Expand Down
7 changes: 7 additions & 0 deletions src/genomicarrays/build_genomicarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,15 @@
# or just provide the path
bw2 = "path/to/object2.bw"
features = pd.DataFrame({
"seqnames": ["chr1", "chr1"],
"starts": [1000, 2000],
"ends": [1500, 2500]
})
# Build GenomicArray
dataset = build_genomicarray(
features=features
output_path=tempdir,
files=[bw1, bw2],
matrix_options=MatrixOptions(dtype=np.float32),
Expand Down

0 comments on commit 05b53b6

Please sign in to comment.