Skip to content

Commit

Permalink
update fam mask generation
Browse files Browse the repository at this point in the history
  • Loading branch information
nebfield committed Jul 19, 2024
1 parent 638017c commit 76126db
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
3 changes: 2 additions & 1 deletion src/fraposa_pgsc/fraposa.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def read_bed(bed_filepref, dtype=np.int8, filt_iid=None):
raise ValueError("Samples with duplicated FID + IID detected, please remove and retry")

bed = np.zeros(shape=(p, len(matched_ids)), dtype=dtype)
fam_mask = fam.iid.isin((x.IID for x in matched_ids)) & fam.fid.isin(x.FID for x in matched_ids)
# in will call SampleID's __hash__ method which uses (fid, iid)
fam_mask = pd.Series((x in fam_ids for x in matched_ids))
i_extract = np.where(fam_mask == True)
for (i, (snp, genotypes)) in enumerate(pyp):
bed[i,:] = genotypes[i_extract]
Expand Down
32 changes: 26 additions & 6 deletions src/fraposa_pgsc/sampleid.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
from dataclasses import dataclass
class SampleID:
""" A sample ID from a plink fam file, including FID and IID """
def __init__(self, fid, iid):
self._fid = fid
self._iid = iid

def __repr__(self):
return f"{self.__class__.__name__}(fid={repr(self.fid)}, iid={repr(self.iid)})"

@dataclass(frozen=True)
class SampleID:
""" A sample ID from a plink fam/psam file, including FID and IID """
FID: str
IID: str
@property
def fid(self):
if self._fid == "0": # 0 means missing :)
return self._iid
else:
return self._fid

@property
def iid(self):
return self._iid

def __hash__(self):
return hash((self.fid, self.iid))

def __eq__(self, other):
if not isinstance(other, SampleID):
return NotImplemented

return self.fid == other.fid and self.iid == other.iid

0 comments on commit 76126db

Please sign in to comment.