Skip to content

Commit

Permalink
Merge pull request #20 from pdimens/dev
Browse files Browse the repository at this point in the history
release 0.0.4
  • Loading branch information
pdimens committed Feb 9, 2021
2 parents 48f2ece + a1b8534 commit 583dc6a
Show file tree
Hide file tree
Showing 16 changed files with 253 additions and 76 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/TagBot.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
name: TagBot
on:
schedule:
- cron: 0 0 * * *
issue_comment:
types:
- created
workflow_dispatch:
jobs:
TagBot:
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
runs-on: ubuntu-latest
steps:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
ssh: ${{ secrets.DOCUMENTER_KEY }}
10 changes: 6 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
name = "PopGenSims"
uuid = "e670a858-c023-4621-870e-30d0f49b8322"
authors = ["Pavel Dimens <[email protected]> and contributors"]
version = "0.0.3"
version = "0.0.4"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
PopGen = "af524d12-c74b-11e9-22a8-3b091653023f"
RandomNumbers = "e6cf234a-135c-5ec9-84dd-332b85af5143"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
DataFrames = "0.21, 0.22"
InvertedIndices = "1.0"
PooledArrays = "0.5"
StaticArrays = "0.12, 1.0"
PopGen = "0.0.3, 0.1, 0.4"
PooledArrays = "1.0, 1.1"
PopGen = "0.4"
RandomNumbers = "1.4"
StaticArrays = "1.0"
StatsBase = "0.33"
julia = "1.4"

Expand Down
19 changes: 8 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
![logo](PopGenSims.png)
[![alt text](https://img.shields.io/badge/docs-stable-informational?style=for-the-badge&logo=Read%20The%20Docs&logoColor=white)](https://pdimens.github.io/PopGen.jl/docs/simulations/simulate_samples)
![logo](misc/PopGenSims.png)
[![alt text](https://img.shields.io/badge/docs-stable-informational?style=for-the-badge&logo=Read%20The%20Docs&logoColor=white)](https://pdimens.github.io/PopGen.jl/docs/simulations/simulate_samples)
[![alt text](https://img.shields.io/badge/slack-join%20PopGen.jl-9d72b1?style=for-the-badge&logo=slack)](https://join.slack.com/t/popgenjl/shared_invite/zt-deam65n8-DuBs2z1oDtsbBuRplJW~Pg)
![build_status](https://img.shields.io/github/workflow/status/pdimens/PopGenSims.jl/CI_dev?label=Dev%20Build&logo=GitHub&style=for-the-badge)



## Create population genetics simulations

This package builds off of [PopGen.jl](http://github.com/pdimens/PopGen.jl) and
This package builds off of [PopGen.jl](http://github.com/BioJulia/PopGen.jl) and
simulates offspring that would be generated under certain conditions. With this package you can simulate the offspring of specific individuals, simulate full-sibs, half-sibs, unrelated individuals, and parent-offspring pairs for use with PopGen.jl. You can also randomly generate samples given population-specific allele frequencies.

### Installation
```julia
julia>]

pkg(v1.5)> add "http://github.com/pdimens/PopGenSims.jl"
```

### Usage
Documentation for PopGenSims.jl has been integrated into [the PopGen.jl docs](https://pdimens.github.io/PopGen.jl/docs/simulations/simulate_samples)
![installation](misc/install.png)
File renamed without changes
Binary file added misc/install.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 2 additions & 3 deletions src/Cross.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export cross, backcross

function sample_genotype(geno::T, n_alleles::Int) where T<:Genotype
sample(SVector(geno), n_alleles, replace = false)
sample(Xoroshiro128Star(), SVector(geno), n_alleles, replace = false)
end

function sample_genotype(geno::Missing, n_alleles::Int)
Expand All @@ -12,7 +12,7 @@ function haploid_cross!(data::DataFrame, p1::T, p2::T; n::Int) where T <: GenoAr
iter_df = DataFrames.groupby(data, :name)
for simulation in iter_df
all_alleles = getindex.(collect.(zip.(p1,p2)), 1)
offspring_geno = Tuple(Base.Iterators.flatten(rand.(all_alleles, 1)) |> collect)
offspring_geno = Tuple(Base.Iterators.flatten(rand.(Xoroshiro128Star(), all_alleles, 1)) |> collect)
miss_idx = reduce(union, findall.(i -> i == (missing,), offspring_geno))
simulation.genotype[Not(miss_idx)] .= offspring_geno[Not(miss_idx)]
end
Expand Down Expand Up @@ -71,7 +71,6 @@ function cross(data::PopData, parent1::String, parent2::String; n::Int = 100, ge
out_population = fill(generation, n * length(loci))
out_geno = similar(p1, n * length(loci))
out_loci = DataFrame(:name => out_offspring, :population => out_population, :locus => out_loci_names, :genotype => out_geno)
#categorical!(out_loci, [:name, :population, :locus], compress = true)
out_loci.name = PooledArray(out_loci.name)
out_loci.population = PooledArray(out_loci.population)
out_loci.locus = PooledArray(out_loci.locus)
Expand Down
10 changes: 6 additions & 4 deletions src/PopGenSims.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
module PopGenSims

using DataFrames, PooledArrays, StaticArrays
using StatsBase: sample
using StatsBase: sample, Weights
using RandomNumbers.Xorshifts
using PopGen:
PopData,
allele_freq,
Genotype,
GenoArray,
get_genotypes,
PopData,
read_from,
write_to,
sort
sort,
write_to

include("Cross.jl")
include("Samples.jl")
Expand Down
67 changes: 39 additions & 28 deletions src/Samples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ julia> sample_locus(d, 3, 3)
function sample_locus(locus::Dict, n::Int, ploidy::Signed)
isempty(locus) && return fill(missing, n)
k,v = collect(keys(locus)), collect(values(locus))
alleles = [sample(k, Weights(v), n) for i in 1:ploidy]
alleles = [sample(Xoroshiro128Star(), k, Weights(v), n) for i in 1:ploidy]
Tuple.(sort.(eachrow(hcat(alleles...))))
end

Expand All @@ -55,34 +55,38 @@ PopData Object
Coordinates: absent
julia> sims.meta
1700×5 DataFrame
│ Row │ name │ population │ ploidy │ longitude │ latitude │
│ │ String │ String │ Int64 │ Missing │ Missing │
├──────┼──────────┼────────────┼────────┼───────────┼──────────┤
│ 1 │ sim_1 │ 1 │ 2 │ missing │ missing │
│ 2 │ sim_2 │ 1 │ 2 │ missing │ missing │
│ 3 │ sim_3 │ 1 │ 2 │ missing │ missing │
│ 4 │ sim_4 │ 1 │ 2 │ missing │ missing │
│ 1696 │ sim_1696 │ 17 │ 2 │ missing │ missing │
│ 1697 │ sim_1697 │ 17 │ 2 │ missing │ missing │
│ 1698 │ sim_1698 │ 17 │ 2 │ missing │ missing │
│ 1699 │ sim_1699 │ 17 │ 2 │ missing │ missing │
│ 1700 │ sim_1700 │ 17 │ 2 │ missing │ missing │
1700×5 DataFrame
Row │ name population ploidy longitude latitude
│ String String Int8 Missing Missing
──────┼───────────────────────────────────────────────────
1 │ sim_1 1 2 missing missing
2 │ sim_2 1 2 missing missing
3 │ sim_3 1 2 missing missing
4 │ sim_4 1 2 missing missing
5 │ sim_5 1 2 missing missing
⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮
1697 │ sim_1697 17 2 missing missing
1698 │ sim_1698 17 2 missing missing
1699 │ sim_1699 17 2 missing missing
1700 │ sim_1700 17 2 missing missing
1691 rows omitted
julia> sims.loci
15300×4 DataFrame
│ Row │ name │ population │ locus │ genotype │
│ │ String │ String │ String │ Tuple…? │
├───────┼──────────┼────────────┼────────┼────────────┤
│ 1 │ sim_1 │ 1 │ fca8 │ (135, 135) │
│ 2 │ sim_1 │ 1 │ fca23 │ (132, 140) │
│ 3 │ sim_1 │ 1 │ fca43 │ (139, 139) │
│ 4 │ sim_1 │ 1 │ fca45 │ (126, 126) │
│ 15297 │ sim_1700 │ 17 │ fca78 │ (142, 142) │
│ 15298 │ sim_1700 │ 17 │ fca90 │ (199, 199) │
│ 15299 │ sim_1700 │ 17 │ fca96 │ (113, 113) │
│ 15300 │ sim_1700 │ 17 │ fca37 │ (208, 208) │
Row │ name population locus genotype
│ String String String Tuple…?
───────┼──────────────────────────────────────────
1 │ sim_1 1 fca8 (135, 143)
2 │ sim_1 1 fca23 (136, 146)
3 │ sim_1 1 fca43 (141, 145)
4 │ sim_1 1 fca45 (120, 126)
5 │ sim_1 1 fca77 (156, 156)
⋮ │ ⋮ ⋮ ⋮ ⋮
15297 │ sim_1700 17 fca78 (150, 150)
15298 │ sim_1700 17 fca90 (197, 197)
15299 │ sim_1700 17 fca96 (113, 113)
15300 │ sim_1700 17 fca37 (208, 208)
15291 rows omitted
```
"""
function simulate(data::PopData; n::Int = 100)
Expand Down Expand Up @@ -113,14 +117,21 @@ function simulate(data::PopData; n::Int = 100)
for pop in pops
out_gdf[(population = pop,)].genotype .= reduce(hcat, geno_gdf[(population = pop,)].frq) |> permutedims |> vec
end
transform!(
geno_out,
:name => PooledArray => :name,
:population => PooledArray => :population,
:locus => PooledArray => :locus,
:genotype
)

# regenerate meta info
meta_df = DataFrames.combine(
groupby(geno_out, :name),
:name => first => :name,
:population => first => :population
)
meta_df[!, :ploidy] .= 2
meta_df[!, :ploidy] .= ploidy
meta_df[!, :longitude] .= missing
meta_df[!, :latitude] .= missing

Expand Down
18 changes: 9 additions & 9 deletions src/Sibship.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@ function _cross(parent1::Vector{Vector{T}}, parent2::Vector{Vector{T}}) where T
ploidy = length(first(parent1))
ploidy == 1 && error("Haploid crosses are not yet supported. Please file and issue or pull request")
if ploidy == 2
p1_contrib = rand.(parent1)
p2_contrib = rand.(parent2)
p1_contrib = rand.(Xoroshiro128Star(),parent1)
p2_contrib = rand.(Xoroshiro128Star(),parent2)
geno_out = sort.(zip(p1_contrib, p2_contrib))
elseif iseven(ploidy)
n_allele = ploidy ÷ 2
p1_contrib = sample.(parent1, n_allele, replace = false)
p2_contrib = sample.(parent2, n_allele, replace = false)
p1_contrib = sample.(Xoroshiro128Star(),parent1, n_allele, replace = false)
p2_contrib = sample.(Xoroshiro128Star(),parent2, n_allele, replace = false)
geno_out = Tuple.(sort!.(append!.(p1_contrib, p2_contrib)))
else
# special method to provide a 50% chance of one parent giving more alleles than the other
rng = rand()
rng = rand(Xoroshiro128Star())
contrib_1 = ploidy ÷ 2
contrib_2 = ploidy - contrib_1
p1_contrib = rng > 0.5 ? sample.(parent1, contrib_1, replace = false) : sample.(parent1, contrib_2, replace = false)
p2_contrib = rng > 0.5 ? sample.(parent2, contrib_2, replace = false) : sample.(parent2, contrib_1, replace = false)
p1_contrib = rng > 0.5 ? sample.(Xoroshiro128Star(),parent1, contrib_1, replace = false) : sample.(Xoroshiro128Star(),parent1, contrib_2, replace = false)
p2_contrib = rng > 0.5 ? sample.(Xoroshiro128Star(),parent2, contrib_2, replace = false) : sample.(Xoroshiro128Star(),parent2, contrib_1, replace = false)
geno_out = Tuple.(sort!.(append!.(p1_contrib, p2_contrib)))
end
return geno_out
Expand Down Expand Up @@ -195,9 +195,9 @@ julia> fullsib_sims.meta_df100×5 DataFrame
"""
function simulate_sibship(data::PopData; n::Int = 500, relationship::String = "nothing", ploidy::Int = 0)
if relationship == "nothing"
error("Please use the keyword \'relationship\' and specify one of: \n- \"fullsib\" \n- \"halfsib\" \n- \"unrelated\"\n- \"parent-offspring\"")
throw(ArgumentError("Please use the keyword \'relationship\' and specify one of: \n- \"fullsib\" \n- \"halfsib\" \n- \"unrelated\"\n- \"parent-offspring\""))
elseif relationship ["fullsib", "halfsib", "unrelated", "parent-offspring"]
error("relationship = \"$relationship\" is invalid, please specify one of: \n- \"fullsib\" \n- \"halfsib\" \n- \"unrelated\"\n- \"parent-offspring\"")
throw(ArgumentError("relationship = \"$relationship\" is invalid, please specify one of: \n- \"fullsib\" \n- \"halfsib\" \n- \"unrelated\"\n- \"parent-offspring\""))
end
# automatic ploidy finding
if ploidy == 0
Expand Down
20 changes: 7 additions & 13 deletions src/Utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ function Base.append!(data::PopData, data2::PopData)

append!(data.meta, data2.meta)

data.loci.name = decompress(data.loci.name)
append!(data.loci, data2.loci)
return data
end
Expand Down Expand Up @@ -125,7 +124,6 @@ function append(data::PopData, data2::PopData)

append!(tmp.meta, tmp2.meta)

tmp.loci.name = decompress(tmp.loci.name)
append!(tmp.loci, tmp2.loci)
return tmp
end
Expand All @@ -135,20 +133,16 @@ function allele_pool(locus::T) where T <: GenoArray
Tuple(Base.Iterators.flatten(skipmissing(locus)))
end


function Base.sort(x::NTuple{N,T}) where N where T <: Signed
Tuple(sort(SVector(x)))
end

function allele_pool(data::PopData)
# index dataframe by locus
idx_df = groupby(data.loci, [:locus])
# instantiate dict to store alleles
allele_dict = Dict{String,NTuple}()
#allele_dict = Dict{String,Tuple}()
# pull out loci names
loc = getindex.(keys(idx_df), :locus)
[allele_dict[i] = allele_pool(idx_df[(;locus = i)].genotype) for i in loc]
return String.(loc), allele_dict
allele_dict = Dict(i => allele_pool(idx_df[(;locus = i)].genotype) for i in loc)
#[allele_dict[i] = allele_pool(idx_df[(;locus = i)].genotype) for i in loc]
return string.(loc), allele_dict
end

"""
Expand All @@ -162,7 +156,7 @@ an individual with a given `ploidy`. Returns a Vector of genotypes.
```
julia> cats = nancycats() ;
julia> loc, alleles = allele_pool(cats) ;
julia> simulate_parent(alleles, loc, ploidy = 2)
julia> simulate_sample(alleles, loc, ploidy = 2)
9-element Array{Array{Int16,1},1}:
[139, 129]
[146, 146]
Expand All @@ -175,6 +169,6 @@ julia> simulate_parent(alleles, loc, ploidy = 2)
[208, 208]
```
"""
function simulate_sample(alleles::Dict{String,NTuple}, loc::Vector{String}; ploidy::Int)
map(i -> rand(alleles[i], ploidy) ,loc)
function simulate_sample(alleles::Dict{String,<:Tuple}, loc::Vector{String}; ploidy::Int)
map(i -> rand(Xoroshiro128Star(), alleles[i], ploidy) ,loc)
end
3 changes: 3 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[deps]
PopGen = "af524d12-c74b-11e9-22a8-3b091653023f"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
35 changes: 35 additions & 0 deletions test/cross.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
module TestCross

using PopGen
using PopGenSims
using Test

cats = @nancycats ;

@testset "sample_genotypes" begin
@test length(PopGenSims.sample_genotype(cats.loci.genotype[30], 2)) == 2
@test eltype(PopGenSims.sample_genotype(cats.loci.genotype[30],1)) == eltype(cats.loci.genotype[30])
@test length(PopGenSims.sample_genotype(missing, 2)) == 1
@test first(PopGenSims.sample_genotype(missing, 2)) === missing
end

@testset "crosses" begin
f1 = cross(cats, "N111", "N107", n = 10)
@test typeof(f1) == PopData
@test length(f1.meta.name) == 10
@test length(f1.loci.name) == 90
f1 = cross(cats, "N111", "N107", n = 10, generation = "firstgen")
@test typeof(f1) == PopData
@test length(f1.meta.name) == 10
@test length(f1.loci.name) == 90
f2 = cross(cats => "N111", f1 => "firstgen_offspring_10", n = 10)
@test typeof(f2) == PopData
@test length(f2.meta.name) == 10
@test length(f2.loci.name) == 90
f2 = cross(cats => "N111", f1 => "firstgen_offspring_10", n = 10, generation = "F2")
@test typeof(f2) == PopData
@test length(f2.meta.name) == 10
@test length(f2.loci.name) == 90
end

end # module TestCross
Loading

2 comments on commit 583dc6a

@pdimens
Copy link
Owner Author

@pdimens pdimens commented on 583dc6a Feb 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register branch=release

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/29677

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.0.4 -m "<description of version>" 583dc6a0a40ef4d6fa29af29de2333ad71864f3d
git push origin v0.0.4

Please sign in to comment.