From d5e30a15fc0f177ea26cb240ce03eb1724a27b10 Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Sun, 8 Nov 2020 20:15:21 +0000 Subject: [PATCH 1/2] Fix conversion to DataFrame --- src/corpus.jl | 12 ++++++------ test/corpus.jl | 12 ++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/corpus.jl b/src/corpus.jl index 5cf4d033..facc4909 100644 --- a/src/corpus.jl +++ b/src/corpus.jl @@ -90,12 +90,12 @@ Base.length(crps::Corpus) = length(crps.documents) function Base.convert(::Type{DataFrame}, crps::Corpus) df = DataFrame() n = length(crps) - df[:Language] = Array{Union{String,Missing}}(n) - df[:Title] = Array{Union{String,Missing}}(n) - df[:Author] = Array{Union{String,Missing}}(n) - df[:TimeStamp] = Array{Union{String,Missing}}(n) - df[:Length] = Array{Union{Int,Missing}}(n) - df[:Text] = Array{Union{String,Missing}}(n) + df[!, :Language] = Array{Union{String,Missing}}(undef, n) + df[!, :Title] = Array{Union{String,Missing}}(undef, n) + df[!, :Author] = Array{Union{String,Missing}}(undef, n) + df[!, :TimeStamp] = Array{Union{String,Missing}}(undef, n) + df[!, :Length] = Array{Union{Int,Missing}}(undef, n) + df[!, :Text] = Array{Union{String,Missing}}(undef, n) for i in 1:n d = crps.documents[i] df[i, :Language] = string(language(d)) diff --git a/test/corpus.jl b/test/corpus.jl index 51c5cacc..19f224ea 100644 --- a/test/corpus.jl +++ b/test/corpus.jl @@ -41,3 +41,15 @@ @test answer == lexicon(crps) end + +using DataFrames + +@testset "Convert Corpus to DataFrames" begin + crps = Corpus([StringDocument("hello world"), StringDocument("goodbye world")]) + df = convert(DataFrame, crps) + @test typeof(df) == DataFrames.DataFrame + @test df[1, :Text] == "hello world" + @test df[1, :Length] == 11 + @test df[2, :Text] == "goodbye world" + @test df[2, :Length] == 13 +end \ No newline at end of file From d361a45d10be3385ff446865541b79f4de428c15 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Mon, 9 Nov 2020 16:27:46 +0530 Subject: [PATCH 2/2] fix link to the docs in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 12cbc475..3673aa6c 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Please see the detailed `example` and `API Documentation` linked above. The pac ## Introduction -TextAnalysis provides support for standard tools and models for working with textual data and natural languages in the Julia langauges. Please see the [documentation](https://juliahub.com/docs/TextAnalysis/5Mwett) for more. +TextAnalysis provides support for standard tools and models for working with textual data and natural languages in the Julia langauges. Please see the [documentation](https://juliahub.com/docs/TextAnalysis/5Mwet) for more. - **License** : [MIT License](https://github.com/JuliaText/TextAnalysis.jl/blob/master/LICENSE.md)