diff --git a/README.md b/README.md index dad1df74..8214dff9 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A Julia package for working with text. ## Introduction -TextAnalysis provides support for standard tools and models for working with textual data and natural languages in the Julia langauges. Please see the [documentation](https://juliahub.com/docs/TextAnalysis/5Mwett) for more. +TextAnalysis provides support for standard tools and models for working with textual data and natural languages in the Julia langauges. Please see the [documentation](https://juliahub.com/docs/TextAnalysis/5Mwet) for more. - **License** : [MIT License](https://github.com/JuliaText/TextAnalysis.jl/blob/master/LICENSE.md) diff --git a/src/corpus.jl b/src/corpus.jl index 5cf4d033..facc4909 100644 --- a/src/corpus.jl +++ b/src/corpus.jl @@ -90,12 +90,12 @@ Base.length(crps::Corpus) = length(crps.documents) function Base.convert(::Type{DataFrame}, crps::Corpus) df = DataFrame() n = length(crps) - df[:Language] = Array{Union{String,Missing}}(n) - df[:Title] = Array{Union{String,Missing}}(n) - df[:Author] = Array{Union{String,Missing}}(n) - df[:TimeStamp] = Array{Union{String,Missing}}(n) - df[:Length] = Array{Union{Int,Missing}}(n) - df[:Text] = Array{Union{String,Missing}}(n) + df[!, :Language] = Array{Union{String,Missing}}(undef, n) + df[!, :Title] = Array{Union{String,Missing}}(undef, n) + df[!, :Author] = Array{Union{String,Missing}}(undef, n) + df[!, :TimeStamp] = Array{Union{String,Missing}}(undef, n) + df[!, :Length] = Array{Union{Int,Missing}}(undef, n) + df[!, :Text] = Array{Union{String,Missing}}(undef, n) for i in 1:n d = crps.documents[i] df[i, :Language] = string(language(d)) diff --git a/test/corpus.jl b/test/corpus.jl index 51c5cacc..19f224ea 100644 --- a/test/corpus.jl +++ b/test/corpus.jl @@ -41,3 +41,15 @@ @test answer == lexicon(crps) end + +using DataFrames + +@testset "Convert Corpus to DataFrames" begin + crps = Corpus([StringDocument("hello world"), StringDocument("goodbye world")]) + df = convert(DataFrame, crps) + @test typeof(df) == DataFrames.DataFrame + @test df[1, :Text] == "hello world" + @test df[1, :Length] == 11 + @test df[2, :Text] == "goodbye world" + @test df[2, :Length] == 13 +end \ No newline at end of file