From 22c6c24457fe45cf2fa6bd8f53721bf00de48cba Mon Sep 17 00:00:00 2001 From: Joakim Wennergren Date: Fri, 26 Jan 2018 18:37:14 +0100 Subject: [PATCH] More streams. Less OutOfMemoryException --- CsvQuery/Csv/CsvAnalyzer.cs | 7 ++----- CsvQuery/Csv/CsvSettings.cs | 15 ++++++++++++++- CsvQuery/Forms/QueryWindow.cs | 32 +++++++++++++++++++++----------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/CsvQuery/Csv/CsvAnalyzer.cs b/CsvQuery/Csv/CsvAnalyzer.cs index 0a722fb..b4d4ebc 100644 --- a/CsvQuery/Csv/CsvAnalyzer.cs +++ b/CsvQuery/Csv/CsvAnalyzer.cs @@ -22,11 +22,6 @@ private class Stat /// public static CsvSettings Analyze(string csvString) { - // TODO: strings with quoted values (e.g. 'hej,san') - // Not sure how to detect this, but we could just run the variance analysis - // 3 times, one for none, one for ' and one for " and see which has best variances - // That wouldn't detect escape chars though, or odd variants like [this] - var result = DetectW3C(csvString); if (result != null) return result; @@ -120,6 +115,8 @@ public static CsvSettings Analyze(string csvString) var separatorQuoted = GetSeparatorFromVariance(variancesQuoted, occurrencesQuoted, linesQuoted, out var uncertancyQuoted); if (uncertancyQuoted < uncertancy) result.Separator = separatorQuoted; + else if (uncertancy < uncertancyQuoted) // It was better ignoring quotes! + result.TextQualifier = '\0'; if (result.Separator != default(char)) return result; diff --git a/CsvQuery/Csv/CsvSettings.cs b/CsvQuery/Csv/CsvSettings.cs index 9857a88..a399a63 100644 --- a/CsvQuery/Csv/CsvSettings.cs +++ b/CsvQuery/Csv/CsvSettings.cs @@ -44,15 +44,28 @@ public CsvSettings(char separator, char quoteEscapeChar, char commentChar, bool? this.HasHeader = hasHeader; } + /// /// Parses a big text blob into rows and columns, using the settings /// /// Big blob of text /// Parsed data public List Parse(string text) + { + using (var reader = new StringReader(text)) + { + return Parse(reader); + } + } + + /// + /// Parses a big text blob into rows and columns, using the settings + /// + /// Big blob of text + /// Parsed data + public List Parse(TextReader reader) { // The actual _parsing_ .NET can handle. Well, VisualBasic anyway... - using(var reader = new StringReader(text)) using (var parser = new Microsoft.VisualBasic.FileIO.TextFieldParser(reader)) { var errors = new StringBuilder(); diff --git a/CsvQuery/Forms/QueryWindow.cs b/CsvQuery/Forms/QueryWindow.cs index 6d8b5cb..7258b28 100644 --- a/CsvQuery/Forms/QueryWindow.cs +++ b/CsvQuery/Forms/QueryWindow.cs @@ -173,13 +173,14 @@ private void Analyze(bool silent) var watch = new DiagnosticTimer(); var bufferId = NotepadPPGateway.GetCurrentBufferId(); - string text; + var textLength = PluginBase.CurrentScintillaGateway.GetTextLength(); + var text = PluginBase.CurrentScintillaGateway.GetTextRange(0, Math.Min(100000, textLength)); // var text = PluginBase.CurrentScintillaGateway.GetAllText(); - using (var sr = new StreamReader(ScintillaStreams.StreamAllText(), Encoding.UTF8)) - { - text = sr.ReadToEnd(); - } + //using (var sr = new StreamReader(ScintillaStreams.StreamAllText(), Encoding.UTF8)) + //{ + // text = sr.ReadToEnd(); + //} watch.Checkpoint("GetText"); @@ -197,10 +198,13 @@ private void Analyze(bool silent) } watch.Checkpoint("Analyze"); - Parse(csvSettings, watch, text, bufferId); + using (var sr = new StreamReader(ScintillaStreams.StreamAllText(), Encoding.UTF8)) + { + Parse(csvSettings, watch, sr, bufferId); + } } - private void Parse(CsvSettings csvSettings, DiagnosticTimer watch, string text, IntPtr bufferId) + private void Parse(CsvSettings csvSettings, DiagnosticTimer watch, TextReader text, IntPtr bufferId) { var data = csvSettings.Parse(text); watch.Checkpoint("Parse"); @@ -227,10 +231,16 @@ private void Parse(CsvSettings csvSettings, DiagnosticTimer watch, string text, public void StartParse(CsvSettings settings) { - StartSomething(() => Parse(settings, - new DiagnosticTimer(), - PluginBase.CurrentScintillaGateway.GetAllText(), - NotepadPPGateway.GetCurrentBufferId())); + StartSomething(() => + { + using (var sr = new StreamReader(ScintillaStreams.StreamAllText(), Encoding.UTF8)) + { + Parse(settings, + new DiagnosticTimer(), + sr, + NotepadPPGateway.GetCurrentBufferId()); + } + }); } private void Execute(IntPtr bufferId, DiagnosticTimer watch)