From ce6c7a6a0af01528058b7cb2315ffa8d75e36e2e Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Wed, 7 Jan 2026 20:51:08 +0300 Subject: [PATCH 01/21] Setup project --- All.sln | 9 +++++++++ Salve/Program.cs | 2 ++ Salve/Salve.csproj | 10 ++++++++++ 3 files changed, 21 insertions(+) create mode 100644 Salve/Program.cs create mode 100644 Salve/Salve.csproj diff --git a/All.sln b/All.sln index 28bc5fc3..c18b8ba9 100644 --- a/All.sln +++ b/All.sln @@ -132,6 +132,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "misc", "misc", "{2918C7E3-3 Ficus\Readme.md = Ficus\Readme.md EndProjectSection EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Salve", "Salve", "{BA9BD6EC-204D-40AA-86A7-E70087EB9A1D}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Salve", "Salve\Salve.csproj", "{11FB9788-D067-4AD5-A17D-4685DE9DD366}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -238,6 +242,10 @@ Global {1BBDEB95-1084-4981-AE5A-EFFC74E095E4}.Debug|Any CPU.Build.0 = Debug|Any CPU {1BBDEB95-1084-4981-AE5A-EFFC74E095E4}.Release|Any CPU.ActiveCfg = Release|Any CPU {1BBDEB95-1084-4981-AE5A-EFFC74E095E4}.Release|Any CPU.Build.0 = Release|Any CPU + {11FB9788-D067-4AD5-A17D-4685DE9DD366}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {11FB9788-D067-4AD5-A17D-4685DE9DD366}.Debug|Any CPU.Build.0 = Debug|Any CPU + {11FB9788-D067-4AD5-A17D-4685DE9DD366}.Release|Any CPU.ActiveCfg = Release|Any CPU + {11FB9788-D067-4AD5-A17D-4685DE9DD366}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {FEF48EF9-EDFC-4464-8942-64FC9FF000CA} = {C3E829BA-4C31-4F55-BF4D-C0D11B0B70B4} @@ -277,5 +285,6 @@ Global {B1223284-182E-4C3A-92CD-F59FBD42FFD6} = {0EC07BD3-AB92-48A0-B68B-05B3F2A767A3} {1BBDEB95-1084-4981-AE5A-EFFC74E095E4} = {D50BD31E-296B-468E-817A-60AF1CE7A759} {2918C7E3-3FC8-48A5-ADDF-8358E888B40C} = {5F2E0AEA-6AAF-4130-B486-A5F4F5A8A4BD} + {11FB9788-D067-4AD5-A17D-4685DE9DD366} = {BA9BD6EC-204D-40AA-86A7-E70087EB9A1D} EndGlobalSection EndGlobal diff --git a/Salve/Program.cs b/Salve/Program.cs new file mode 100644 index 00000000..3751555c --- /dev/null +++ b/Salve/Program.cs @@ -0,0 +1,2 @@ +// See https://aka.ms/new-console-template for more information +Console.WriteLine("Hello, World!"); diff --git a/Salve/Salve.csproj b/Salve/Salve.csproj new file mode 100644 index 00000000..ed9781c2 --- /dev/null +++ b/Salve/Salve.csproj @@ -0,0 +1,10 @@ + + + + Exe + net10.0 + enable + enable + + + From e905d6ca370bb6a74a1799d319dc05566276ad2e Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Thu, 8 Jan 2026 22:09:09 +0300 Subject: [PATCH 02/21] WIP on parsing rustc output --- Directory.Packages.props | 1 + Salve/Program.cs | 132 ++++++++++++++++++++++++++++++++++++++- Salve/Salve.csproj | 9 +++ 3 files changed, 140 insertions(+), 2 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index fa41122e..352a9176 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -5,6 +5,7 @@ + diff --git a/Salve/Program.cs b/Salve/Program.cs index 3751555c..f67ccdb9 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -1,2 +1,130 @@ -// See https://aka.ms/new-console-template for more information -Console.WriteLine("Hello, World!"); +using System.ComponentModel; +using System.Diagnostics; +using Bxes.Models.Domain; +using Bxes.Writer.Stream; +using JetBrains.Annotations; +using Spectre.Console; +using Spectre.Console.Cli; + +var app = new CommandApp(); +app.Configure(cfg => { cfg.AddCommand("serialize-to-bxes"); }); + +app.Run(args); + + +internal enum ParserKind +{ + Rustc +} + +[UsedImplicitly] +internal class SerializeOutputToBxesCommand : Command +{ + [UsedImplicitly] + public class Settings : CommandSettings + { + [CommandArgument(0, "")] + [Description("The parser which should be used to parse command output")] + public required ParserKind ParserKind { get; init; } + + [CommandArgument(1, "")] + [Description("The output path of a bXES file")] + public required string OutputFilePath { get; init; } + + [CommandArgument(2, "")] + [Description("Command executable")] + public required string Executable { get; init; } + + [CommandOption("--args")] + [Description("Command arguments")] + public required string? Arguments { get; init; } + + [CommandOption("--workdir")] + [Description("Working directory")] + public required string? WorkingDirectory { get; init; } + } + + + protected override int Execute(CommandContext context, Settings settings, CancellationToken cancellationToken) + { + try + { + var info = new ProcessStartInfo + { + FileName = settings.Executable, + RedirectStandardOutput = true, + RedirectStandardError = true, + Arguments = settings.Arguments, + CreateNoWindow = true + }; + + var process = new Process + { + StartInfo = info + }; + + var processor = LogsProcessorFactory.Create(settings.ParserKind, settings.OutputFilePath); + + try + { + // ReSharper disable once AccessToDisposedClosure + process.OutputDataReceived += (sender, args) => processor.Process(args.Data); + // ReSharper disable once AccessToDisposedClosure + process.ErrorDataReceived += (sender, args) => processor.Process(args.Data); + + if (!process.Start()) + { + throw new Exception("Failed to start process"); + } + + process.BeginOutputReadLine(); + process.BeginErrorReadLine(); + + process.WaitForExit(); + } + finally + { + processor.Dispose(); + } + + return 0; + } + catch (Exception ex) + { + AnsiConsole.WriteException(ex); + return 1; + } + } +} + +internal static class LogsProcessorFactory +{ + public static ILogsProcessor Create(ParserKind parserKind, string outputPath) => parserKind switch + { + ParserKind.Rustc => new RustcLogsParser(outputPath), + _ => throw new ArgumentOutOfRangeException(nameof(parserKind), parserKind, null) + }; +} + +internal interface ILogsProcessor : IDisposable +{ + void Process(string? line); +} + +internal class RustcLogsParser(string outputPath) : ILogsProcessor +{ + private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); + + public void Process(string? line) + { + if (line is { }) + { + AnsiConsole.WriteLine(line); + } + } + + public void Dispose() + { + myWriter.Dispose(); + } +} \ No newline at end of file diff --git a/Salve/Salve.csproj b/Salve/Salve.csproj index ed9781c2..e992bcf8 100644 --- a/Salve/Salve.csproj +++ b/Salve/Salve.csproj @@ -7,4 +7,13 @@ enable + + + + + + + + + From 5c84d9dcc7df5e80b451bd8ac3fe6a38c1af185e Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Thu, 8 Jan 2026 23:34:37 +0300 Subject: [PATCH 03/21] WIP --- Salve/Program.cs | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index f67ccdb9..4d5cdca1 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -1,6 +1,8 @@ using System.ComponentModel; using System.Diagnostics; using Bxes.Models.Domain; +using Bxes.Models.Domain.Values; +using Bxes.Utils; using Bxes.Writer.Stream; using JetBrains.Annotations; using Spectre.Console; @@ -49,11 +51,20 @@ protected override int Execute(CommandContext context, Settings settings, Cancel { try { + var directory = Path.GetDirectoryName(settings.OutputFilePath); + if (!Directory.Exists(directory)) + { + throw new Exception($"Directory {directory} does not exist"); + } + + PathUtil.EnsureDeleted(settings.OutputFilePath); + var info = new ProcessStartInfo { FileName = settings.Executable, RedirectStandardOutput = true, RedirectStandardError = true, + WorkingDirectory = settings.WorkingDirectory, Arguments = settings.Arguments, CreateNoWindow = true }; @@ -64,13 +75,14 @@ protected override int Execute(CommandContext context, Settings settings, Cancel }; var processor = LogsProcessorFactory.Create(settings.ParserKind, settings.OutputFilePath); + processor.Initialize(); try { // ReSharper disable once AccessToDisposedClosure - process.OutputDataReceived += (sender, args) => processor.Process(args.Data); + process.OutputDataReceived += (_, args) => processor.Process(args.Data); // ReSharper disable once AccessToDisposedClosure - process.ErrorDataReceived += (sender, args) => processor.Process(args.Data); + process.ErrorDataReceived += (_, args) => processor.Process(args.Data); if (!process.Start()) { @@ -108,6 +120,7 @@ internal static class LogsProcessorFactory internal interface ILogsProcessor : IDisposable { + void Initialize(); void Process(string? line); } @@ -115,12 +128,21 @@ internal class RustcLogsParser(string outputPath) : ILogsProcessor { private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); + + public void Initialize() => myWriter.HandleEvent(new BxesTraceVariantStartEvent(1, [])); + public void Process(string? line) { - if (line is { }) - { - AnsiConsole.WriteLine(line); - } + if (line is null) return; + + line = line.Trim(); + + if (!line.StartsWith("INFO")) return; + + var @event = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(line), []); + myWriter.HandleEvent(new BxesEventEvent(@event)); + + AnsiConsole.MarkupLine($"[green]Processed event:[/] {@event.Name}"); } public void Dispose() From a156aaa58f7355d672d1503aa232d47ce6e8dc75 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Thu, 8 Jan 2026 23:45:52 +0300 Subject: [PATCH 04/21] Locks and logging --- Salve/Program.cs | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index 4d5cdca1..7dd0aa88 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -127,26 +127,54 @@ internal interface ILogsProcessor : IDisposable internal class RustcLogsParser(string outputPath) : ILogsProcessor { private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); + private readonly Lock myLock = new(); + + private volatile bool myIsDisposed; public void Initialize() => myWriter.HandleEvent(new BxesTraceVariantStartEvent(1, [])); public void Process(string? line) { - if (line is null) return; + if (myIsDisposed || line is null) return; line = line.Trim(); - if (!line.StartsWith("INFO")) return; + if (!line.StartsWith("INFO")) + { + AnsiConsole.Markup("[yellow]Skipping line:[/]"); + AnsiConsole.WriteLine(line); + return; + } var @event = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(line), []); - myWriter.HandleEvent(new BxesEventEvent(@event)); + + using (myLock.EnterScope()) + { + if (myIsDisposed) + { + AnsiConsole.MarkupLine($"[red]The writer is disposed, will not write event [/] {@event.Name}"); + return; + } + + myWriter.HandleEvent(new BxesEventEvent(@event)); + } AnsiConsole.MarkupLine($"[green]Processed event:[/] {@event.Name}"); } public void Dispose() { - myWriter.Dispose(); + using var _ = myLock.EnterScope(); + + try + { + myWriter.Dispose(); + AnsiConsole.WriteLine("Disposed writer"); + } + finally + { + myIsDisposed = true; + } } } \ No newline at end of file From ae7e0364c52627dc8a561964c3177713b6e0f301 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 00:35:12 +0300 Subject: [PATCH 05/21] Store events for further processing --- Salve/Program.cs | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index 7dd0aa88..14cae80e 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -1,5 +1,6 @@ using System.ComponentModel; using System.Diagnostics; +using System.Text.RegularExpressions; using Bxes.Models.Domain; using Bxes.Models.Domain.Values; using Bxes.Utils; @@ -124,10 +125,17 @@ internal interface ILogsProcessor : IDisposable void Process(string? line); } -internal class RustcLogsParser(string outputPath) : ILogsProcessor +internal partial class RustcLogsParser(string outputPath) : ILogsProcessor { + private readonly record struct Event(string Message, string Group); + + + [GeneratedRegex("rustc_([a-z])+::([a-z])+")] + private static partial Regex MessageGroupRegex(); + private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); private readonly Lock myLock = new(); + private readonly List myEvents = []; private volatile bool myIsDisposed; @@ -140,33 +148,49 @@ public void Process(string? line) line = line.Trim(); - if (!line.StartsWith("INFO")) + if (!ShouldProcess(line, out var group)) { AnsiConsole.Markup("[yellow]Skipping line:[/]"); AnsiConsole.WriteLine(line); return; } - var @event = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(line), []); - using (myLock.EnterScope()) { if (myIsDisposed) { - AnsiConsole.MarkupLine($"[red]The writer is disposed, will not write event [/] {@event.Name}"); + AnsiConsole.MarkupLine($"[red]The writer is disposed, will not write event [/] {line}"); return; } - myWriter.HandleEvent(new BxesEventEvent(@event)); + myEvents.Add(new Event(line, group.ToString())); } - AnsiConsole.MarkupLine($"[green]Processed event:[/] {@event.Name}"); + AnsiConsole.MarkupLine($"[green]Processed event:[/] [gray]{line}[/], group [bold]{group}[/]"); + } + + private static bool ShouldProcess(string line, out ReadOnlySpan messageGroup) + { + messageGroup = default; + + if (!line.StartsWith("INFO")) return false; + if (MessageGroupRegex().Match(line) is not { } match) return false; + + messageGroup = match.ValueSpan; + + return true; } public void Dispose() { using var _ = myLock.EnterScope(); + foreach (var @event in myEvents) + { + var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Message), []); + myWriter.HandleEvent(new BxesEventEvent(bxesEvent)); + } + try { myWriter.Dispose(); From ade9743ca84fde5c251c853d785f3c642b329cef Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 13:06:22 +0300 Subject: [PATCH 06/21] Added dbscan --- Directory.Packages.props | 1 + Salve/Program.cs | 84 +++++++++++++++++++++++++++++++++++++++- Salve/Salve.csproj | 1 + 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index 352a9176..c298f055 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -4,6 +4,7 @@ + diff --git a/Salve/Program.cs b/Salve/Program.cs index 14cae80e..3e48931c 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -1,10 +1,12 @@ using System.ComponentModel; using System.Diagnostics; +using System.Numerics; using System.Text.RegularExpressions; using Bxes.Models.Domain; using Bxes.Models.Domain.Values; using Bxes.Utils; using Bxes.Writer.Stream; +using Dbscan; using JetBrains.Annotations; using Spectre.Console; using Spectre.Console.Cli; @@ -127,7 +129,75 @@ internal interface ILogsProcessor : IDisposable internal partial class RustcLogsParser(string outputPath) : ILogsProcessor { - private readonly record struct Event(string Message, string Group); + private record Event(string Message, string Group) : IPointData + { + public Point Point => default; + } + + private class EventsIndex(List events) : ISpatialIndex> + { + private readonly Dictionary>> myEventsByGroups = + events + .GroupBy(e => e.Group) + .ToDictionary(e => e.Key, e => e.Select(evt => new PointInfo(evt)).ToList()); + + + public IReadOnlyList> Search() => myEventsByGroups.Values.SelectMany(v => v).ToList(); + + public IReadOnlyList> Search(in IPointData p, double epsilon) + { + var point = (PointInfo)p; + var result = new List>(); + + foreach (var evt in myEventsByGroups[point.Item.Group]) + { + if (ReferenceEquals(evt, point)) + { + continue; + } + + var distance = CalculateEditDistance(point.Item.Message.AsSpan(), evt.Item.Message.AsSpan()); + if (distance <= epsilon) + { + result.Add(evt); + } + } + + return result; + } + + private static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan second) where T : IEqualityOperators + { + if (first.Length == 0) return second.Length; + if (second.Length == 0) return first.Length; + + var current = 1; + var previous = 0; + + var r = new int[2, second.Length + 1]; + for (var i = 0; i <= second.Length; i++) + { + r[previous, i] = i; + } + + for (var i = 0; i < first.Length; i++) + { + r[current, 0] = i + 1; + for (var j = 1; j <= second.Length; j++) + { + var cost = (second[j - 1] == first[i]) ? 0 : 1; + r[current, j] = Min(r[previous, j] + 1, r[current, j - 1] + 1, r[previous, j - 1] + cost); + } + + previous = (previous + 1) % 2; + current = (current + 1) % 2; + } + + return r[previous, second.Length]; + } + + private static int Min(int e1, int e2, int e3) => Math.Min(Math.Min(e1, e2), e3); + } [GeneratedRegex("rustc_([a-z])+::([a-z])+")] @@ -185,6 +255,18 @@ public void Dispose() { using var _ = myLock.EnterScope(); + var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(myEvents), 3, 1); + foreach (var cluster in clusters.Clusters) + { + AnsiConsole.MarkupLine("[blue]CLUSTER[/]"); + foreach (var obj in cluster.Objects) + { + AnsiConsole.WriteLine(obj.Message); + } + + AnsiConsole.WriteLine(); + } + foreach (var @event in myEvents) { var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Message), []); diff --git a/Salve/Salve.csproj b/Salve/Salve.csproj index e992bcf8..5894bd42 100644 --- a/Salve/Salve.csproj +++ b/Salve/Salve.csproj @@ -8,6 +8,7 @@ + From 2606e41d38e994c29ae9141213a24928b123d590 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 13:25:20 +0300 Subject: [PATCH 07/21] Use word index for better clustering --- Salve/Program.cs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index 3e48931c..36e7e463 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -136,12 +136,22 @@ private record Event(string Message, string Group) : IPointData private class EventsIndex(List events) : ISpatialIndex> { + private const char Separator = ' '; + + private readonly Dictionary>> myEventsByGroups = events .GroupBy(e => e.Group) .ToDictionary(e => e.Key, e => e.Select(evt => new PointInfo(evt)).ToList()); + private readonly SortedList myWordsIndex = new( + events.SelectMany(e => e.Message.Split(Separator)) + .ToHashSet() + .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) + ); + + public IReadOnlyList> Search() => myEventsByGroups.Values.SelectMany(v => v).ToList(); public IReadOnlyList> Search(in IPointData p, double epsilon) @@ -149,6 +159,8 @@ public IReadOnlyList> Search(in IPointData p, double epsilon) var point = (PointInfo)p; var result = new List>(); + var firstWord = ConvertMessageToWord(point.Item.Message); + foreach (var evt in myEventsByGroups[point.Item.Group]) { if (ReferenceEquals(evt, point)) @@ -156,7 +168,9 @@ public IReadOnlyList> Search(in IPointData p, double epsilon) continue; } - var distance = CalculateEditDistance(point.Item.Message.AsSpan(), evt.Item.Message.AsSpan()); + var secondWord = ConvertMessageToWord(evt.Item.Message); + var distance = CalculateEditDistance(firstWord, secondWord); + if (distance <= epsilon) { result.Add(evt); @@ -164,6 +178,8 @@ public IReadOnlyList> Search(in IPointData p, double epsilon) } return result; + + int[] ConvertMessageToWord(string message) => message.Split(Separator).Select(word => myWordsIndex[word]).ToArray(); } private static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan second) where T : IEqualityOperators From 8e59020148b776db6f143292256e8352ea3d6132 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 14:01:51 +0300 Subject: [PATCH 08/21] Find LCS for all messages in cluster --- Salve/Program.cs | 101 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 84 insertions(+), 17 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index 36e7e463..19734422 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -129,29 +129,21 @@ internal interface ILogsProcessor : IDisposable internal partial class RustcLogsParser(string outputPath) : ILogsProcessor { + private const char Separator = ' '; + private record Event(string Message, string Group) : IPointData { public Point Point => default; } - private class EventsIndex(List events) : ISpatialIndex> + private class EventsIndex(List events, SortedList index) : ISpatialIndex> { - private const char Separator = ' '; - - private readonly Dictionary>> myEventsByGroups = events .GroupBy(e => e.Group) .ToDictionary(e => e.Key, e => e.Select(evt => new PointInfo(evt)).ToList()); - private readonly SortedList myWordsIndex = new( - events.SelectMany(e => e.Message.Split(Separator)) - .ToHashSet() - .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) - ); - - public IReadOnlyList> Search() => myEventsByGroups.Values.SelectMany(v => v).ToList(); public IReadOnlyList> Search(in IPointData p, double epsilon) @@ -159,7 +151,7 @@ public IReadOnlyList> Search(in IPointData p, double epsilon) var point = (PointInfo)p; var result = new List>(); - var firstWord = ConvertMessageToWord(point.Item.Message); + var firstWord = ConvertMessageToWord(point.Item.Message, index); foreach (var evt in myEventsByGroups[point.Item.Group]) { @@ -168,7 +160,7 @@ public IReadOnlyList> Search(in IPointData p, double epsilon) continue; } - var secondWord = ConvertMessageToWord(evt.Item.Message); + var secondWord = ConvertMessageToWord(evt.Item.Message, index); var distance = CalculateEditDistance(firstWord, secondWord); if (distance <= epsilon) @@ -178,8 +170,6 @@ public IReadOnlyList> Search(in IPointData p, double epsilon) } return result; - - int[] ConvertMessageToWord(string message) => message.Split(Separator).Select(word => myWordsIndex[word]).ToArray(); } private static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan second) where T : IEqualityOperators @@ -271,13 +261,31 @@ public void Dispose() { using var _ = myLock.EnterScope(); - var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(myEvents), 3, 1); + var index = new SortedList( + myEvents.SelectMany(e => e.Message.Split(Separator)) + .ToHashSet() + .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) + ); + + var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(myEvents, index), 3, 1); foreach (var cluster in clusters.Clusters) { + if (cluster.Objects.Count is 0) continue; + AnsiConsole.MarkupLine("[blue]CLUSTER[/]"); - foreach (var obj in cluster.Objects) + + var lcs = ConvertMessageToWord(cluster.Objects[0].Message, index); + foreach (var obj in cluster.Objects.Skip(1)) { AnsiConsole.WriteLine(obj.Message); + + lcs = FindLcs(ConvertMessageToWord(obj.Message, index), lcs); + } + + AnsiConsole.Markup("[blue]LCS:[/] "); + foreach (var idx in lcs) + { + AnsiConsole.Markup($"{index.GetKeyAtIndex(index.IndexOfValue(idx))} "); } AnsiConsole.WriteLine(); @@ -299,4 +307,63 @@ public void Dispose() myIsDisposed = true; } } + + public static T[] FindLcs(ReadOnlySpan first, ReadOnlySpan second) + where T : IEqualityOperators + { + var n = first.Length; + var m = second.Length; + var dp = new int[n + 1, m + 1]; + + for (var i = 1; i <= n; i++) + { + for (var j = 1; j <= m; j++) + { + if (first[i - 1] == second[j - 1]) + { + dp[i, j] = dp[i - 1, j - 1] + 1; + } + else + { + dp[i, j] = Math.Max(dp[i - 1, j], dp[i, j - 1]); + } + } + } + + var lcs = RestoreLcs(first, second, dp, n, m); + + return lcs; + } + + private static int[] ConvertMessageToWord(string message, SortedList index) => + message.Split(Separator).Select(word => index[word]).ToArray(); + + public static T[] RestoreLcs(ReadOnlySpan x, ReadOnlySpan y, int[,] dp, int n, int m) + where T : IEqualityOperators + { + int i = n, j = m; + List lcs = []; + + while (i > 0 && j > 0) + { + if (x[i - 1] == y[j - 1]) + { + lcs.Add(x[i - 1]); + i--; + j--; + } + else if (dp[i - 1, j] > dp[i, j - 1]) + { + i--; + } + else + { + j--; + } + } + + lcs.Reverse(); + + return lcs.ToArray(); + } } \ No newline at end of file From 646bc07fe40a34367aaa525b9fe12a4bed77fb3b Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 14:50:48 +0300 Subject: [PATCH 09/21] Small fixes --- Salve/Program.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index 19734422..fcf03cbc 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -206,7 +206,7 @@ private static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan< } - [GeneratedRegex("rustc_([a-z])+::([a-z])+")] + [GeneratedRegex("rustc_[a-z]+::[a-z]+")] private static partial Regex MessageGroupRegex(); private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); @@ -242,7 +242,8 @@ public void Process(string? line) myEvents.Add(new Event(line, group.ToString())); } - AnsiConsole.MarkupLine($"[green]Processed event:[/] [gray]{line}[/], group [bold]{group}[/]"); + AnsiConsole.MarkupLine( + $"[green]Processed event:[/] [gray]{Markup.Escape(line)}[/], group [bold]{Markup.Escape(group.ToString())}[/]"); } private static bool ShouldProcess(string line, out ReadOnlySpan messageGroup) @@ -285,10 +286,11 @@ public void Dispose() AnsiConsole.Markup("[blue]LCS:[/] "); foreach (var idx in lcs) { - AnsiConsole.Markup($"{index.GetKeyAtIndex(index.IndexOfValue(idx))} "); + Console.Write($"{index.GetKeyAtIndex(index.IndexOfValue(idx))} "); } AnsiConsole.WriteLine(); + AnsiConsole.WriteLine(); } foreach (var @event in myEvents) @@ -330,9 +332,7 @@ public static T[] FindLcs(ReadOnlySpan first, ReadOnlySpan second) } } - var lcs = RestoreLcs(first, second, dp, n, m); - - return lcs; + return RestoreLcs(first, second, dp, n, m); } private static int[] ConvertMessageToWord(string message, SortedList index) => From 2d236b4f031716f4711a57d4e2ac2a830e1a22e8 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 15:03:29 +0300 Subject: [PATCH 10/21] Consider messages of a similar index length to be in the same event group --- Salve/Program.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Salve/Program.cs b/Salve/Program.cs index fcf03cbc..b6f645f4 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -161,6 +161,8 @@ public IReadOnlyList> Search(in IPointData p, double epsilon) } var secondWord = ConvertMessageToWord(evt.Item.Message, index); + if (secondWord.Length != firstWord.Length) continue; + var distance = CalculateEditDistance(firstWord, secondWord); if (distance <= epsilon) From 59c21c522c47525b28eeee79dce264b30dd576a3 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 15:14:54 +0300 Subject: [PATCH 11/21] Write unclustered objects --- Salve/Program.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index b6f645f4..c6730d59 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -280,7 +280,7 @@ public void Dispose() var lcs = ConvertMessageToWord(cluster.Objects[0].Message, index); foreach (var obj in cluster.Objects.Skip(1)) { - AnsiConsole.WriteLine(obj.Message); + Console.WriteLine(obj.Message); lcs = FindLcs(ConvertMessageToWord(obj.Message, index), lcs); } @@ -295,6 +295,12 @@ public void Dispose() AnsiConsole.WriteLine(); } + AnsiConsole.MarkupLine("[blue]UNCLUSTERED[/]"); + foreach (var obj in clusters.UnclusteredObjects) + { + Console.WriteLine(obj.Message); + } + foreach (var @event in myEvents) { var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Message), []); From 43fb1b3e556ae7aedfd1e1a0be53164d83088191 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 17:52:29 +0300 Subject: [PATCH 12/21] Draft event classes extraction --- Salve/Program.cs | 92 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 82 insertions(+), 10 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index c6730d59..5ba68cf8 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -1,6 +1,7 @@ using System.ComponentModel; using System.Diagnostics; using System.Numerics; +using System.Text; using System.Text.RegularExpressions; using Bxes.Models.Domain; using Bxes.Models.Domain.Values; @@ -131,8 +132,10 @@ internal partial class RustcLogsParser(string outputPath) : ILogsProcessor { private const char Separator = ' '; - private record Event(string Message, string Group) : IPointData + private class Event(string message, string group) : IPointData { + public string Message { get; set; } = message; + public string Group => group; public Point Point => default; } @@ -280,19 +283,78 @@ public void Dispose() var lcs = ConvertMessageToWord(cluster.Objects[0].Message, index); foreach (var obj in cluster.Objects.Skip(1)) { - Console.WriteLine(obj.Message); - - lcs = FindLcs(ConvertMessageToWord(obj.Message, index), lcs); + lcs = FindLcs(ConvertMessageToWord(obj.Message, index), lcs).Lcs; } AnsiConsole.Markup("[blue]LCS:[/] "); foreach (var idx in lcs) { - Console.Write($"{index.GetKeyAtIndex(index.IndexOfValue(idx))} "); + Console.Write($"{WordByIndex(idx)} "); + } + + var referenceIndices = FindLcs(ConvertMessageToWord(cluster.Objects[0].Message, index), lcs).FirstIndices; + foreach (var obj in cluster.Objects) + { + var word = ConvertMessageToWord(obj.Message, index); + var indices = FindLcs(word, lcs).FirstIndices; + if (!indices.SequenceEqual(referenceIndices)) + { + throw new Exception("Broken message group"); + } + + var newMessage = new StringBuilder(); + newMessage.Append('['); + var lcsIndex = 0; + var addedPlaceholders = 0; + + for (var i = 0; i < word.Length; ++i) + { + if (lcsIndex >= indices.Count || i != indices[lcsIndex]) + { + newMessage.Append($"({addedPlaceholders + 1})"); + ++addedPlaceholders; + } + else + { + newMessage.Append(WordByIndex(word[i])); + ++lcsIndex; + } + + if (i < word.Length - 1) + { + newMessage.Append(' '); + } + } + + newMessage.Append(']'); + + lcsIndex = 0; + for (var i = 0; i < word.Length; ++i) + { + if (lcsIndex < indices.Count && i == indices[lcsIndex]) + { + lcsIndex++; + continue; + } + + newMessage.Append($"{{{WordByIndex(word[i])}}}"); + } + + obj.Message = newMessage.ToString(); + } + + AnsiConsole.WriteLine(); + + foreach (var obj in cluster.Objects) + { + Console.WriteLine(obj.Message); } AnsiConsole.WriteLine(); AnsiConsole.WriteLine(); + continue; + + string WordByIndex(int i) => index.GetKeyAtIndex(index.IndexOfValue(i)); } AnsiConsole.MarkupLine("[blue]UNCLUSTERED[/]"); @@ -318,7 +380,9 @@ public void Dispose() } } - public static T[] FindLcs(ReadOnlySpan first, ReadOnlySpan second) + private record struct LcsInfo(T[] Lcs, List FirstIndices, List SecondIndices); + + private static LcsInfo FindLcs(ReadOnlySpan first, ReadOnlySpan second) where T : IEqualityOperators { var n = first.Length; @@ -346,17 +410,23 @@ public static T[] FindLcs(ReadOnlySpan first, ReadOnlySpan second) private static int[] ConvertMessageToWord(string message, SortedList index) => message.Split(Separator).Select(word => index[word]).ToArray(); - public static T[] RestoreLcs(ReadOnlySpan x, ReadOnlySpan y, int[,] dp, int n, int m) + private static LcsInfo RestoreLcs(ReadOnlySpan first, ReadOnlySpan second, int[,] dp, int n, int m) where T : IEqualityOperators { int i = n, j = m; List lcs = []; + List firstIndices = []; + List secondIndices = []; while (i > 0 && j > 0) { - if (x[i - 1] == y[j - 1]) + if (first[i - 1] == second[j - 1]) { - lcs.Add(x[i - 1]); + firstIndices.Add(i - 1); + secondIndices.Add(j - 1); + + lcs.Add(first[i - 1]); + i--; j--; } @@ -370,8 +440,10 @@ public static T[] RestoreLcs(ReadOnlySpan x, ReadOnlySpan y, int[,] dp, } } + firstIndices.Reverse(); + secondIndices.Reverse(); lcs.Reverse(); - return lcs.ToArray(); + return new LcsInfo(lcs.ToArray(), firstIndices, secondIndices); } } \ No newline at end of file From a4e58fe1644b0a4141c74791469866de0419ec66 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 19:24:50 +0300 Subject: [PATCH 13/21] WIP --- Salve/Program.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index 5ba68cf8..adffbc5f 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -211,9 +211,12 @@ private static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan< } - [GeneratedRegex("rustc_[a-z]+::[a-z]+")] + [GeneratedRegex("rustc_[a-z]+(::[a-z]+)*")] private static partial Regex MessageGroupRegex(); + [GeneratedRegex("[0-9]+ms")] + private static partial Regex MsRegex(); + private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); private readonly Lock myLock = new(); private readonly List myEvents = []; @@ -228,6 +231,7 @@ public void Process(string? line) if (myIsDisposed || line is null) return; line = line.Trim(); + line = MsRegex().Replace(line, string.Empty).Trim(); if (!ShouldProcess(line, out var group)) { @@ -255,7 +259,7 @@ private static bool ShouldProcess(string line, out ReadOnlySpan messageGro { messageGroup = default; - if (!line.StartsWith("INFO")) return false; + if (!line.StartsWith("INFO") && !line.StartsWith("DEBUG")) return false; if (MessageGroupRegex().Match(line) is not { } match) return false; messageGroup = match.ValueSpan; From f66de533e6d48a854d31aa99de73b33f97a6de72 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 19:33:45 +0300 Subject: [PATCH 14/21] Cache tokens --- Salve/Program.cs | 60 ++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index adffbc5f..cf517f49 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -139,34 +139,41 @@ private class Event(string message, string group) : IPointData public Point Point => default; } - private class EventsIndex(List events, SortedList index) : ISpatialIndex> + private record EventWithTokens(Event Event, int[] Tokens) : IPointData { - private readonly Dictionary>> myEventsByGroups = + public Point Point => Event.Point; + } + + private class EventsIndex(List events, SortedList index) : ISpatialIndex> + { + private readonly Dictionary>> myEventsByGroups = events .GroupBy(e => e.Group) - .ToDictionary(e => e.Key, e => e.Select(evt => new PointInfo(evt)).ToList()); + .ToDictionary( + e => e.Key, + e => e + .Select(evt => new PointInfo(new EventWithTokens(evt, ConvertMessageToTokens(evt.Message, index)))) + .ToList() + ); - public IReadOnlyList> Search() => myEventsByGroups.Values.SelectMany(v => v).ToList(); + public IReadOnlyList> Search() => myEventsByGroups.Values.SelectMany(v => v).ToList(); - public IReadOnlyList> Search(in IPointData p, double epsilon) + public IReadOnlyList> Search(in IPointData p, double epsilon) { - var point = (PointInfo)p; - var result = new List>(); - - var firstWord = ConvertMessageToWord(point.Item.Message, index); + var point = (PointInfo)p; + var result = new List>(); - foreach (var evt in myEventsByGroups[point.Item.Group]) + foreach (var evt in myEventsByGroups[point.Item.Event.Group]) { if (ReferenceEquals(evt, point)) { continue; } - var secondWord = ConvertMessageToWord(evt.Item.Message, index); - if (secondWord.Length != firstWord.Length) continue; + if (point.Item.Tokens.Length != evt.Item.Tokens.Length) continue; - var distance = CalculateEditDistance(firstWord, secondWord); + var distance = CalculateEditDistance(point.Item.Tokens, evt.Item.Tokens); if (distance <= epsilon) { @@ -284,10 +291,10 @@ public void Dispose() AnsiConsole.MarkupLine("[blue]CLUSTER[/]"); - var lcs = ConvertMessageToWord(cluster.Objects[0].Message, index); + var lcs = cluster.Objects[0].Tokens; foreach (var obj in cluster.Objects.Skip(1)) { - lcs = FindLcs(ConvertMessageToWord(obj.Message, index), lcs).Lcs; + lcs = FindLcs(obj.Tokens, lcs).Lcs; } AnsiConsole.Markup("[blue]LCS:[/] "); @@ -296,11 +303,10 @@ public void Dispose() Console.Write($"{WordByIndex(idx)} "); } - var referenceIndices = FindLcs(ConvertMessageToWord(cluster.Objects[0].Message, index), lcs).FirstIndices; + var referenceIndices = FindLcs(cluster.Objects[0].Tokens, lcs).FirstIndices; foreach (var obj in cluster.Objects) { - var word = ConvertMessageToWord(obj.Message, index); - var indices = FindLcs(word, lcs).FirstIndices; + var indices = FindLcs(obj.Tokens, lcs).FirstIndices; if (!indices.SequenceEqual(referenceIndices)) { throw new Exception("Broken message group"); @@ -311,7 +317,7 @@ public void Dispose() var lcsIndex = 0; var addedPlaceholders = 0; - for (var i = 0; i < word.Length; ++i) + for (var i = 0; i < obj.Tokens.Length; ++i) { if (lcsIndex >= indices.Count || i != indices[lcsIndex]) { @@ -320,11 +326,11 @@ public void Dispose() } else { - newMessage.Append(WordByIndex(word[i])); + newMessage.Append(WordByIndex(obj.Tokens[i])); ++lcsIndex; } - if (i < word.Length - 1) + if (i < obj.Tokens.Length - 1) { newMessage.Append(' '); } @@ -333,7 +339,7 @@ public void Dispose() newMessage.Append(']'); lcsIndex = 0; - for (var i = 0; i < word.Length; ++i) + for (var i = 0; i < obj.Tokens.Length; ++i) { if (lcsIndex < indices.Count && i == indices[lcsIndex]) { @@ -341,17 +347,17 @@ public void Dispose() continue; } - newMessage.Append($"{{{WordByIndex(word[i])}}}"); + newMessage.Append($"{{{WordByIndex(obj.Tokens[i])}}}"); } - obj.Message = newMessage.ToString(); + obj.Event.Message = newMessage.ToString(); } AnsiConsole.WriteLine(); foreach (var obj in cluster.Objects) { - Console.WriteLine(obj.Message); + Console.WriteLine(obj.Event.Message); } AnsiConsole.WriteLine(); @@ -364,7 +370,7 @@ public void Dispose() AnsiConsole.MarkupLine("[blue]UNCLUSTERED[/]"); foreach (var obj in clusters.UnclusteredObjects) { - Console.WriteLine(obj.Message); + Console.WriteLine(obj.Event.Message); } foreach (var @event in myEvents) @@ -411,7 +417,7 @@ private static LcsInfo FindLcs(ReadOnlySpan first, ReadOnlySpan seco return RestoreLcs(first, second, dp, n, m); } - private static int[] ConvertMessageToWord(string message, SortedList index) => + private static int[] ConvertMessageToTokens(string message, SortedList index) => message.Split(Separator).Select(word => index[word]).ToArray(); private static LcsInfo RestoreLcs(ReadOnlySpan first, ReadOnlySpan second, int[,] dp, int n, int m) From 4f65c16b6c73c5a321be8ed4281461d66f67dfa8 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 20:13:05 +0300 Subject: [PATCH 15/21] Filter by tokens length, remove reference indices for now, fix group regex --- Salve/Program.cs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index cf517f49..6de220b3 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -153,6 +153,7 @@ private class EventsIndex(List events, SortedList index) : I e => e.Key, e => e .Select(evt => new PointInfo(new EventWithTokens(evt, ConvertMessageToTokens(evt.Message, index)))) + .Where(evt => evt.Item.Tokens.Length < 8) .ToList() ); @@ -218,7 +219,7 @@ private static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan< } - [GeneratedRegex("rustc_[a-z]+(::[a-z]+)*")] + [GeneratedRegex("rustc(_[a-z]+)+(::[a-z_]+)*")] private static partial Regex MessageGroupRegex(); [GeneratedRegex("[0-9]+ms")] @@ -284,7 +285,8 @@ public void Dispose() .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) ); - var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(myEvents, index), 3, 1); + var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(myEvents, index), 4, 2); + foreach (var cluster in clusters.Clusters) { if (cluster.Objects.Count is 0) continue; @@ -303,15 +305,9 @@ public void Dispose() Console.Write($"{WordByIndex(idx)} "); } - var referenceIndices = FindLcs(cluster.Objects[0].Tokens, lcs).FirstIndices; foreach (var obj in cluster.Objects) { var indices = FindLcs(obj.Tokens, lcs).FirstIndices; - if (!indices.SequenceEqual(referenceIndices)) - { - throw new Exception("Broken message group"); - } - var newMessage = new StringBuilder(); newMessage.Append('['); var lcsIndex = 0; From 2d8f72fe598b8006d38455250b585e0b82ffb008 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 20:51:54 +0300 Subject: [PATCH 16/21] Serialize only filtered events --- Salve/Program.cs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index 6de220b3..0409685e 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -144,16 +144,15 @@ private record EventWithTokens(Event Event, int[] Tokens) : IPointData public Point Point => Event.Point; } - private class EventsIndex(List events, SortedList index) : ISpatialIndex> + private class EventsIndex(List events) : ISpatialIndex> { private readonly Dictionary>> myEventsByGroups = events - .GroupBy(e => e.Group) + .GroupBy(e => e.Event.Group) .ToDictionary( e => e.Key, e => e - .Select(evt => new PointInfo(new EventWithTokens(evt, ConvertMessageToTokens(evt.Message, index)))) - .Where(evt => evt.Item.Tokens.Length < 8) + .Select(evt => new PointInfo(evt)) .ToList() ); @@ -285,7 +284,12 @@ public void Dispose() .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) ); - var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(myEvents, index), 4, 2); + var eventsWithTokens = myEvents + .Select(e => new EventWithTokens(e, ConvertMessageToTokens(e.Message, index))) + .Where(et => et.Tokens.Length < 8) + .ToList(); + + var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(eventsWithTokens), 4, 2); foreach (var cluster in clusters.Clusters) { @@ -366,12 +370,13 @@ public void Dispose() AnsiConsole.MarkupLine("[blue]UNCLUSTERED[/]"); foreach (var obj in clusters.UnclusteredObjects) { + obj.Event.Message = $"[{obj.Event.Message}]"; Console.WriteLine(obj.Event.Message); } - foreach (var @event in myEvents) + foreach (var @event in eventsWithTokens) { - var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Message), []); + var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Event.Message), []); myWriter.HandleEvent(new BxesEventEvent(bxesEvent)); } From 2162b27cfe15781e57b20080cc26860e251dafe1 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 22:00:53 +0300 Subject: [PATCH 17/21] Refactorings --- Salve/ClusteringUtils.cs | 102 ++++++++ Salve/ILogsProcessor.cs | 7 + Salve/Program.cs | 347 +--------------------------- Salve/RustcLogsParser.EventIndex.cs | 59 +++++ Salve/RustcLogsParser.cs | 218 +++++++++++++++++ 5 files changed, 388 insertions(+), 345 deletions(-) create mode 100644 Salve/ClusteringUtils.cs create mode 100644 Salve/ILogsProcessor.cs create mode 100644 Salve/RustcLogsParser.EventIndex.cs create mode 100644 Salve/RustcLogsParser.cs diff --git a/Salve/ClusteringUtils.cs b/Salve/ClusteringUtils.cs new file mode 100644 index 00000000..f90b699f --- /dev/null +++ b/Salve/ClusteringUtils.cs @@ -0,0 +1,102 @@ +using System.Numerics; + +namespace Salve; + +internal static class ClusteringUtils +{ + public static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan second) where T : IEqualityOperators + { + if (first.Length == 0) return second.Length; + if (second.Length == 0) return first.Length; + + var current = 1; + var previous = 0; + + var r = new int[2, second.Length + 1]; + for (var i = 0; i <= second.Length; i++) + { + r[previous, i] = i; + } + + for (var i = 0; i < first.Length; i++) + { + r[current, 0] = i + 1; + for (var j = 1; j <= second.Length; j++) + { + var cost = (second[j - 1] == first[i]) ? 0 : 1; + r[current, j] = Min(r[previous, j] + 1, r[current, j - 1] + 1, r[previous, j - 1] + cost); + } + + previous = (previous + 1) % 2; + current = (current + 1) % 2; + } + + return r[previous, second.Length]; + } + + private static int Min(int e1, int e2, int e3) => Math.Min(Math.Min(e1, e2), e3); + + public record struct LcsInfo(T[] Lcs, List FirstIndices, List SecondIndices); + + public static LcsInfo FindLcs(ReadOnlySpan first, ReadOnlySpan second) + where T : IEqualityOperators + { + var n = first.Length; + var m = second.Length; + var dp = new int[n + 1, m + 1]; + + for (var i = 1; i <= n; i++) + { + for (var j = 1; j <= m; j++) + { + if (first[i - 1] == second[j - 1]) + { + dp[i, j] = dp[i - 1, j - 1] + 1; + } + else + { + dp[i, j] = Math.Max(dp[i - 1, j], dp[i, j - 1]); + } + } + } + + return RestoreLcs(first, second, dp, n, m); + } + + private static LcsInfo RestoreLcs(ReadOnlySpan first, ReadOnlySpan second, int[,] dp, int n, int m) + where T : IEqualityOperators + { + int i = n, j = m; + List lcs = []; + List firstIndices = []; + List secondIndices = []; + + while (i > 0 && j > 0) + { + if (first[i - 1] == second[j - 1]) + { + firstIndices.Add(i - 1); + secondIndices.Add(j - 1); + + lcs.Add(first[i - 1]); + + i--; + j--; + } + else if (dp[i - 1, j] > dp[i, j - 1]) + { + i--; + } + else + { + j--; + } + } + + firstIndices.Reverse(); + secondIndices.Reverse(); + lcs.Reverse(); + + return new LcsInfo(lcs.ToArray(), firstIndices, secondIndices); + } +} \ No newline at end of file diff --git a/Salve/ILogsProcessor.cs b/Salve/ILogsProcessor.cs new file mode 100644 index 00000000..69b06200 --- /dev/null +++ b/Salve/ILogsProcessor.cs @@ -0,0 +1,7 @@ +namespace Salve; + +internal interface ILogsProcessor : IDisposable +{ + void Initialize(); + void Process(string? line); +} \ No newline at end of file diff --git a/Salve/Program.cs b/Salve/Program.cs index 0409685e..97542a00 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -1,14 +1,8 @@ using System.ComponentModel; using System.Diagnostics; -using System.Numerics; -using System.Text; -using System.Text.RegularExpressions; -using Bxes.Models.Domain; -using Bxes.Models.Domain.Values; using Bxes.Utils; -using Bxes.Writer.Stream; -using Dbscan; using JetBrains.Annotations; +using Salve; using Spectre.Console; using Spectre.Console.Cli; @@ -117,344 +111,7 @@ internal static class LogsProcessorFactory { public static ILogsProcessor Create(ParserKind parserKind, string outputPath) => parserKind switch { - ParserKind.Rustc => new RustcLogsParser(outputPath), + ParserKind.Rustc => new Salve.RustcLogsParser(outputPath), _ => throw new ArgumentOutOfRangeException(nameof(parserKind), parserKind, null) }; -} - -internal interface ILogsProcessor : IDisposable -{ - void Initialize(); - void Process(string? line); -} - -internal partial class RustcLogsParser(string outputPath) : ILogsProcessor -{ - private const char Separator = ' '; - - private class Event(string message, string group) : IPointData - { - public string Message { get; set; } = message; - public string Group => group; - public Point Point => default; - } - - private record EventWithTokens(Event Event, int[] Tokens) : IPointData - { - public Point Point => Event.Point; - } - - private class EventsIndex(List events) : ISpatialIndex> - { - private readonly Dictionary>> myEventsByGroups = - events - .GroupBy(e => e.Event.Group) - .ToDictionary( - e => e.Key, - e => e - .Select(evt => new PointInfo(evt)) - .ToList() - ); - - - public IReadOnlyList> Search() => myEventsByGroups.Values.SelectMany(v => v).ToList(); - - public IReadOnlyList> Search(in IPointData p, double epsilon) - { - var point = (PointInfo)p; - var result = new List>(); - - foreach (var evt in myEventsByGroups[point.Item.Event.Group]) - { - if (ReferenceEquals(evt, point)) - { - continue; - } - - if (point.Item.Tokens.Length != evt.Item.Tokens.Length) continue; - - var distance = CalculateEditDistance(point.Item.Tokens, evt.Item.Tokens); - - if (distance <= epsilon) - { - result.Add(evt); - } - } - - return result; - } - - private static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan second) where T : IEqualityOperators - { - if (first.Length == 0) return second.Length; - if (second.Length == 0) return first.Length; - - var current = 1; - var previous = 0; - - var r = new int[2, second.Length + 1]; - for (var i = 0; i <= second.Length; i++) - { - r[previous, i] = i; - } - - for (var i = 0; i < first.Length; i++) - { - r[current, 0] = i + 1; - for (var j = 1; j <= second.Length; j++) - { - var cost = (second[j - 1] == first[i]) ? 0 : 1; - r[current, j] = Min(r[previous, j] + 1, r[current, j - 1] + 1, r[previous, j - 1] + cost); - } - - previous = (previous + 1) % 2; - current = (current + 1) % 2; - } - - return r[previous, second.Length]; - } - - private static int Min(int e1, int e2, int e3) => Math.Min(Math.Min(e1, e2), e3); - } - - - [GeneratedRegex("rustc(_[a-z]+)+(::[a-z_]+)*")] - private static partial Regex MessageGroupRegex(); - - [GeneratedRegex("[0-9]+ms")] - private static partial Regex MsRegex(); - - private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); - private readonly Lock myLock = new(); - private readonly List myEvents = []; - - private volatile bool myIsDisposed; - - - public void Initialize() => myWriter.HandleEvent(new BxesTraceVariantStartEvent(1, [])); - - public void Process(string? line) - { - if (myIsDisposed || line is null) return; - - line = line.Trim(); - line = MsRegex().Replace(line, string.Empty).Trim(); - - if (!ShouldProcess(line, out var group)) - { - AnsiConsole.Markup("[yellow]Skipping line:[/]"); - AnsiConsole.WriteLine(line); - return; - } - - using (myLock.EnterScope()) - { - if (myIsDisposed) - { - AnsiConsole.MarkupLine($"[red]The writer is disposed, will not write event [/] {line}"); - return; - } - - myEvents.Add(new Event(line, group.ToString())); - } - - AnsiConsole.MarkupLine( - $"[green]Processed event:[/] [gray]{Markup.Escape(line)}[/], group [bold]{Markup.Escape(group.ToString())}[/]"); - } - - private static bool ShouldProcess(string line, out ReadOnlySpan messageGroup) - { - messageGroup = default; - - if (!line.StartsWith("INFO") && !line.StartsWith("DEBUG")) return false; - if (MessageGroupRegex().Match(line) is not { } match) return false; - - messageGroup = match.ValueSpan; - - return true; - } - - public void Dispose() - { - using var _ = myLock.EnterScope(); - - var index = new SortedList( - myEvents.SelectMany(e => e.Message.Split(Separator)) - .ToHashSet() - .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) - ); - - var eventsWithTokens = myEvents - .Select(e => new EventWithTokens(e, ConvertMessageToTokens(e.Message, index))) - .Where(et => et.Tokens.Length < 8) - .ToList(); - - var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(eventsWithTokens), 4, 2); - - foreach (var cluster in clusters.Clusters) - { - if (cluster.Objects.Count is 0) continue; - - AnsiConsole.MarkupLine("[blue]CLUSTER[/]"); - - var lcs = cluster.Objects[0].Tokens; - foreach (var obj in cluster.Objects.Skip(1)) - { - lcs = FindLcs(obj.Tokens, lcs).Lcs; - } - - AnsiConsole.Markup("[blue]LCS:[/] "); - foreach (var idx in lcs) - { - Console.Write($"{WordByIndex(idx)} "); - } - - foreach (var obj in cluster.Objects) - { - var indices = FindLcs(obj.Tokens, lcs).FirstIndices; - var newMessage = new StringBuilder(); - newMessage.Append('['); - var lcsIndex = 0; - var addedPlaceholders = 0; - - for (var i = 0; i < obj.Tokens.Length; ++i) - { - if (lcsIndex >= indices.Count || i != indices[lcsIndex]) - { - newMessage.Append($"({addedPlaceholders + 1})"); - ++addedPlaceholders; - } - else - { - newMessage.Append(WordByIndex(obj.Tokens[i])); - ++lcsIndex; - } - - if (i < obj.Tokens.Length - 1) - { - newMessage.Append(' '); - } - } - - newMessage.Append(']'); - - lcsIndex = 0; - for (var i = 0; i < obj.Tokens.Length; ++i) - { - if (lcsIndex < indices.Count && i == indices[lcsIndex]) - { - lcsIndex++; - continue; - } - - newMessage.Append($"{{{WordByIndex(obj.Tokens[i])}}}"); - } - - obj.Event.Message = newMessage.ToString(); - } - - AnsiConsole.WriteLine(); - - foreach (var obj in cluster.Objects) - { - Console.WriteLine(obj.Event.Message); - } - - AnsiConsole.WriteLine(); - AnsiConsole.WriteLine(); - continue; - - string WordByIndex(int i) => index.GetKeyAtIndex(index.IndexOfValue(i)); - } - - AnsiConsole.MarkupLine("[blue]UNCLUSTERED[/]"); - foreach (var obj in clusters.UnclusteredObjects) - { - obj.Event.Message = $"[{obj.Event.Message}]"; - Console.WriteLine(obj.Event.Message); - } - - foreach (var @event in eventsWithTokens) - { - var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Event.Message), []); - myWriter.HandleEvent(new BxesEventEvent(bxesEvent)); - } - - try - { - myWriter.Dispose(); - AnsiConsole.WriteLine("Disposed writer"); - } - finally - { - myIsDisposed = true; - } - } - - private record struct LcsInfo(T[] Lcs, List FirstIndices, List SecondIndices); - - private static LcsInfo FindLcs(ReadOnlySpan first, ReadOnlySpan second) - where T : IEqualityOperators - { - var n = first.Length; - var m = second.Length; - var dp = new int[n + 1, m + 1]; - - for (var i = 1; i <= n; i++) - { - for (var j = 1; j <= m; j++) - { - if (first[i - 1] == second[j - 1]) - { - dp[i, j] = dp[i - 1, j - 1] + 1; - } - else - { - dp[i, j] = Math.Max(dp[i - 1, j], dp[i, j - 1]); - } - } - } - - return RestoreLcs(first, second, dp, n, m); - } - - private static int[] ConvertMessageToTokens(string message, SortedList index) => - message.Split(Separator).Select(word => index[word]).ToArray(); - - private static LcsInfo RestoreLcs(ReadOnlySpan first, ReadOnlySpan second, int[,] dp, int n, int m) - where T : IEqualityOperators - { - int i = n, j = m; - List lcs = []; - List firstIndices = []; - List secondIndices = []; - - while (i > 0 && j > 0) - { - if (first[i - 1] == second[j - 1]) - { - firstIndices.Add(i - 1); - secondIndices.Add(j - 1); - - lcs.Add(first[i - 1]); - - i--; - j--; - } - else if (dp[i - 1, j] > dp[i, j - 1]) - { - i--; - } - else - { - j--; - } - } - - firstIndices.Reverse(); - secondIndices.Reverse(); - lcs.Reverse(); - - return new LcsInfo(lcs.ToArray(), firstIndices, secondIndices); - } } \ No newline at end of file diff --git a/Salve/RustcLogsParser.EventIndex.cs b/Salve/RustcLogsParser.EventIndex.cs new file mode 100644 index 00000000..e1f4fefc --- /dev/null +++ b/Salve/RustcLogsParser.EventIndex.cs @@ -0,0 +1,59 @@ +using Dbscan; + +namespace Salve; + +internal partial class RustcLogsParser +{ + private class Event(string message, string group) : IPointData + { + public string Message { get; set; } = message; + public string Group => group; + public Point Point => default; + } + + private record EventWithTokens(Event Event, int[] Tokens) : IPointData + { + public Point Point => Event.Point; + } + + private class EventsIndex(List events) : ISpatialIndex> + { + private readonly Dictionary>> myEventsByGroups = + events + .GroupBy(e => e.Event.Group) + .ToDictionary( + e => e.Key, + e => e + .Select(evt => new PointInfo(evt)) + .ToList() + ); + + + public IReadOnlyList> Search() => myEventsByGroups.Values.SelectMany(v => v).ToList(); + + public IReadOnlyList> Search(in IPointData p, double epsilon) + { + var point = (PointInfo)p; + var result = new List>(); + + foreach (var evt in myEventsByGroups[point.Item.Event.Group]) + { + if (ReferenceEquals(evt, point)) + { + continue; + } + + if (point.Item.Tokens.Length != evt.Item.Tokens.Length) continue; + + var distance = ClusteringUtils.CalculateEditDistance(point.Item.Tokens, evt.Item.Tokens); + + if (distance <= epsilon) + { + result.Add(evt); + } + } + + return result; + } + } +} \ No newline at end of file diff --git a/Salve/RustcLogsParser.cs b/Salve/RustcLogsParser.cs new file mode 100644 index 00000000..7a8143da --- /dev/null +++ b/Salve/RustcLogsParser.cs @@ -0,0 +1,218 @@ +using System.Text; +using System.Text.RegularExpressions; +using Bxes.Models.Domain; +using Bxes.Models.Domain.Values; +using Bxes.Writer.Stream; +using Dbscan; +using Spectre.Console; +using WordsIndex = System.Collections.Generic.SortedList; + +namespace Salve; + +internal partial class RustcLogsParser(string outputPath) : ILogsProcessor +{ + private const char Separator = ' '; + + + [GeneratedRegex("rustc(_[a-z]+)+(::[a-z_]+)*")] + private static partial Regex MessageGroupRegex(); + + [GeneratedRegex("[0-9]+ms")] + private static partial Regex MsRegex(); + + private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); + private readonly Lock myLock = new(); + private readonly List myEvents = []; + + private volatile bool myIsDisposed; + + + public void Initialize() => myWriter.HandleEvent(new BxesTraceVariantStartEvent(1, [])); + + public void Process(string? line) + { + if (myIsDisposed || line is null) return; + + line = line.Trim(); + line = MsRegex().Replace(line, string.Empty).Trim(); + + if (!ShouldProcess(line, out var group)) + { + AnsiConsole.Markup("[yellow]Skipping line:[/]"); + AnsiConsole.WriteLine(line); + return; + } + + using (myLock.EnterScope()) + { + if (myIsDisposed) + { + AnsiConsole.MarkupLine($"[red]The writer is disposed, will not write event [/] {line}"); + return; + } + + myEvents.Add(new Event(line, group.ToString())); + } + + AnsiConsole.MarkupLine( + $"[green]Processed event:[/] [gray]{Markup.Escape(line)}[/], group [bold]{Markup.Escape(group.ToString())}[/]"); + } + + private static bool ShouldProcess(string line, out ReadOnlySpan messageGroup) + { + messageGroup = default; + + if (!line.StartsWith("INFO") && !line.StartsWith("DEBUG")) return false; + if (MessageGroupRegex().Match(line) is not { } match) return false; + + messageGroup = match.ValueSpan; + + return true; + } + + public void Dispose() + { + using var _ = myLock.EnterScope(); + + var index = new WordsIndex( + myEvents.SelectMany(e => e.Message.Split(Separator)) + .ToHashSet() + .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) + ); + + var eventsWithTokens = myEvents + .Select(e => new EventWithTokens(e, ConvertMessageToTokens(e.Message, index))) + .Where(et => et.Tokens.Length < 8) + .ToList(); + + var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(eventsWithTokens), 4, 2); + + ProcessClusters(clusters, index); + LogUnclusteredEvents(clusters); + + foreach (var @event in eventsWithTokens) + { + var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Event.Message), []); + myWriter.HandleEvent(new BxesEventEvent(bxesEvent)); + } + + DisposeWriter(); + } + + private static void LogUnclusteredEvents(ClusterSet clusters) + { + AnsiConsole.MarkupLine("[blue]UNCLUSTERED[/]"); + foreach (var obj in clusters.UnclusteredObjects) + { + obj.Event.Message = $"[{obj.Event.Message}]"; + Console.WriteLine(obj.Event.Message); + } + } + + private void DisposeWriter() + { + try + { + myWriter.Dispose(); + AnsiConsole.WriteLine("Disposed writer"); + } + finally + { + myIsDisposed = true; + } + } + + private static void ProcessClusters(ClusterSet clusters, WordsIndex index) + { + foreach (var cluster in clusters.Clusters) + { + if (cluster.Objects.Count is 0) continue; + + var lcs = cluster.Objects.Skip(1) + .Aggregate(cluster.Objects[0].Tokens, (current, obj) => ClusteringUtils.FindLcs(obj.Tokens, current).Lcs); + + AnsiConsole.MarkupLine("[blue]CLUSTER[/]"); + AnsiConsole.Markup("[blue]LCS:[/] "); + foreach (var idx in lcs) + { + Console.Write($"{index.WordByToken(idx)} "); + } + + AdjustEventsNames(cluster, lcs, index); + + AnsiConsole.WriteLine(); + + foreach (var obj in cluster.Objects) + { + Console.WriteLine(obj.Event.Message); + } + + AnsiConsole.WriteLine(); + AnsiConsole.WriteLine(); + } + } + + private static void AdjustEventsNames(Cluster cluster, int[] lcs, WordsIndex index) + { + foreach (var evt in cluster.Objects) + { + evt.Event.Message = CreateNewEventName(evt, lcs, index); + } + } + + private static string CreateNewEventName(EventWithTokens evt, int[] lcs, WordsIndex index) + { + var indices = ClusteringUtils.FindLcs(evt.Tokens, lcs).FirstIndices; + + var newMessage = new StringBuilder(); + newMessage.Append('['); + + var lcsIndex = 0; + var addedPlaceholders = 0; + for (var i = 0; i < evt.Tokens.Length; ++i) + { + if (lcsIndex >= indices.Count || i != indices[lcsIndex]) + { + newMessage.Append($"({addedPlaceholders + 1})"); + ++addedPlaceholders; + } + else + { + newMessage.Append(index.WordByToken(evt.Tokens[i])); + ++lcsIndex; + } + + if (i < evt.Tokens.Length - 1) + { + newMessage.Append(' '); + } + } + + newMessage.Append(']'); + + lcsIndex = 0; + for (var i = 0; i < evt.Tokens.Length; ++i) + { + if (lcsIndex < indices.Count && i == indices[lcsIndex]) + { + lcsIndex++; + continue; + } + + newMessage.Append($"{{{index.WordByToken(evt.Tokens[i])}}}"); + } + + return newMessage.ToString(); + } + + private static int[] ConvertMessageToTokens(string message, WordsIndex index) => + message.Split(Separator).Select(word => index[word]).ToArray(); +} + +internal static class IndexExtensions +{ + extension(WordsIndex index) + { + public string WordByToken(int i) => index.GetKeyAtIndex(index.IndexOfValue(i)); + } +} \ No newline at end of file From 660375cb79c21f09de09b8632cfdaa47cc4e2ec9 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 22:24:00 +0300 Subject: [PATCH 18/21] Refactor commands, add use groups as event names option --- Salve/Program.cs | 41 +++++++++++++--------------------------- Salve/RustcLogsParser.cs | 20 +++++++++++++++----- 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index 97542a00..ef8527ba 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -7,41 +7,35 @@ using Spectre.Console.Cli; var app = new CommandApp(); -app.Configure(cfg => { cfg.AddCommand("serialize-to-bxes"); }); +app.Configure(cfg => { cfg.AddCommand("rustc-logs-to-bxes"); }); app.Run(args); -internal enum ParserKind -{ - Rustc -} - [UsedImplicitly] -internal class SerializeOutputToBxesCommand : Command +internal class RustcLogsToBxes : Command { [UsedImplicitly] public class Settings : CommandSettings { - [CommandArgument(0, "")] - [Description("The parser which should be used to parse command output")] - public required ParserKind ParserKind { get; init; } - [CommandArgument(1, "")] [Description("The output path of a bXES file")] public required string OutputFilePath { get; init; } - [CommandArgument(2, "")] - [Description("Command executable")] - public required string Executable { get; init; } - [CommandOption("--args")] [Description("Command arguments")] - public required string? Arguments { get; init; } + // ReSharper disable once UnassignedGetOnlyAutoProperty + public string? Arguments { get; init; } [CommandOption("--workdir")] [Description("Working directory")] - public required string? WorkingDirectory { get; init; } + // ReSharper disable once UnassignedGetOnlyAutoProperty + public string? WorkingDirectory { get; init; } + + [CommandOption("--group-names-as-event-names")] + [Description("Use groups names (FQNs) as event names")] + // ReSharper disable once UnassignedGetOnlyAutoProperty + public bool UseGroupsAsEventNames { get; init; } } @@ -59,7 +53,7 @@ protected override int Execute(CommandContext context, Settings settings, Cancel var info = new ProcessStartInfo { - FileName = settings.Executable, + FileName = "rustc", RedirectStandardOutput = true, RedirectStandardError = true, WorkingDirectory = settings.WorkingDirectory, @@ -72,7 +66,7 @@ protected override int Execute(CommandContext context, Settings settings, Cancel StartInfo = info }; - var processor = LogsProcessorFactory.Create(settings.ParserKind, settings.OutputFilePath); + var processor = new RustcLogsParser(settings.OutputFilePath, settings.UseGroupsAsEventNames); processor.Initialize(); try @@ -105,13 +99,4 @@ protected override int Execute(CommandContext context, Settings settings, Cancel return 1; } } -} - -internal static class LogsProcessorFactory -{ - public static ILogsProcessor Create(ParserKind parserKind, string outputPath) => parserKind switch - { - ParserKind.Rustc => new Salve.RustcLogsParser(outputPath), - _ => throw new ArgumentOutOfRangeException(nameof(parserKind), parserKind, null) - }; } \ No newline at end of file diff --git a/Salve/RustcLogsParser.cs b/Salve/RustcLogsParser.cs index 7a8143da..2b9b9406 100644 --- a/Salve/RustcLogsParser.cs +++ b/Salve/RustcLogsParser.cs @@ -9,17 +9,17 @@ namespace Salve; -internal partial class RustcLogsParser(string outputPath) : ILogsProcessor +internal partial class RustcLogsParser(string outputPath, bool useGroupsAsEventNames) : ILogsProcessor { private const char Separator = ' '; - [GeneratedRegex("rustc(_[a-z]+)+(::[a-z_]+)*")] private static partial Regex MessageGroupRegex(); [GeneratedRegex("[0-9]+ms")] private static partial Regex MsRegex(); + private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); private readonly Lock myLock = new(); private readonly List myEvents = []; @@ -85,10 +85,20 @@ public void Dispose() .Where(et => et.Tokens.Length < 8) .ToList(); - var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(eventsWithTokens), 4, 2); + if (useGroupsAsEventNames) + { + foreach (var evt in eventsWithTokens) + { + evt.Event.Message = evt.Event.Group; + } + } + else + { + var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(eventsWithTokens), 4, 2); - ProcessClusters(clusters, index); - LogUnclusteredEvents(clusters); + ProcessClusters(clusters, index); + LogUnclusteredEvents(clusters); + } foreach (var @event in eventsWithTokens) { From 03bc6eb3246d2d205ebd7f34161bc70581f8bcd8 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Fri, 9 Jan 2026 23:13:52 +0300 Subject: [PATCH 19/21] Add option for max tokens --- Salve/Program.cs | 9 ++++++++- Salve/RustcLogsParser.cs | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index ef8527ba..d027a306 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -36,6 +36,13 @@ public class Settings : CommandSettings [Description("Use groups names (FQNs) as event names")] // ReSharper disable once UnassignedGetOnlyAutoProperty public bool UseGroupsAsEventNames { get; init; } + + [CommandOption("--max-tokens-in-event")] + [Description("Maximum tokens in events")] + public int MaxTokensInEvent { get; init; } + + + public RustcLogsParser CreateProcessor() => new(OutputFilePath, UseGroupsAsEventNames, MaxTokensInEvent); } @@ -66,7 +73,7 @@ protected override int Execute(CommandContext context, Settings settings, Cancel StartInfo = info }; - var processor = new RustcLogsParser(settings.OutputFilePath, settings.UseGroupsAsEventNames); + var processor = settings.CreateProcessor(); processor.Initialize(); try diff --git a/Salve/RustcLogsParser.cs b/Salve/RustcLogsParser.cs index 2b9b9406..5b82205b 100644 --- a/Salve/RustcLogsParser.cs +++ b/Salve/RustcLogsParser.cs @@ -9,7 +9,7 @@ namespace Salve; -internal partial class RustcLogsParser(string outputPath, bool useGroupsAsEventNames) : ILogsProcessor +internal partial class RustcLogsParser(string outputPath, bool useGroupsAsEventNames, int maxTokensInEvent) : ILogsProcessor { private const char Separator = ' '; @@ -82,7 +82,7 @@ public void Dispose() var eventsWithTokens = myEvents .Select(e => new EventWithTokens(e, ConvertMessageToTokens(e.Message, index))) - .Where(et => et.Tokens.Length < 8) + .Where(et => et.Tokens.Length <= maxTokensInEvent) .ToList(); if (useGroupsAsEventNames) From 7db8191c978f81b0e0e216920c6786af2c990c89 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Sat, 10 Jan 2026 00:48:10 +0300 Subject: [PATCH 20/21] Supporting method events --- Salve/Program.cs | 9 +++- Salve/RustcLogsParser.EventIndex.cs | 18 +++++-- Salve/RustcLogsParser.cs | 82 +++++++++++++++++++++-------- 3 files changed, 81 insertions(+), 28 deletions(-) diff --git a/Salve/Program.cs b/Salve/Program.cs index d027a306..cd8df6eb 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -39,10 +39,15 @@ public class Settings : CommandSettings [CommandOption("--max-tokens-in-event")] [Description("Maximum tokens in events")] - public int MaxTokensInEvent { get; init; } + public int MaxTokensInEvent { get; init; } = 10; + [CommandOption("--leave-only-method-events")] + [Description("Leave only methods tracing events")] + public bool LeaveOnlyMethodEvents { get; init; } - public RustcLogsParser CreateProcessor() => new(OutputFilePath, UseGroupsAsEventNames, MaxTokensInEvent); + + public RustcLogsParser CreateProcessor() => + new(OutputFilePath, UseGroupsAsEventNames, MaxTokensInEvent, LeaveOnlyMethodEvents); } diff --git a/Salve/RustcLogsParser.EventIndex.cs b/Salve/RustcLogsParser.EventIndex.cs index e1f4fefc..c708c4a3 100644 --- a/Salve/RustcLogsParser.EventIndex.cs +++ b/Salve/RustcLogsParser.EventIndex.cs @@ -4,9 +4,16 @@ namespace Salve; internal partial class RustcLogsParser { - private class Event(string message, string group) : IPointData + private enum EventKind { - public string Message { get; set; } = message; + Message, + Method + } + + private class Event(EventKind kind, string message, string group) : IPointData + { + public EventKind Kind => kind; + public string Name { get; set; } = message; public string Group => group; public Point Point => default; } @@ -29,7 +36,10 @@ private class EventsIndex(List events) : ISpatialIndex> Search() => myEventsByGroups.Values.SelectMany(v => v).ToList(); + public IReadOnlyList> Search() => + myEventsByGroups.Values.SelectMany(v => v).Where(e => ShouldCluster(e.Item.Event)).ToList(); + + private static bool ShouldCluster(Event e) => e.Kind is EventKind.Message; public IReadOnlyList> Search(in IPointData p, double epsilon) { @@ -38,7 +48,7 @@ public IReadOnlyList> Search(in IPointData p, double foreach (var evt in myEventsByGroups[point.Item.Event.Group]) { - if (ReferenceEquals(evt, point)) + if (!ShouldCluster(evt.Item.Event) || ReferenceEquals(evt, point)) { continue; } diff --git a/Salve/RustcLogsParser.cs b/Salve/RustcLogsParser.cs index 5b82205b..d4c097c1 100644 --- a/Salve/RustcLogsParser.cs +++ b/Salve/RustcLogsParser.cs @@ -9,12 +9,17 @@ namespace Salve; -internal partial class RustcLogsParser(string outputPath, bool useGroupsAsEventNames, int maxTokensInEvent) : ILogsProcessor +internal partial class RustcLogsParser( + string outputPath, + bool useGroupsAsEventNames, + int maxTokensInEvent, + bool leaveOnlyMethodEvents) + : ILogsProcessor { private const char Separator = ' '; [GeneratedRegex("rustc(_[a-z]+)+(::[a-z_]+)*")] - private static partial Regex MessageGroupRegex(); + private static partial Regex FqnRegex(); [GeneratedRegex("[0-9]+ms")] private static partial Regex MsRegex(); @@ -34,12 +39,24 @@ public void Process(string? line) if (myIsDisposed || line is null) return; line = line.Trim(); + + var kind = (FqnRegex().Match(line) is { Index: 0, Length: > 0 }) switch + { + true => EventKind.Method, + false => EventKind.Message + }; + + if (leaveOnlyMethodEvents && kind is EventKind.Message) + { + LogSkippedLine(line); + return; + } + line = MsRegex().Replace(line, string.Empty).Trim(); - if (!ShouldProcess(line, out var group)) + if (!ShouldProcess(line, kind, out var group)) { - AnsiConsole.Markup("[yellow]Skipping line:[/]"); - AnsiConsole.WriteLine(line); + LogSkippedLine(line); return; } @@ -51,21 +68,41 @@ public void Process(string? line) return; } - myEvents.Add(new Event(line, group.ToString())); + var groupStr = group.ToString(); + var name = kind switch + { + EventKind.Method => groupStr, + EventKind.Message => line, + _ => throw new ArgumentOutOfRangeException() + }; + + myEvents.Add(new Event(kind, name, groupStr)); } AnsiConsole.MarkupLine( $"[green]Processed event:[/] [gray]{Markup.Escape(line)}[/], group [bold]{Markup.Escape(group.ToString())}[/]"); } - private static bool ShouldProcess(string line, out ReadOnlySpan messageGroup) + private static void LogSkippedLine(string line) + { + AnsiConsole.Markup("[yellow]Skipping line:[/]"); + AnsiConsole.WriteLine(line); + } + + private static bool ShouldProcess(string line, EventKind kind, out ReadOnlySpan eventGroup) { - messageGroup = default; + eventGroup = default; + + if (kind is EventKind.Method) + { + eventGroup = FqnRegex().Match(line).ValueSpan; + return true; + } if (!line.StartsWith("INFO") && !line.StartsWith("DEBUG")) return false; - if (MessageGroupRegex().Match(line) is not { } match) return false; + if (FqnRegex().Match(line) is not { } match) return false; - messageGroup = match.ValueSpan; + eventGroup = match.ValueSpan; return true; } @@ -75,13 +112,13 @@ public void Dispose() using var _ = myLock.EnterScope(); var index = new WordsIndex( - myEvents.SelectMany(e => e.Message.Split(Separator)) + myEvents.SelectMany(e => e.Name.Split(Separator)) .ToHashSet() .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) ); var eventsWithTokens = myEvents - .Select(e => new EventWithTokens(e, ConvertMessageToTokens(e.Message, index))) + .Select(e => new EventWithTokens(e, ConvertMessageToTokens(e.Name, index))) .Where(et => et.Tokens.Length <= maxTokensInEvent) .ToList(); @@ -89,7 +126,7 @@ public void Dispose() { foreach (var evt in eventsWithTokens) { - evt.Event.Message = evt.Event.Group; + evt.Event.Name = evt.Event.Group; } } else @@ -97,25 +134,25 @@ public void Dispose() var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(eventsWithTokens), 4, 2); ProcessClusters(clusters, index); - LogUnclusteredEvents(clusters); + ProcessUnclusteredEvents(clusters.UnclusteredObjects); } foreach (var @event in eventsWithTokens) { - var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Event.Message), []); + var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Event.Name), []); myWriter.HandleEvent(new BxesEventEvent(bxesEvent)); } DisposeWriter(); } - private static void LogUnclusteredEvents(ClusterSet clusters) + private static void ProcessUnclusteredEvents(IReadOnlyList events) { AnsiConsole.MarkupLine("[blue]UNCLUSTERED[/]"); - foreach (var obj in clusters.UnclusteredObjects) + foreach (var evt in events) { - obj.Event.Message = $"[{obj.Event.Message}]"; - Console.WriteLine(obj.Event.Message); + evt.Event.Name = evt.Event.Group; + Console.WriteLine(evt.Event.Name); } } @@ -143,6 +180,7 @@ private static void ProcessClusters(ClusterSet clusters, WordsI AnsiConsole.MarkupLine("[blue]CLUSTER[/]"); AnsiConsole.Markup("[blue]LCS:[/] "); + foreach (var idx in lcs) { Console.Write($"{index.WordByToken(idx)} "); @@ -154,7 +192,7 @@ private static void ProcessClusters(ClusterSet clusters, WordsI foreach (var obj in cluster.Objects) { - Console.WriteLine(obj.Event.Message); + Console.WriteLine(obj.Event.Name); } AnsiConsole.WriteLine(); @@ -166,11 +204,11 @@ private static void AdjustEventsNames(Cluster cluster, int[] lc { foreach (var evt in cluster.Objects) { - evt.Event.Message = CreateNewEventName(evt, lcs, index); + evt.Event.Name = CreateNewClusteredEventName(evt, lcs, index); } } - private static string CreateNewEventName(EventWithTokens evt, int[] lcs, WordsIndex index) + private static string CreateNewClusteredEventName(EventWithTokens evt, int[] lcs, WordsIndex index) { var indices = ClusteringUtils.FindLcs(evt.Tokens, lcs).FirstIndices; From d1a2d3d0abf4067a6cf2a3608b5d15b49579c959 Mon Sep 17 00:00:00 2001 From: aerooneqq Date: Sat, 10 Jan 2026 23:20:48 +0300 Subject: [PATCH 21/21] Refactorings --- Salve/ILogsProcessor.cs | 7 ------- Salve/Program.cs | 2 +- Salve/{ => Rust}/RustcLogsParser.EventIndex.cs | 2 +- Salve/{ => Rust}/RustcLogsParser.cs | 3 +-- 4 files changed, 3 insertions(+), 11 deletions(-) delete mode 100644 Salve/ILogsProcessor.cs rename Salve/{ => Rust}/RustcLogsParser.EventIndex.cs (98%) rename Salve/{ => Rust}/RustcLogsParser.cs (99%) diff --git a/Salve/ILogsProcessor.cs b/Salve/ILogsProcessor.cs deleted file mode 100644 index 69b06200..00000000 --- a/Salve/ILogsProcessor.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace Salve; - -internal interface ILogsProcessor : IDisposable -{ - void Initialize(); - void Process(string? line); -} \ No newline at end of file diff --git a/Salve/Program.cs b/Salve/Program.cs index cd8df6eb..7e4c6829 100644 --- a/Salve/Program.cs +++ b/Salve/Program.cs @@ -2,7 +2,7 @@ using System.Diagnostics; using Bxes.Utils; using JetBrains.Annotations; -using Salve; +using Salve.Rust; using Spectre.Console; using Spectre.Console.Cli; diff --git a/Salve/RustcLogsParser.EventIndex.cs b/Salve/Rust/RustcLogsParser.EventIndex.cs similarity index 98% rename from Salve/RustcLogsParser.EventIndex.cs rename to Salve/Rust/RustcLogsParser.EventIndex.cs index c708c4a3..c73ecfa7 100644 --- a/Salve/RustcLogsParser.EventIndex.cs +++ b/Salve/Rust/RustcLogsParser.EventIndex.cs @@ -1,6 +1,6 @@ using Dbscan; -namespace Salve; +namespace Salve.Rust; internal partial class RustcLogsParser { diff --git a/Salve/RustcLogsParser.cs b/Salve/Rust/RustcLogsParser.cs similarity index 99% rename from Salve/RustcLogsParser.cs rename to Salve/Rust/RustcLogsParser.cs index d4c097c1..d57dfa0e 100644 --- a/Salve/RustcLogsParser.cs +++ b/Salve/Rust/RustcLogsParser.cs @@ -7,14 +7,13 @@ using Spectre.Console; using WordsIndex = System.Collections.Generic.SortedList; -namespace Salve; +namespace Salve.Rust; internal partial class RustcLogsParser( string outputPath, bool useGroupsAsEventNames, int maxTokensInEvent, bool leaveOnlyMethodEvents) - : ILogsProcessor { private const char Separator = ' ';