diff --git a/All.sln b/All.sln index 28bc5fc3..c18b8ba9 100644 --- a/All.sln +++ b/All.sln @@ -132,6 +132,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "misc", "misc", "{2918C7E3-3 Ficus\Readme.md = Ficus\Readme.md EndProjectSection EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Salve", "Salve", "{BA9BD6EC-204D-40AA-86A7-E70087EB9A1D}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Salve", "Salve\Salve.csproj", "{11FB9788-D067-4AD5-A17D-4685DE9DD366}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -238,6 +242,10 @@ Global {1BBDEB95-1084-4981-AE5A-EFFC74E095E4}.Debug|Any CPU.Build.0 = Debug|Any CPU {1BBDEB95-1084-4981-AE5A-EFFC74E095E4}.Release|Any CPU.ActiveCfg = Release|Any CPU {1BBDEB95-1084-4981-AE5A-EFFC74E095E4}.Release|Any CPU.Build.0 = Release|Any CPU + {11FB9788-D067-4AD5-A17D-4685DE9DD366}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {11FB9788-D067-4AD5-A17D-4685DE9DD366}.Debug|Any CPU.Build.0 = Debug|Any CPU + {11FB9788-D067-4AD5-A17D-4685DE9DD366}.Release|Any CPU.ActiveCfg = Release|Any CPU + {11FB9788-D067-4AD5-A17D-4685DE9DD366}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {FEF48EF9-EDFC-4464-8942-64FC9FF000CA} = {C3E829BA-4C31-4F55-BF4D-C0D11B0B70B4} @@ -277,5 +285,6 @@ Global {B1223284-182E-4C3A-92CD-F59FBD42FFD6} = {0EC07BD3-AB92-48A0-B68B-05B3F2A767A3} {1BBDEB95-1084-4981-AE5A-EFFC74E095E4} = {D50BD31E-296B-468E-817A-60AF1CE7A759} {2918C7E3-3FC8-48A5-ADDF-8358E888B40C} = {5F2E0AEA-6AAF-4130-B486-A5F4F5A8A4BD} + {11FB9788-D067-4AD5-A17D-4685DE9DD366} = {BA9BD6EC-204D-40AA-86A7-E70087EB9A1D} EndGlobalSection EndGlobal diff --git a/Directory.Packages.props b/Directory.Packages.props index fa41122e..c298f055 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -4,7 +4,9 @@ + + diff --git a/Salve/ClusteringUtils.cs b/Salve/ClusteringUtils.cs new file mode 100644 index 00000000..f90b699f --- /dev/null +++ b/Salve/ClusteringUtils.cs @@ -0,0 +1,102 @@ +using System.Numerics; + +namespace Salve; + +internal static class ClusteringUtils +{ + public static int CalculateEditDistance(ReadOnlySpan first, ReadOnlySpan second) where T : IEqualityOperators + { + if (first.Length == 0) return second.Length; + if (second.Length == 0) return first.Length; + + var current = 1; + var previous = 0; + + var r = new int[2, second.Length + 1]; + for (var i = 0; i <= second.Length; i++) + { + r[previous, i] = i; + } + + for (var i = 0; i < first.Length; i++) + { + r[current, 0] = i + 1; + for (var j = 1; j <= second.Length; j++) + { + var cost = (second[j - 1] == first[i]) ? 0 : 1; + r[current, j] = Min(r[previous, j] + 1, r[current, j - 1] + 1, r[previous, j - 1] + cost); + } + + previous = (previous + 1) % 2; + current = (current + 1) % 2; + } + + return r[previous, second.Length]; + } + + private static int Min(int e1, int e2, int e3) => Math.Min(Math.Min(e1, e2), e3); + + public record struct LcsInfo(T[] Lcs, List FirstIndices, List SecondIndices); + + public static LcsInfo FindLcs(ReadOnlySpan first, ReadOnlySpan second) + where T : IEqualityOperators + { + var n = first.Length; + var m = second.Length; + var dp = new int[n + 1, m + 1]; + + for (var i = 1; i <= n; i++) + { + for (var j = 1; j <= m; j++) + { + if (first[i - 1] == second[j - 1]) + { + dp[i, j] = dp[i - 1, j - 1] + 1; + } + else + { + dp[i, j] = Math.Max(dp[i - 1, j], dp[i, j - 1]); + } + } + } + + return RestoreLcs(first, second, dp, n, m); + } + + private static LcsInfo RestoreLcs(ReadOnlySpan first, ReadOnlySpan second, int[,] dp, int n, int m) + where T : IEqualityOperators + { + int i = n, j = m; + List lcs = []; + List firstIndices = []; + List secondIndices = []; + + while (i > 0 && j > 0) + { + if (first[i - 1] == second[j - 1]) + { + firstIndices.Add(i - 1); + secondIndices.Add(j - 1); + + lcs.Add(first[i - 1]); + + i--; + j--; + } + else if (dp[i - 1, j] > dp[i, j - 1]) + { + i--; + } + else + { + j--; + } + } + + firstIndices.Reverse(); + secondIndices.Reverse(); + lcs.Reverse(); + + return new LcsInfo(lcs.ToArray(), firstIndices, secondIndices); + } +} \ No newline at end of file diff --git a/Salve/Program.cs b/Salve/Program.cs new file mode 100644 index 00000000..7e4c6829 --- /dev/null +++ b/Salve/Program.cs @@ -0,0 +1,114 @@ +using System.ComponentModel; +using System.Diagnostics; +using Bxes.Utils; +using JetBrains.Annotations; +using Salve.Rust; +using Spectre.Console; +using Spectre.Console.Cli; + +var app = new CommandApp(); +app.Configure(cfg => { cfg.AddCommand("rustc-logs-to-bxes"); }); + +app.Run(args); + + +[UsedImplicitly] +internal class RustcLogsToBxes : Command +{ + [UsedImplicitly] + public class Settings : CommandSettings + { + [CommandArgument(1, "")] + [Description("The output path of a bXES file")] + public required string OutputFilePath { get; init; } + + [CommandOption("--args")] + [Description("Command arguments")] + // ReSharper disable once UnassignedGetOnlyAutoProperty + public string? Arguments { get; init; } + + [CommandOption("--workdir")] + [Description("Working directory")] + // ReSharper disable once UnassignedGetOnlyAutoProperty + public string? WorkingDirectory { get; init; } + + [CommandOption("--group-names-as-event-names")] + [Description("Use groups names (FQNs) as event names")] + // ReSharper disable once UnassignedGetOnlyAutoProperty + public bool UseGroupsAsEventNames { get; init; } + + [CommandOption("--max-tokens-in-event")] + [Description("Maximum tokens in events")] + public int MaxTokensInEvent { get; init; } = 10; + + [CommandOption("--leave-only-method-events")] + [Description("Leave only methods tracing events")] + public bool LeaveOnlyMethodEvents { get; init; } + + + public RustcLogsParser CreateProcessor() => + new(OutputFilePath, UseGroupsAsEventNames, MaxTokensInEvent, LeaveOnlyMethodEvents); + } + + + protected override int Execute(CommandContext context, Settings settings, CancellationToken cancellationToken) + { + try + { + var directory = Path.GetDirectoryName(settings.OutputFilePath); + if (!Directory.Exists(directory)) + { + throw new Exception($"Directory {directory} does not exist"); + } + + PathUtil.EnsureDeleted(settings.OutputFilePath); + + var info = new ProcessStartInfo + { + FileName = "rustc", + RedirectStandardOutput = true, + RedirectStandardError = true, + WorkingDirectory = settings.WorkingDirectory, + Arguments = settings.Arguments, + CreateNoWindow = true + }; + + var process = new Process + { + StartInfo = info + }; + + var processor = settings.CreateProcessor(); + processor.Initialize(); + + try + { + // ReSharper disable once AccessToDisposedClosure + process.OutputDataReceived += (_, args) => processor.Process(args.Data); + // ReSharper disable once AccessToDisposedClosure + process.ErrorDataReceived += (_, args) => processor.Process(args.Data); + + if (!process.Start()) + { + throw new Exception("Failed to start process"); + } + + process.BeginOutputReadLine(); + process.BeginErrorReadLine(); + + process.WaitForExit(); + } + finally + { + processor.Dispose(); + } + + return 0; + } + catch (Exception ex) + { + AnsiConsole.WriteException(ex); + return 1; + } + } +} \ No newline at end of file diff --git a/Salve/Rust/RustcLogsParser.EventIndex.cs b/Salve/Rust/RustcLogsParser.EventIndex.cs new file mode 100644 index 00000000..c73ecfa7 --- /dev/null +++ b/Salve/Rust/RustcLogsParser.EventIndex.cs @@ -0,0 +1,69 @@ +using Dbscan; + +namespace Salve.Rust; + +internal partial class RustcLogsParser +{ + private enum EventKind + { + Message, + Method + } + + private class Event(EventKind kind, string message, string group) : IPointData + { + public EventKind Kind => kind; + public string Name { get; set; } = message; + public string Group => group; + public Point Point => default; + } + + private record EventWithTokens(Event Event, int[] Tokens) : IPointData + { + public Point Point => Event.Point; + } + + private class EventsIndex(List events) : ISpatialIndex> + { + private readonly Dictionary>> myEventsByGroups = + events + .GroupBy(e => e.Event.Group) + .ToDictionary( + e => e.Key, + e => e + .Select(evt => new PointInfo(evt)) + .ToList() + ); + + + public IReadOnlyList> Search() => + myEventsByGroups.Values.SelectMany(v => v).Where(e => ShouldCluster(e.Item.Event)).ToList(); + + private static bool ShouldCluster(Event e) => e.Kind is EventKind.Message; + + public IReadOnlyList> Search(in IPointData p, double epsilon) + { + var point = (PointInfo)p; + var result = new List>(); + + foreach (var evt in myEventsByGroups[point.Item.Event.Group]) + { + if (!ShouldCluster(evt.Item.Event) || ReferenceEquals(evt, point)) + { + continue; + } + + if (point.Item.Tokens.Length != evt.Item.Tokens.Length) continue; + + var distance = ClusteringUtils.CalculateEditDistance(point.Item.Tokens, evt.Item.Tokens); + + if (distance <= epsilon) + { + result.Add(evt); + } + } + + return result; + } + } +} \ No newline at end of file diff --git a/Salve/Rust/RustcLogsParser.cs b/Salve/Rust/RustcLogsParser.cs new file mode 100644 index 00000000..d57dfa0e --- /dev/null +++ b/Salve/Rust/RustcLogsParser.cs @@ -0,0 +1,265 @@ +using System.Text; +using System.Text.RegularExpressions; +using Bxes.Models.Domain; +using Bxes.Models.Domain.Values; +using Bxes.Writer.Stream; +using Dbscan; +using Spectre.Console; +using WordsIndex = System.Collections.Generic.SortedList; + +namespace Salve.Rust; + +internal partial class RustcLogsParser( + string outputPath, + bool useGroupsAsEventNames, + int maxTokensInEvent, + bool leaveOnlyMethodEvents) +{ + private const char Separator = ' '; + + [GeneratedRegex("rustc(_[a-z]+)+(::[a-z_]+)*")] + private static partial Regex FqnRegex(); + + [GeneratedRegex("[0-9]+ms")] + private static partial Regex MsRegex(); + + + private readonly SingleFileBxesStreamWriterImpl myWriter = new(outputPath, 1); + private readonly Lock myLock = new(); + private readonly List myEvents = []; + + private volatile bool myIsDisposed; + + + public void Initialize() => myWriter.HandleEvent(new BxesTraceVariantStartEvent(1, [])); + + public void Process(string? line) + { + if (myIsDisposed || line is null) return; + + line = line.Trim(); + + var kind = (FqnRegex().Match(line) is { Index: 0, Length: > 0 }) switch + { + true => EventKind.Method, + false => EventKind.Message + }; + + if (leaveOnlyMethodEvents && kind is EventKind.Message) + { + LogSkippedLine(line); + return; + } + + line = MsRegex().Replace(line, string.Empty).Trim(); + + if (!ShouldProcess(line, kind, out var group)) + { + LogSkippedLine(line); + return; + } + + using (myLock.EnterScope()) + { + if (myIsDisposed) + { + AnsiConsole.MarkupLine($"[red]The writer is disposed, will not write event [/] {line}"); + return; + } + + var groupStr = group.ToString(); + var name = kind switch + { + EventKind.Method => groupStr, + EventKind.Message => line, + _ => throw new ArgumentOutOfRangeException() + }; + + myEvents.Add(new Event(kind, name, groupStr)); + } + + AnsiConsole.MarkupLine( + $"[green]Processed event:[/] [gray]{Markup.Escape(line)}[/], group [bold]{Markup.Escape(group.ToString())}[/]"); + } + + private static void LogSkippedLine(string line) + { + AnsiConsole.Markup("[yellow]Skipping line:[/]"); + AnsiConsole.WriteLine(line); + } + + private static bool ShouldProcess(string line, EventKind kind, out ReadOnlySpan eventGroup) + { + eventGroup = default; + + if (kind is EventKind.Method) + { + eventGroup = FqnRegex().Match(line).ValueSpan; + return true; + } + + if (!line.StartsWith("INFO") && !line.StartsWith("DEBUG")) return false; + if (FqnRegex().Match(line) is not { } match) return false; + + eventGroup = match.ValueSpan; + + return true; + } + + public void Dispose() + { + using var _ = myLock.EnterScope(); + + var index = new WordsIndex( + myEvents.SelectMany(e => e.Name.Split(Separator)) + .ToHashSet() + .Select((e, index) => (e, index)).ToDictionary(p => p.e, p => p.index) + ); + + var eventsWithTokens = myEvents + .Select(e => new EventWithTokens(e, ConvertMessageToTokens(e.Name, index))) + .Where(et => et.Tokens.Length <= maxTokensInEvent) + .ToList(); + + if (useGroupsAsEventNames) + { + foreach (var evt in eventsWithTokens) + { + evt.Event.Name = evt.Event.Group; + } + } + else + { + var clusters = Dbscan.Dbscan.CalculateClusters(new EventsIndex(eventsWithTokens), 4, 2); + + ProcessClusters(clusters, index); + ProcessUnclusteredEvents(clusters.UnclusteredObjects); + } + + foreach (var @event in eventsWithTokens) + { + var bxesEvent = new InMemoryEventImpl(DateTime.UtcNow.Ticks, new BxesStringValue(@event.Event.Name), []); + myWriter.HandleEvent(new BxesEventEvent(bxesEvent)); + } + + DisposeWriter(); + } + + private static void ProcessUnclusteredEvents(IReadOnlyList events) + { + AnsiConsole.MarkupLine("[blue]UNCLUSTERED[/]"); + foreach (var evt in events) + { + evt.Event.Name = evt.Event.Group; + Console.WriteLine(evt.Event.Name); + } + } + + private void DisposeWriter() + { + try + { + myWriter.Dispose(); + AnsiConsole.WriteLine("Disposed writer"); + } + finally + { + myIsDisposed = true; + } + } + + private static void ProcessClusters(ClusterSet clusters, WordsIndex index) + { + foreach (var cluster in clusters.Clusters) + { + if (cluster.Objects.Count is 0) continue; + + var lcs = cluster.Objects.Skip(1) + .Aggregate(cluster.Objects[0].Tokens, (current, obj) => ClusteringUtils.FindLcs(obj.Tokens, current).Lcs); + + AnsiConsole.MarkupLine("[blue]CLUSTER[/]"); + AnsiConsole.Markup("[blue]LCS:[/] "); + + foreach (var idx in lcs) + { + Console.Write($"{index.WordByToken(idx)} "); + } + + AdjustEventsNames(cluster, lcs, index); + + AnsiConsole.WriteLine(); + + foreach (var obj in cluster.Objects) + { + Console.WriteLine(obj.Event.Name); + } + + AnsiConsole.WriteLine(); + AnsiConsole.WriteLine(); + } + } + + private static void AdjustEventsNames(Cluster cluster, int[] lcs, WordsIndex index) + { + foreach (var evt in cluster.Objects) + { + evt.Event.Name = CreateNewClusteredEventName(evt, lcs, index); + } + } + + private static string CreateNewClusteredEventName(EventWithTokens evt, int[] lcs, WordsIndex index) + { + var indices = ClusteringUtils.FindLcs(evt.Tokens, lcs).FirstIndices; + + var newMessage = new StringBuilder(); + newMessage.Append('['); + + var lcsIndex = 0; + var addedPlaceholders = 0; + for (var i = 0; i < evt.Tokens.Length; ++i) + { + if (lcsIndex >= indices.Count || i != indices[lcsIndex]) + { + newMessage.Append($"({addedPlaceholders + 1})"); + ++addedPlaceholders; + } + else + { + newMessage.Append(index.WordByToken(evt.Tokens[i])); + ++lcsIndex; + } + + if (i < evt.Tokens.Length - 1) + { + newMessage.Append(' '); + } + } + + newMessage.Append(']'); + + lcsIndex = 0; + for (var i = 0; i < evt.Tokens.Length; ++i) + { + if (lcsIndex < indices.Count && i == indices[lcsIndex]) + { + lcsIndex++; + continue; + } + + newMessage.Append($"{{{index.WordByToken(evt.Tokens[i])}}}"); + } + + return newMessage.ToString(); + } + + private static int[] ConvertMessageToTokens(string message, WordsIndex index) => + message.Split(Separator).Select(word => index[word]).ToArray(); +} + +internal static class IndexExtensions +{ + extension(WordsIndex index) + { + public string WordByToken(int i) => index.GetKeyAtIndex(index.IndexOfValue(i)); + } +} \ No newline at end of file diff --git a/Salve/Salve.csproj b/Salve/Salve.csproj new file mode 100644 index 00000000..5894bd42 --- /dev/null +++ b/Salve/Salve.csproj @@ -0,0 +1,20 @@ + + + + Exe + net10.0 + enable + enable + + + + + + + + + + + + +