Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
use flake
6 changes: 3 additions & 3 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ jobs:

steps:
- uses: actions/checkout@v3
- name: Setup .NET 7.0
uses: actions/setup-dotnet@v3
- name: Setup .NET Core
uses: actions/setup-dotnet@v3.2.0
with:
dotnet-version: 7.0.x
dotnet-version: 9.0.x
- name: Install dependencies
run: dotnet restore
- name: Build
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
appsettings.Secret.yml
SS14.Watchdog/data.db
SS14.Watchdog/data.db-journal


# Created by https://www.gitignore.io/api/csharp
Expand Down
9 changes: 9 additions & 0 deletions SECURITY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Reporting a security vulnerability
You can report a security vulnerability through Discord or through email.

If you want to send an email, you can contact us at <telecommunications@spacestation14.com>.
If you want to contact us through Discord, you can join [our server](https://discord.gg/MwDDf6t)
and then **privately** message anyone with the `@Wizard` or `@SS14 Maintainer` role.

In either case, **do not publicly disclose the vulnerability until we explicitly give
you permission to do so**.
10 changes: 5 additions & 5 deletions SS14.Watchdog.Tests/SS14.Watchdog.Tests.csproj
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<TargetFramework>net9.0</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Moq" Version="4.18.4" />
<PackageReference Include="nunit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="4.5.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.1" />
<PackageReference Include="Moq" Version="4.20.72" />
<PackageReference Include="nunit" Version="4.3.2" />
<PackageReference Include="NUnit3TestAdapter" Version="4.6.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- When using the "Systemd" process manager, the unit name of the service process.
ALTER TABLE ServerInstance ADD COLUMN PersistedSystemdUnit TEXT;

99 changes: 94 additions & 5 deletions SS14.Watchdog/Components/ProcessManagement/IProcessManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace SS14.Watchdog.Components.ProcessManagement;
/// Responsible for managing game server processes: start, stop, persistence.
/// </summary>
/// <seealso cref="IProcessHandle"/>
/// <seealso cref="ProcessOptions"/>
public interface IProcessManager
{
bool CanPersist { get; }
Expand All @@ -29,6 +30,7 @@ Task<IProcessHandle> StartServer(
/// </summary>
/// <param name="Program">The program to run to launch the game server. Full path.</param>
/// <param name="WorkingDirectory">The working directory of the launched process.</param>
/// <seealso cref="IProcessHandle"/>
public sealed record ProcessStartData(
string Program,
string WorkingDirectory,
Expand All @@ -41,12 +43,99 @@ public sealed record ProcessStartData(
/// </summary>
public interface IProcessHandle
{
bool HasExited { get; }
int ExitCode { get; }

void DumpProcess(string file, DumpType type);

Task WaitForExitAsync(CancellationToken cancel = default);

void Kill();
}
Task Kill();

Task<ProcessExitStatus?> GetExitStatusAsync();
}

/// <summary>
/// Status for how a process has exited.
/// </summary>
/// <param name="Reason">The reason why the process exited. Check the enum for possible values.</param>
/// <param name="Status">
/// Reason-specific value.
/// For <see cref="ProcessExitReason.ExitCode"/> this is the exit code.
/// For <see cref="ProcessExitReason.Signal"/> and <see cref="ProcessExitReason.CoreDumped"/> this is the signal that killed the process.
/// </param>
/// <seealso cref="IProcessHandle"/>
public sealed record ProcessExitStatus(ProcessExitReason Reason, int Status)
{
public ProcessExitStatus(ProcessExitReason reason) : this(reason, 0)
{
}

public bool IsClean => Reason == ProcessExitReason.ReasonUnavailable
|| Reason == ProcessExitReason.ExitCode && Status == 0
|| Reason == ProcessExitReason.Success;
}

/// <summary>
/// Reason values for <see cref="ProcessExitStatus"/>.
/// </summary>
public enum ProcessExitReason
{
// These somewhat correspond to systemd's values for "Result" on a Service, kinda.
// https://www.freedesktop.org/software/systemd/man/org.freedesktop.systemd1.html#Properties2

/// <summary>
/// Exit reason could not be determined.
/// </summary>
/// <remarks>
/// <para>
/// This happens on POSIX with the "basic" process manager after restarting the watchdog,
/// as it is not possible to get the exit status of persisted processes.
/// </para>
/// </remarks>
ReasonUnavailable,

/// <summary>
/// Process exited "successfully" according to systemd.
/// </summary>
/// <remarks>
/// This probably means exit code 0, but I want to distinguish them as technically they're not equal.
/// </remarks>
Success,

/// <summary>
/// Process exited recorded exit code.
/// </summary>
ExitCode,

/// <summary>
/// Process was killed by uncaught signal.
/// </summary>
/// <remarks>
/// This won't apply if the process is killed with SIGTERM,
/// as the game handles that and manually returns exit code signum + 128.
/// </remarks>
Signal,

/// <summary>
/// Process crashed and dumped core.
/// </summary>
CoreDump,

/// <summary>
/// Systemd operation failed.
/// </summary>
SystemdFailed,

/// <summary>
/// Timeout executing service operation.
/// </summary>
Timeout,

/// <summary>
/// Process was killed by the Linux OOM killer.
/// </summary>
OomKill,

/// <summary>
/// Catch-all for other unhandled status codes.
/// </summary>
Other,
}
28 changes: 22 additions & 6 deletions SS14.Watchdog/Components/ProcessManagement/ProcessManagerBasic.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,13 @@ private void PersistPid(IServerInstance instance, Process process)

_logger.LogDebug("Process looks good, guess we're using this!");

return Task.FromResult<IProcessHandle?>(new Handle(process));
return Task.FromResult<IProcessHandle?>(new Handle(process) { IsRecovered = true });
}

private sealed class Handle : IProcessHandle
{
private readonly Process _process;

public bool HasExited => _process.HasExited;
public int ExitCode => _process.ExitCode;
public bool IsRecovered;

public Handle(Process process)
{
Expand All @@ -187,9 +185,27 @@ public async Task WaitForExitAsync(CancellationToken cancel = default)
await _process.WaitForExitAsync(cancel);
}

public void Kill()
public Task<ProcessExitStatus?> GetExitStatusAsync()
{
if (!_process.HasExited)
return Task.FromResult<ProcessExitStatus?>(null);

// POSIX makes it impossible to fetch the exit code for processes that aren't our immediate children.
// This means we cannot tell what the exit code is if the process
// was started by a previous watchdog instance, and we "recovered" it from persistence.
// Windows does not have this issue. Microsoft wins again.
var processExitStatus = !OperatingSystem.IsWindows() && IsRecovered
? new ProcessExitStatus(ProcessExitReason.ReasonUnavailable)
: new ProcessExitStatus(ProcessExitReason.ExitCode, _process.ExitCode);

return Task.FromResult<ProcessExitStatus?>(processExitStatus);
}

public Task Kill()
{
_process.Kill(entireProcessTree: true);

return Task.CompletedTask;
}
}
}
}
Loading