Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions html/js/i18n.js
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ const i18n = {
disabled: 'Disabled',
disabledFailedSync: 'The detection was saved but synchronization failed. The detection has been disabled. Check SOC logs for details.',
disconnected: 'Disconnected from manager',
diskUsage: 'Disk Usage',
diskUsageElastic: 'Elastic Storage Used',
diskUsageInfluxDb: 'InfluxDB Storage Used',
diskUsageNsm: 'NSM Partition Usage',
Expand Down Expand Up @@ -924,6 +925,7 @@ const i18n = {
rulePassBadChars: 'The password must not contain the following characters: " \' $ & !',
rules: 'Rules',
ruleset: 'Ruleset',
runningTroubleshoot: 'Running troubleshoot script...',
save: 'Save',
saveSuccess: 'Save successful!',
searchUsername: 'Search User',
Expand Down Expand Up @@ -1147,6 +1149,7 @@ const i18n = {
trafficManOut: 'Outbound Mgmt Traffic',
trafficManOutAbbr: 'Mgmt Out',
transcriptCyberChefHelp: 'Send the transcript to CyberChef',
troubleshootElasticsearch: 'Troubleshoot Elasticsearch',
ttr: 'Time Tracking',
tuneDetection: 'Tune Detection',
tuneDetectionHelp: 'Tune the detection that triggered this alert',
Expand Down
44 changes: 44 additions & 0 deletions html/js/routes/grid.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ routes.push({ path: '/grid', name: 'grid', component: {
gridMemberTestConfirmDialog: false,
gridMemberRestartConfirmDialog: false,
gridMemberUploadConfirmDialog: false,
troubleshootDialog: false,
troubleshootData: null,
troubleshootError: null,
troubleshootLoading: false,
uploadForm: { valid: true, attachment: null },
maxUploadSizeBytes: 25 * 1024 * 1024,
staleMetricsMs: 120000,
Expand Down Expand Up @@ -360,6 +364,46 @@ routes.push({ path: '/grid', name: 'grid', component: {
this.$root.stopLoading();
this.hideRestartConfirm();
},
canTroubleshoot(node) {
// Manager-type nodes only
return ['so-manager', 'so-managersearch', 'so-standalone'].indexOf(node.role) != -1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would think there would be more node types here, like eval, import, etc.

},
showTroubleshootDialog(node) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These new JS functions need unit tests.

this.selectedNode = node;
this.troubleshootData = null;
this.troubleshootError = null;
this.troubleshootDialog = true;
this.runTroubleshoot();
},
hideTroubleshootDialog() {
this.troubleshootDialog = false;
this.troubleshootData = null;
this.troubleshootError = null;
this.selectedNode = null;
},
async runTroubleshoot() {
const nodeId = this.getNodeName(this.selectedNode);
this.troubleshootLoading = true;
this.troubleshootError = null;
try {
const response = await this.$root.papi.post('gridmembers/' + nodeId + "/estroubleshoot", null, {
params: {gridId: this.selectedNode.gridId}
});
try {
this.troubleshootData = JSON.parse(response.data.output);
} catch (parseError) {
this.troubleshootError = 'Failed to parse troubleshoot output';
}
} catch (error) {
this.troubleshootError = error.message || error;
}
this.troubleshootLoading = false;
},
getStatusColor(status) {
if (status === 'green' || status === 'ok') return 'success';
if (status === 'yellow' || status === 'high') return 'warning';
return 'error';
},
hasContainer(item, container) {
return item && item.containers && item.containers.find(function(x) {
return x.Name == container;
Expand Down
111 changes: 111 additions & 0 deletions html/pages/grid.html
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ <h4 v-if="metricsEnabled" data-aid="grid_summary">{{ i18n.gridEps }} {{ formatCo
<span class="filter label">{{ i18n.eventstoreStatus }}:</span>
<span id="node_eventstoreStatus" :class="areMetricsCurrent(item) ? 'filter value text-' + colorNodeStatus(item.eventstoreStatus) : 'filter value stale'">
{{ $root.localizeMessage(item.eventstoreStatus) }}
<v-icon v-if="canTroubleshoot(item)" style="font-size: 10px; vertical-align: middle;" :title="i18n.troubleshootElasticsearch" @click="showTroubleshootDialog(item)" class="theme-icon cursor-pointer ml-1" data-aid="grid_node_troubleshoot">fa-circle-info</v-icon>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I try to avoid inline styles. Was the icon too big by default? If so, the vuetify v-icon component should have a size property.

</span>
</div>
<div v-if="item.metricsEnabled" class="text-no-wrap">
Expand Down Expand Up @@ -529,4 +530,114 @@ <h4 v-if="metricsEnabled" data-aid="grid_summary">{{ i18n.gridEps }} {{ formatCo
</v-card-actions>
</v-card>
</v-dialog>
<v-dialog v-model="troubleshootDialog" persistent max-width="900" scrollable data-aid="grid_troubleshoot_dialog">
<v-card>
<v-card-title class="text-h5 d-flex align-center">
{{ i18n.troubleshootElasticsearch }}
<v-spacer></v-spacer>
<v-btn icon variant="text" @click="hideTroubleshootDialog" data-aid="grid_troubleshoot_close">
<v-icon>fa-times</v-icon>
</v-btn>
</v-card-title>
<v-card-text>
<div v-if="troubleshootLoading" class="text-center py-4">
<v-progress-circular indeterminate color="primary"></v-progress-circular>
<p class="mt-2">{{ i18n.runningTroubleshoot }}</p>
</div>
<v-alert v-else-if="troubleshootError" type="error" class="mb-4">{{ troubleshootError }}</v-alert>
<div v-else-if="troubleshootData">
<!-- Elasticsearch Not Accessible -->
<v-alert v-if="!troubleshootData.elasticsearchStatus?.accessible" type="error" class="mb-4">
{{ troubleshootData.elasticsearchStatus?.errorMessage || 'Elasticsearch is not accessible' }}
</v-alert>
<div v-else>
<!-- Issues Section - Show problems first -->
<div v-if="troubleshootData.elasticsearchStatus?.healthReport?.nonGreenCount > 0 && troubleshootData.elasticsearchStatus?.healthReport?.indicators?.length">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These inline scripts are getting complex. Would be cleaner and testable to move them to grid.js functions. Ex: hasEsTroubleshootIssues(). Similar comment for others in this file.

<div class="text-subtitle-1 font-weight-bold mb-3">Issues Found</div>
<v-alert v-for="(indicator, issueIdx) in (troubleshootData?.elasticsearchStatus?.healthReport?.indicators || []).filter(i => i && i.status !== 'green' && !['repository_integrity', 'slm'].includes(i.name))"
:key="issueIdx"
:type="indicator?.status === 'red' ? 'error' : 'warning'"
variant="tonal"
class="mb-3">
<div class="mb-2">{{ indicator?.symptom }}</div>
<div v-for="(diag, diagIdx) in (indicator?.diagnosis || [])" :key="diagIdx">
<template v-if="diag">
<p class="mb-2 text-caption">{{ diag.cause }}</p>
<p v-if="diag.affectedNodes?.length > 0" class="mb-2 text-caption">
Affected node{{ diag.affectedNodes.length > 1 ? 's' : '' }}: {{ diag.affectedNodes.length <= 5 ? diag.affectedNodes.join(', ') : diag.affectedNodes.slice(0, 5).join(', ') + ' and ' + (diag.affectedNodes.length - 5) + ' more' }}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like it could use some help.

  • "Affected node" should be localized
  • Problem need to move that logic/join statement into a helper function. Ex: getEsTroubleshootEffectedNodes()

Several other English phrases in this PR also need localized.

</p>
<p class="mb-2 text-caption">Recommendation: {{ diag.action }}</p>
<v-expansion-panels v-if="diag.affectedIndices?.length > 0" flat>
<v-expansion-panel>
<v-expansion-panel-title class="py-0 text-caption">
{{ diag.affectedIndices.length }} affected indices
</v-expansion-panel-title>
<v-expansion-panel-text>
<div style="max-height: 150px; overflow-y: auto;" class="text-caption">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another example where you should try to stick to vuetify style classes instead of inlining styles. Matthew can assist here if this is going to be too time consuming to get ramped up on.

<div v-for="(index, indexIdx) in (diag.affectedIndices || []).slice(0, 50)" :key="indexIdx">{{ index }}</div>
<div v-if="diag.affectedIndices?.length > 50" class="font-italic">...and {{ diag.affectedIndices.length - 50 }} more</div>
</div>
</v-expansion-panel-text>
</v-expansion-panel>
</v-expansion-panels>
</template>
</div>
</v-alert>
</div>

<!-- Disk Usage Table -->
<div class="text-subtitle-1 font-weight-bold mb-2">{{ i18n.diskUsage }}</div>
<div v-if="troubleshootData.diskUsage?.nodes?.length" style="max-height: 300px; overflow-y: auto;">
<v-table density="compact">
<template #default>
<thead>
<tr>
<th>Node</th>
<th>IP</th>
<th>Disk Used</th>
<th>Available</th>
<th>Status</th>
</tr>
</thead>
<tbody>
<template v-for="(node, nodeIdx) in (troubleshootData?.diskUsage?.nodes || [])" :key="nodeIdx">
<tr v-if="node">
<td>{{ node.name }}</td>
<td>{{ node.ip }}</td>
<td>{{ node.usedPercent != null ? node.usedPercent.toFixed(1) + '%' : '—' }}</td>
<td>{{ node.availableBytes }}</td>
<td><v-chip :color="getStatusColor(node.status)" size="x-small">{{ node.status }}</v-chip></td>
</tr>
</template>
</tbody>
</template>
</v-table>
</div>
<div class="text-caption text--secondary mb-4">
Elasticsearch watermarks —
writes blocked at <strong>{{ troubleshootData.diskUsage?.watermarks?.floodDisplay }}</strong> (flood),
rebalancing starts at <strong>{{ troubleshootData.diskUsage?.watermarks?.highDisplay }}</strong> (high),
new shards blocked at <strong>{{ troubleshootData.diskUsage?.watermarks?.lowDisplay }}</strong> (low)
</div>

<!-- All Checks Summary -->
<div v-if="troubleshootData.elasticsearchStatus?.healthReport?.indicators?.length" class="mt-4">
<div class="text-subtitle-1 font-weight-bold mb-2">All Checks</div>
<div class="d-flex flex-wrap ga-2">
<v-chip v-for="(indicator, indicatorIdx) in (troubleshootData?.elasticsearchStatus?.healthReport?.indicators || []).filter(i => i && !['repository_integrity', 'slm'].includes(i.name))" :key="indicatorIdx"
:color="getStatusColor(indicator?.status)" variant="flat" size="small">
{{ indicator?.displayName }}
</v-chip>
</div>
</div>
</div>
</div>
</v-card-text>
<v-card-actions>
<v-spacer></v-spacer>
<v-btn text @click="runTroubleshoot" :disabled="troubleshootLoading" data-aid="grid_troubleshoot_refresh">{{ i18n.refresh }}</v-btn>
<v-btn text @click="hideTroubleshootDialog" data-aid="grid_troubleshoot_close_btn">{{ i18n.close }}</v-btn>
</v-card-actions>
</v-card>
</v-dialog>
</v-container>
19 changes: 17 additions & 2 deletions server/gridmembershandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ func (h *GridMembersHandler) postImport(w http.ResponseWriter, r *http.Request)
// @Tags Grid
// @Security bearer[grid/write]
// @Param id path string true "The grid member ID to be managed" example(so_standalone)
// @Param operation path string true "The operation to perform: add, reject, delete, test, restart" example(reject)
// @Param operation path string true "The operation to perform: add, reject, delete, test, restart, estroubleshoot" example(reject)
// @Success 200 "The operation was executed successfully"
// @Failure 401 "Request was not properly authenticated"
// @Failure 403 "Insufficient permissions for this request"
Expand All @@ -292,11 +292,26 @@ func (h *GridMembersHandler) postManageMembers(w http.ResponseWriter, r *http.Re
}

op := chi.URLParam(r, "operation")
if op != "add" && op != "reject" && op != "delete" && op != "test" && op != "restart" {
if op != "add" && op != "reject" && op != "delete" && op != "test" && op != "restart" && op != "estroubleshoot" {
web.Respond(w, r, http.StatusBadRequest, errors.New("Invalid operation"))
return
}

// Handle Elasticsearch troubleshoot operation separately since it returns output
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently you have a mixture of reusing the manage member handler but then branching out to a new gridMemberStore function. There are two ways to handle this:

  1. Go with the new Diagnose() salt-store function which would require this logic to move to a new postDiagnose() API handler.
  • or -
  1. Remove this if logic here and pass in the new op into the existing ManageMember() function, and have it do the branching, like test/restart ops.

if op == "estroubleshoot" {
// Extract node name from minion ID (e.g., "manager_standalone" -> "manager")
parts := strings.SplitN(id, "_", 2)
nodeName := parts[0]

output, err := h.server.GridMembersstore.RunTroubleshoot(ctx, nodeName, "/usr/sbin/so-elasticsearch-troubleshoot")
if err != nil {
web.Respond(w, r, http.StatusInternalServerError, err)
return
}
web.Respond(w, r, http.StatusOK, map[string]string{"output": output})
return
}

err := h.server.GridMembersstore.ManageMember(ctx, op, id)
if err != nil {
web.Respond(w, r, http.StatusBadRequest, err)
Expand Down
1 change: 1 addition & 0 deletions server/gridmembersstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type GridMembersstore interface {
ManageMember(ctx context.Context, operation string, id string) error
SendFile(ctx context.Context, node string, from string, to string, cleanup bool) error
Import(ctx context.Context, node string, file string, importer string) (*string, error)
RunTroubleshoot(ctx context.Context, node string, script string) (string, error)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would go away in favor us either reusing the existing ManageMember func or creating a new Diagnose func.

}

//go:generate mockgen -destination mock/mock_gridmembersstore.go -package mock . GridMembersstore
15 changes: 15 additions & 0 deletions server/mock/mock_gridmembersstore.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions server/modules/salt/saltstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -1164,6 +1164,28 @@ func (store *Saltstore) Import(ctx context.Context, node string, file string, im
return &output, err
}

func (store *Saltstore) RunTroubleshoot(ctx context.Context, node string, script string) (string, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider renaming this to Diagnose() and pass in an enum like elastic or storage. The have a switch determine which script to use for that incoming enum. Or have a single bash script called so-diagnose that accepts the enum and the bash script can branch out to different diagnostic routines.

But first read the other comments below.

if err := store.server.CheckAuthorized(ctx, "read", "grid"); err != nil {
return "", err
}

args := map[string]string{
"command": "run-troubleshoot",
"node": node,
"script": script + " --json",
}

// Use long timeout since troubleshoot scripts may take up to 120 seconds
ctxTimeout := options.WithTimeoutMs(ctx, store.longRelayTimeoutMs)

output, err := store.execCommand(ctxTimeout, args)
if err != nil {
return "", err
}

return output, nil
}

func (store *Saltstore) lookupEmailFromId(ctx context.Context, id string) string {
user, _ := store.server.Userstore.GetUserById(ctx, id)
if user != nil && user.Id == id {
Expand Down
Loading