diff --git a/html/js/i18n.js b/html/js/i18n.js index d2fc39bd5..111478479 100644 --- a/html/js/i18n.js +++ b/html/js/i18n.js @@ -471,6 +471,7 @@ const i18n = { disabled: 'Disabled', disabledFailedSync: 'The detection was saved but synchronization failed. The detection has been disabled. Check SOC logs for details.', disconnected: 'Disconnected from manager', + diskUsage: 'Disk Usage', diskUsageElastic: 'Elastic Storage Used', diskUsageInfluxDb: 'InfluxDB Storage Used', diskUsageNsm: 'NSM Partition Usage', @@ -924,6 +925,7 @@ const i18n = { rulePassBadChars: 'The password must not contain the following characters: " \' $ & !', rules: 'Rules', ruleset: 'Ruleset', + runningTroubleshoot: 'Running troubleshoot script...', save: 'Save', saveSuccess: 'Save successful!', searchUsername: 'Search User', @@ -1147,6 +1149,7 @@ const i18n = { trafficManOut: 'Outbound Mgmt Traffic', trafficManOutAbbr: 'Mgmt Out', transcriptCyberChefHelp: 'Send the transcript to CyberChef', + troubleshootElasticsearch: 'Troubleshoot Elasticsearch', ttr: 'Time Tracking', tuneDetection: 'Tune Detection', tuneDetectionHelp: 'Tune the detection that triggered this alert', diff --git a/html/js/routes/grid.js b/html/js/routes/grid.js index 66a0582d1..97d20a5e7 100644 --- a/html/js/routes/grid.js +++ b/html/js/routes/grid.js @@ -58,6 +58,10 @@ routes.push({ path: '/grid', name: 'grid', component: { gridMemberTestConfirmDialog: false, gridMemberRestartConfirmDialog: false, gridMemberUploadConfirmDialog: false, + troubleshootDialog: false, + troubleshootData: null, + troubleshootError: null, + troubleshootLoading: false, uploadForm: { valid: true, attachment: null }, maxUploadSizeBytes: 25 * 1024 * 1024, staleMetricsMs: 120000, @@ -360,6 +364,46 @@ routes.push({ path: '/grid', name: 'grid', component: { this.$root.stopLoading(); this.hideRestartConfirm(); }, + canTroubleshoot(node) { + // Manager-type nodes only + return ['so-manager', 'so-managersearch', 'so-standalone'].indexOf(node.role) != -1; + }, + showTroubleshootDialog(node) { + this.selectedNode = node; + this.troubleshootData = null; + this.troubleshootError = null; + this.troubleshootDialog = true; + this.runTroubleshoot(); + }, + hideTroubleshootDialog() { + this.troubleshootDialog = false; + this.troubleshootData = null; + this.troubleshootError = null; + this.selectedNode = null; + }, + async runTroubleshoot() { + const nodeId = this.getNodeName(this.selectedNode); + this.troubleshootLoading = true; + this.troubleshootError = null; + try { + const response = await this.$root.papi.post('gridmembers/' + nodeId + "/estroubleshoot", null, { + params: {gridId: this.selectedNode.gridId} + }); + try { + this.troubleshootData = JSON.parse(response.data.output); + } catch (parseError) { + this.troubleshootError = 'Failed to parse troubleshoot output'; + } + } catch (error) { + this.troubleshootError = error.message || error; + } + this.troubleshootLoading = false; + }, + getStatusColor(status) { + if (status === 'green' || status === 'ok') return 'success'; + if (status === 'yellow' || status === 'high') return 'warning'; + return 'error'; + }, hasContainer(item, container) { return item && item.containers && item.containers.find(function(x) { return x.Name == container; diff --git a/html/pages/grid.html b/html/pages/grid.html index 631266c4f..9a20d317b 100644 --- a/html/pages/grid.html +++ b/html/pages/grid.html @@ -213,6 +213,7 @@

{{ i18n.gridEps }} {{ formatCo {{ i18n.eventstoreStatus }}: {{ $root.localizeMessage(item.eventstoreStatus) }} + fa-circle-info
@@ -529,4 +530,114 @@

{{ i18n.gridEps }} {{ formatCo + + + + {{ i18n.troubleshootElasticsearch }} + + + fa-times + + + +
+ +

{{ i18n.runningTroubleshoot }}

+
+ {{ troubleshootError }} +
+ + + {{ troubleshootData.elasticsearchStatus?.errorMessage || 'Elasticsearch is not accessible' }} + +
+ +
+
Issues Found
+ +
{{ indicator?.symptom }}
+
+ +
+
+
+ + +
{{ i18n.diskUsage }}
+
+ + + +
+
+ Elasticsearch watermarks — + writes blocked at {{ troubleshootData.diskUsage?.watermarks?.floodDisplay }} (flood), + rebalancing starts at {{ troubleshootData.diskUsage?.watermarks?.highDisplay }} (high), + new shards blocked at {{ troubleshootData.diskUsage?.watermarks?.lowDisplay }} (low) +
+ + +
+
All Checks
+
+ + {{ indicator?.displayName }} + +
+
+
+
+
+ + + {{ i18n.refresh }} + {{ i18n.close }} + +
+
diff --git a/server/gridmembershandler.go b/server/gridmembershandler.go index 44928ee14..f40570ba4 100644 --- a/server/gridmembershandler.go +++ b/server/gridmembershandler.go @@ -275,7 +275,7 @@ func (h *GridMembersHandler) postImport(w http.ResponseWriter, r *http.Request) // @Tags Grid // @Security bearer[grid/write] // @Param id path string true "The grid member ID to be managed" example(so_standalone) -// @Param operation path string true "The operation to perform: add, reject, delete, test, restart" example(reject) +// @Param operation path string true "The operation to perform: add, reject, delete, test, restart, estroubleshoot" example(reject) // @Success 200 "The operation was executed successfully" // @Failure 401 "Request was not properly authenticated" // @Failure 403 "Insufficient permissions for this request" @@ -292,11 +292,26 @@ func (h *GridMembersHandler) postManageMembers(w http.ResponseWriter, r *http.Re } op := chi.URLParam(r, "operation") - if op != "add" && op != "reject" && op != "delete" && op != "test" && op != "restart" { + if op != "add" && op != "reject" && op != "delete" && op != "test" && op != "restart" && op != "estroubleshoot" { web.Respond(w, r, http.StatusBadRequest, errors.New("Invalid operation")) return } + // Handle Elasticsearch troubleshoot operation separately since it returns output + if op == "estroubleshoot" { + // Extract node name from minion ID (e.g., "manager_standalone" -> "manager") + parts := strings.SplitN(id, "_", 2) + nodeName := parts[0] + + output, err := h.server.GridMembersstore.RunTroubleshoot(ctx, nodeName, "/usr/sbin/so-elasticsearch-troubleshoot") + if err != nil { + web.Respond(w, r, http.StatusInternalServerError, err) + return + } + web.Respond(w, r, http.StatusOK, map[string]string{"output": output}) + return + } + err := h.server.GridMembersstore.ManageMember(ctx, op, id) if err != nil { web.Respond(w, r, http.StatusBadRequest, err) diff --git a/server/gridmembersstore.go b/server/gridmembersstore.go index 5e7136130..1ea4b3c73 100644 --- a/server/gridmembersstore.go +++ b/server/gridmembersstore.go @@ -17,6 +17,7 @@ type GridMembersstore interface { ManageMember(ctx context.Context, operation string, id string) error SendFile(ctx context.Context, node string, from string, to string, cleanup bool) error Import(ctx context.Context, node string, file string, importer string) (*string, error) + RunTroubleshoot(ctx context.Context, node string, script string) (string, error) } //go:generate mockgen -destination mock/mock_gridmembersstore.go -package mock . GridMembersstore diff --git a/server/mock/mock_gridmembersstore.go b/server/mock/mock_gridmembersstore.go index 115a8e1cd..46c5185f9 100644 --- a/server/mock/mock_gridmembersstore.go +++ b/server/mock/mock_gridmembersstore.go @@ -85,6 +85,21 @@ func (mr *MockGridMembersstoreMockRecorder) ManageMember(ctx, operation, id any) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ManageMember", reflect.TypeOf((*MockGridMembersstore)(nil).ManageMember), ctx, operation, id) } +// RunTroubleshoot mocks base method. +func (m *MockGridMembersstore) RunTroubleshoot(ctx context.Context, node, script string) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RunTroubleshoot", ctx, node, script) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// RunTroubleshoot indicates an expected call of RunTroubleshoot. +func (mr *MockGridMembersstoreMockRecorder) RunTroubleshoot(ctx, node, script any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RunTroubleshoot", reflect.TypeOf((*MockGridMembersstore)(nil).RunTroubleshoot), ctx, node, script) +} + // SendFile mocks base method. func (m *MockGridMembersstore) SendFile(ctx context.Context, node, from, to string, cleanup bool) error { m.ctrl.T.Helper() diff --git a/server/modules/salt/saltstore.go b/server/modules/salt/saltstore.go index 662dc114a..e303717ad 100644 --- a/server/modules/salt/saltstore.go +++ b/server/modules/salt/saltstore.go @@ -1164,6 +1164,28 @@ func (store *Saltstore) Import(ctx context.Context, node string, file string, im return &output, err } +func (store *Saltstore) RunTroubleshoot(ctx context.Context, node string, script string) (string, error) { + if err := store.server.CheckAuthorized(ctx, "read", "grid"); err != nil { + return "", err + } + + args := map[string]string{ + "command": "run-troubleshoot", + "node": node, + "script": script + " --json", + } + + // Use long timeout since troubleshoot scripts may take up to 120 seconds + ctxTimeout := options.WithTimeoutMs(ctx, store.longRelayTimeoutMs) + + output, err := store.execCommand(ctxTimeout, args) + if err != nil { + return "", err + } + + return output, nil +} + func (store *Saltstore) lookupEmailFromId(ctx context.Context, id string) string { user, _ := store.server.Userstore.GetUserById(ctx, id) if user != nil && user.Id == id {