diff --git a/build/config/ceems_api_server/ceems_api_server.yml b/build/config/ceems_api_server/ceems_api_server.yml index d6a27f3d..ce536be6 100644 --- a/build/config/ceems_api_server/ceems_api_server.yml +++ b/build/config/ceems_api_server/ceems_api_server.yml @@ -34,6 +34,14 @@ ceems_api_server: # retention_period: 30d + # Time zone to be used when storing times of different events in the DB. + # It takes a value defined in IANA (https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + # like `Europe/Paris` + # + # A special value `Local` can be used to use server local time zone. + # + time_zone: Local + # CEEMS API server is capable of creating DB backups using SQLite backup API. Created # DB backups will be saved to this path. NOTE that for huge DBs, this backup can take # a considerable amount of time. @@ -266,19 +274,17 @@ clusters: [] # # When SLURM resource manager is configured to fetch job data using `sacct` command, # # execution mode of the command will be decided as follows: # # - # # - If the current user running `ceems_api_server` is `root` or `slurm` user, `sacct` - # # command will be executed natively as that user. + # # - If the current user running `ceems_api_server` is `root`, `sacct` + # # command will be executed as that user in a security context. # # - # # - If above check fails, `sacct` command will be attempted to execute as `slurm` user. - # # If the `ceems_api_server` process have enough privileges setup using Linux capabilities - # # in the systemd unit file, this will succeed and `sacct` will be always executed - # # as `slurm` user. + # # - If the `ceems_api_server` process has `CAP_SETUID` and `CAP_SETGID` capabilities, `sacct` + # # command will be executed as `root` user in a security context. # # - # # - If above check fails as well, we attempt to execute `sacct` with `sudo` prefix. If + # # - As a last attempt, we attempt to execute `sacct` with `sudo` prefix. If # # the current user running `ceems_api_server` is in the list of sudoers, this check # # will pass and `sacct` will be always executed as `sudo sacct ` to fetch jobs. # # - # # If none of the above checks, pass, `sacct` will be executed as the current user + # # If none of the above conditions are true, `sacct` will be executed as the current user # # which might not give job data of _all_ users in the cluster. # # # # If the operators are unsure which method to use, there is a default systemd diff --git a/internal/common/helpers.go b/internal/common/helpers.go index 9fa9d4d2..31e5d7ca 100644 --- a/internal/common/helpers.go +++ b/internal/common/helpers.go @@ -35,7 +35,7 @@ func Round(value int64, nearest int64) int64 { // TimeTrack tracks execution time of each function. func TimeTrack(start time.Time, name string, logger *slog.Logger) { elapsed := time.Since(start) - logger.Debug(name, "elapsed_time", elapsed) + logger.Debug(name, "duration", elapsed) } // SanitizeFloat replaces +/-Inf and NaN with zero. diff --git a/pkg/api/base/base.go b/pkg/api/base/base.go index 47143921..1b8792a0 100644 --- a/pkg/api/base/base.go +++ b/pkg/api/base/base.go @@ -55,6 +55,9 @@ var ( // DatetimeLayout to be used in the package. var DatetimeLayout = fmt.Sprintf("%sT%s", time.DateOnly, time.TimeOnly) +// DatetimezoneLayout to be used in the package. +var DatetimezoneLayout = DatetimeLayout + "-0700" + // CLI args with global scope. var ( ConfigFilePath string diff --git a/pkg/api/cli/cli.go b/pkg/api/cli/cli.go index 3baddc81..9a521a3d 100644 --- a/pkg/api/cli/cli.go +++ b/pkg/api/cli/cli.go @@ -60,6 +60,7 @@ func (c *CEEMSAPIAppConfig) UnmarshalYAML(unmarshal func(interface{}) error) err RetentionPeriod: model.Duration(30 * 24 * time.Hour), UpdateInterval: model.Duration(15 * time.Minute), BackupInterval: model.Duration(24 * time.Hour), + TimeLocation: ceems_db.TimeLocation{Location: time.Local}, LastUpdateTime: todayMidnight, }, Admin: ceems_db.AdminConfig{ diff --git a/pkg/api/db/db.go b/pkg/api/db/db.go index d7bfa68c..b6412091 100644 --- a/pkg/api/db/db.go +++ b/pkg/api/db/db.go @@ -46,6 +46,30 @@ type AdminConfig struct { Grafana common.GrafanaWebConfig `yaml:"grafana"` } +type TimeLocation struct { + *time.Location +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (t *TimeLocation) UnmarshalYAML(unmarshal func(interface{}) error) error { + var tmp string + + err := unmarshal(&tmp) + if err != nil { + return err + } + + // Attempt to create location from string + loc, err := time.LoadLocation(tmp) + if err != nil { + return err + } + + *t = TimeLocation{loc} + + return nil +} + // DataConfig is the container for the data related config. type DataConfig struct { Path string `yaml:"path"` @@ -54,6 +78,7 @@ type DataConfig struct { UpdateInterval model.Duration `yaml:"update_interval"` BackupInterval model.Duration `yaml:"backup_interval"` LastUpdateTime time.Time `yaml:"update_from"` + TimeLocation TimeLocation `yaml:"time_zone"` SkipDeleteOldUnits bool } @@ -72,14 +97,15 @@ type storageConfig struct { dbBackupPath string retentionPeriod time.Duration lastUpdateTime time.Time + timeLocation *time.Location skipDeleteOldUnits bool } // String implements Stringer interface for storageConfig. func (s *storageConfig) String() string { return fmt.Sprintf( - "DB File Path: %s; Retention Period: %s; Last Updated At: %s", - s.dbPath, s.retentionPeriod, s.lastUpdateTime, + "DB File Path: %s; Retention Period: %s; Location: %s; Last Updated At: %s", + s.dbPath, s.retentionPeriod, s.timeLocation, s.lastUpdateTime, ) } @@ -191,7 +217,7 @@ func New(c *Config) (*stats, error) { c.Data.LastUpdateTime.Minute(), c.Data.LastUpdateTime.Second(), c.Data.LastUpdateTime.Nanosecond(), - time.Now().Location(), + c.Data.TimeLocation.Location, ) c.Logger.Info("DB will be updated from", "time", c.Data.LastUpdateTime) @@ -220,6 +246,7 @@ func New(c *Config) (*stats, error) { dbBackupPath: c.Data.BackupPath, retentionPeriod: time.Duration(c.Data.RetentionPeriod), lastUpdateTime: c.Data.LastUpdateTime, + timeLocation: c.Data.TimeLocation.Location, skipDeleteOldUnits: c.Data.SkipDeleteOldUnits, } @@ -259,7 +286,7 @@ func (s *stats) Collect(ctx context.Context) error { // Measure elapsed time defer common.TimeTrack(time.Now(), "Data collection", s.logger) - currentTime := time.Now() + currentTime := time.Now().In(s.storage.timeLocation) // If duration is less than 1 day do single update if currentTime.Sub(s.storage.lastUpdateTime) < 24*time.Hour { @@ -733,7 +760,7 @@ func (s *stats) createBackup(ctx context.Context) error { backupDBFileName := fmt.Sprintf( "%s-%s.db", strings.Split(base.CEEMSDBName, ".")[0], - time.Now().Format("200601021504"), + time.Now().In(s.storage.timeLocation).Format("200601021504"), ) backupDBFilePath := filepath.Join(filepath.Dir(s.storage.dbPath), backupDBFileName) diff --git a/pkg/api/db/db_test.go b/pkg/api/db/db_test.go index 15b569da..75ab8e20 100644 --- a/pkg/api/db/db_test.go +++ b/pkg/api/db/db_test.go @@ -570,6 +570,7 @@ func prepareMockConfig(tmpDir string) (*Config, error) { BackupPath: dataBackupDir, LastUpdateTime: time.Now(), RetentionPeriod: model.Duration(24 * time.Hour), + TimeLocation: TimeLocation{Location: time.UTC}, }, Admin: AdminConfig{ Users: []string{"adm1", "adm2"}, diff --git a/pkg/api/helper/helper.go b/pkg/api/helper/helper.go index cff21ccb..976dea4c 100644 --- a/pkg/api/helper/helper.go +++ b/pkg/api/helper/helper.go @@ -109,7 +109,7 @@ func NodelistParser(nodelistExp string) []string { // TimeToTimestamp converts a date in a given layout to unix timestamp of the date. func TimeToTimestamp(layout string, date string) int64 { if t, err := time.Parse(layout, date); err == nil { - return t.Local().UnixMilli() + return t.UnixMilli() } return 0 diff --git a/pkg/api/helper/helper_test.go b/pkg/api/helper/helper_test.go index 1156eb68..8620065d 100644 --- a/pkg/api/helper/helper_test.go +++ b/pkg/api/helper/helper_test.go @@ -7,141 +7,160 @@ import ( "github.com/stretchr/testify/assert" ) -type nodelistParserTest struct { - nodelist string - expected []string -} - -var nodelistParserTests = []nodelistParserTest{ - { - "compute-a-0", []string{"compute-a-0"}, - }, - { - "compute-a-[0-1]", []string{"compute-a-0", "compute-a-1"}, - }, - { - "compute-a-[0-1,5-6]", []string{"compute-a-0", "compute-a-1", "compute-a-5", "compute-a-6"}, - }, - { - "compute-a-[0-1]-b-[3-4]", - []string{"compute-a-0-b-3", "compute-a-0-b-4", "compute-a-1-b-3", "compute-a-1-b-4"}, - }, - { - "compute-a-[0-1,3,5-6]-b-[3-4,5,7-9]", - []string{ - "compute-a-0-b-3", - "compute-a-0-b-4", - "compute-a-0-b-5", - "compute-a-0-b-7", - "compute-a-0-b-8", - "compute-a-0-b-9", - "compute-a-1-b-3", - "compute-a-1-b-4", - "compute-a-1-b-5", - "compute-a-1-b-7", - "compute-a-1-b-8", - "compute-a-1-b-9", - "compute-a-3-b-3", - "compute-a-3-b-4", - "compute-a-3-b-5", - "compute-a-3-b-7", - "compute-a-3-b-8", - "compute-a-3-b-9", - "compute-a-5-b-3", - "compute-a-5-b-4", - "compute-a-5-b-5", - "compute-a-5-b-7", - "compute-a-5-b-8", - "compute-a-5-b-9", - "compute-a-6-b-3", - "compute-a-6-b-4", - "compute-a-6-b-5", - "compute-a-6-b-7", - "compute-a-6-b-8", - "compute-a-6-b-9", +func TestNodelistParser(t *testing.T) { + tests := []struct { + nodelist string + expected []string + }{ + { + "compute-a-0", []string{"compute-a-0"}, }, - }, - { - "compute-a-[0-1]-b-[3-4],compute-c,compute-d", - []string{ - "compute-a-0-b-3", "compute-a-0-b-4", - "compute-a-1-b-3", "compute-a-1-b-4", "compute-c", "compute-d", + { + "compute-a-[0-1]", []string{"compute-a-0", "compute-a-1"}, }, - }, - { - "compute-a-[0-2,5,7-9]-b-[3-4,7,9-12],compute-c,compute-d", - []string{ - "compute-a-0-b-3", - "compute-a-0-b-4", - "compute-a-0-b-7", - "compute-a-0-b-9", - "compute-a-0-b-10", - "compute-a-0-b-11", - "compute-a-0-b-12", - "compute-a-1-b-3", - "compute-a-1-b-4", - "compute-a-1-b-7", - "compute-a-1-b-9", - "compute-a-1-b-10", - "compute-a-1-b-11", - "compute-a-1-b-12", - "compute-a-2-b-3", - "compute-a-2-b-4", - "compute-a-2-b-7", - "compute-a-2-b-9", - "compute-a-2-b-10", - "compute-a-2-b-11", - "compute-a-2-b-12", - "compute-a-5-b-3", - "compute-a-5-b-4", - "compute-a-5-b-7", - "compute-a-5-b-9", - "compute-a-5-b-10", - "compute-a-5-b-11", - "compute-a-5-b-12", - "compute-a-7-b-3", - "compute-a-7-b-4", - "compute-a-7-b-7", - "compute-a-7-b-9", - "compute-a-7-b-10", - "compute-a-7-b-11", - "compute-a-7-b-12", - "compute-a-8-b-3", - "compute-a-8-b-4", - "compute-a-8-b-7", - "compute-a-8-b-9", - "compute-a-8-b-10", - "compute-a-8-b-11", - "compute-a-8-b-12", - "compute-a-9-b-3", - "compute-a-9-b-4", - "compute-a-9-b-7", - "compute-a-9-b-9", - "compute-a-9-b-10", - "compute-a-9-b-11", - "compute-a-9-b-12", - "compute-c", - "compute-d", + { + "compute-a-[0-1,5-6]", []string{"compute-a-0", "compute-a-1", "compute-a-5", "compute-a-6"}, }, - }, -} + { + "compute-a-[0-1]-b-[3-4]", + []string{"compute-a-0-b-3", "compute-a-0-b-4", "compute-a-1-b-3", "compute-a-1-b-4"}, + }, + { + "compute-a-[0-1,3,5-6]-b-[3-4,5,7-9]", + []string{ + "compute-a-0-b-3", + "compute-a-0-b-4", + "compute-a-0-b-5", + "compute-a-0-b-7", + "compute-a-0-b-8", + "compute-a-0-b-9", + "compute-a-1-b-3", + "compute-a-1-b-4", + "compute-a-1-b-5", + "compute-a-1-b-7", + "compute-a-1-b-8", + "compute-a-1-b-9", + "compute-a-3-b-3", + "compute-a-3-b-4", + "compute-a-3-b-5", + "compute-a-3-b-7", + "compute-a-3-b-8", + "compute-a-3-b-9", + "compute-a-5-b-3", + "compute-a-5-b-4", + "compute-a-5-b-5", + "compute-a-5-b-7", + "compute-a-5-b-8", + "compute-a-5-b-9", + "compute-a-6-b-3", + "compute-a-6-b-4", + "compute-a-6-b-5", + "compute-a-6-b-7", + "compute-a-6-b-8", + "compute-a-6-b-9", + }, + }, + { + "compute-a-[0-1]-b-[3-4],compute-c,compute-d", + []string{ + "compute-a-0-b-3", "compute-a-0-b-4", + "compute-a-1-b-3", "compute-a-1-b-4", "compute-c", "compute-d", + }, + }, + { + "compute-a-[0-2,5,7-9]-b-[3-4,7,9-12],compute-c,compute-d", + []string{ + "compute-a-0-b-3", + "compute-a-0-b-4", + "compute-a-0-b-7", + "compute-a-0-b-9", + "compute-a-0-b-10", + "compute-a-0-b-11", + "compute-a-0-b-12", + "compute-a-1-b-3", + "compute-a-1-b-4", + "compute-a-1-b-7", + "compute-a-1-b-9", + "compute-a-1-b-10", + "compute-a-1-b-11", + "compute-a-1-b-12", + "compute-a-2-b-3", + "compute-a-2-b-4", + "compute-a-2-b-7", + "compute-a-2-b-9", + "compute-a-2-b-10", + "compute-a-2-b-11", + "compute-a-2-b-12", + "compute-a-5-b-3", + "compute-a-5-b-4", + "compute-a-5-b-7", + "compute-a-5-b-9", + "compute-a-5-b-10", + "compute-a-5-b-11", + "compute-a-5-b-12", + "compute-a-7-b-3", + "compute-a-7-b-4", + "compute-a-7-b-7", + "compute-a-7-b-9", + "compute-a-7-b-10", + "compute-a-7-b-11", + "compute-a-7-b-12", + "compute-a-8-b-3", + "compute-a-8-b-4", + "compute-a-8-b-7", + "compute-a-8-b-9", + "compute-a-8-b-10", + "compute-a-8-b-11", + "compute-a-8-b-12", + "compute-a-9-b-3", + "compute-a-9-b-4", + "compute-a-9-b-7", + "compute-a-9-b-9", + "compute-a-9-b-10", + "compute-a-9-b-11", + "compute-a-9-b-12", + "compute-c", + "compute-d", + }, + }, + } -func TestNodelistParser(t *testing.T) { - for _, test := range nodelistParserTests { + for _, test := range tests { output := NodelistParser(test.nodelist) assert.Equal(t, test.expected, output) } } func TestTimeToTimestamp(t *testing.T) { - var expectedTimeStamp int64 = 1136239445000 + tests := []struct { + name string + time string + expected int64 + }{ + { + name: "time string in CET location", + time: "2024-11-12T15:23:02+0100", + expected: 1731421382000, + }, + { + name: "time string in DST", + time: "2024-10-03T12:51:40+0200", + expected: 1727952700000, + }, + { + name: "time string in UTC", + time: "2024-11-12T15:23:02+0000", + expected: 1731424982000, + }, + } - timeFormat := base.DatetimeLayout + "-0700" - timeStamp := TimeToTimestamp(timeFormat, "2006-01-02T15:04:05-0700") - assert.Equal(t, expectedTimeStamp, timeStamp) + for _, test := range tests { + timeStamp := TimeToTimestamp(base.DatetimezoneLayout, test.time) + assert.Equal(t, test.expected, timeStamp, test.name) + } // Check failure case - timeStamp = TimeToTimestamp(timeFormat, "2006-01-0215:04:05-0700") + timeStamp := TimeToTimestamp(base.DatetimezoneLayout, "Unknown") assert.Equal(t, int64(0), timeStamp) } diff --git a/pkg/api/http/docs/docs.go b/pkg/api/http/docs/docs.go index fb11a641..4e6d0040 100644 --- a/pkg/api/http/docs/docs.go +++ b/pkg/api/http/docs/docs.go @@ -358,7 +358,7 @@ const docTemplate = `{ "BasicAuth": [] } ], - "description": "This user endpoint will fetch compute units of the current user. The\ncurrent user is always identified by the header ` + "`" + `X-Grafana-User` + "`" + ` in\nthe request.\n\nIf multiple query parameters are passed, for instance, ` + "`" + `?uuid=\u003cuuid\u003e\u0026project=\u003cproject\u003e` + "`" + `,\nthe intersection of query parameters are used to fetch compute units rather than\nthe union. That means if the compute unit's ` + "`" + `uuid` + "`" + ` does not belong to the queried\nproject, null response will be returned.\n\nIn order to return the running compute units as well, use the query parameter ` + "`" + `running` + "`" + `.\n\nIf ` + "`" + `to` + "`" + ` query parameter is not provided, current time will be used. If ` + "`" + `from` + "`" + `\nquery parameter is not used, a default query window of 24 hours will be used.\nIt means if ` + "`" + `to` + "`" + ` is provided, ` + "`" + `from` + "`" + ` will be calculated as ` + "`" + `to` + "`" + ` - 24hrs.\n\nTo limit the number of fields in the response, use ` + "`" + `field` + "`" + ` query parameter. By default, all\nfields will be included in the response if they are _non-empty_.", + "description": "This user endpoint will fetch compute units of the current user. The\ncurrent user is always identified by the header ` + "`" + `X-Grafana-User` + "`" + ` in\nthe request.\n\nIf multiple query parameters are passed, for instance, ` + "`" + `?uuid=\u003cuuid\u003e\u0026project=\u003cproject\u003e` + "`" + `,\nthe intersection of query parameters are used to fetch compute units rather than\nthe union. That means if the compute unit's ` + "`" + `uuid` + "`" + ` does not belong to the queried\nproject, null response will be returned.\n\nIn order to return the running compute units as well, use the query parameter ` + "`" + `running` + "`" + `.\n\nIf ` + "`" + `to` + "`" + ` query parameter is not provided, current time will be used. If ` + "`" + `from` + "`" + `\nquery parameter is not used, a default query window of 24 hours will be used.\nIt means if ` + "`" + `to` + "`" + ` is provided, ` + "`" + `from` + "`" + ` will be calculated as ` + "`" + `to` + "`" + ` - 24hrs. If query\nparameter ` + "`" + `timezone` + "`" + ` is provided, the unit's created, start and end time strings\nwill be presented in that time zone.\n\nTo limit the number of fields in the response, use ` + "`" + `field` + "`" + ` query parameter. By default, all\nfields will be included in the response if they are _non-empty_.", "produces": [ "application/json" ], @@ -422,6 +422,12 @@ const docTemplate = `{ "name": "to", "in": "query" }, + { + "type": "string", + "description": "Time zone in IANA format", + "name": "timezone", + "in": "query" + }, { "type": "array", "items": { @@ -468,7 +474,7 @@ const docTemplate = `{ "BasicAuth": [] } ], - "description": "This admin endpoint will fetch compute units of _any_ user, compute unit and/or project. The\ncurrent user is always identified by the header ` + "`" + `X-Grafana-User` + "`" + ` in\nthe request.\n\nThe user who is making the request must be in the list of admin users\nconfigured for the server.\n\nIf multiple query parameters are passed, for instance, ` + "`" + `?uuid=\u003cuuid\u003e\u0026user=\u003cuser\u003e` + "`" + `,\nthe intersection of query parameters are used to fetch compute units rather than\nthe union. That means if the compute unit's ` + "`" + `uuid` + "`" + ` does not belong to the queried\nuser, null response will be returned.\n\nIn order to return the running compute units as well, use the query parameter ` + "`" + `running` + "`" + `.\n\nIf ` + "`" + `to` + "`" + ` query parameter is not provided, current time will be used. If ` + "`" + `from` + "`" + `\nquery parameter is not used, a default query window of 24 hours will be used.\nIt means if ` + "`" + `to` + "`" + ` is provided, ` + "`" + `from` + "`" + ` will be calculated as ` + "`" + `to` + "`" + ` - 24hrs.\n\nTo limit the number of fields in the response, use ` + "`" + `field` + "`" + ` query parameter. By default, all\nfields will be included in the response if they are _non-empty_.", + "description": "This admin endpoint will fetch compute units of _any_ user, compute unit and/or project. The\ncurrent user is always identified by the header ` + "`" + `X-Grafana-User` + "`" + ` in\nthe request.\n\nThe user who is making the request must be in the list of admin users\nconfigured for the server.\n\nIf multiple query parameters are passed, for instance, ` + "`" + `?uuid=\u003cuuid\u003e\u0026user=\u003cuser\u003e` + "`" + `,\nthe intersection of query parameters are used to fetch compute units rather than\nthe union. That means if the compute unit's ` + "`" + `uuid` + "`" + ` does not belong to the queried\nuser, null response will be returned.\n\nIn order to return the running compute units as well, use the query parameter ` + "`" + `running` + "`" + `.\n\nIf ` + "`" + `to` + "`" + ` query parameter is not provided, current time will be used. If ` + "`" + `from` + "`" + `\nquery parameter is not used, a default query window of 24 hours will be used.\nIt means if ` + "`" + `to` + "`" + ` is provided, ` + "`" + `from` + "`" + ` will be calculated as ` + "`" + `to` + "`" + ` - 24hrs. If query\nparameter ` + "`" + `timezone` + "`" + ` is provided, the unit's created, start and end time strings\nwill be presented in that time zone.\n\nTo limit the number of fields in the response, use ` + "`" + `field` + "`" + ` query parameter. By default, all\nfields will be included in the response if they are _non-empty_.", "produces": [ "application/json" ], @@ -542,6 +548,12 @@ const docTemplate = `{ "name": "to", "in": "query" }, + { + "type": "string", + "description": "Time zone in IANA format", + "name": "timezone", + "in": "query" + }, { "type": "array", "items": { diff --git a/pkg/api/http/docs/swagger.json b/pkg/api/http/docs/swagger.json index 4bf31649..92657a6e 100644 --- a/pkg/api/http/docs/swagger.json +++ b/pkg/api/http/docs/swagger.json @@ -350,7 +350,7 @@ "BasicAuth": [] } ], - "description": "This user endpoint will fetch compute units of the current user. The\ncurrent user is always identified by the header `X-Grafana-User` in\nthe request.\n\nIf multiple query parameters are passed, for instance, `?uuid=\u003cuuid\u003e\u0026project=\u003cproject\u003e`,\nthe intersection of query parameters are used to fetch compute units rather than\nthe union. That means if the compute unit's `uuid` does not belong to the queried\nproject, null response will be returned.\n\nIn order to return the running compute units as well, use the query parameter `running`.\n\nIf `to` query parameter is not provided, current time will be used. If `from`\nquery parameter is not used, a default query window of 24 hours will be used.\nIt means if `to` is provided, `from` will be calculated as `to` - 24hrs.\n\nTo limit the number of fields in the response, use `field` query parameter. By default, all\nfields will be included in the response if they are _non-empty_.", + "description": "This user endpoint will fetch compute units of the current user. The\ncurrent user is always identified by the header `X-Grafana-User` in\nthe request.\n\nIf multiple query parameters are passed, for instance, `?uuid=\u003cuuid\u003e\u0026project=\u003cproject\u003e`,\nthe intersection of query parameters are used to fetch compute units rather than\nthe union. That means if the compute unit's `uuid` does not belong to the queried\nproject, null response will be returned.\n\nIn order to return the running compute units as well, use the query parameter `running`.\n\nIf `to` query parameter is not provided, current time will be used. If `from`\nquery parameter is not used, a default query window of 24 hours will be used.\nIt means if `to` is provided, `from` will be calculated as `to` - 24hrs. If query\nparameter `timezone` is provided, the unit's created, start and end time strings\nwill be presented in that time zone.\n\nTo limit the number of fields in the response, use `field` query parameter. By default, all\nfields will be included in the response if they are _non-empty_.", "produces": [ "application/json" ], @@ -414,6 +414,12 @@ "name": "to", "in": "query" }, + { + "type": "string", + "description": "Time zone in IANA format", + "name": "timezone", + "in": "query" + }, { "type": "array", "items": { @@ -460,7 +466,7 @@ "BasicAuth": [] } ], - "description": "This admin endpoint will fetch compute units of _any_ user, compute unit and/or project. The\ncurrent user is always identified by the header `X-Grafana-User` in\nthe request.\n\nThe user who is making the request must be in the list of admin users\nconfigured for the server.\n\nIf multiple query parameters are passed, for instance, `?uuid=\u003cuuid\u003e\u0026user=\u003cuser\u003e`,\nthe intersection of query parameters are used to fetch compute units rather than\nthe union. That means if the compute unit's `uuid` does not belong to the queried\nuser, null response will be returned.\n\nIn order to return the running compute units as well, use the query parameter `running`.\n\nIf `to` query parameter is not provided, current time will be used. If `from`\nquery parameter is not used, a default query window of 24 hours will be used.\nIt means if `to` is provided, `from` will be calculated as `to` - 24hrs.\n\nTo limit the number of fields in the response, use `field` query parameter. By default, all\nfields will be included in the response if they are _non-empty_.", + "description": "This admin endpoint will fetch compute units of _any_ user, compute unit and/or project. The\ncurrent user is always identified by the header `X-Grafana-User` in\nthe request.\n\nThe user who is making the request must be in the list of admin users\nconfigured for the server.\n\nIf multiple query parameters are passed, for instance, `?uuid=\u003cuuid\u003e\u0026user=\u003cuser\u003e`,\nthe intersection of query parameters are used to fetch compute units rather than\nthe union. That means if the compute unit's `uuid` does not belong to the queried\nuser, null response will be returned.\n\nIn order to return the running compute units as well, use the query parameter `running`.\n\nIf `to` query parameter is not provided, current time will be used. If `from`\nquery parameter is not used, a default query window of 24 hours will be used.\nIt means if `to` is provided, `from` will be calculated as `to` - 24hrs. If query\nparameter `timezone` is provided, the unit's created, start and end time strings\nwill be presented in that time zone.\n\nTo limit the number of fields in the response, use `field` query parameter. By default, all\nfields will be included in the response if they are _non-empty_.", "produces": [ "application/json" ], @@ -534,6 +540,12 @@ "name": "to", "in": "query" }, + { + "type": "string", + "description": "Time zone in IANA format", + "name": "timezone", + "in": "query" + }, { "type": "array", "items": { diff --git a/pkg/api/http/docs/swagger.yaml b/pkg/api/http/docs/swagger.yaml index 7ab84068..5ff3b117 100644 --- a/pkg/api/http/docs/swagger.yaml +++ b/pkg/api/http/docs/swagger.yaml @@ -747,7 +747,9 @@ paths: If `to` query parameter is not provided, current time will be used. If `from` query parameter is not used, a default query window of 24 hours will be used. - It means if `to` is provided, `from` will be calculated as `to` - 24hrs. + It means if `to` is provided, `from` will be calculated as `to` - 24hrs. If query + parameter `timezone` is provided, the unit's created, start and end time strings + will be presented in that time zone. To limit the number of fields in the response, use `field` query parameter. By default, all fields will be included in the response if they are _non-empty_. @@ -790,6 +792,10 @@ paths: in: query name: to type: string + - description: Time zone in IANA format + in: query + name: timezone + type: string - collectionFormat: multi description: Fields to return in response in: query @@ -840,7 +846,9 @@ paths: If `to` query parameter is not provided, current time will be used. If `from` query parameter is not used, a default query window of 24 hours will be used. - It means if `to` is provided, `from` will be calculated as `to` - 24hrs. + It means if `to` is provided, `from` will be calculated as `to` - 24hrs. If query + parameter `timezone` is provided, the unit's created, start and end time strings + will be presented in that time zone. To limit the number of fields in the response, use `field` query parameter. By default, all fields will be included in the response if they are _non-empty_. @@ -890,6 +898,10 @@ paths: in: query name: to type: string + - description: Time zone in IANA format + in: query + name: timezone + type: string - collectionFormat: multi description: Fields to return in response in: query diff --git a/pkg/api/http/querier.go b/pkg/api/http/querier.go index 0386dcd3..617f9eee 100644 --- a/pkg/api/http/querier.go +++ b/pkg/api/http/querier.go @@ -223,8 +223,8 @@ func Querier[T any](ctx context.Context, dbConn *sql.DB, query Query, logger *sl // Loop through rows, using Scan to assign column data to struct fields. logger.Debug( - "Rows", "query", queryString, "queryParams", strings.Join(queryParams, ","), - "num_rows", numRows, "error", err, + "DB query", "query", queryString, "queryParams", strings.Join(queryParams, ","), + "num_rows", numRows, ) return scanRows[T](rows, numRows) diff --git a/pkg/api/http/server.go b/pkg/api/http/server.go index 987296ac..c407133d 100644 --- a/pkg/api/http/server.go +++ b/pkg/api/http/server.go @@ -432,14 +432,29 @@ func (s *CEEMSServer) getQueriedFields(urlValues url.Values, validFieldNames []s return queriedFields } +// timeLocation returns `time.Location` based on location name. +func (s *CEEMSServer) timeLocation(l string) *time.Location { + if l == "" { + return s.dbConfig.Data.TimeLocation.Location + } else { + if loc, err := time.LoadLocation(l); err != nil { + return s.dbConfig.Data.TimeLocation.Location + } else { + return loc + } + } +} + // getQueryWindow returns `from` and `to` time stamps from query vars and // cast them into proper format. func (s *CEEMSServer) getQueryWindow(r *http.Request) (map[string]string, error) { + q := r.URL.Query() + var fromTime, toTime time.Time // Get to and from query parameters and do checks on them - if f := r.URL.Query().Get("from"); f == "" { + if f := q.Get("from"); f == "" { // If from is not present in query params, use a default query window of 1 week - fromTime = time.Now().Add(-defaultQueryWindow) + fromTime = time.Now().Add(-defaultQueryWindow).In(s.dbConfig.Data.TimeLocation.Location) } else { // Return error response if from is not a timestamp if ts, err := strconv.ParseInt(f, 10, 64); err != nil { @@ -447,13 +462,13 @@ func (s *CEEMSServer) getQueryWindow(r *http.Request) (map[string]string, error) return nil, fmt.Errorf("query parameter 'from': %w", ErrMalformedTimeStamp) } else { - fromTime = time.Unix(ts, 0) + fromTime = time.Unix(ts, 0).In(s.dbConfig.Data.TimeLocation.Location) } } - if t := r.URL.Query().Get("to"); t == "" { + if t := q.Get("to"); t == "" { // Use current time as default to - toTime = time.Now() + toTime = time.Now().In(s.dbConfig.Data.TimeLocation.Location) } else { // Return error response if to is not a timestamp if ts, err := strconv.ParseInt(t, 10, 64); err != nil { @@ -461,7 +476,7 @@ func (s *CEEMSServer) getQueryWindow(r *http.Request) (map[string]string, error) return nil, fmt.Errorf("query parameter 'to': %w", ErrMalformedTimeStamp) } else { - toTime = time.Unix(ts, 0) + toTime = time.Unix(ts, 0).In(s.dbConfig.Data.TimeLocation.Location) } } @@ -495,7 +510,12 @@ func (s *CEEMSServer) roundQueryWindow(r *http.Request) error { if f := q.Get("from"); f == "" { q.Set( "from", - strconv.FormatInt(common.Round(time.Now().Add(-defaultQueryWindow).Local().Unix(), cacheTTLSeconds), 10), + strconv.FormatInt( + common.Round( + time.Now().Add(-defaultQueryWindow).In(s.dbConfig.Data.TimeLocation.Location).Unix(), + cacheTTLSeconds, + ), 10, + ), ) } else { // Return error response if from is not a timestamp @@ -509,7 +529,15 @@ func (s *CEEMSServer) roundQueryWindow(r *http.Request) error { } if t := q.Get("to"); t == "" { - q.Set("to", strconv.FormatInt(common.Round(time.Now().Local().Unix(), cacheTTLSeconds), 10)) + q.Set( + "to", + strconv.FormatInt( + common.Round( + time.Now().In(s.dbConfig.Data.TimeLocation.Location).Unix(), + cacheTTLSeconds, + ), 10, + ), + ) } else { // Return error response if from is not a timestamp if ts, err := strconv.ParseInt(t, 10, 64); err != nil { @@ -526,6 +554,31 @@ func (s *CEEMSServer) roundQueryWindow(r *http.Request) error { return nil } +// inTargetTimeLocation converts the string representations of times in units to target +// time location. +func (s *CEEMSServer) inTargetTimeLocation(tz string, units []models.Unit) []models.Unit { + // If no time zone is provided, we present times stored in DB without any changes + if tz == "" { + return units + } + + // Location in which we need times to be presented + targetLoc := s.timeLocation(tz) + + // If target location is same as source, return + if s.dbConfig.Data.TimeLocation.Location.String() == targetLoc.String() { + return units + } + + for i := range units { + units[i].CreatedAt = convertTimeLocation(s.dbConfig.Data.TimeLocation.Location, targetLoc, units[i].CreatedAt) + units[i].StartedAt = convertTimeLocation(s.dbConfig.Data.TimeLocation.Location, targetLoc, units[i].StartedAt) + units[i].EndedAt = convertTimeLocation(s.dbConfig.Data.TimeLocation.Location, targetLoc, units[i].EndedAt) + } + + return units +} + // unitsQuerier queries for compute units and write response. func (s *CEEMSServer) unitsQuerier( queriedUsers []string, @@ -624,6 +677,9 @@ queryUnits: return } + // Convert times to time zone provided in the query + units = s.inTargetTimeLocation(r.URL.Query().Get("timezone"), units) + // Write response w.WriteHeader(http.StatusOK) @@ -660,7 +716,9 @@ queryUnits: // @Description // @Description If `to` query parameter is not provided, current time will be used. If `from` // @Description query parameter is not used, a default query window of 24 hours will be used. -// @Description It means if `to` is provided, `from` will be calculated as `to` - 24hrs. +// @Description It means if `to` is provided, `from` will be calculated as `to` - 24hrs. If query +// @Description parameter `timezone` is provided, the unit's created, start and end time strings +// @Description will be presented in that time zone. // @Description // @Description To limit the number of fields in the response, use `field` query parameter. By default, all // @Description fields will be included in the response if they are _non-empty_. @@ -675,6 +733,7 @@ queryUnits: // @Param running query bool false "Whether to fetch running units" // @Param from query string false "From timestamp" // @Param to query string false "To timestamp" +// @Param timezone query string false "Time zone in IANA format" // @Param field query []string false "Fields to return in response" collectionFormat(multi) // @Success 200 {object} Response[models.Unit] // @Failure 401 {object} Response[any] @@ -708,7 +767,9 @@ func (s *CEEMSServer) unitsAdmin(w http.ResponseWriter, r *http.Request) { // @Description // @Description If `to` query parameter is not provided, current time will be used. If `from` // @Description query parameter is not used, a default query window of 24 hours will be used. -// @Description It means if `to` is provided, `from` will be calculated as `to` - 24hrs. +// @Description It means if `to` is provided, `from` will be calculated as `to` - 24hrs. If query +// @Description parameter `timezone` is provided, the unit's created, start and end time strings +// @Description will be presented in that time zone. // @Description // @Description To limit the number of fields in the response, use `field` query parameter. By default, all // @Description fields will be included in the response if they are _non-empty_. @@ -722,6 +783,7 @@ func (s *CEEMSServer) unitsAdmin(w http.ResponseWriter, r *http.Request) { // @Param running query bool false "Whether to fetch running units" // @Param from query string false "From timestamp" // @Param to query string false "To timestamp" +// @Param timezone query string false "Time zone in IANA format" // @Param field query []string false "Fields to return in response" collectionFormat(multi) // @Success 200 {object} Response[models.Unit] // @Failure 401 {object} Response[any] @@ -1904,3 +1966,12 @@ func (s *CEEMSServer) demo(w http.ResponseWriter, r *http.Request) { } } } + +// convertTimeLocation converts time from source location to target location. +func convertTimeLocation(sourceLoc *time.Location, targetLoc *time.Location, val string) string { + if t, err := time.ParseInLocation(base.DatetimezoneLayout, val, sourceLoc); err == nil { + return t.In(targetLoc).Format(base.DatetimezoneLayout) + } + + return val +} diff --git a/pkg/api/http/server_test.go b/pkg/api/http/server_test.go index f7633a21..b1a3d9b5 100644 --- a/pkg/api/http/server_test.go +++ b/pkg/api/http/server_test.go @@ -72,7 +72,12 @@ func setupServer(d string) *CEEMSServer { server, _, _ := New( &Config{ Logger: logger, - DB: db.Config{Data: db.DataConfig{Path: d}}, + DB: db.Config{ + Data: db.DataConfig{ + Path: d, + TimeLocation: db.TimeLocation{Location: time.UTC}, + }, + }, Web: WebConfig{ Addresses: []string{"localhost:9020"}, // dummy address RequestsLimit: 10, diff --git a/pkg/api/resource/manager.go b/pkg/api/resource/manager.go index 4c3fcd6c..64a6f622 100644 --- a/pkg/api/resource/manager.go +++ b/pkg/api/resource/manager.go @@ -142,7 +142,7 @@ func New(logger *slog.Logger) (*Manager, error) { } // Loop over factories and create new instances - var keepPrivs bool + var dropPrivs bool for key, factory := range factories { for _, config := range configMap[key] { @@ -155,9 +155,9 @@ func New(logger *slog.Logger) (*Manager, error) { fetchers = append(fetchers, fetcher) - // If manager is SLURM and CLI is configured, we MUST keep privileges - if config.Manager == "slurm" && config.CLI.Path != "" { - keepPrivs = true + // If manager is SLURM and web is configured, we MUST DROP privileges + if config.Manager == "slurm" && config.Web.URL != "" { + dropPrivs = true } } } @@ -180,7 +180,7 @@ func New(logger *slog.Logger) (*Manager, error) { } // If we dont need to keep any privileges, drop any existing capabilities - if !keepPrivs { + if dropPrivs { if err := security.DropCapabilities(); err != nil { logger.Warn("Failed to drop capabilities", "err", err) } diff --git a/pkg/api/resource/openstack/compute.go b/pkg/api/resource/openstack/compute.go index 5f1890c3..8548c44e 100644 --- a/pkg/api/resource/openstack/compute.go +++ b/pkg/api/resource/openstack/compute.go @@ -11,6 +11,7 @@ import ( "sync" "time" + "github.com/mahendrapaipuri/ceems/pkg/api/base" "github.com/mahendrapaipuri/ceems/pkg/api/models" ) @@ -56,6 +57,9 @@ func (o *openstackManager) activeInstances(ctx context.Context, start time.Time, return nil, err } + // Get current time location + loc := end.Location() + // Start a wait group wg := sync.WaitGroup{} @@ -132,6 +136,12 @@ func (o *openstackManager) activeInstances(ctx context.Context, start time.Time, var iServer int for _, server := range allServers { + // Convert CreatedAt, UpdatedAt, LaunchedAt, TerminatedAt to current time location + server.CreatedAt = server.CreatedAt.In(loc) + server.LaunchedAt = server.LaunchedAt.In(loc) + server.UpdatedAt = server.UpdatedAt.In(loc) + server.TerminatedAt = server.TerminatedAt.In(loc) + // Get elapsed time of instance including shutdowns, suspended states elapsedTime := Timespan(end.Sub(server.LaunchedAt)).Format("15:04:05") @@ -149,7 +159,7 @@ func (o *openstackManager) activeInstances(ctx context.Context, start time.Time, elapsedTime = Timespan(server.TerminatedAt.Sub(server.LaunchedAt)).Format("15:04:05") // Get instance termination time - endedAt = server.TerminatedAt.Format(osTimeFormat) + endedAt = server.TerminatedAt.Format(base.DatetimezoneLayout) endedAtTS = server.TerminatedAt.UnixMilli() // If the instance has been terminated in this update interval @@ -223,8 +233,8 @@ func (o *openstackManager) activeInstances(ctx context.Context, start time.Time, Name: server.Name, Project: o.userProjectsCache.projectIDNameMap[server.TenantID], User: o.userProjectsCache.userIDNameMap[server.UserID], - CreatedAt: server.CreatedAt.Format(osTimeFormat), - StartedAt: server.LaunchedAt.Format(osTimeFormat), + CreatedAt: server.CreatedAt.Format(base.DatetimezoneLayout), + StartedAt: server.LaunchedAt.Format(base.DatetimezoneLayout), EndedAt: endedAt, CreatedAtTS: server.CreatedAt.UnixMilli(), StartedAtTS: server.LaunchedAt.UnixMilli(), @@ -269,8 +279,8 @@ func (o *openstackManager) fetchInstances(ctx context.Context, start time.Time, if deleted { q.Add("deleted", "true") - q.Add("changes-since", start.Format(osTimeFormat)) - q.Add("changes-before", end.Format(osTimeFormat)) + q.Add("changes-since", start.Format(base.DatetimezoneLayout)) + q.Add("changes-before", end.Format(base.DatetimezoneLayout)) } req.URL.RawQuery = q.Encode() diff --git a/pkg/api/resource/openstack/identity.go b/pkg/api/resource/openstack/identity.go index b447b829..98d4e122 100644 --- a/pkg/api/resource/openstack/identity.go +++ b/pkg/api/resource/openstack/identity.go @@ -10,6 +10,7 @@ import ( "sync" "time" + "github.com/mahendrapaipuri/ceems/pkg/api/base" "github.com/mahendrapaipuri/ceems/pkg/api/helper" "github.com/mahendrapaipuri/ceems/pkg/api/models" ) @@ -121,7 +122,7 @@ func (o *openstackManager) usersProjectsAssoc(ctx context.Context, current time. } // Current time string - currentTime := current.Format(osTimeFormat) + currentTime := current.Format(base.DatetimezoneLayout) // First get all users users, err := o.fetchUsers(ctx) diff --git a/pkg/api/resource/openstack/manager.go b/pkg/api/resource/openstack/manager.go index ce564975..a68b20c0 100644 --- a/pkg/api/resource/openstack/manager.go +++ b/pkg/api/resource/openstack/manager.go @@ -15,7 +15,6 @@ import ( "time" "github.com/mahendrapaipuri/ceems/internal/common" - "github.com/mahendrapaipuri/ceems/pkg/api/base" "github.com/mahendrapaipuri/ceems/pkg/api/models" "github.com/mahendrapaipuri/ceems/pkg/api/resource" config_util "github.com/prometheus/common/config" @@ -31,7 +30,6 @@ var ( "X-OpenStack-Nova-API-Version", "OpenStack-API-Version", } - osTimeFormat = base.DatetimeLayout + "-0700" tokenExpiryDuration = 1 * time.Hour // Openstack tokens are valid for 1 hour ) @@ -167,7 +165,7 @@ func New(cluster models.Cluster, logger *slog.Logger) (resource.Fetcher, error) return nil, err } - logger.Info("Fetching VM instances from Openstack cluster", "id", cluster.ID) + logger.Info("VM instances from Openstack cluster will be fetched", "id", cluster.ID) return openstackManager, nil } diff --git a/pkg/api/resource/openstack/manager_test.go b/pkg/api/resource/openstack/manager_test.go index 572bec3b..1526bac6 100644 --- a/pkg/api/resource/openstack/manager_test.go +++ b/pkg/api/resource/openstack/manager_test.go @@ -13,6 +13,7 @@ import ( "testing" "time" + "github.com/mahendrapaipuri/ceems/pkg/api/base" "github.com/mahendrapaipuri/ceems/pkg/api/models" config_util "github.com/prometheus/common/config" "github.com/stretchr/testify/assert" @@ -21,9 +22,9 @@ import ( ) var ( - start, _ = time.Parse(osTimeFormat, "2024-10-15T15:00:00+0200") - end, _ = time.Parse(osTimeFormat, "2024-10-15T15:15:00+0200") - current, _ = time.Parse(osTimeFormat, "2024-10-15T15:15:00+0200") + start, _ = time.Parse(base.DatetimezoneLayout, "2024-10-15T16:15:00+0200") + end, _ = time.Parse(base.DatetimezoneLayout, "2024-10-15T16:45:00+0200") + current, _ = time.Parse(base.DatetimezoneLayout, "2024-10-15T16:45:00+0200") expectedUnits = map[string]models.Unit{ "d0d60434-4bf1-4eb1-9469-d7b38083a88f": { @@ -32,12 +33,12 @@ var ( Name: "new-vgpu-3", Project: "admin", User: "admin", - CreatedAt: "2024-10-15T13:32:25+0200", - StartedAt: "2024-10-15T13:32:43+0200", - EndedAt: "2024-10-15T14:32:09+0200", - CreatedAtTS: 1728991945000, - StartedAtTS: 1728991963000, - EndedAtTS: 1728995529000, + CreatedAt: "2024-10-15T15:32:25+0200", + StartedAt: "2024-10-15T15:32:43+0200", + EndedAt: "2024-10-15T16:32:09+0200", + CreatedAtTS: 1728999145000, + StartedAtTS: 1728999163000, + EndedAtTS: 1729002729000, Elapsed: "00:59:26", State: "DELETED", Allocation: models.Generic{ @@ -49,11 +50,11 @@ var ( "vcpus": 8, }, TotalTime: models.MetricMap{ - "alloc_cpumemtime": 0, - "alloc_cputime": 0, - "alloc_gpumemtime": 0, - "alloc_gputime": 0, - "walltime": 0, + "alloc_cpumemtime": 8.429568e+06, + "alloc_cputime": 8232, + "alloc_gpumemtime": 1029, + "alloc_gputime": 1029, + "walltime": 1029, }, Tags: models.Generic{ "az": "nova", @@ -71,13 +72,13 @@ var ( Name: "newer-2", Project: "admin", User: "admin", - CreatedAt: "2024-10-15T14:29:18+0200", - StartedAt: "2024-10-15T14:29:34+0200", + CreatedAt: "2024-10-15T16:29:18+0200", + StartedAt: "2024-10-15T16:29:34+0200", EndedAt: "N/A", - CreatedAtTS: 1728995358000, - StartedAtTS: 1728995374000, + CreatedAtTS: 1729002558000, + StartedAtTS: 1729002574000, EndedAtTS: 0, - Elapsed: "00:45:26", + Elapsed: "00:15:26", State: "ACTIVE", Allocation: models.Generic{ "disk": 1, @@ -88,11 +89,11 @@ var ( "vcpus": 1, }, TotalTime: models.MetricMap{ - "alloc_cpumemtime": 230400, - "alloc_cputime": 900, + "alloc_cpumemtime": 237056, + "alloc_cputime": 926, "alloc_gpumemtime": 0, "alloc_gputime": 0, - "walltime": 900, + "walltime": 926, }, Tags: models.Generic{ "az": "nova", @@ -110,13 +111,13 @@ var ( Name: "tp-21", Project: "test-project-2", User: "test-user-2", - CreatedAt: "2024-10-15T13:16:44+0200", - StartedAt: "2024-10-15T13:16:55+0200", + CreatedAt: "2024-10-15T15:16:44+0200", + StartedAt: "2024-10-15T15:16:55+0200", EndedAt: "N/A", - CreatedAtTS: 1728991004000, - StartedAtTS: 1728991015000, + CreatedAtTS: 1728998204000, + StartedAtTS: 1728998215000, EndedAtTS: 0, - Elapsed: "01:58:05", + Elapsed: "01:28:05", State: "ACTIVE", Allocation: models.Generic{ "disk": 1, @@ -127,11 +128,11 @@ var ( "vcpus": 128, }, TotalTime: models.MetricMap{ - "alloc_cpumemtime": 4.6848e+07, - "alloc_cputime": 31232, + "alloc_cpumemtime": 3.456e+08, + "alloc_cputime": 230400, "alloc_gpumemtime": 0, "alloc_gputime": 0, - "walltime": 244, + "walltime": 1800, }, Tags: models.Generic{ "az": "nova", diff --git a/pkg/api/resource/openstack/types.go b/pkg/api/resource/openstack/types.go index 1c2c1a26..7e3698c5 100644 --- a/pkg/api/resource/openstack/types.go +++ b/pkg/api/resource/openstack/types.go @@ -2,25 +2,12 @@ package openstack import ( "encoding/json" - "os" "strconv" "time" ) -func init() { - // If we are in CI env, use fixed time location - // for e2e tests - if os.Getenv("CI") != "" { - currentLocation, _ = time.LoadLocation("CET") - } else { - currentLocation = time.Now().Location() - } -} - const RFC3339MilliNoZ = "2006-01-02T15:04:05.999999" -var currentLocation *time.Location - type JSONRFC3339MilliNoZ time.Time func (jt *JSONRFC3339MilliNoZ) UnmarshalJSON(data []byte) error { @@ -38,7 +25,9 @@ func (jt *JSONRFC3339MilliNoZ) UnmarshalJSON(data []byte) error { return err } - // Convert the UTC time to local + // Make times in UTC + // IMPORTANT: We checked quickly with source code of Openstack + // API and it ALWAYS returns times in UTC *jt = JSONRFC3339MilliNoZ( time.Date( t.Year(), @@ -48,7 +37,7 @@ func (jt *JSONRFC3339MilliNoZ) UnmarshalJSON(data []byte) error { t.Minute(), t.Second(), t.Nanosecond(), - currentLocation, + time.UTC, ), ) @@ -154,8 +143,9 @@ func (r *Server) UnmarshalJSON(b []byte) error { r.LaunchedAt = time.Time(s.LaunchedAt) r.TerminatedAt = time.Time(s.TerminatedAt) - // Convert CreatedAt and UpdatedAt to local times - // Seems like returned values are always in UTC + // Ensure that CreatedAt and UpdatedAt are in UTC + // IMPORTANT: We checked quickly with source code of Openstack + // API and it ALWAYS returns times in UTC r.CreatedAt = time.Date( r.CreatedAt.Year(), r.CreatedAt.Month(), @@ -164,7 +154,7 @@ func (r *Server) UnmarshalJSON(b []byte) error { r.CreatedAt.Minute(), r.CreatedAt.Second(), r.CreatedAt.Nanosecond(), - currentLocation, + time.UTC, ) r.UpdatedAt = time.Date( r.UpdatedAt.Year(), @@ -174,10 +164,10 @@ func (r *Server) UnmarshalJSON(b []byte) error { r.UpdatedAt.Minute(), r.UpdatedAt.Second(), r.UpdatedAt.Nanosecond(), - currentLocation, + time.UTC, ) - return err + return nil } type AttachedVolume struct { diff --git a/pkg/api/resource/slurm/cli.go b/pkg/api/resource/slurm/cli.go index d50fed78..23cfcbd9 100644 --- a/pkg/api/resource/slurm/cli.go +++ b/pkg/api/resource/slurm/cli.go @@ -34,6 +34,9 @@ var ( "T": 1024 * 1024 * 1024 * 1024, "Z": 1024 * 1024 * 1024 * 1024 * 1024, } + + // Required capabilities to execute SLURM commands. + requiredCaps = []string{"cap_setuid", "cap_setgid"} ) // Run preflights for CLI execution mode. @@ -42,7 +45,7 @@ func preflightsCLI(slurm *slurmScheduler) error { // Assume execMode is always native slurm.fetchMode = cliMode slurm.cmdExecMode = "native" - slurm.logger.Debug("SLURM jobs will be fetched using CLI commands") + slurm.logger.Debug("Using SLURM CLI commands") // If no sacct path is provided, assume it is available on PATH if slurm.cluster.CLI.Path == "" { @@ -63,31 +66,28 @@ func preflightsCLI(slurm *slurmScheduler) error { } } - // sacct path - sacctPath := filepath.Join(slurm.cluster.CLI.Path, "sacct") + // Check if current capabilities have required caps + haveCaps := true - // If current user root pass checks - if currentUser, err := user.Current(); err == nil && currentUser.Uid == "0" { - slurm.cmdExecMode = capabilityMode - slurm.logger.Info("Current user have enough privileges to execute SLURM commands", "user", currentUser.Username) + currentCaps := cap.GetProc().String() + for _, cap := range requiredCaps { + if !strings.Contains(currentCaps, cap) { + haveCaps = false - goto secu_context + break + } } - // Check if current process has necessary caps - if currentCaps := cap.GetProc().String(); strings.Contains(currentCaps, "cap_setuid") && strings.Contains(currentCaps, "cap_setgid") { + // If current user is root or if current process has necessary caps setup security context + if currentUser, err := user.Current(); err == nil && currentUser.Uid == "0" || haveCaps { slurm.cmdExecMode = capabilityMode - slurm.logger.Info("Linux capabilities will be used to execute SLURM commands as slurm user") - } + slurm.logger.Info("Current user/process have enough privileges to execute SLURM commands", "user", currentUser.Username) -secu_context: - // If using capability mode, setup security context - if slurm.cmdExecMode == capabilityMode { var caps []cap.Value var err error - for _, name := range []string{"cap_setuid", "cap_setgid"} { + for _, name := range requiredCaps { value, err := cap.FromName(name) if err != nil { slurm.logger.Error("Error parsing capability %s: %w", name, err) @@ -115,6 +115,9 @@ secu_context: return nil } + // sacct path + sacctPath := filepath.Join(slurm.cluster.CLI.Path, "sacct") + // Last attempt to run sacct with sudo if _, err := internal_osexec.ExecuteWithTimeout("sudo", []string{sacctPath, "--help"}, 5, nil); err == nil { slurm.cmdExecMode = sudoMode @@ -135,8 +138,11 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] sacctOutputLines := strings.Split(sacctOutput, "\n") // Update period - intStartTS := start.Local().UnixMilli() - intEndTS := end.Local().UnixMilli() + intStartTS := start.UnixMilli() + intEndTS := end.UnixMilli() + + // Get current location + loc := end.Location() numJobs := 0 @@ -179,10 +185,16 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] uidInt, _ = strconv.ParseInt(components[sacctFieldMap["uid"]], 10, 64) // elapsedSeconds, _ = strconv.ParseInt(components[sacctFieldMap["elapsedraw"]], 10, 64) - // Get job submit, start and end times - jobSubmitTS := helper.TimeToTimestamp(slurmTimeFormat, components[8]) - jobStartTS := helper.TimeToTimestamp(slurmTimeFormat, components[9]) - jobEndTS := helper.TimeToTimestamp(slurmTimeFormat, components[10]) + // Convert time strings to configured time location + eventTS := make(map[string]int64, 3) + + for _, c := range []string{"submit", "start", "end"} { + if t, err := time.Parse(base.DatetimezoneLayout, components[sacctFieldMap[c]]); err == nil { + components[sacctFieldMap[c]] = t.In(loc).Format(base.DatetimezoneLayout) + } + + eventTS[c] = helper.TimeToTimestamp(base.DatetimezoneLayout, components[sacctFieldMap[c]]) + } // Parse alloctres to get billing, nnodes, ncpus, ngpus and mem var billing, nnodes, ncpus, ngpus int64 @@ -241,7 +253,7 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] // If job has not started between interval's start and end time, // elapsedTime should be zero. This can happen when job is in pending state // after submission - if jobStartTS == 0 { + if eventTS["start"] == 0 { endMark = startMark goto elapsed @@ -251,23 +263,23 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] // job's start and end time. This case should not arrive in production as // there is no reason SLURM gives us the jobs that have finished in the past // that do not overlap with interval boundaries - if jobEndTS > 0 && jobEndTS < intStartTS { - startMark = jobStartTS - endMark = jobEndTS + if eventTS["end"] > 0 && eventTS["end"] < intStartTS { + startMark = eventTS["start"] + endMark = eventTS["end"] goto elapsed } // If job has started **after** start of interval, we should mark job's start // time as start of elapsed time - if jobStartTS > intStartTS { - startMark = jobStartTS + if eventTS["start"] > intStartTS { + startMark = eventTS["start"] } // If job has ended before end of interval, we should mark job's end time // as elapsed end time. - if jobEndTS > 0 && jobEndTS < intEndTS { - endMark = jobEndTS + if eventTS["end"] > 0 && eventTS["end"] < intEndTS { + endMark = eventTS["end"] } elapsed: @@ -332,9 +344,9 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] CreatedAt: components[sacctFieldMap["submit"]], StartedAt: components[sacctFieldMap["start"]], EndedAt: components[sacctFieldMap["end"]], - CreatedAtTS: jobSubmitTS, - StartedAtTS: jobStartTS, - EndedAtTS: jobEndTS, + CreatedAtTS: eventTS["submit"], + StartedAtTS: eventTS["start"], + EndedAtTS: eventTS["end"], Elapsed: components[sacctFieldMap["elapsed"]], State: components[sacctFieldMap["state"]], Allocation: allocation, @@ -474,17 +486,16 @@ func parseSacctMgrCmdOutput(sacctMgrOutput string, currentTime string) ([]models } // runSacctCmd executes sacct command and return output. -func (s *slurmScheduler) runSacctCmd(ctx context.Context, startTime string, endTime string) ([]byte, error) { +func (s *slurmScheduler) runSacctCmd(ctx context.Context, start, end time.Time) ([]byte, error) { // If we are fetching historical data, do not use RUNNING state as it can report // same job twice once when it was still in running state and once it is in completed // state. - endTimeParsed, _ := time.Parse(base.DatetimeLayout, endTime) - + // endTimeParsed, _ := time.Parse(base.DatetimeLayout, endTime) var states []string // When fetching current jobs, endTime should be very close to current time. Here we // assume that if current time is more than 5 sec than end time, we are fetching // historical data - if time.Since(endTimeParsed) > 5*time.Second { + if time.Now().In(end.Location()).Sub(end) > 5*time.Second { // Strip RUNNING state from slice states = slurmStates[:len(slurmStates)-1] } else { @@ -505,8 +516,8 @@ func (s *slurmScheduler) runSacctCmd(ctx context.Context, startTime string, endT "-D", "-X", "--noheader", "--allusers", "--parsable2", "--format", strings.Join(sacctFields, ","), "--state", strings.Join(states, ","), - "--starttime", startTime, - "--endtime", endTime, + "--starttime", start.Format(base.DatetimeLayout), + "--endtime", end.Format(base.DatetimeLayout), } // Run command as slurm user diff --git a/pkg/api/resource/slurm/cli_test.go b/pkg/api/resource/slurm/cli_test.go index ecb3ee07..a08c74f2 100644 --- a/pkg/api/resource/slurm/cli_test.go +++ b/pkg/api/resource/slurm/cli_test.go @@ -8,6 +8,7 @@ import ( "path/filepath" "testing" + "github.com/mahendrapaipuri/ceems/pkg/api/base" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -65,7 +66,7 @@ func TestParseSacctCmdOutput(t *testing.T) { } func TestParseSacctMgrCmdOutput(t *testing.T) { - users, projects := parseSacctMgrCmdOutput(sacctMgrCmdOutput, current.Format(slurmTimeFormat)) + users, projects := parseSacctMgrCmdOutput(sacctMgrCmdOutput, current.Format(base.DatetimezoneLayout)) require.ElementsMatch(t, expectedUsers, users) require.ElementsMatch(t, expectedProjects, projects) } diff --git a/pkg/api/resource/slurm/manager.go b/pkg/api/resource/slurm/manager.go index 9c1d83cd..3c7228ed 100644 --- a/pkg/api/resource/slurm/manager.go +++ b/pkg/api/resource/slurm/manager.go @@ -43,10 +43,9 @@ type slurmScheduler struct { const slurmBatchScheduler = "slurm" var ( - slurmTimeFormat = base.DatetimeLayout + "-0700" - jobLock = sync.RWMutex{} - assocLock = sync.RWMutex{} - sacctFields = []string{ + jobLock = sync.RWMutex{} + assocLock = sync.RWMutex{} + sacctFields = []string{ "jobidraw", "partition", "qos", "account", "group", "gid", "user", "uid", "submit", "start", "end", "elapsed", "elapsedraw", "exitcode", "state", "alloctres", "nodelist", "jobname", "workdir", @@ -81,7 +80,7 @@ func New(cluster models.Cluster, logger *slog.Logger) (resource.Fetcher, error) return nil, err } - logger.Info("Fetching batch jobs from SLURM clusters", "id", cluster.ID) + logger.Info("Batch jobs from SLURM cluster will be fetched", "id", cluster.ID) return &slurmScheduler, nil } @@ -139,11 +138,10 @@ func (s *slurmScheduler) FetchUsersProjects( // Get jobs from slurm sacct command. func (s *slurmScheduler) fetchFromSacct(ctx context.Context, start time.Time, end time.Time) ([]models.Unit, error) { - startTime := start.Format(base.DatetimeLayout) - endTime := end.Format(base.DatetimeLayout) - + // startTime := start.Format(base.DatetimeLayout) + // endTime := end.Format(base.DatetimeLayout) // Execute sacct command between start and end times - sacctOutput, err := s.runSacctCmd(ctx, startTime, endTime) + sacctOutput, err := s.runSacctCmd(ctx, start, end) if err != nil { s.logger.Error("Failed to run sacct command", "cluster_id", s.cluster.ID, "err", err) diff --git a/pkg/api/resource/slurm/manager_test.go b/pkg/api/resource/slurm/manager_test.go index b310e1b5..cd2388ba 100644 --- a/pkg/api/resource/slurm/manager_test.go +++ b/pkg/api/resource/slurm/manager_test.go @@ -10,14 +10,15 @@ import ( "testing" "time" + "github.com/mahendrapaipuri/ceems/pkg/api/base" "github.com/mahendrapaipuri/ceems/pkg/api/models" "github.com/stretchr/testify/require" ) var ( - start, _ = time.Parse(slurmTimeFormat, "2023-02-21T15:00:00+0100") - end, _ = time.Parse(slurmTimeFormat, "2023-02-21T15:15:00+0100") - current, _ = time.Parse(slurmTimeFormat, "2023-02-21T15:15:00+0100") + start, _ = time.Parse(base.DatetimezoneLayout, "2023-02-21T15:00:00+0100") + end, _ = time.Parse(base.DatetimezoneLayout, "2023-02-21T15:15:00+0100") + current, _ = time.Parse(base.DatetimezoneLayout, "2023-02-21T15:15:00+0100") sacctCmdOutput = `1479763|part1|qos1|acc1|grp|1000|usr|1000|2023-02-21T14:37:02+0100|2023-02-21T14:37:07+0100|NA|01:49:22|3000|0:0|RUNNING|billing=80,cpu=160,energy=1439089,gres/gpu=8,mem=320.5G,node=2|compute-0|test_script1|/home/usr 1481508|part1|qos1|acc1|grp|1000|usr|1000|2023-02-21T13:49:20+0100|2023-02-21T13:49:06+0100|2023-02-21T15:10:23+0100|00:08:17|4920|0:0|COMPLETED|billing=1,cpu=2,mem=4M,node=1|compute-[0-2]|test_script2|/home/usr` sacctMgrCmdOutput = `root| @@ -176,9 +177,9 @@ printf """%s"""`, sacctMgrCmdOutput) }, } - start, _ = time.Parse(slurmTimeFormat, "2023-02-21T15:00:00+0100") - end, _ = time.Parse(slurmTimeFormat, "2023-02-21T15:15:00+0100") - current, _ = time.Parse(slurmTimeFormat, "2023-02-21T15:15:00+0100") + start, _ = time.Parse(base.DatetimezoneLayout, "2023-02-21T15:00:00+0100") + end, _ = time.Parse(base.DatetimezoneLayout, "2023-02-21T15:15:00+0100") + current, _ = time.Parse(base.DatetimezoneLayout, "2023-02-21T15:15:00+0100") ctx := context.Background() for _, cluster := range clusters { diff --git a/pkg/api/testdata/config.yml b/pkg/api/testdata/config.yml index 92fad79b..9ed90ce7 100644 --- a/pkg/api/testdata/config.yml +++ b/pkg/api/testdata/config.yml @@ -4,6 +4,7 @@ ceems_api_server: data: path: TO_REPLACE backup_path: TO_REPLACE + time_zone: 'Europe/Paris' # backup_interval: 10s # update_from: '2024-09-08T00:00:00Z' admin: diff --git a/pkg/api/testdata/openstack/compute/servers.json b/pkg/api/testdata/openstack/compute/servers.json index 2bf8f428..da23e87b 100644 --- a/pkg/api/testdata/openstack/compute/servers.json +++ b/pkg/api/testdata/openstack/compute/servers.json @@ -614,7 +614,7 @@ } }, "created": "2024-10-15T13:16:44Z", - "updated": "2024-10-15T15:10:56Z", + "updated": "2024-10-15T14:10:56Z", "addresses": { "shared": [ { diff --git a/pkg/api/testdata/output/e2e-test-api-server-admin-query-all-selected-fields.txt b/pkg/api/testdata/output/e2e-test-api-server-admin-query-all-selected-fields.txt index afc70bd2..3a4bdf7d 100644 --- a/pkg/api/testdata/output/e2e-test-api-server-admin-query-all-selected-fields.txt +++ b/pkg/api/testdata/output/e2e-test-api-server-admin-query-all-selected-fields.txt @@ -1 +1 @@ -{"status":"success","data":[{"uuid":"0687859c-b7b8-47ea-aa4c-74162f52fbfc","started_at":"2024-10-15T14:29:34+0200","ended_at":"N/A"},{"uuid":"16af784a-4fa1-429e-953f-4ef5dc462960","started_at":"2024-10-15T13:32:45+0200","ended_at":"2024-10-15T14:21:29+0200"},{"uuid":"1c8ad46b-c4a5-42c5-81a0-194aa592f1e1","started_at":"2024-10-15T13:18:13+0200","ended_at":"2024-10-15T14:25:30+0200"},{"uuid":"1cef0381-0a5a-42e6-9e9b-3d88f84be971","started_at":"2024-10-15T14:28:50+0200","ended_at":"N/A"},{"uuid":"1e3b7f2c-a648-41a8-b53e-4fa5bd2ae73c","started_at":"2024-10-15T13:15:42+0200","ended_at":"N/A"},{"uuid":"242760b7-756f-4f13-a64f-e1a6e012f708","started_at":"2024-10-15T13:17:15+0200","ended_at":"N/A"},{"uuid":"38c14d24-2e8a-4cb6-ad5b-851612e800ab","started_at":"2024-10-15T13:18:08+0200","ended_at":"2024-10-15T14:21:57+0200"},{"uuid":"3bc984e2-ff73-417c-b123-fdb365ddf241","started_at":"2024-10-15T13:32:40+0200","ended_at":"N/A"},{"uuid":"43c81538-d7ea-479f-b71f-934521a6f7bf","started_at":"2024-10-15T13:18:10+0200","ended_at":"N/A"},{"uuid":"6330d17c-6fe8-419c-a044-58e590480e18","started_at":"2024-10-15T13:18:08+0200","ended_at":"2024-10-15T14:21:57+0200"},{"uuid":"66c3eff0-52eb-45e2-a5da-5fe21c0ef3f3","started_at":"2024-10-15T13:16:55+0200","ended_at":"N/A"},{"uuid":"7fe4fa04-e4ea-4b92-84f4-45c9e78b9520","started_at":"2024-10-15T13:15:11+0200","ended_at":"N/A"},{"uuid":"83b7be64-daff-477c-8f3d-2ce880c44a53","started_at":"2024-10-15T13:14:34+0200","ended_at":"N/A"},{"uuid":"b6eafae3-5c24-4f25-b297-5ef291d9487d","started_at":"2024-10-15T13:16:00+0200","ended_at":"N/A"},{"uuid":"c98235c9-d9c0-4b54-8782-0fd8d6312539","started_at":"2024-10-15T14:29:32+0200","ended_at":"N/A"},{"uuid":"d0d60434-4bf1-4eb1-9469-d7b38083a88f","started_at":"2024-10-15T13:32:43+0200","ended_at":"2024-10-15T14:32:09+0200"},{"uuid":"d8af1245-4639-4981-95db-ae097021401d","started_at":"2024-10-15T13:18:08+0200","ended_at":"2024-10-15T14:21:57+0200"},{"uuid":"e119caae-1424-47de-9f64-11ac73ae0e75","started_at":"2024-10-15T13:14:33+0200","ended_at":"N/A"}]} +{"status":"success","data":[{"uuid":"0687859c-b7b8-47ea-aa4c-74162f52fbfc","started_at":"2024-10-15T16:29:34+0200","ended_at":"N/A"},{"uuid":"16af784a-4fa1-429e-953f-4ef5dc462960","started_at":"2024-10-15T15:32:45+0200","ended_at":"2024-10-15T16:21:29+0200"},{"uuid":"1c8ad46b-c4a5-42c5-81a0-194aa592f1e1","started_at":"2024-10-15T15:18:13+0200","ended_at":"2024-10-15T16:25:30+0200"},{"uuid":"1cef0381-0a5a-42e6-9e9b-3d88f84be971","started_at":"2024-10-15T16:28:50+0200","ended_at":"N/A"},{"uuid":"1e3b7f2c-a648-41a8-b53e-4fa5bd2ae73c","started_at":"2024-10-15T15:15:42+0200","ended_at":"N/A"},{"uuid":"242760b7-756f-4f13-a64f-e1a6e012f708","started_at":"2024-10-15T15:17:15+0200","ended_at":"N/A"},{"uuid":"38c14d24-2e8a-4cb6-ad5b-851612e800ab","started_at":"2024-10-15T15:18:08+0200","ended_at":"2024-10-15T16:21:57+0200"},{"uuid":"3bc984e2-ff73-417c-b123-fdb365ddf241","started_at":"2024-10-15T15:32:40+0200","ended_at":"N/A"},{"uuid":"43c81538-d7ea-479f-b71f-934521a6f7bf","started_at":"2024-10-15T15:18:10+0200","ended_at":"N/A"},{"uuid":"6330d17c-6fe8-419c-a044-58e590480e18","started_at":"2024-10-15T15:18:08+0200","ended_at":"2024-10-15T16:21:57+0200"},{"uuid":"66c3eff0-52eb-45e2-a5da-5fe21c0ef3f3","started_at":"2024-10-15T15:16:55+0200","ended_at":"N/A"},{"uuid":"7fe4fa04-e4ea-4b92-84f4-45c9e78b9520","started_at":"2024-10-15T15:15:11+0200","ended_at":"N/A"},{"uuid":"83b7be64-daff-477c-8f3d-2ce880c44a53","started_at":"2024-10-15T15:14:34+0200","ended_at":"N/A"},{"uuid":"b6eafae3-5c24-4f25-b297-5ef291d9487d","started_at":"2024-10-15T15:16:00+0200","ended_at":"N/A"},{"uuid":"c98235c9-d9c0-4b54-8782-0fd8d6312539","started_at":"2024-10-15T16:29:32+0200","ended_at":"N/A"},{"uuid":"d0d60434-4bf1-4eb1-9469-d7b38083a88f","started_at":"2024-10-15T15:32:43+0200","ended_at":"2024-10-15T16:32:09+0200"},{"uuid":"d8af1245-4639-4981-95db-ae097021401d","started_at":"2024-10-15T15:18:08+0200","ended_at":"2024-10-15T16:21:57+0200"},{"uuid":"e119caae-1424-47de-9f64-11ac73ae0e75","started_at":"2024-10-15T15:14:33+0200","ended_at":"N/A"}]} diff --git a/pkg/api/testdata/output/e2e-test-api-server-running-query.txt b/pkg/api/testdata/output/e2e-test-api-server-running-query.txt index 6b0c8169..447018b4 100644 --- a/pkg/api/testdata/output/e2e-test-api-server-running-query.txt +++ b/pkg/api/testdata/output/e2e-test-api-server-running-query.txt @@ -1 +1 @@ -{"status":"success","data":[{"uuid":"1cef0381-0a5a-42e6-9e9b-3d88f84be971","started_at":"2024-10-15T14:28:50+0200","state":"ACTIVE","allocation":{"disk":1,"extra_specs":{"hw_rng:allowed":"True","resources:VGPU":"1"},"mem":8192,"name":"m10.vgpu","swap":0,"vcpus":8},"tags":{"az":"nova","hypervisor":"gpu-node-4","metadata":{},"power_state":"RUNNING","reservation_id":"r-ct4kh3w1","server_groups":"","tags":[]}},{"uuid":"1e3b7f2c-a648-41a8-b53e-4fa5bd2ae73c","started_at":"2024-10-15T13:15:42+0200","state":"ACTIVE","allocation":{"disk":1,"extra_specs":{"hw_rng:allowed":"True"},"mem":256,"name":"cirros256","swap":0,"vcpus":1},"tags":{"az":"nova","hypervisor":"cpu-node-4","metadata":{},"power_state":"RUNNING","reservation_id":"r-tk530ak6","server_groups":"","tags":[]}},{"uuid":"7fe4fa04-e4ea-4b92-84f4-45c9e78b9520","started_at":"2024-10-15T13:15:11+0200","state":"ACTIVE","allocation":{"disk":1,"extra_specs":{"hw_rng:allowed":"True"},"mem":192,"name":"m1.micro","swap":0,"vcpus":1},"tags":{"az":"nova","hypervisor":"cpu-node-4","metadata":{},"power_state":"RUNNING","reservation_id":"r-ztao3fbf","server_groups":"","tags":[]}},{"uuid":"b6eafae3-5c24-4f25-b297-5ef291d9487d","started_at":"2024-10-15T13:16:00+0200","state":"SUSPENDED","allocation":{"disk":1,"extra_specs":{"hw_rng:allowed":"True"},"mem":128,"name":"m1.nano","swap":0,"vcpus":1},"tags":{"az":"nova","hypervisor":"cpu-node-4","metadata":{},"power_state":"SHUTDOWN","reservation_id":"r-ks8nrkb2","server_groups":"","tags":[]}}]} +{"status":"success","data":[{"uuid":"1cef0381-0a5a-42e6-9e9b-3d88f84be971","started_at":"2024-10-15T17:28:50+0300","state":"ACTIVE","allocation":{"disk":1,"extra_specs":{"hw_rng:allowed":"True","resources:VGPU":"1"},"mem":8192,"name":"m10.vgpu","swap":0,"vcpus":8},"tags":{"az":"nova","hypervisor":"gpu-node-4","metadata":{},"power_state":"RUNNING","reservation_id":"r-ct4kh3w1","server_groups":"","tags":[]}},{"uuid":"1e3b7f2c-a648-41a8-b53e-4fa5bd2ae73c","started_at":"2024-10-15T16:15:42+0300","state":"ACTIVE","allocation":{"disk":1,"extra_specs":{"hw_rng:allowed":"True"},"mem":256,"name":"cirros256","swap":0,"vcpus":1},"tags":{"az":"nova","hypervisor":"cpu-node-4","metadata":{},"power_state":"RUNNING","reservation_id":"r-tk530ak6","server_groups":"","tags":[]}},{"uuid":"7fe4fa04-e4ea-4b92-84f4-45c9e78b9520","started_at":"2024-10-15T16:15:11+0300","state":"ACTIVE","allocation":{"disk":1,"extra_specs":{"hw_rng:allowed":"True"},"mem":192,"name":"m1.micro","swap":0,"vcpus":1},"tags":{"az":"nova","hypervisor":"cpu-node-4","metadata":{},"power_state":"RUNNING","reservation_id":"r-ztao3fbf","server_groups":"","tags":[]}},{"uuid":"b6eafae3-5c24-4f25-b297-5ef291d9487d","started_at":"2024-10-15T16:16:00+0300","state":"SUSPENDED","allocation":{"disk":1,"extra_specs":{"hw_rng:allowed":"True"},"mem":128,"name":"m1.nano","swap":0,"vcpus":1},"tags":{"az":"nova","hypervisor":"cpu-node-4","metadata":{},"power_state":"SHUTDOWN","reservation_id":"r-ks8nrkb2","server_groups":"","tags":[]}}]} diff --git a/pkg/lb/backend/pyro.go b/pkg/lb/backend/pyro.go index 8c573ad2..7a2d1f51 100644 --- a/pkg/lb/backend/pyro.go +++ b/pkg/lb/backend/pyro.go @@ -62,7 +62,7 @@ func (b *pyroServer) RetentionPeriod() time.Duration { // String returns name/web URL backend Pyroscope server. func (b *pyroServer) String() string { if b.url != nil { - return b.url.Redacted() + return "url: " + b.url.Redacted() } return "No backend found" diff --git a/pkg/lb/backend/tsdb.go b/pkg/lb/backend/tsdb.go index 900768d0..e274e8a1 100644 --- a/pkg/lb/backend/tsdb.go +++ b/pkg/lb/backend/tsdb.go @@ -51,7 +51,8 @@ func NewTSDB(webURL *url.URL, p *httputil.ReverseProxy, logger *slog.Logger) Ser logger.Debug("Basic auth configured for backend TSDB server", "backend", webURL.Redacted()) } - return &tsdbServer{ + // Make server struct + server := &tsdbServer{ url: webURL, alive: true, reverseProxy: p, @@ -60,12 +61,17 @@ func NewTSDB(webURL *url.URL, p *httputil.ReverseProxy, logger *slog.Logger) Ser client: tsdbClient, logger: logger, } + + // Update retention period + server.RetentionPeriod() + + return server } // String returns name/web URL backend TSDB server. func (b *tsdbServer) String() string { if b.url != nil { - return b.url.Redacted() + return fmt.Sprintf("url: %s; retention: %s", b.url.Redacted(), b.retentionPeriod) } return "No backend found" diff --git a/pkg/lb/cli/cli.go b/pkg/lb/cli/cli.go index de97e50e..56508fca 100644 --- a/pkg/lb/cli/cli.go +++ b/pkg/lb/cli/cli.go @@ -341,7 +341,7 @@ func (lb *CEEMSLoadBalancer) Main() error { go func() { defer wg.Done() - frontend.Monitor(ctx, managers[lbType], logger) + frontend.Monitor(ctx, managers[lbType], logger.With("backend_type", lbType)) }() // Initializing the server in a goroutine so that diff --git a/pkg/lb/frontend/helpers.go b/pkg/lb/frontend/helpers.go index 18797a09..32a8461d 100644 --- a/pkg/lb/frontend/helpers.go +++ b/pkg/lb/frontend/helpers.go @@ -299,7 +299,7 @@ func healthCheck(ctx context.Context, manager serverpool.Manager, logger *slog.L status = "down" } } - logger.Debug("Health check", "id", id, "url", backend.URL().Redacted(), "status", status) + logger.Debug("Health check", "id", id, "backend", backend.String(), "status", status) } } } diff --git a/pkg/lb/serverpool/leastconn.go b/pkg/lb/serverpool/leastconn.go index a6f91627..0f3b49b5 100644 --- a/pkg/lb/serverpool/leastconn.go +++ b/pkg/lb/serverpool/leastconn.go @@ -41,7 +41,7 @@ func (s *leastConn) Target(id string, _ time.Duration) backend.Server { } if targetBackend != nil { - s.logger.Debug("Least connection strategy", "selected_backend", targetBackend.String()) + s.logger.Debug("Least connection strategy", "cluster_id", id, "selected_backend", targetBackend.String()) return targetBackend } @@ -50,6 +50,8 @@ func (s *leastConn) Target(id string, _ time.Duration) backend.Server { } func (s *leastConn) Add(id string, b backend.Server) { + s.logger.Debug("Backend added", "strategy", "least-connection", "cluster_id", id, "backend", b.String()) + s.backends[id] = append(s.backends[id], b) } diff --git a/pkg/lb/serverpool/resource.go b/pkg/lb/serverpool/resource.go index ac62fcd2..e6009e25 100644 --- a/pkg/lb/serverpool/resource.go +++ b/pkg/lb/serverpool/resource.go @@ -54,12 +54,12 @@ func (s *resourceBased) Target(id string, d time.Duration) backend.Server { // If no eligible servers found return if len(targetBackends) == 0 { - s.logger.Debug("Resourced based strategy. No eligible backends found") + s.logger.Debug("Resourced based strategy. No eligible backends found", "cluster_id", id, "duration", d) return targetBackend } else if len(targetBackends) == 1 { targetBackend = targetBackends[0] - s.logger.Debug("Resourced based strategy", "selected_backend", targetBackend.String()) + s.logger.Debug("Resourced based strategy", "cluster_id", id, "selected_backend", targetBackend.String()) return targetBackend } @@ -86,7 +86,7 @@ func (s *resourceBased) Target(id string, d time.Duration) backend.Server { } if targetBackend != nil { - s.logger.Debug("Resourced based strategy", "selected_backend", targetBackend.String()) + s.logger.Debug("Resourced based strategy", "cluster_id", id, "selected_backend", targetBackend.String()) return targetBackend } @@ -101,6 +101,8 @@ func (s *resourceBased) Backends() map[string][]backend.Server { // Add a backend server to pool. func (s *resourceBased) Add(id string, b backend.Server) { + s.logger.Debug("Backend added", "strategy", "resource", "cluster_id", id, "backend", b.String()) + s.backends[id] = append(s.backends[id], b) } diff --git a/pkg/lb/serverpool/roundrobin.go b/pkg/lb/serverpool/roundrobin.go index 2c032152..c3b94bce 100644 --- a/pkg/lb/serverpool/roundrobin.go +++ b/pkg/lb/serverpool/roundrobin.go @@ -38,7 +38,7 @@ func (s *roundRobin) Target(id string, _ time.Duration) backend.Server { for range s.Size(id) { nextPeer := s.Rotate(id) if nextPeer.IsAlive() { - s.logger.Debug("Round Robin strategy", "selected_backend", nextPeer.String()) + s.logger.Debug("Round Robin strategy", "cluster_id", id, "selected_backend", nextPeer.String()) return nextPeer } @@ -54,6 +54,8 @@ func (s *roundRobin) Backends() map[string][]backend.Server { // Add a backend server to pool. func (s *roundRobin) Add(id string, b backend.Server) { + s.logger.Debug("Backend added", "strategy", "roundrobin", "cluster_id", id, "backend", b.String()) + s.backends[id] = append(s.backends[id], b) } diff --git a/scripts/e2e-test.sh b/scripts/e2e-test.sh index 0a98ae92..7369f6f5 100755 --- a/scripts/e2e-test.sh +++ b/scripts/e2e-test.sh @@ -673,6 +673,7 @@ then # Usage from and to timestamps usage_from=$(date +%s --date='86400 seconds ago') usage_to=$(date +%s --date='1800 seconds') + timezone="Europe%2FAthens" if [ "${scenario}" = "api-project-query" ] then @@ -703,7 +704,7 @@ then get -H "X-Grafana-User: usr2" "127.0.0.1:${port}/api/${api_version}/units?cluster_id=slurm-0&from=1676934000&to=1677538800&field=uuiid" > "${fixture_output}" elif [ "${scenario}" = "api-running-query" ] then - get -H "X-Grafana-User: test-user-1" "127.0.0.1:${port}/api/${api_version}/units?running&cluster_id=os-1&field=uuid&field=state&field=started_at&field=allocation&field=tags" > "${fixture_output}" + get -H "X-Grafana-User: test-user-1" "127.0.0.1:${port}/api/${api_version}/units?running&cluster_id=os-1&field=uuid&field=state&field=started_at&field=allocation&field=tags&timezone=${timezone}" > "${fixture_output}" elif [ "${scenario}" = "api-admin-query" ] then get -H "X-Grafana-User: grafana" -H "X-Dashboard-User: usr3" "127.0.0.1:${port}/api/${api_version}/units?cluster_id=slurm-0&project=acc3&from=1676934000&to=1677538800" > "${fixture_output}" diff --git a/website/docs/configuration/config-reference.md b/website/docs/configuration/config-reference.md index 108f7297..b6c9deef 100644 --- a/website/docs/configuration/config-reference.md +++ b/website/docs/configuration/config-reference.md @@ -153,6 +153,14 @@ A `data_config` allows configuring the DB settings of CEEMS API server. # [ retention_period: | default = 30d ] +# Time zone to be used when storing times of different events in the DB. +# It takes a value defined in IANA (https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) +# like `Europe/Paris` +# +# A special value `Local` can be used to use server local time zone. +# +[ time_zone: | default = Local ] + # CEEMS API server is capable of creating DB backups using SQLite backup API. Created # DB backups will be saved to this path. NOTE that for huge DBs, this backup can take # a considerable amount of time. @@ -308,19 +316,17 @@ updaters: # When SLURM resource manager is configured to fetch job data using `sacct` command, # execution mode of the command will be decided as follows: # -# - If the current user running `ceems_api_server` is `root` or `slurm` user, `sacct` -# command will be executed natively as that user. -# -# - If above check fails, `sacct` command will be attempted to execute as `slurm` user. -# If the `ceems_api_server` process have enough privileges setup using Linux capabilities -# in the systemd unit file, this will succeed and `sacct` will be always executed -# as `slurm` user. -# -# - If above check fails as well, we attempt to execute `sacct` with `sudo` prefix. If +# - If the current user running `ceems_api_server` is `root`, `sacct` +# command will be executed as that user in a security context. +# +# - If the `ceems_api_server` process has `CAP_SETUID` and `CAP_SETGID` capabilities, `sacct` +# command will be executed as `root` user in a security context. +# +# - As a last attempt, we attempt to execute `sacct` with `sudo` prefix. If # the current user running `ceems_api_server` is in the list of sudoers, this check # will pass and `sacct` will be always executed as `sudo sacct ` to fetch jobs. # -# If none of the above checks, pass, `sacct` will be executed as the current user +# If none of the above conditions are true, `sacct` will be executed as the current user # which might not give job data of _all_ users in the cluster. # # If the operators are unsure which method to use, there is a default systemd