Skip to content

Commit

Permalink
Merge pull request #2172 from xueqzhan/flake-fail
Browse files Browse the repository at this point in the history
TRT-1912: Treat flake as failures in component readiness
  • Loading branch information
openshift-merge-bot[bot] authored Jan 8, 2025
2 parents 57cbc0a + b218d0b commit 46b1d0d
Show file tree
Hide file tree
Showing 11 changed files with 263 additions and 48 deletions.
10 changes: 10 additions & 0 deletions config/views.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
pass_rate_required_new_tests: 95
include_multi_release_analysis: true
metrics:
Expand Down Expand Up @@ -122,6 +123,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: false
regression_tracking:
Expand Down Expand Up @@ -182,6 +184,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: false
regression_tracking:
Expand Down Expand Up @@ -241,6 +244,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
pass_rate_required_all_tests: 90
metrics:
enabled: false
Expand Down Expand Up @@ -302,6 +306,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
pass_rate_required_new_tests: 95
metrics:
enabled: true
Expand Down Expand Up @@ -365,6 +370,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: false
regression_tracking:
Expand Down Expand Up @@ -427,6 +433,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: false
regression_tracking:
Expand Down Expand Up @@ -488,6 +495,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
pass_rate_required_all_tests: 90
metrics:
enabled: false
Expand Down Expand Up @@ -548,6 +556,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: true
regression_tracking:
Expand Down Expand Up @@ -605,6 +614,7 @@ component_readiness:
pity_factor: 5
ignore_missing: false
ignore_disruption: true
flake_as_failure: false
metrics:
enabled: true
regression_tracking:
Expand Down
58 changes: 32 additions & 26 deletions pkg/api/componentreadiness/component_report.go
Original file line number Diff line number Diff line change
Expand Up @@ -627,8 +627,7 @@ func (c *componentReportGenerator) normalizeProwJobName(prowName string) string
}

func (c *componentReportGenerator) fetchJobRunTestStatusResults(ctx context.Context,
query *bigquery.Query) (map[string][]crtype.
JobRunTestStatusRow, []error) {
query *bigquery.Query) (map[string][]crtype.JobRunTestStatusRow, []error) {
errs := []error{}
status := map[string][]crtype.JobRunTestStatusRow{}
log.Infof("Fetching job run test details with:\n%s\nParameters:\n%+v\n", query.Q, query.Parameters)
Expand Down Expand Up @@ -911,9 +910,7 @@ type triagedIncidentsGenerator struct {
SampleRelease crtype.RequestReleaseOptions
}

func (t *triagedIncidentsGenerator) generateTriagedIssuesFor(ctx context.Context) (resolvedissues.
TriagedIncidentsForRelease,
[]error) {
func (t *triagedIncidentsGenerator) generateTriagedIssuesFor(ctx context.Context) (resolvedissues.TriagedIncidentsForRelease, []error) {
before := time.Now()
incidents, errs := t.queryTriagedIssues(ctx)

Expand Down Expand Up @@ -1102,7 +1099,7 @@ func (c *componentReportGenerator) matchBaseRegression(testID crtype.ReportTestI
baseRegression = regressionallowances.IntentionalRegressionFor(baseRelease, testID.ColumnIdentification, testID.TestID)

// This could go away if we remove the option for ignoring fallback
if baseRegression != nil && baseRegression.PreviousPassPercentage() > getTestStatusSuccessRate(baseStats) {
if baseRegression != nil && baseRegression.PreviousPassPercentage(c.FlakeAsFailure) > c.getTestStatusPassRate(baseStats) {
// override with the basis regression previous values
// testStats will reflect the expected threshold, not the computed values from the release with the allowed regression
baseRegressionPreviousRelease, err := previousRelease(c.BaseRelease.Release)
Expand Down Expand Up @@ -1466,15 +1463,18 @@ func getFailureCount(status crtype.JobRunTestStatusRow) int {
return failure
}

func getTestStatusSuccessRate(testStatus crtype.TestStatus) float64 {
return getSuccessRate(testStatus.SuccessCount, testStatus.TotalCount-testStatus.SuccessCount-testStatus.FlakeCount, testStatus.FlakeCount)
func (c *componentReportGenerator) getTestStatusPassRate(testStatus crtype.TestStatus) float64 {
return c.getPassRate(testStatus.SuccessCount, testStatus.TotalCount-testStatus.SuccessCount-testStatus.FlakeCount, testStatus.FlakeCount)
}

func getSuccessRate(success, failure, flake int) float64 {
func (c *componentReportGenerator) getPassRate(success, failure, flake int) float64 {
total := success + failure + flake
if total == 0 {
return 0.0
}
if c.FlakeAsFailure {
return float64(success) / float64(total)
}
return float64(success+flake) / float64(total)
}

Expand All @@ -1494,7 +1494,7 @@ func getRegressionStatus(basisPassPercentage, samplePassPercentage float64, isTr

func (c *componentReportGenerator) getEffectivePityFactor(basisPassPercentage float64, approvedRegression *regressionallowances.IntentionalRegression) int {
if approvedRegression != nil && approvedRegression.RegressedFailures > 0 {
regressedPassPercentage := approvedRegression.RegressedPassPercentage()
regressedPassPercentage := approvedRegression.RegressedPassPercentage(c.FlakeAsFailure)
if regressedPassPercentage < basisPassPercentage {
// product owner chose a required pass percentage, so we allow pity to cover that approved pass percent
// plus the existing pity factor to limit, "well, it's just *barely* lower" arguments.
Expand Down Expand Up @@ -1569,7 +1569,7 @@ func (c *componentReportGenerator) assessComponentStatus(
Start: baseStart,
End: baseEnd,
TestDetailsTestStats: crtype.TestDetailsTestStats{
SuccessRate: getSuccessRate(baseSuccess, baseFailure, baseFlake),
SuccessRate: c.getPassRate(baseSuccess, baseFailure, baseFlake),
SuccessCount: baseSuccess,
FailureCount: baseFailure,
FlakeCount: baseFlake,
Expand All @@ -1592,7 +1592,7 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
Start: baseStart,
End: baseEnd,
TestDetailsTestStats: crtype.TestDetailsTestStats{
SuccessRate: getSuccessRate(baseSuccess, baseFailure, baseFlake),
SuccessRate: c.getPassRate(baseSuccess, baseFailure, baseFlake),
SuccessCount: baseSuccess,
FailureCount: baseFailure,
FlakeCount: baseFlake,
Expand All @@ -1606,7 +1606,7 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
Start: &c.SampleRelease.Start,
End: &c.SampleRelease.End,
TestDetailsTestStats: crtype.TestDetailsTestStats{
SuccessRate: getSuccessRate(sampleSuccess, sampleFailure, sampleFlake),
SuccessRate: c.getPassRate(sampleSuccess, sampleFailure, sampleFlake),
SuccessCount: sampleSuccess,
FailureCount: sampleFailure,
FlakeCount: sampleFlake,
Expand All @@ -1625,16 +1625,22 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
}
} else {
// see if we had a significant regression prior to adjusting
basisPassPercentage := float64(baseSuccess+baseFlake) / float64(baseTotal)
initialPassPercentage := float64(sampleSuccess+sampleFlake) / float64(initialSampleTotal)
basePass := baseSuccess + baseFlake
samplePass := sampleSuccess + sampleFlake
if c.FlakeAsFailure {
basePass = baseSuccess
samplePass = sampleSuccess
}
basisPassPercentage := float64(basePass) / float64(baseTotal)
initialPassPercentage := float64(samplePass) / float64(initialSampleTotal)
effectivePityFactor := c.getEffectivePityFactor(basisPassPercentage, approvedRegression)

wasSignificant := false
// only consider wasSignificant if the sampleTotal has been changed and our sample
// pass percentage is below the basis
if initialSampleTotal > sampleTotal && initialPassPercentage < basisPassPercentage {
if basisPassPercentage-initialPassPercentage > float64(c.PityFactor)/100 {
wasSignificant, _ = c.fischerExactTest(requiredConfidence, initialSampleTotal, sampleSuccess, sampleFlake, baseTotal, baseSuccess, baseFlake)
wasSignificant, _ = c.fischerExactTest(requiredConfidence, initialSampleTotal-samplePass, samplePass, baseTotal-basePass, basePass)
}
// if it was significant without the adjustment use
// ExtremeTriagedRegression or SignificantTriagedRegression
Expand Down Expand Up @@ -1666,10 +1672,10 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
}

// now that we know sampleTotal is non zero
samplePassPercentage := float64(sampleSuccess+sampleFlake) / float64(sampleTotal)
samplePassPercentage := float64(samplePass) / float64(sampleTotal)

// did we remove enough failures that we are below the MinimumFailure threshold?
if c.MinimumFailure != 0 && (sampleTotal-sampleSuccess-sampleFlake) < c.MinimumFailure {
if c.MinimumFailure != 0 && (sampleTotal-samplePass) < c.MinimumFailure {
testStats.ReportStatus = status
testStats.FisherExact = thrift.Float64Ptr(0.0)
return testStats
Expand All @@ -1679,9 +1685,9 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,

if improved {
// flip base and sample when improved
significant, fisherExact = c.fischerExactTest(requiredConfidence, baseTotal, baseSuccess, baseFlake, sampleTotal, sampleSuccess, sampleFlake)
significant, fisherExact = c.fischerExactTest(requiredConfidence, baseTotal-basePass, basePass, sampleTotal-samplePass, samplePass)
} else if basisPassPercentage-samplePassPercentage > float64(effectivePityFactor)/100 {
significant, fisherExact = c.fischerExactTest(requiredConfidence, sampleTotal, sampleSuccess, sampleFlake, baseTotal, baseSuccess, baseFlake)
significant, fisherExact = c.fischerExactTest(requiredConfidence, sampleTotal-samplePass, samplePass, baseTotal-basePass, basePass)
}
if significant {
if improved {
Expand Down Expand Up @@ -1716,7 +1722,7 @@ func (c *componentReportGenerator) buildFisherExactTestStats(requiredConfidence,
}

func (c *componentReportGenerator) buildPassRateTestStats(sampleSuccess, sampleFailure, sampleFlake int, requiredSuccessRate float64) crtype.ReportTestStats {
successRate := getSuccessRate(sampleSuccess, sampleFailure, sampleFlake)
successRate := c.getPassRate(sampleSuccess, sampleFailure, sampleFlake)

// Assume 2x our allowed failure rate = an extreme regression.
// i.e. if we require 90%, extreme is anything below 80%
Expand Down Expand Up @@ -1756,11 +1762,11 @@ func (c *componentReportGenerator) buildPassRateTestStats(sampleSuccess, sampleF
}
}

func (c *componentReportGenerator) fischerExactTest(confidenceRequired, sampleTotal, sampleSuccess, sampleFlake, baseTotal, baseSuccess, baseFlake int) (bool, float64) {
_, _, r, _ := fischer.FisherExactTest(sampleTotal-sampleSuccess-sampleFlake,
sampleSuccess+sampleFlake,
baseTotal-baseSuccess-baseFlake,
baseSuccess+baseFlake)
func (c *componentReportGenerator) fischerExactTest(confidenceRequired, sampleFailure, sampleSuccess, baseFailure, baseSuccess int) (bool, float64) {
_, _, r, _ := fischer.FisherExactTest(sampleFailure,
sampleSuccess,
baseFailure,
baseSuccess)
return r < 1-float64(confidenceRequired)/100, r
}

Expand Down
Loading

0 comments on commit 46b1d0d

Please sign in to comment.