diff --git a/docs/openapi.yml b/docs/openapi.yml index a819902cb6..9728f8f31a 100644 --- a/docs/openapi.yml +++ b/docs/openapi.yml @@ -183,12 +183,13 @@ info: that the Metaculus api will accept. But wait! Just because we have a cdf that represents out beliefs, it doesn't mean Metaculus will accept it. We'll have to make sure it obeys a few rules, lest it be rejected as invalid. - 1. The cdf must be strictly increasing by at least 0.00005 per step. This is because Metaculus evaluates continuous forecasts - by their PDF (technically a PMF) dervied as the set of differences between consecutive CDF points, and 0.00005 is the - minimum value allowed to avoid scores getting too arbitrarily negative. + 1. The cdf must be strictly increasing by at least 0.00005 per step (1% / inbound_outcome_count). This is because Metaculus + evaluates continuous forecasts by their PDF (technically a PMF) dervied as the set of differences between consecutive + CDF points, and 0.00005 is the minimum value allowed to avoid scores getting too arbitrarily negative. Note that if the + inbound_outcome_count is less than the normal 200, this threshold will be larger. - 2. The cdf must not increase by more than 0.59 at any step, as this is the maximum value attainable via the sliders in the UI. - This threshold may be lowered in the future. + 2. The cdf must not increase by more than 0.2 at any step if the question has the normal inbound_outcome_count of 200. Otherwise, + that threshold is scaled as such: 0.2 * (200 / inbound_outcome_count). This prevents too extreme spikes in the distribution. 3. The cdf must obey bounds. If a boundary is open, at least 0.1% of probability mass must be assigned outside of it; if it is closed, no probability mass may be outside of it. @@ -197,19 +198,27 @@ info: so those with an abundance of precision in their forecasts way want to skip it. The cdfs (and thus their derived pdfs) you see on the website have been standardized in this way. ```python - def standardize_cdf(cdf: list[float], question_data: dict) -> list[float]: + def standardize_cdf(cdf, question_data: dict): """ Takes a cdf and returns a standardized version of it - assigns no mass outside of closed bounds (scales accordingly) - assigns at least a minimum amount of mass outside of open bounds - increasing by at least the minimum amount (0.01 / 200 = 0.0005) + - caps the maximum growth to 0.2 - TODO: add smoothing over cdfs that spike too heavily (exceed a change of 0.59) + Note, thresholds change with different `inbound_outcome_count`s """ lower_open = question_data["open_lower_bound"] upper_open = question_data["open_upper_bound"] + inbound_outcome_count = question_data["inbound_outcome_count"] + default_inbound_outcome_count = 200 + cdf = np.asarray(cdf, dtype=float) + if not cdf.size: + return [] + + # apply lower bound & enforce boundary values scale_lower_to = 0 if lower_open else cdf[0] scale_upper_to = 1.0 if upper_open else cdf[-1] rescaled_inbound_mass = scale_upper_to - scale_lower_to @@ -227,13 +236,44 @@ info: return 0.989 * rescaled_F + 0.01 * location return 0.99 * rescaled_F + 0.01 * location - standardized_cdf = [] - for i, F in enumerate(cdf): - standardized_F = standardize(F, i / (len(cdf) - 1)) - # round to avoid floating point errors - standardized_cdf.append(round(standardized_F, 10)) + for i, value in enumerate(cdf): + cdf[i] = standardize(value, i / (len(cdf) - 1)) + + # apply upper bound + # operate in PMF space + pmf = np.diff(cdf, prepend=0, append=1) + # cap depends on inboundOutcomeCount (0.2 if it is the default 200) + cap = 0.2 * (default_inbound_outcome_count / inbound_outcome_count) + + def cap_pmf(scale: float) -> np.ndarray: + return np.concatenate([pmf[:1], np.minimum(cap, scale * pmf[1:-1]), pmf[-1:]]) + + def capped_sum(scale: float) -> float: + return float(cap_pmf(scale).sum()) + + # find the appropriate scale search space + lo = hi = scale = 1.0 + while capped_sum(hi) < 1.0: + hi *= 1.2 + # hone in on scale value that makes capped sum 1 + for _ in range(100): + scale = 0.5 * (lo + hi) + s = capped_sum(scale) + if s < 1.0: + lo = scale + else: + hi = scale + if s == 1.0 or (hi - lo) < 2e-5: + break + # apply scale and renormalize + pmf = cap_pmf(scale) + pmf[1:-1] *= (cdf[-1] - cdf[0]) / pmf[1:-1].sum() + # back to CDF space + cdf = np.cumsum(pmf)[:-1] - return standardized_cdf + # round to minimize floating point errors + cdf = np.round(cdf, 10) + return cdf.tolist() ``` With this tiny guide, you can be well along your way to submitting continuous forecasts to Metaculus. diff --git a/front_end/src/utils/forecasts/dataset.ts b/front_end/src/utils/forecasts/dataset.ts index 0895c5eb4c..7e8c9d4e9b 100644 --- a/front_end/src/utils/forecasts/dataset.ts +++ b/front_end/src/utils/forecasts/dataset.ts @@ -15,6 +15,82 @@ import { nominalLocationToCdfLocation, } from "@/utils/math"; +function standardizeCdf( + cdf: number[], + lowerOpen: boolean, + upperOpen: boolean, + inboundOutcomeCount: number +): number[] { + if (cdf.length === 0) { + return []; + } + + // apply lower bound + const cdfOffset = + lowerOpen && upperOpen + ? (F: number, x: number) => 0.988 * F + 0.01 * x + 0.001 + : lowerOpen + ? (F: number, x: number) => 0.989 * F + 0.01 * x + 0.001 + : upperOpen + ? (F: number, x: number) => 0.989 * F + 0.01 * x + : (F: number, x: number) => 0.99 * F + 0.01 * x; + cdf = cdf.map((F, index) => cdfOffset(F, index / (cdf.length - 1))); + + // apply upper bound + let pmf: number[] = []; + pmf.push(cdf[0] ?? 0); + for (let i = 1; i < cdf.length; i++) { + pmf.push((cdf[i] ?? 0) - (cdf[i - 1] ?? 0)); + } + pmf.push(1 - (cdf[cdf.length - 1] ?? 1)); + // cap depends on inboundOutcomeCount (0.2 if it is the default 200) + const cap = 0.2 * (DefaultInboundOutcomeCount / inboundOutcomeCount); + const capPmf = (scale: number) => + pmf.map((value, i) => + i == 0 || i == pmf.length - 1 ? value : Math.min(cap, scale * value) + ); + const cappedSum = (scale: number) => + capPmf(scale).reduce((acc, value) => acc + value, 0); + + // find the appropriate scale search space + let lo = 1; + let hi = 1; + let scale = 1; + while (cappedSum(hi) < 1) hi *= 1.2; + // hone in on scale value that makes capped sum 1 + for (let i = 0; i < 100; i++) { + scale = 0.5 * (lo + hi); + const s = cappedSum(scale); + if (s < 1) { + lo = scale; + } else { + hi = scale; + } + if (s === 1 || hi - lo < 2e-5) { + break; + } + } + // apply scale and renormalize + pmf = capPmf(scale); + const inboundScaleFactor = + ((cdf[cdf.length - 1] ?? 1) - (cdf[0] ?? 0)) / + pmf.slice(1, pmf.length - 1).reduce((acc, value) => acc + value, 0); + pmf = pmf.map((value, i) => + i == 0 || i == pmf.length - 1 ? value : value * inboundScaleFactor + ); + // back to CDF space + cdf = []; + let cumulative = 0; + for (let i = 0; i < pmf.length - 1; i++) { + cumulative += pmf[i] ?? 0; + cdf.push(cumulative); + } + + // finally round to 10 decimal places + cdf = cdf.map((value) => Math.round(value * 1e10) / 1e10); + return cdf; +} + /** * Get chart data from slider input */ @@ -51,19 +127,7 @@ export function getSliderNumericForecastDataset( }, Array(componentCdfs[0]!.length).fill(0)); cdf = cdf.map((F) => Number(F)); - // standardize cdf - const cdfOffset = - lowerOpen && upperOpen - ? (F: number, x: number) => 0.988 * F + 0.01 * x + 0.001 - : lowerOpen - ? (F: number, x: number) => 0.989 * F + 0.01 * x + 0.001 - : upperOpen - ? (F: number, x: number) => 0.989 * F + 0.01 * x - : (F: number, x: number) => 0.99 * F + 0.01 * x; - cdf = cdf.map( - (F, index) => - Math.round(cdfOffset(F, index / (cdf.length - 1)) * 1e10) / 1e10 - ); + cdf = standardizeCdf(cdf, lowerOpen, upperOpen, inboundOutcomeCount); return { cdf: cdf, diff --git a/questions/serializers/common.py b/questions/serializers/common.py index cef47870fe..8ce2158ef6 100644 --- a/questions/serializers/common.py +++ b/questions/serializers/common.py @@ -486,11 +486,10 @@ def continuous_validation(self, continuous_cdf, question: Question): "continuous_cdf must be increasing by at least " f"{min_diff} at every step.\n" ) - # max diff for default CDF is derived empirically from slider positions - # TODO: make this lower and scale with inbound_outcome_count - max_diff = ( - 0.59 if len(continuous_cdf) == DEFAULT_INBOUND_OUTCOME_COUNT + 1 else 1 - ) + # Check if maximum difference between cdf points is acceptable + # (0.59 if inbound outcome count is the default 200) + # TODO: switch this value to 0.2 after coordinating + max_diff = 0.59 * DEFAULT_INBOUND_OUTCOME_COUNT / inbound_outcome_count if not all(inbound_pmf <= max_diff): errors += ( "continuous_cdf must be increasing by no more than "