diff --git a/compiler/rustc_query_system/src/ich/hcx.rs b/compiler/rustc_query_system/src/ich/hcx.rs index 397ec7994f181..2e118dc3359fa 100644 --- a/compiler/rustc_query_system/src/ich/hcx.rs +++ b/compiler/rustc_query_system/src/ich/hcx.rs @@ -1,10 +1,12 @@ -use rustc_data_structures::stable_hasher::HashingControls; +use std::hash::Hash; + +use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_hir::definitions::DefPathHash; use rustc_session::Session; use rustc_session::cstore::Untracked; use rustc_span::source_map::SourceMap; -use rustc_span::{BytePos, CachingSourceMapView, DUMMY_SP, SourceFile, Span, SpanData}; +use rustc_span::{CachingSourceMapView, DUMMY_SP, Pos, Span}; // Very often, we are hashing something that does not need the `CachingSourceMapView`, so we // initialize it lazily. @@ -60,6 +62,11 @@ impl<'a> StableHashingContext<'a> { } } + #[inline] + fn def_span(&self, def_id: LocalDefId) -> Span { + self.untracked.source_span.get(def_id).unwrap_or(DUMMY_SP) + } + #[inline] pub fn hashing_controls(&self) -> HashingControls { self.hashing_controls @@ -67,9 +74,88 @@ impl<'a> StableHashingContext<'a> { } impl<'a> rustc_span::HashStableContext for StableHashingContext<'a> { + /// Hashes a span in a stable way. We can't directly hash the span's `BytePos` fields (that + /// would be similar to hashing pointers, since those are just offsets into the `SourceMap`). + /// Instead, we hash the (file name, line, column) triple, which stays the same even if the + /// containing `SourceFile` has moved within the `SourceMap`. + /// + /// Also note that we are hashing byte offsets for the column, not unicode codepoint offsets. + /// For the purpose of the hash that's sufficient. Also, hashing filenames is expensive so we + /// avoid doing it twice when the span starts and ends in the same file, which is almost always + /// the case. + /// + /// IMPORTANT: changes to this method should be reflected in implementations of `SpanEncoder`. #[inline] - fn unstable_opts_incremental_ignore_spans(&self) -> bool { - self.incremental_ignore_spans + fn span_hash_stable(&mut self, span: Span, hasher: &mut StableHasher) { + const TAG_VALID_SPAN: u8 = 0; + const TAG_INVALID_SPAN: u8 = 1; + const TAG_RELATIVE_SPAN: u8 = 2; + + if !self.hashing_controls().hash_spans { + return; + } + + let span = span.data_untracked(); + span.ctxt.hash_stable(self, hasher); + span.parent.hash_stable(self, hasher); + + if span.is_dummy() { + Hash::hash(&TAG_INVALID_SPAN, hasher); + return; + } + + let parent = span.parent.map(|parent| self.def_span(parent).data_untracked()); + if let Some(parent) = parent + && parent.contains(span) + { + // This span is enclosed in a definition: only hash the relative position. This catches + // a subset of the cases from the `file.contains(parent.lo)`. But we can do this check + // cheaply without the expensive `span_data_to_lines_and_cols` query. + Hash::hash(&TAG_RELATIVE_SPAN, hasher); + (span.lo - parent.lo).to_u32().hash_stable(self, hasher); + (span.hi - parent.lo).to_u32().hash_stable(self, hasher); + return; + } + + // If this is not an empty or invalid span, we want to hash the last position that belongs + // to it, as opposed to hashing the first position past it. + let Some((file, line_lo, col_lo, line_hi, col_hi)) = + self.source_map().span_data_to_lines_and_cols(&span) + else { + Hash::hash(&TAG_INVALID_SPAN, hasher); + return; + }; + + if let Some(parent) = parent + && file.contains(parent.lo) + { + // This span is relative to another span in the same file, + // only hash the relative position. + Hash::hash(&TAG_RELATIVE_SPAN, hasher); + Hash::hash(&(span.lo.0.wrapping_sub(parent.lo.0)), hasher); + Hash::hash(&(span.hi.0.wrapping_sub(parent.lo.0)), hasher); + return; + } + + Hash::hash(&TAG_VALID_SPAN, hasher); + Hash::hash(&file.stable_id, hasher); + + // Hash both the length and the end location (line/column) of a span. If we hash only the + // length, for example, then two otherwise equal spans with different end locations will + // have the same hash. This can cause a problem during incremental compilation wherein a + // previous result for a query that depends on the end location of a span will be + // incorrectly reused when the end location of the span it depends on has changed (see + // issue #74890). A similar analysis applies if some query depends specifically on the + // length of the span, but we only hash the end location. So hash both. + + let col_lo_trunc = (col_lo.0 as u64) & 0xFF; + let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8; + let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32; + let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40; + let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc; + let len = (span.hi - span.lo).0; + Hash::hash(&col_line, hasher); + Hash::hash(&len, hasher); } #[inline] @@ -81,22 +167,26 @@ impl<'a> rustc_span::HashStableContext for StableHashingContext<'a> { } } + /// Assert that the provided `HashStableContext` is configured with the default + /// `HashingControls`. We should always have bailed out before getting to here with a + /// non-default mode. With this check in place, we can avoid the need to maintain separate + /// versions of `ExpnData` hashes for each permutation of `HashingControls` settings. #[inline] - fn def_span(&self, def_id: LocalDefId) -> Span { - self.untracked.source_span.get(def_id).unwrap_or(DUMMY_SP) - } - - #[inline] - fn span_data_to_lines_and_cols( - &mut self, - span: &SpanData, - ) -> Option<(&SourceFile, usize, BytePos, usize, BytePos)> { - self.source_map().span_data_to_lines_and_cols(span) - } + fn assert_default_hashing_controls(&self, msg: &str) { + let hashing_controls = self.hashing_controls; + let HashingControls { hash_spans } = hashing_controls; - #[inline] - fn hashing_controls(&self) -> HashingControls { - self.hashing_controls + // Note that we require that `hash_spans` be the inverse of the global `-Z + // incremental-ignore-spans` option. Normally, this option is disabled, in which case + // `hash_spans` must be true. + // + // Span hashing can also be disabled without `-Z incremental-ignore-spans`. This is the + // case for instance when building a hash for name mangling. Such configuration must not be + // used for metadata. + assert_eq!( + hash_spans, !self.incremental_ignore_spans, + "Attempted hashing of {msg} with non-default HashingControls: {hashing_controls:?}" + ); } } diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs index 07a85ad75daa6..25834f03af5e7 100644 --- a/compiler/rustc_span/src/hygiene.rs +++ b/compiler/rustc_span/src/hygiene.rs @@ -30,7 +30,7 @@ use std::{fmt, iter, mem}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; -use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher}; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::Lock; use rustc_data_structures::unhash::UnhashMap; use rustc_hashes::Hash64; @@ -126,29 +126,6 @@ rustc_index::newtype_index! { impl !Ord for LocalExpnId {} impl !PartialOrd for LocalExpnId {} -/// Assert that the provided `HashStableContext` is configured with the 'default' -/// `HashingControls`. We should always have bailed out before getting to here -/// with a non-default mode. With this check in place, we can avoid the need -/// to maintain separate versions of `ExpnData` hashes for each permutation -/// of `HashingControls` settings. -fn assert_default_hashing_controls(ctx: &impl HashStableContext, msg: &str) { - let hashing_controls = ctx.hashing_controls(); - let HashingControls { hash_spans } = hashing_controls; - - // Note that we require that `hash_spans` be the inverse of the global - // `-Z incremental-ignore-spans` option. Normally, this option is disabled, - // in which case `hash_spans` must be true. - // - // Span hashing can also be disabled without `-Z incremental-ignore-spans`. - // This is the case for instance when building a hash for name mangling. - // Such configuration must not be used for metadata. - assert_eq!( - hash_spans, - !ctx.unstable_opts_incremental_ignore_spans(), - "Attempted hashing of {msg} with non-default HashingControls: {hashing_controls:?}" - ); -} - /// A unique hash value associated to an expansion. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Encodable, Decodable, HashStable_Generic)] pub struct ExpnHash(Fingerprint); @@ -1508,7 +1485,7 @@ pub fn raw_encode_syntax_context( fn update_disambiguator(expn_data: &mut ExpnData, mut ctx: impl HashStableContext) -> ExpnHash { // This disambiguator should not have been set yet. assert_eq!(expn_data.disambiguator, 0, "Already set disambiguator for ExpnData: {expn_data:?}"); - assert_default_hashing_controls(&ctx, "ExpnData (disambiguator)"); + ctx.assert_default_hashing_controls("ExpnData (disambiguator)"); let mut expn_hash = expn_data.hash_expn(&mut ctx); let disambiguator = HygieneData::with(|data| { @@ -1558,7 +1535,7 @@ impl HashStable for SyntaxContext { impl HashStable for ExpnId { fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { - assert_default_hashing_controls(ctx, "ExpnId"); + ctx.assert_default_hashing_controls("ExpnId"); let hash = if *self == ExpnId::root() { // Avoid fetching TLS storage for a trivial often-used value. Fingerprint::ZERO diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 0ad80b9d7879a..bececc548494e 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -54,7 +54,6 @@ use hygiene::Transparency; pub use hygiene::{ DesugaringKind, ExpnData, ExpnHash, ExpnId, ExpnKind, LocalExpnId, MacroKind, SyntaxContext, }; -use rustc_data_structures::stable_hasher::HashingControls; pub mod def_id; use def_id::{CrateNum, DefId, DefIndex, DefPathHash, LOCAL_CRATE, LocalDefId, StableCrateId}; pub mod edit_distance; @@ -2798,105 +2797,24 @@ impl InnerSpan { /// This is a hack to allow using the [`HashStable_Generic`] derive macro /// instead of implementing everything in rustc_middle. pub trait HashStableContext { + /// The main event: stable hashing of a span. + fn span_hash_stable(&mut self, span: Span, hasher: &mut StableHasher); + + /// Compute a `DefPathHash`. fn def_path_hash(&self, def_id: DefId) -> DefPathHash; - /// Accesses `sess.opts.unstable_opts.incremental_ignore_spans` since - /// we don't have easy access to a `Session` - fn unstable_opts_incremental_ignore_spans(&self) -> bool; - fn def_span(&self, def_id: LocalDefId) -> Span; - fn span_data_to_lines_and_cols( - &mut self, - span: &SpanData, - ) -> Option<(&SourceFile, usize, BytePos, usize, BytePos)>; - fn hashing_controls(&self) -> HashingControls; + + /// Assert that the provided `HashStableContext` is configured with the default + /// `HashingControls`. We should always have bailed out before getting to here with a + fn assert_default_hashing_controls(&self, msg: &str); } impl HashStable for Span where CTX: HashStableContext, { - /// Hashes a span in a stable way. We can't directly hash the span's `BytePos` - /// fields (that would be similar to hashing pointers, since those are just - /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column) - /// triple, which stays the same even if the containing `SourceFile` has moved - /// within the `SourceMap`. - /// - /// Also note that we are hashing byte offsets for the column, not unicode - /// codepoint offsets. For the purpose of the hash that's sufficient. - /// Also, hashing filenames is expensive so we avoid doing it twice when the - /// span starts and ends in the same file, which is almost always the case. - /// - /// IMPORTANT: changes to this method should be reflected in implementations of `SpanEncoder`. fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { - const TAG_VALID_SPAN: u8 = 0; - const TAG_INVALID_SPAN: u8 = 1; - const TAG_RELATIVE_SPAN: u8 = 2; - - if !ctx.hashing_controls().hash_spans { - return; - } - - let span = self.data_untracked(); - span.ctxt.hash_stable(ctx, hasher); - span.parent.hash_stable(ctx, hasher); - - if span.is_dummy() { - Hash::hash(&TAG_INVALID_SPAN, hasher); - return; - } - - let parent = span.parent.map(|parent| ctx.def_span(parent).data_untracked()); - if let Some(parent) = parent - && parent.contains(span) - { - // This span is enclosed in a definition: only hash the relative position. - // This catches a subset of the cases from the `file.contains(parent.lo)`, - // But we can do this check cheaply without the expensive `span_data_to_lines_and_cols` query. - Hash::hash(&TAG_RELATIVE_SPAN, hasher); - (span.lo - parent.lo).to_u32().hash_stable(ctx, hasher); - (span.hi - parent.lo).to_u32().hash_stable(ctx, hasher); - return; - } - - // If this is not an empty or invalid span, we want to hash the last - // position that belongs to it, as opposed to hashing the first - // position past it. - let Some((file, line_lo, col_lo, line_hi, col_hi)) = ctx.span_data_to_lines_and_cols(&span) - else { - Hash::hash(&TAG_INVALID_SPAN, hasher); - return; - }; - - if let Some(parent) = parent - && file.contains(parent.lo) - { - // This span is relative to another span in the same file, - // only hash the relative position. - Hash::hash(&TAG_RELATIVE_SPAN, hasher); - Hash::hash(&(span.lo.0.wrapping_sub(parent.lo.0)), hasher); - Hash::hash(&(span.hi.0.wrapping_sub(parent.lo.0)), hasher); - return; - } - - Hash::hash(&TAG_VALID_SPAN, hasher); - Hash::hash(&file.stable_id, hasher); - - // Hash both the length and the end location (line/column) of a span. If we - // hash only the length, for example, then two otherwise equal spans with - // different end locations will have the same hash. This can cause a problem - // during incremental compilation wherein a previous result for a query that - // depends on the end location of a span will be incorrectly reused when the - // end location of the span it depends on has changed (see issue #74890). A - // similar analysis applies if some query depends specifically on the length - // of the span, but we only hash the end location. So hash both. - - let col_lo_trunc = (col_lo.0 as u64) & 0xFF; - let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8; - let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32; - let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40; - let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc; - let len = (span.hi - span.lo).0; - Hash::hash(&col_line, hasher); - Hash::hash(&len, hasher); + // `span_hash_stable` does all the work. + ctx.span_hash_stable(*self, hasher) } }