Skip to content

Commit 42dc36e

Browse files
authored
Merge pull request #412 from dtolnay/sourcetext
Cache and lazily build the mapping from char index to byte offset
2 parents 90b8e1e + 6461c2d commit 42dc36e

File tree

2 files changed

+50
-11
lines changed

2 files changed

+50
-11
lines changed

src/fallback.rs

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ use crate::parse::{self, Cursor};
44
use crate::rcvec::{RcVec, RcVecBuilder, RcVecIntoIter, RcVecMut};
55
use crate::{Delimiter, Spacing, TokenTree};
66
#[cfg(all(span_locations, not(fuzzing)))]
7+
use alloc::collections::BTreeMap;
8+
#[cfg(all(span_locations, not(fuzzing)))]
79
use core::cell::RefCell;
810
#[cfg(span_locations)]
911
use core::cmp;
@@ -327,6 +329,7 @@ thread_local! {
327329
source_text: String::new(),
328330
span: Span { lo: 0, hi: 0 },
329331
lines: vec![0],
332+
char_index_to_byte_offset: BTreeMap::new(),
330333
}],
331334
});
332335
}
@@ -336,6 +339,7 @@ struct FileInfo {
336339
source_text: String,
337340
span: Span,
338341
lines: Vec<usize>,
342+
char_index_to_byte_offset: BTreeMap<usize, usize>,
339343
}
340344

341345
#[cfg(all(span_locations, not(fuzzing)))]
@@ -362,12 +366,34 @@ impl FileInfo {
362366
span.lo >= self.span.lo && span.hi <= self.span.hi
363367
}
364368

365-
fn source_text(&self, span: Span) -> String {
366-
let lo = (span.lo - self.span.lo) as usize;
367-
let trunc_lo = match self.source_text.char_indices().nth(lo) {
368-
Some((offset, _ch)) => &self.source_text[offset..],
369-
None => return String::new(),
369+
fn source_text(&mut self, span: Span) -> String {
370+
let lo_char = (span.lo - self.span.lo) as usize;
371+
372+
// Look up offset of the largest already-computed char index that is
373+
// less than or equal to the current requested one. We resume counting
374+
// chars from that point.
375+
let (&last_char_index, &last_byte_offset) = self
376+
.char_index_to_byte_offset
377+
.range(..=lo_char)
378+
.next_back()
379+
.unwrap_or((&0, &0));
380+
381+
let lo_byte = if last_char_index == lo_char {
382+
last_byte_offset
383+
} else {
384+
let total_byte_offset = match self.source_text[last_byte_offset..]
385+
.char_indices()
386+
.nth(lo_char - last_char_index)
387+
{
388+
Some((additional_offset, _ch)) => last_byte_offset + additional_offset,
389+
None => self.source_text.len(),
390+
};
391+
self.char_index_to_byte_offset
392+
.insert(lo_char, total_byte_offset);
393+
total_byte_offset
370394
};
395+
396+
let trunc_lo = &self.source_text[lo_byte..];
371397
let char_len = (span.hi - span.lo) as usize;
372398
let source_text = match trunc_lo.char_indices().nth(char_len) {
373399
Some((offset, _ch)) => &trunc_lo[..offset],
@@ -421,6 +447,8 @@ impl SourceMap {
421447
source_text: src.to_owned(),
422448
span,
423449
lines,
450+
// Populated lazily by source_text().
451+
char_index_to_byte_offset: BTreeMap::new(),
424452
});
425453

426454
span
@@ -448,6 +476,15 @@ impl SourceMap {
448476
}
449477
unreachable!("Invalid span with no related FileInfo!");
450478
}
479+
480+
fn fileinfo_mut(&mut self, span: Span) -> &mut FileInfo {
481+
for file in &mut self.files {
482+
if file.span_within(span) {
483+
return file;
484+
}
485+
}
486+
unreachable!("Invalid span with no related FileInfo!");
487+
}
451488
}
452489

453490
#[derive(Clone, Copy, PartialEq, Eq)]
@@ -572,7 +609,7 @@ impl Span {
572609
if self.is_call_site() {
573610
None
574611
} else {
575-
Some(SOURCE_MAP.with(|cm| cm.borrow().fileinfo(*self).source_text(*self)))
612+
Some(SOURCE_MAP.with(|cm| cm.borrow_mut().fileinfo_mut(*self).source_text(*self)))
576613
}
577614
}
578615
}

tests/test.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,17 +328,19 @@ fn literal_span() {
328328
#[cfg(span_locations)]
329329
#[test]
330330
fn source_text() {
331-
let input = " 𓀕 c ";
331+
let input = " 𓀕 a z ";
332332
let mut tokens = input
333333
.parse::<proc_macro2::TokenStream>()
334334
.unwrap()
335335
.into_iter();
336336

337-
let ident = tokens.next().unwrap();
338-
assert_eq!("𓀕", ident.span().source_text().unwrap());
337+
let first = tokens.next().unwrap();
338+
assert_eq!("𓀕", first.span().source_text().unwrap());
339339

340-
let ident = tokens.next().unwrap();
341-
assert_eq!("c", ident.span().source_text().unwrap());
340+
let second = tokens.next().unwrap();
341+
let third = tokens.next().unwrap();
342+
assert_eq!("z", third.span().source_text().unwrap());
343+
assert_eq!("a", second.span().source_text().unwrap());
342344
}
343345

344346
#[test]

0 commit comments

Comments
 (0)