Skip to content

Commit f53f327

Browse files
authoredMay 10, 2023
fix: replace source with unicode (#58)
1 parent 28ff40c commit f53f327

File tree

5 files changed

+154
-56
lines changed

5 files changed

+154
-56
lines changed
 

‎Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ dashmap = "5"
2424
substring = "1"
2525
smallvec = "1.10.0"
2626
memchr = "2.5.0"
27+
str_indices = "0.4.1"
2728

2829
[dev-dependencies]
2930
twox-hash = "1"
3031
base64-simd = "0.7"
32+
regex = "1.8.1"

‎src/helpers.rs

+8-10
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@ use rustc_hash::FxHashMap as HashMap;
22
use std::{borrow::BorrowMut, cell::RefCell, sync::Arc};
33

44
use crate::{
5-
line_with_indices_index::LineWithIndicesArray,
65
source::{Mapping, OriginalLocation},
76
vlq::{decode, encode},
7+
with_indices::WithIndices,
88
MapOptions, Source, SourceMap,
99
};
1010

1111
type ArcStr = Arc<str>;
1212
// Adding this type because sourceContentLine not happy
1313
type InnerSourceContentLine =
14-
RefCell<HashMap<i64, Option<Arc<Vec<LineWithIndicesArray<ArcStr>>>>>>;
14+
RefCell<HashMap<i64, Option<Arc<Vec<WithIndices<ArcStr>>>>>>;
1515

1616
pub fn get_map<S: StreamChunks>(
1717
stream: &S,
@@ -625,10 +625,8 @@ fn stream_chunks_of_source_map_full(
625625
on_name: OnName,
626626
) -> GeneratedInfo {
627627
let lines = split_into_lines(source);
628-
let line_with_indices_list = lines
629-
.into_iter()
630-
.map(LineWithIndicesArray::new)
631-
.collect::<Vec<_>>();
628+
let line_with_indices_list =
629+
lines.into_iter().map(WithIndices::new).collect::<Vec<_>>();
632630

633631
if line_with_indices_list.is_empty() {
634632
return GeneratedInfo {
@@ -899,7 +897,7 @@ struct SourceMapLineData {
899897
#[derive(Debug)]
900898
struct SourceMapLineChunk {
901899
content: ArcStr,
902-
cached: once_cell::sync::OnceCell<LineWithIndicesArray<ArcStr>>,
900+
cached: once_cell::sync::OnceCell<WithIndices<ArcStr>>,
903901
}
904902

905903
impl SourceMapLineChunk {
@@ -913,7 +911,7 @@ impl SourceMapLineChunk {
913911
pub fn substring(&self, start_index: usize, end_index: usize) -> &str {
914912
let cached = self
915913
.cached
916-
.get_or_init(|| LineWithIndicesArray::new(self.content.clone()));
914+
.get_or_init(|| WithIndices::new(self.content.clone()));
917915
cached.substring(start_index, end_index)
918916
}
919917
}
@@ -1039,7 +1037,7 @@ pub fn stream_chunks_of_combined_source_map(
10391037
Some(Arc::new(
10401038
split_into_lines(original_source)
10411039
.into_iter()
1042-
.map(|s| LineWithIndicesArray::new(s.into()))
1040+
.map(|s| WithIndices::new(s.into()))
10431041
.collect(),
10441042
))
10451043
} else {
@@ -1142,7 +1140,7 @@ pub fn stream_chunks_of_combined_source_map(
11421140
Arc::new(
11431141
lines
11441142
.into_iter()
1145-
.map(|s| LineWithIndicesArray::new(s.into()))
1143+
.map(|s| WithIndices::new(s.into()))
11461144
.collect::<Vec<_>>(),
11471145
)
11481146
})

‎src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ mod cached_source;
88
mod concat_source;
99
mod error;
1010
mod helpers;
11-
mod line_with_indices_index;
1211
mod original_source;
1312
mod raw_source;
1413
mod replace_source;
1514
mod source;
1615
mod source_map_source;
1716
mod vlq;
17+
mod with_indices;
1818
pub use cached_source::CachedSource;
1919
pub use concat_source::ConcatSource;
2020
pub use error::{Error, Result};

‎src/replace_source.rs

+134-33
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@ use std::{
55
sync::Arc,
66
};
77

8+
use once_cell::sync::OnceCell;
89
use parking_lot::Mutex;
910
use rustc_hash::FxHashMap as HashMap;
10-
use substring::Substring;
1111

1212
use crate::{
1313
helpers::{get_map, split_into_lines, GeneratedInfo, StreamChunks},
14+
with_indices::WithIndices,
1415
MapOptions, Mapping, OriginalLocation, Source, SourceMap,
1516
};
1617

@@ -36,22 +37,76 @@ use crate::{
3637
#[derive(Debug)]
3738
pub struct ReplaceSource<T> {
3839
inner: Arc<T>,
40+
inner_source_code: OnceCell<Box<str>>,
3941
replacements: Mutex<Vec<Replacement>>,
4042
}
4143

42-
#[derive(Debug, Hash, Clone, PartialEq, Eq)]
44+
#[derive(Debug, Clone, Eq)]
4345
struct Replacement {
4446
start: u32,
4547
end: u32,
48+
char_start: OnceCell<u32>,
49+
char_end: OnceCell<u32>,
4650
content: String,
4751
name: Option<String>,
4852
}
4953

54+
impl Hash for Replacement {
55+
fn hash<H: Hasher>(&self, state: &mut H) {
56+
self.start.hash(state);
57+
self.end.hash(state);
58+
self.content.hash(state);
59+
self.name.hash(state);
60+
}
61+
}
62+
63+
impl PartialEq for Replacement {
64+
fn eq(&self, other: &Self) -> bool {
65+
self.start == other.start
66+
&& self.end == other.end
67+
&& self.content == other.content
68+
&& self.name == other.name
69+
}
70+
}
71+
72+
impl Replacement {
73+
pub fn new(
74+
start: u32,
75+
end: u32,
76+
content: String,
77+
name: Option<String>,
78+
) -> Self {
79+
Self {
80+
start,
81+
end,
82+
char_start: OnceCell::new(),
83+
char_end: OnceCell::new(),
84+
content,
85+
name,
86+
}
87+
}
88+
89+
pub fn char_start(&self, inner_source_code: &str) -> u32 {
90+
*self.char_start.get_or_init(|| {
91+
str_indices::chars::from_byte_idx(inner_source_code, self.start as usize)
92+
as u32
93+
})
94+
}
95+
96+
pub fn char_end(&self, inner_source_code: &str) -> u32 {
97+
*self.char_end.get_or_init(|| {
98+
str_indices::chars::from_byte_idx(inner_source_code, self.end as usize)
99+
as u32
100+
})
101+
}
102+
}
103+
50104
impl<T> ReplaceSource<T> {
51105
/// Create a [ReplaceSource].
52106
pub fn new(source: T) -> Self {
53107
Self {
54108
inner: Arc::new(source),
109+
inner_source_code: OnceCell::new(),
55110
replacements: Mutex::new(Vec::new()),
56111
}
57112
}
@@ -61,14 +116,24 @@ impl<T> ReplaceSource<T> {
61116
&self.inner
62117
}
63118

119+
fn sort_replacement(&self) {
120+
self
121+
.replacements
122+
.lock()
123+
.sort_by(|a, b| (a.start, a.end).cmp(&(b.start, b.end)));
124+
}
125+
}
126+
127+
impl<T: Source> ReplaceSource<T> {
128+
fn get_inner_source_code(&self) -> &str {
129+
self
130+
.inner_source_code
131+
.get_or_init(|| Box::from(self.inner.source()))
132+
}
133+
64134
/// Insert a content at start.
65135
pub fn insert(&mut self, start: u32, content: &str, name: Option<&str>) {
66-
self.replacements.lock().push(Replacement {
67-
start,
68-
end: start,
69-
content: content.into(),
70-
name: name.map(|s| s.into()),
71-
});
136+
self.replace(start, start, content, name)
72137
}
73138

74139
/// Create a replacement with content at `[start, end)`.
@@ -79,48 +144,45 @@ impl<T> ReplaceSource<T> {
79144
content: &str,
80145
name: Option<&str>,
81146
) {
82-
self.replacements.lock().push(Replacement {
147+
self.replacements.lock().push(Replacement::new(
83148
start,
84149
end,
85-
content: content.into(),
86-
name: name.map(|s| s.into()),
87-
});
88-
}
89-
90-
fn sort_replacement(&self) {
91-
self
92-
.replacements
93-
.lock()
94-
.sort_by(|a, b| (a.start, a.end).cmp(&(b.start, b.end)));
150+
content.into(),
151+
name.map(|s| s.into()),
152+
));
95153
}
96154
}
97155

98156
impl<T: Source + Hash + PartialEq + Eq + 'static> Source for ReplaceSource<T> {
99157
fn source(&self) -> Cow<str> {
100158
self.sort_replacement();
101159

102-
let inner_source_code = self.inner.source();
160+
let inner_source_code = self.get_inner_source_code();
161+
let inner_source_code_with_indices = WithIndices::new(inner_source_code);
103162

104163
// mut_string_push_str is faster that vec join
105164
// concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs
106165
let mut source_code = String::new();
107166
let mut inner_pos = 0;
108167
for replacement in self.replacements.lock().iter() {
109-
if inner_pos < replacement.start {
110-
let end_pos = (replacement.start as usize).min(inner_source_code.len());
111-
source_code
112-
.push_str(inner_source_code.substring(inner_pos as usize, end_pos));
168+
if inner_pos < replacement.char_start(inner_source_code) {
169+
let end_pos = (replacement.char_start(inner_source_code) as usize)
170+
.min(inner_source_code.len());
171+
source_code.push_str(
172+
inner_source_code_with_indices.substring(inner_pos as usize, end_pos),
173+
);
113174
}
114175
source_code.push_str(&replacement.content);
115176
#[allow(clippy::manual_clamp)]
116177
{
117178
inner_pos = inner_pos
118-
.max(replacement.end)
179+
.max(replacement.char_end(inner_source_code))
119180
.min(inner_source_code.len() as u32);
120181
}
121182
}
122-
source_code
123-
.push_str(inner_source_code.substring(inner_pos as usize, usize::MAX));
183+
source_code.push_str(
184+
inner_source_code_with_indices.substring(inner_pos as usize, usize::MAX),
185+
);
124186

125187
source_code.into()
126188
}
@@ -166,7 +228,7 @@ impl<T: Source> StreamChunks for ReplaceSource<T> {
166228
let mut generated_line_offset: i64 = 0;
167229
let mut generated_column_offset: i64 = 0;
168230
let mut generated_column_offset_line = 0;
169-
let source_content_lines: RefCell<Vec<Option<Vec<String>>>> =
231+
let source_content_lines: RefCell<Vec<Option<Vec<WithIndices<String>>>>> =
170232
RefCell::new(Vec::new());
171233
let name_mapping: RefCell<HashMap<String, u32>> =
172234
RefCell::new(HashMap::default());
@@ -224,6 +286,7 @@ impl<T: Source> StreamChunks for ReplaceSource<T> {
224286
&mut |chunk, mut mapping| {
225287
// SAFETY: final_source is false in ReplaceSource
226288
let chunk = chunk.unwrap();
289+
let chunk_with_indices = WithIndices::new(chunk);
227290
let mut chunk_pos = 0;
228291
let end_pos = pos + chunk.len() as u32;
229292
// Skip over when it has been replaced
@@ -253,7 +316,7 @@ impl<T: Source> StreamChunks for ReplaceSource<T> {
253316
original.source_index,
254317
original.original_line,
255318
original.original_column,
256-
chunk.substring(0, chunk_pos as usize),
319+
chunk_with_indices.substring(0, chunk_pos as usize),
257320
) {
258321
original.original_column += chunk_pos;
259322
}
@@ -274,7 +337,7 @@ impl<T: Source> StreamChunks for ReplaceSource<T> {
274337
if next_replacement_pos > pos {
275338
// Emit chunk until replacement
276339
let offset = next_replacement_pos - pos;
277-
let chunk_slice = chunk.substring(chunk_pos as usize, (chunk_pos + offset) as usize);
340+
let chunk_slice = chunk_with_indices.substring(chunk_pos as usize, (chunk_pos + offset) as usize);
278341
on_chunk(Some(chunk_slice), Mapping {
279342
generated_line: line as u32,
280343
generated_column: mapping.generated_column + if line == generated_column_offset_line {generated_column_offset} else {0} as u32,
@@ -367,7 +430,7 @@ impl<T: Source> StreamChunks for ReplaceSource<T> {
367430

368431
// Partially skip over chunk
369432
let line = mapping.generated_line as i64 + generated_line_offset;
370-
if let Some(original) = &mut mapping.original && check_original_content(original.source_index, original.original_line, original.original_column, chunk.substring(chunk_pos as usize, (chunk_pos + offset as u32) as usize)) {
433+
if let Some(original) = &mut mapping.original && check_original_content(original.source_index, original.original_line, original.original_column, chunk_with_indices.substring(chunk_pos as usize, (chunk_pos + offset as u32) as usize)) {
371434
original.original_column += offset as u32;
372435
}
373436
chunk_pos += offset as u32;
@@ -384,7 +447,7 @@ impl<T: Source> StreamChunks for ReplaceSource<T> {
384447

385448
// Emit remaining chunk
386449
if (chunk_pos as usize) < chunk.len() {
387-
let chunk_slice = if chunk_pos == 0 {chunk} else {chunk.substring(chunk_pos as usize, usize::MAX)};
450+
let chunk_slice = if chunk_pos == 0 {chunk} else {chunk_with_indices.substring(chunk_pos as usize, usize::MAX)};
388451
let line = mapping.generated_line as i64 + generated_line_offset;
389452
on_chunk(Some(chunk_slice), Mapping {
390453
generated_line: line as u32,
@@ -400,7 +463,7 @@ impl<T: Source> StreamChunks for ReplaceSource<T> {
400463
source_content_lines.push(None);
401464
}
402465
source_content_lines[source_index as usize] = source_content.map(|source_content| {
403-
split_into_lines(source_content).into_iter().map(Into::into).collect()
466+
split_into_lines(source_content).into_iter().map(|line| WithIndices::new(line.into())).collect()
404467
});
405468
on_source(source_index, source, source_content);
406469
},
@@ -473,6 +536,7 @@ impl<T: Source> Clone for ReplaceSource<T> {
473536
fn clone(&self) -> Self {
474537
Self {
475538
inner: self.inner.clone(),
539+
inner_source_code: self.inner_source_code.clone(),
476540
replacements: Mutex::new(self.replacements.lock().clone()),
477541
}
478542
}
@@ -900,4 +964,41 @@ return <div>{data.foo}</div>
900964
source.hash(&mut hasher);
901965
assert_eq!(format!("{:x}", hasher.finish()), "ab891b4c45dc95b4");
902966
}
967+
968+
#[test]
969+
fn should_replace_correctly_with_unicode() {
970+
let content = r#"
971+
"abc"; url(__PUBLIC_PATH__logo.png);
972+
"ヒラギノ角ゴ"; url(__PUBLIC_PATH__logo.png);
973+
"游ゴシック体"; url(__PUBLIC_PATH__logo.png);
974+
"🤪"; url(__PUBLIC_PATH__logo.png);
975+
"👨‍👩‍👧‍👧"; url(__PUBLIC_PATH__logo.png);
976+
"#;
977+
let mut source =
978+
ReplaceSource::new(OriginalSource::new(content, "file.css").boxed());
979+
for mat in regex::Regex::new("__PUBLIC_PATH__")
980+
.unwrap()
981+
.find_iter(content)
982+
{
983+
source.replace(mat.start() as u32, mat.end() as u32, "../", None);
984+
}
985+
assert_eq!(
986+
source.source(),
987+
r#"
988+
"abc"; url(../logo.png);
989+
"ヒラギノ角ゴ"; url(../logo.png);
990+
"游ゴシック体"; url(../logo.png);
991+
"🤪"; url(../logo.png);
992+
"👨‍👩‍👧‍👧"; url(../logo.png);
993+
"#
994+
);
995+
assert_eq!(
996+
source
997+
.map(&MapOptions::default())
998+
.unwrap()
999+
.to_json()
1000+
.unwrap(),
1001+
r#"{"version":3,"sources":["file.css"],"sourcesContent":["\n\"abc\"; url(__PUBLIC_PATH__logo.png);\n\"ヒラギノ角ゴ\"; url(__PUBLIC_PATH__logo.png);\n\"游ゴシック体\"; url(__PUBLIC_PATH__logo.png);\n\"🤪\"; url(__PUBLIC_PATH__logo.png);\n\"👨‍👩‍👧‍👧\"; url(__PUBLIC_PATH__logo.png);\n"],"names":[],"mappings":";AACA,OAAO,IAAI,GAAe;AAC1B,sBAAsB;AACtB,sBAAsB;AACtB,QAAQ;AACR,6BAA6B"}"#,
1002+
);
1003+
}
9031004
}

‎src/line_with_indices_index.rs ‎src/with_indices.rs

+9-12
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#[derive(Debug, Clone)]
2-
pub struct LineWithIndicesArray<T: AsRef<str>> {
2+
pub struct WithIndices<T: AsRef<str>> {
33
/// line is a string reference
44
pub line: T,
55
/// the byte position of each `char` in `line` string slice .
66
pub indices_indexes: Box<[u32]>,
77
}
88

9-
impl<T: AsRef<str>> LineWithIndicesArray<T> {
9+
impl<T: AsRef<str>> WithIndices<T> {
1010
pub fn new(line: T) -> Self {
1111
Self {
1212
indices_indexes: line
@@ -44,33 +44,30 @@ impl<T: AsRef<str>> LineWithIndicesArray<T> {
4444
/// tests are just copy from `substring` crate
4545
#[cfg(test)]
4646
mod tests {
47-
use super::LineWithIndicesArray;
47+
use super::WithIndices;
4848
#[test]
4949
fn test_substring() {
50-
assert_eq!(LineWithIndicesArray::new("foobar").substring(0, 3), "foo");
50+
assert_eq!(WithIndices::new("foobar").substring(0, 3), "foo");
5151
}
5252

5353
#[test]
5454
fn test_out_of_bounds() {
55-
assert_eq!(
56-
LineWithIndicesArray::new("foobar").substring(0, 10),
57-
"foobar"
58-
);
59-
assert_eq!(LineWithIndicesArray::new("foobar").substring(6, 10), "");
55+
assert_eq!(WithIndices::new("foobar").substring(0, 10), "foobar");
56+
assert_eq!(WithIndices::new("foobar").substring(6, 10), "");
6057
}
6158

6259
#[test]
6360
fn test_start_less_than_end() {
64-
assert_eq!(LineWithIndicesArray::new("foobar").substring(3, 2), "");
61+
assert_eq!(WithIndices::new("foobar").substring(3, 2), "");
6562
}
6663

6764
#[test]
6865
fn test_start_and_end_equal() {
69-
assert_eq!(LineWithIndicesArray::new("foobar").substring(3, 3), "");
66+
assert_eq!(WithIndices::new("foobar").substring(3, 3), "");
7067
}
7168

7269
#[test]
7370
fn test_multiple_byte_characters() {
74-
assert_eq!(LineWithIndicesArray::new("fõøbα®").substring(2, 5), "øbα");
71+
assert_eq!(WithIndices::new("fõøbα®").substring(2, 5), "øbα");
7572
}
7673
}

1 commit comments

Comments
 (1)

github-actions[bot] commented on May 10, 2023

@github-actions[bot]

Benchmark

Benchmark suite Current: f53f327 Previous: 5da1d41 Ratio
benchmark_concat_generate_base64 30364 ns/iter (± 4689) 28132 ns/iter (± 2351) 1.08
benchmark_concat_generate_base64_with_cache 20069 ns/iter (± 3000) 18107 ns/iter (± 389) 1.11
benchmark_concat_generate_string 11303 ns/iter (± 2149) 14078 ns/iter (± 709) 0.80
benchmark_concat_generate_string_with_cache 3567 ns/iter (± 680) 4018 ns/iter (± 194) 0.89

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.