-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix f-string formatting in assignment statement #14454
base: dhruv/f-string-assignment-1
Are you sure you want to change the base?
Fix f-string formatting in assignment statement #14454
Conversation
4f24b97
to
9f968a5
Compare
|
This is looking good! |
This comment was marked as resolved.
This comment was marked as resolved.
b7aa489
to
c2d8b38
Compare
19e6a99
to
4c71b5d
Compare
c2d8b38
to
55574a9
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This overall looks good, but I think we're now using the BestFit
layout in too many places, which leads to bad formatting in clause headers (see my inline comment).
I think we should only use the BestFit
layout if the FString
uses the flat layout. In all other cases, we should use the Multiline
layout.
We should add some more tests that cover BestFit
layout usages in non assignment positions to verify that we got the needs_parentheses
changes correct.
See
Lines 296 to 321 in 73ee72b
############################################################################## | |
# Regressions | |
############################################################################## | |
LEEEEEEEEEEEEEEEEEEEEEEFT = RRRRRRRRIIIIIIIIIIIIGGGGGHHHT | { | |
"entityNameeeeeeeeeeeeeeeeee", # comment must be long enough to | |
"some long implicit concatenated string" "that should join" | |
} | |
# Ensure that flipping between Multiline and BestFit layout results in stable formatting | |
# when using IfBreaksParenthesized layout. | |
assert False, "Implicit concatenated string" "uses {} layout on {} format".format( | |
"Multiline", "first" | |
) | |
assert False, await "Implicit concatenated string" "uses {} layout on {} format".format( | |
"Multiline", "first" | |
) | |
assert False, "Implicit concatenated stringuses {} layout on {} format"[ | |
aaaaaaaaa, bbbbbb | |
] | |
assert False, +"Implicit concatenated string" "uses {} layout on {} format".format( | |
"Multiline", "first" | |
) |
and
Lines 128 to 224 in 73ee72b
# Fits | |
with "aa" "bbb" "cccccccccccccccccccccccccccccccccccccccccccccc": | |
pass | |
# Parenthesize single-line | |
with "aa" "bbb" "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc": | |
pass | |
# Multiline | |
with "aa" "bbb" "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc": | |
pass | |
with f"aaaaaaa{expression}bbbb" f"ccc {20999}" "more": | |
pass | |
############################################################################## | |
# For loops | |
############################################################################## | |
# Flat | |
for a in "aaaaaaaaa" "bbbbbbbbb" "ccccccccc" "dddddddddd": | |
pass | |
# Parenthesize single-line | |
for a in "aaaaaaaaa" "bbbbbbbbb" "ccccccccc" "dddddddddd" "eeeeeeeeeeeeeee" "fffffffffffff" "ggggggggggggggg" "hh": | |
pass | |
# Multiline | |
for a in "aaaaaaaaa" "bbbbbbbbb" "ccccccccc" "dddddddddd" "eeeeeeeeeeeeeee" "fffffffffffff" "ggggggggggggggg" "hhhh": | |
pass | |
############################################################################## | |
# Assert statement | |
############################################################################## | |
# Fits | |
assert "aaaaaaaaa" "bbbbbbbbbbbb", "cccccccccccccccc" "dddddddddddddddd" | |
# Wrap right | |
assert "aaaaaaaaa" "bbbbbbbbbbbb", "cccccccccccccccc" "dddddddddddddddd" "eeeeeeeeeeeee" "fffffffffff" | |
# Right multiline | |
assert "aaaaaaaaa" "bbbbbbbbbbbb", "cccccccccccccccc" "dddddddddddddddd" "eeeeeeeeeeeee" "fffffffffffffff" "ggggggggggggg" "hhhhhhhhhhh" | |
# Wrap left | |
assert "aaaaaaaaa" "bbbbbbbbbbbb" "cccccccccccccccc" "dddddddddddddddd" "eeeeeeeeeeeee" "fffffffffffffff", "ggggggggggggg" "hhhhhhhhhhh" | |
# Left multiline | |
assert "aaaaaaaaa" "bbbbbbbbbbbb" "cccccccccccccccc" "dddddddddddddddd" "eeeeeeeeeeeee" "fffffffffffffff" "ggggggggggggg", "hhhhhhhhhhh" | |
# wrap both | |
assert "aaaaaaaaa" "bbbbbbbbbbbb" "cccccccccccccccc" "dddddddddddddddd" "eeeeeeeeeeeee" "fffffffffffffff", "ggggggggggggg" "hhhhhhhhhhh" "iiiiiiiiiiiiiiiiii" "jjjjjjjjjjjjj" "kkkkkkkkkkkkkkkkk" "llllllllllll" | |
# both multiline | |
assert "aaaaaaaaa" "bbbbbbbbbbbb" "cccccccccccccccc" "dddddddddddddddd" "eeeeeeeeeeeee" "fffffffffffffff" "ggggggggggggg", "hhhhhhhhhhh" "iiiiiiiiiiiiiiiiii" "jjjjjjjjjjjjj" "kkkkkkkkkkkkkkkkk" "llllllllllll" "mmmmmmmmmmmmmm" | |
############################################################################## | |
# In clause headers (can_omit_optional_parentheses) | |
############################################################################## | |
# Use can_omit_optional_parentheses layout to avoid an instability where the formatter | |
# picks the can_omit_optional_parentheses layout when the strings are joined. | |
if ( | |
f"implicit" | |
"concatenated" | |
"string" + f"implicit" | |
"concaddddddddddded" | |
"ring" | |
* len([aaaaaa, bbbbbbbbbbbbbbbb, cccccccccccccccccc, ddddddddddddddddddddddddddd]) | |
): | |
pass | |
# Keep parenthesizing multiline - implicit concatenated strings | |
if ( | |
f"implicit" | |
"""concatenate | |
d""" | |
"string" + f"implicit" | |
"concaddddddddddded" | |
"ring" | |
* len([aaaaaa, bbbbbbbbbbbbbbbb, cccccccccccccccccc, ddddddddddddddddddddddddddd]) | |
): | |
pass | |
if ( | |
[ | |
aaaaaa, | |
bbbbbbbbbbbbbbbb, | |
cccccccccccccccccc, | |
ddddddddddddddddddddddddddd, | |
] | |
+ "implicitconcat" | |
"enatedstriiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiing" | |
): | |
pass |
struct FStringMultilineVisitor<'a> { | ||
context: &'a PyFormatContext<'a>, | ||
is_multiline: bool, | ||
} | ||
|
||
impl<'a> FStringMultilineVisitor<'a> { | ||
fn new(context: &'a PyFormatContext<'a>) -> Self { | ||
Self { | ||
context, | ||
is_multiline: false, | ||
} | ||
} | ||
} | ||
|
||
impl<'a> SourceOrderVisitor<'a> for FStringMultilineVisitor<'a> { | ||
fn enter_node(&mut self, _node: ast::AnyNodeRef<'a>) -> TraversalSignal { | ||
if self.is_multiline { | ||
TraversalSignal::Skip | ||
} else { | ||
TraversalSignal::Traverse | ||
} | ||
} | ||
|
||
fn visit_string_literal(&mut self, string_literal: &'a ast::StringLiteral) { | ||
if string_literal.flags.is_triple_quoted() | ||
&& self | ||
.context | ||
.source() | ||
.contains_line_break(string_literal.range()) | ||
{ | ||
self.is_multiline = true; | ||
} | ||
} | ||
|
||
fn visit_f_string_element(&mut self, f_string_element: &'a ast::FStringElement) { | ||
let is_multiline = match f_string_element { | ||
ast::FStringElement::Literal(literal) => { | ||
self.context.source().contains_line_break(literal.range()) | ||
} | ||
ast::FStringElement::Expression(expression) => { | ||
if is_f_string_formatting_enabled(self.context) { | ||
// Expressions containing comments can't be joined. | ||
self.context.comments().contains_comments(expression.into()) | ||
} else { | ||
// Multiline f-string expressions can't be joined if the f-string formatting is | ||
// disabled because the string gets inserted in verbatim preserving the | ||
// newlines. | ||
self.context | ||
.source() | ||
.contains_line_break(expression.range()) | ||
} | ||
} | ||
}; | ||
if is_multiline { | ||
self.is_multiline = true; | ||
} else { | ||
// Continue walking the f-string elements to visit the ones in format specifiers. | ||
// | ||
// For example, the following should be considered multiline because the literal part | ||
// of the format specifier contains a newline at the end (`.3f\n`): | ||
// ```py | ||
// x = f"hello {a + b + c + d:.3f | ||
// } world" | ||
// ``` | ||
walk_f_string_element(self, f_string_element); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's unclear to me why this needs a visitor, considering that we only visit f-string elements. It may need to be recursive but I think that would be easier to understood than a full visitor (where it's difficult to understand where and how it recurses)
I see that your implementation differs from
ruff/crates/ruff_python_formatter/src/string/implicit.rs
Lines 190 to 207 in 3a681eb
if fstring.elements.iter().any(|element| match element { | |
// Same as for other literals. Multiline literals can't fit on a single line. | |
FStringElement::Literal(literal) => { | |
context.source().contains_line_break(literal.range()) | |
} | |
FStringElement::Expression(expression) => { | |
if is_f_string_formatting_enabled(context) { | |
// Expressions containing comments can't be joined. | |
context.comments().contains_comments(expression.into()) | |
} else { | |
// Multiline f-string expressions can't be joined if the f-string formatting is disabled because | |
// the string gets inserted in verbatim preserving the newlines. | |
context.source().contains_line_break(expression.range()) | |
} | |
} | |
}) { | |
return None; | |
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I can use recursion instead, I think that might be simpler.
in that it also covers format spec. Do we need to update the implicit concation formatting too? Can we share the logic
I think so although I'll have to check. I can make it a shared function
// This isn't decided yet, refer to the relevant discussion: | ||
// https://github.com/astral-sh/ruff/discussions/9785 | ||
else if StringLike::FString(self).is_multiline(context.source()) { | ||
} else if StringLike::FString(self).is_multiline(context) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think using BestFit
for all f-strings that contain multiline expressions isn't correct. For example. It results in
if f"aaaaaaaaaaa { ttttteeeeeeeeest} more {
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
}": pass
being formatted as
if f"aaaaaaaaaaa {ttttteeeeeeeeest} more {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}":
pass
Which seems worse than before.
I think we should keep using Multiline
if the f-string has any multiline expression to avoid collapsing them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the reason it was formatted like that previously is because the needs_parentheses
would return OptionalParentheses::Never
. If we would return Multiline
then I think parentheses will be added making it:
if (
f"aaaaaaaaaaa {ttttteeeeeeeeest} more {
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
}"
):
pass
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can also decide to return Never
. It's not entirely clear to me what the better and more consistent layout is. I would have to play with a few layouts but I don't think it should be what it is now.
Using Never has the advantage that it avoids unnecessary parentheses and is closer to what we had today (and no one complained?). Adding parentheses is similar to having "aaaaa" + tttttt + "more(aaaaaaaaaaaaaaaaaaaaaaaaaa)
. It might be good to play with the formatting in other positions where we use BestFit
to decide what the best layout is
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that's a good idea, I can do that.
@@ -28,7 +28,7 @@ impl NeedsParentheses for ExprBinOp { | |||
} else if let Ok(string) = StringLike::try_from(&*self.left) { | |||
// Multiline strings are guaranteed to never fit, avoid adding unnecessary parentheses |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add an example covering this path with an expression that starts as a mulitline f-string but then gets formatted as a flat f-string?
if string.is_implicit_concatenated() || !string.is_multiline(context) { | ||
return false; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Your change is an improvement to what we had before.
For example, this was correctly formatted code before
call(f"{
testeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee
}")
But I don't think it's correct to use the "hug" layout if an inner expression is multiline
call(f"{
aaaaaa
+ '''test
more'''
}")
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure I follow here. Are you saying that both the code snippet should result in the formatting where the f-string is on the same line as the call expression (call(f"{
)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not directly related to your PR, but I noticed it when reviewing it because you changed is_multiline
. We have to make a decision on the idiomatic formatting for "hugging multiline strings" for f-strings.
I'm leaning towards that we should only "hug" if the f-string itself contains any multiline string literal, but not if any inner-expression is multiline. Similar to prettier:
call(
`${
aaaaaaaaaaaaaaaaaaaaaaaaaaaa +
`test more
aaa` +
morrrrrrrrrrrrrrrrrrrrrr
}`,
);
So what I'm saying is that we should probably not hug for the both cases above.
} | ||
|
||
impl<'a> FormatFStringAssignment<'a> { | ||
fn new(string: StringLike<'a>, context: &PyFormatContext) -> Option<Self> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we instead return a FormatFString
instead of having a new FormatFStringAssignment
, considering that we just forward the call?
// ``` | ||
|
||
// Keep the target flat, and use the regular f-string formatting. | ||
// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// ``` | |
// Keep the target flat, and use the regular f-string formatting. | |
// | |
// ``` | |
// | |
// Keep the target flat, and use the regular f-string formatting. | |
// |
@@ -154,7 +154,7 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> { | |||
} | |||
|
|||
// Multiline strings can never fit on a single line. | |||
if !string.is_fstring() && string.is_multiline(context.source()) { | |||
if !string.is_fstring() && string.is_multiline(context) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we still need the check on line 190-204 considering that it's now covered by string.is_multiline
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think not, I can remove it. Good catch.
} | ||
|
||
// This checks whether the f-string is multi-line and it can *never* be flattened. Thus, | ||
// we cannot try the flattened layout. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// we cannot try the flattened layout. | |
// it's useless to try the flattened layout. |
}; | ||
if is_multiline { | ||
self.is_multiline = true; | ||
} else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to consider the debug text as well?
aaaaaaaaaaaa["bbbbbbbbbbbbbbbb"] = ( | ||
f"aaaaaaaaaaaaaaaaaaa { | ||
aaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbb + cccccccccccccccccccccc + dddddddddddddddddddddddddddd} ddddddddddddddddddd" | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add some examples with inner comments or debug expressions (and format specs)? See
Lines 236 to 293 in 73ee72b
# Don't inline f-strings that contain expressions that are guaranteed to split, e.b. because of a magic trailing comma | |
aaaaaaaaaaaaaaaaaa = f"testeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee{ | |
[a,] | |
}" "moreeeeeeeeeeeeeeeeeeee" "test" # comment | |
aaaaaaaaaaaaaaaaaa = ( | |
f"testeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee{ | |
[a,] | |
}" "moreeeeeeeeeeeeeeeeeeee" "test" # comment | |
) | |
aaaaa[aaaaaaaaaaa] = f"testeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee{ | |
[a,] | |
}" "moreeeeeeeeeeeeeeeeeeee" "test" # comment | |
aaaaa[aaaaaaaaaaa] = (f"testeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee{ | |
[a,] | |
}" "moreeeeeeeeeeeeeeeeeeee" "test" # comment | |
) | |
# Don't inline f-strings that contain commented expressions | |
aaaaaaaaaaaaaaaaaa = ( | |
f"testeeeeeeeeeeeeeeeeeeeeeeeee{[ | |
a # comment | |
]}" "moreeeeeeeeeeeeeeeeeetest" # comment | |
) | |
aaaaa[aaaaaaaaaaa] = ( | |
f"testeeeeeeeeeeeeeeeeeeeeeeeee{[ | |
a # comment | |
]}" "moreeeeeeeeeeeeeeeeeetest" # comment | |
) | |
# Don't inline f-strings with multiline debug expressions: | |
aaaaaaaaaaaaaaaaaa = ( | |
f"testeeeeeeeeeeeeeeeeeeeeeeeee{ | |
a=}" "moreeeeeeeeeeeeeeeeeetest" # comment | |
) | |
aaaaaaaaaaaaaaaaaa = ( | |
f"testeeeeeeeeeeeeeeeeeeeeeeeee{a + | |
b=}" "moreeeeeeeeeeeeeeeeeetest" # comment | |
) | |
aaaaaaaaaaaaaaaaaa = ( | |
f"testeeeeeeeeeeeeeeeeeeeeeeeee{a | |
=}" "moreeeeeeeeeeeeeeeeeetest" # comment | |
) | |
aaaaa[aaaaaaaaaaa] = ( | |
f"testeeeeeeeeeeeeeeeeeeeeeeeee{ | |
a=}" "moreeeeeeeeeeeeeeeeeetest" # comment | |
) | |
aaaaa[aaaaaaaaaaa] = ( | |
f"testeeeeeeeeeeeeeeeeeeeeeeeee{a | |
=}" "moreeeeeeeeeeeeeeeeeetest" # comment | |
) |
Summary
fixes: #13813
This PR fixes a bug in the formatting assignment statement when the value is an f-string.
This is resolved by using custom best fit layouts if the f-string is (a) not already a flat f-string (thus, cannot be multiline) and (b) is not a multiline string (thus, cannot be flattened). So, it is used in cases like the following:
Which is (a)
FStringLayout::Multiline
and (b) not a multiline.There are various other examples in the PR diff along with additional explanation and context as code comments.
Test Plan
Add multiple test cases for various scenarios.