Skip to content

Commit

Permalink
bash: Fix CR handling
Browse files Browse the repository at this point in the history
Fixes #1839

`0005-bash-4.3-msys2-fix-lineendings.patch` adds CRLF support.
However, `0001-bash-4.4-cygwin.patch` already added `igncr` option to
Bash to support CRLF.

I confirmed that the Cygwin version of Bash has also the same issue
with #1839 when the `igncr` option is set.

After debugging, I found that there is an issue in
`rewind_input_string()` in `parser.y` that it doesn't take the CR into
account.

This PR adds the following changes:
* Modify `rewind_input_string()` to take the CR into account.
  (It might be better to apply a similar change to the Cygwin version of
  Bash.)
* Remove all the changes from `y.tab.c`. This file should be
  automatically generated from `parser.y`.
* Set `LC_ALL=C.UTF-8` when running `make check` for running on
  non-English locales.
* Add two tests: `run-ps1lf` and `run-crlf`.

Note: This patch contains a line with CRLF. So, .gitattributes is also
updated to keep the CRLF.
  • Loading branch information
k-takata committed Feb 10, 2025
1 parent 7c7d154 commit 426a896
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 34 deletions.
1 change: 1 addition & 0 deletions bash/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*-bash-*-msys2-fix-lineendings.patch -text
106 changes: 75 additions & 31 deletions bash/0005-bash-4.3-msys2-fix-lineendings.patch
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ index c1135ec..b388af6 100644
*niflp = invfl;
if (vlp)
diff --git a/parse.y b/parse.y
index 0ae3458..c6e9520 100644
index 8fd24a1c..232a33dc 100644
--- a/parse.y
+++ b/parse.y
@@ -1459,7 +1459,13 @@ yy_input_name ()
Expand All @@ -99,6 +99,23 @@ index 0ae3458..c6e9520 100644
}

/* Call this to unget C. That is, to make C the next character
@@ -1684,7 +1690,15 @@ rewind_input_string ()
into account, e.g., $(...\n) */
xchars = shell_input_line_len - shell_input_line_index;
if (bash_input.location.string[-1] == '\n')
- xchars++;
+ {
+ xchars++;
+#ifdef __MSYS__
+ {
+ if (bash_input.location.string[-2] == '\r')
+ xchars++;
+ }
+#endif
+ }

/* XXX - how to reflect bash_input.location.string back to string passed to
parse_and_execute or xparse_dolparen? xparse_dolparen needs to know how
diff --git a/shell.c b/shell.c
index ee9d445..8f25726 100644
--- a/shell.c
Expand Down Expand Up @@ -156,6 +173,63 @@ index 2001b4e..3ba2029 100644
}
else
break;
diff --git a/tests/crlf.right b/tests/crlf.right
new file mode 100644
index 0000000..d7fd195
--- /dev/null
+++ b/tests/crlf.right
@@ -0,0 +1,8 @@
+Line with LF
+Line with CR
+Line with CRLF
+Line with
+ LF
+Line with CR
+Line with
+ CRLF
diff --git a/tests/crlf.tests b/tests/crlf.tests
new file mode 100644
index 0000000..b2c4da7
--- /dev/null
+++ b/tests/crlf.tests
@@ -0,0 +1,6 @@
+echo $(echo -e "Line with\n LF")
+echo $(echo -e "Line with\r CR")
+echo $(echo -e "Line with\r\n CRLF")
+echo "$(echo -e "Line with\n LF")"
+echo "$(echo -e "Line with\r CR")"
+echo "$(echo -e "Line with\r\n CRLF")"
diff --git a/tests/ps1lf.right b/tests/ps1lf.right
new file mode 100644
index 0000000..ee83131
--- /dev/null
+++ b/tests/ps1lf.right
@@ -0,0 +1,2 @@
+foo
+$ exit
diff --git a/tests/ps1lf.tests b/tests/ps1lf.tests
new file mode 100644
index 0000000..01a2265
--- /dev/null
+++ b/tests/ps1lf.tests
@@ -0,0 +1 @@
+PS1='$(echo foo)\n\$ '
diff --git a/tests/run-crlf b/tests/run-crlf
new file mode 100644
index 0000000..3f6037c
--- /dev/null
+++ b/tests/run-crlf
@@ -0,0 +1,2 @@
+${THIS_SH} ./crlf.tests > ${BASH_TSTOUT}
+diff ${BASH_TSTOUT} crlf.right && rm -f ${BASH_TSTOUT}
diff --git a/tests/run-ps1lf b/tests/run-ps1lf
new file mode 100644
index 0000000..1617108
--- /dev/null
+++ b/tests/run-ps1lf
@@ -0,0 +1,2 @@
+${THIS_SH} --rcfile ./ps1lf.tests -i < /dev/null 2>&1 | tr -d '\r' > ${BASH_TSTOUT}
+diff ${BASH_TSTOUT} ps1lf.right && rm -f ${BASH_TSTOUT}
diff --git a/variables.c b/variables.c
index 028667c..a10594d 100644
--- a/variables.c
Expand All @@ -181,33 +255,3 @@ index 028667c..a10594d 100644
if (shell_variables == 0)
create_variable_tables ();

diff --git a/y.tab.c b/y.tab.c
index 32b4c7c..ac70820 100644
--- a/y.tab.c
+++ b/y.tab.c
@@ -3770,7 +3770,13 @@ yy_input_name ()
static int
yy_getc ()
{
- return (*(bash_input.getter)) ();
+#ifdef __MSYS__
+ int c;
+ while ((c = (*(bash_input.getter)) ()) == '\r');
+ return c;
+#else
+ return (*(bash_input.getter)) ();
+#endif
}

/* Call this to unget C. That is, to make C the next character
@@ -4746,6 +4752,10 @@ shell_getc (remove_quoted_newline)
else
RESIZE_MALLOCED_BUFFER (shell_input_line, i, 2, shell_input_line_size, 256);

+#ifdef __MSYS__
+ if (c == '\r')
+ continue;
+#endif
if (c == EOF)
{
if (bash_input.type == st_stream)
Expand Down
6 changes: 3 additions & 3 deletions bash/PKGBUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pkgname=('bash' 'bash-devel')
_basever=5.2
_patchlevel=037 #prepare for some patches
pkgver=${_basever}.${_patchlevel}
pkgrel=2
pkgrel=3
pkgdesc="The GNU Bourne Again shell"
arch=('i686' 'x86_64')
license=('GPL')
Expand Down Expand Up @@ -82,7 +82,7 @@ build() {

check() {
cd ${srcdir}/${pkgname}-$_basever
make check
LC_ALL=C.UTF-8 make check
}

package_bash() {
Expand Down Expand Up @@ -129,7 +129,7 @@ sha256sums=('a139c166df7ff4471c5e0733051642ee5556c1cc8a4a78f145583c5c81ab32fb'
'SKIP'
'948b8b5401dcb4e5eb577cfa6543e740e2e3bd0690939d8e77d078d75d110097'
'16584e119db9418030912171f89aecae319858ecd357d3e56c95eba83667dae7'
'c55c24110fbe90a2000411239e6399c1baed2843a61220b4e8a7a036f4a7436a'
'b598a3dcfab16eb2bd0cee4228a7f0041fc6f10892e5ffc9da1ae1c2e1d7570e'
'500c75c64593a70276585345a55c807226c0cc220d08b7cccece2ab005b3bcea'
'cbae1aa81d56eba4e916bdaf2b2983731d6e2537dd8d606a3b378e49bcb81e79'
'f42f2fee923bc2209f406a1892772121c467f44533bedfe00a176139da5d310a'
Expand Down

0 comments on commit 426a896

Please sign in to comment.