From b0fa52c3afd4ed33899473c0f6c69aad7a202b5b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 23 Jun 2026 14:39:06 -0400 Subject: [PATCH] Fix up regex delimited by \r\n --- src/prism.c | 10 +++++++++- test/prism/regexp_test.rb | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/prism.c b/src/prism.c index 0960e39747..e5e7a0005d 100644 --- a/src/prism.c +++ b/src/prism.c @@ -2249,7 +2249,15 @@ pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closin if (closing->type == PM_TOKEN_REGEXP_END) { pm_buffer_t unknown_flags = { 0 }; - for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) { + // The closing delimiter is normally a single byte, so the options + // follow it. A `\r\n` newline delimiter is two bytes, however, so we + // skip past it to avoid misreading the trailing `\n` as an option. + const uint8_t *flag = closing->start + 1; + if ((closing->end - closing->start) >= 2 && closing->start[0] == '\r' && closing->start[1] == '\n') { + flag++; + } + + for (; flag < closing->end; flag++) { switch (*flag) { case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break; case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break; diff --git a/test/prism/regexp_test.rb b/test/prism/regexp_test.rb index cde0c23f97..41c51a111c 100644 --- a/test/prism/regexp_test.rb +++ b/test/prism/regexp_test.rb @@ -237,8 +237,27 @@ def test_last_encoding_option_wins assert_equal Regexp::NOENCODING, option end + # A `%r` regexp can use a newline (LF or CRLF) as its delimiter. The closing + # delimiter terminates the regexp and must not be misread as an option -- in + # particular the trailing `\n` of a `\r\n` delimiter. + def test_newline_delimiter + assert_newline_regexp("%r\nfoo\n", "foo", 0) + assert_newline_regexp("%r\r\nfoo\r\n", "foo", 0) + assert_newline_regexp("%r\r\nfoo\r\ni", "foo", Regexp::IGNORECASE) + assert_newline_regexp("%r\r\nfoo\r\nmix", "foo", Regexp::IGNORECASE | Regexp::MULTILINE | Regexp::EXTENDED) + assert_newline_regexp("%r\r\n\r\n", "", 0) + end + private + def assert_newline_regexp(source, content, options) + node = Prism.parse_statement(source) + + assert_kind_of RegularExpressionNode, node + assert_equal content, node.content + assert_equal options, node.options + end + def assert_valid_regexp(source) assert Prism.parse_success?("/#{source}/ =~ \"\"") end