diff --git a/snapshots/percent_array_newline_delimiter.txt b/snapshots/percent_array_newline_delimiter.txt new file mode 100644 index 0000000000..e99c63e17c --- /dev/null +++ b/snapshots/percent_array_newline_delimiter.txt @@ -0,0 +1,75 @@ +@ ProgramNode (location: (1,0)-(12,0)) +├── flags: ∅ +├── locals: [] +└── statements: + @ StatementsNode (location: (1,0)-(12,0)) + ├── flags: ∅ + └── body: (length: 4) + ├── @ ArrayNode (location: (1,0)-(3,0)) + │ ├── flags: newline + │ ├── elements: (length: 2) + │ │ ├── @ StringNode (location: (2,0)-(2,3)) + │ │ │ ├── flags: ∅ + │ │ │ ├── opening_loc: ∅ + │ │ │ ├── content_loc: (2,0)-(2,3) = "foo" + │ │ │ ├── closing_loc: ∅ + │ │ │ └── unescaped: "foo" + │ │ └── @ StringNode (location: (2,4)-(2,7)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: ∅ + │ │ ├── content_loc: (2,4)-(2,7) = "bar" + │ │ ├── closing_loc: ∅ + │ │ └── unescaped: "bar" + │ ├── opening_loc: (1,0)-(2,0) = "%w\n" + │ └── closing_loc: (2,7)-(3,0) = "\n" + ├── @ ArrayNode (location: (4,0)-(6,0)) + │ ├── flags: newline, static_literal + │ ├── elements: (length: 2) + │ │ ├── @ SymbolNode (location: (5,0)-(5,3)) + │ │ │ ├── flags: static_literal, forced_us_ascii_encoding + │ │ │ ├── opening_loc: ∅ + │ │ │ ├── value_loc: (5,0)-(5,3) = "baz" + │ │ │ ├── closing_loc: ∅ + │ │ │ └── unescaped: "baz" + │ │ └── @ SymbolNode (location: (5,4)-(5,7)) + │ │ ├── flags: static_literal, forced_us_ascii_encoding + │ │ ├── opening_loc: ∅ + │ │ ├── value_loc: (5,4)-(5,7) = "qux" + │ │ ├── closing_loc: ∅ + │ │ └── unescaped: "qux" + │ ├── opening_loc: (4,0)-(5,0) = "%i\n" + │ └── closing_loc: (5,7)-(6,0) = "\n" + ├── @ ArrayNode (location: (7,0)-(9,0)) + │ ├── flags: newline + │ ├── elements: (length: 2) + │ │ ├── @ StringNode (location: (8,0)-(8,1)) + │ │ │ ├── flags: ∅ + │ │ │ ├── opening_loc: ∅ + │ │ │ ├── content_loc: (8,0)-(8,1) = "a" + │ │ │ ├── closing_loc: ∅ + │ │ │ └── unescaped: "a" + │ │ └── @ StringNode (location: (8,2)-(8,3)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: ∅ + │ │ ├── content_loc: (8,2)-(8,3) = "b" + │ │ ├── closing_loc: ∅ + │ │ └── unescaped: "b" + │ ├── opening_loc: (7,0)-(8,0) = "%W\n" + │ └── closing_loc: (8,3)-(9,0) = "\n" + └── @ ArrayNode (location: (10,0)-(12,0)) + ├── flags: newline, static_literal + ├── elements: (length: 2) + │ ├── @ SymbolNode (location: (11,0)-(11,1)) + │ │ ├── flags: static_literal, forced_us_ascii_encoding + │ │ ├── opening_loc: ∅ + │ │ ├── value_loc: (11,0)-(11,1) = "c" + │ │ ├── closing_loc: ∅ + │ │ └── unescaped: "c" + │ └── @ SymbolNode (location: (11,2)-(11,3)) + │ ├── flags: static_literal, forced_us_ascii_encoding + │ ├── opening_loc: ∅ + │ ├── value_loc: (11,2)-(11,3) = "d" + │ ├── closing_loc: ∅ + │ └── unescaped: "d" + ├── opening_loc: (10,0)-(11,0) = "%I\n" + └── closing_loc: (11,3)-(12,0) = "\n" diff --git a/src/prism.c b/src/prism.c index 0512afae41..bb9f97664a 100644 --- a/src/prism.c +++ b/src/prism.c @@ -11373,6 +11373,8 @@ parser_lex(pm_parser_t *parser) { // First we'll set the beginning of the token. parser->current.start = parser->current.end; + pm_lex_mode_t *lex_mode = parser->lex_modes.current; + // If there's any whitespace at the start of the list, then we're // going to trim it off the beginning and create a new token. size_t whitespace; @@ -11382,6 +11384,12 @@ parser_lex(pm_parser_t *parser) { if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') { whitespace += 1; } + } else if (lex_mode->as.list.terminator == '\n') { + // When the list delimiter is a newline (e.g. `%w` followed by a + // newline), the newline is the terminator rather than a word + // separator. We only trim inline whitespace here so that the + // terminating newline is left for the terminator handling below. + whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end); } else { whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); } @@ -11403,7 +11411,6 @@ parser_lex(pm_parser_t *parser) { // Here we'll get a list of the places where strpbrk should break, // and then find the first one. - pm_lex_mode_t *lex_mode = parser->lex_modes.current; const uint8_t *breakpoints = lex_mode->as.list.breakpoints; const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true); @@ -11413,8 +11420,10 @@ parser_lex(pm_parser_t *parser) { while (breakpoint != NULL) { // If we hit whitespace, then we must have received content by - // now, so we can return an element of the list. - if (pm_char_is_whitespace(*breakpoint)) { + // now, so we can return an element of the list. A whitespace + // character that is also the terminator (e.g. a newline + // delimiter) is handled by the terminator check below, not here. + if (pm_char_is_whitespace(*breakpoint) && *breakpoint != lex_mode->as.list.terminator) { parser->current.end = breakpoint; pm_token_buffer_flush(parser, &token_buffer); LEX(PM_TOKEN_STRING_CONTENT); @@ -11443,6 +11452,14 @@ parser_lex(pm_parser_t *parser) { // Otherwise, switch back to the default state and return // the end of the list. parser->current.end = breakpoint + 1; + + // If the terminator is a newline (i.e. the list delimiter + // was a newline), then we need to record it so that line + // numbers after the list remain accurate. + if (*breakpoint == '\n') { + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); + } + lex_mode_pop(parser); lex_state_set(parser, PM_LEX_STATE_END); LEX(PM_TOKEN_STRING_END); diff --git a/test/prism/fixtures/percent_array_newline_delimiter.txt b/test/prism/fixtures/percent_array_newline_delimiter.txt new file mode 100644 index 0000000000..27096a5fef --- /dev/null +++ b/test/prism/fixtures/percent_array_newline_delimiter.txt @@ -0,0 +1,11 @@ +%w +foo bar + +%i +baz qux + +%W +a b + +%I +c d diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb index bc89bdae72..0a89e784f6 100644 --- a/test/prism/ruby/ruby_parser_test.rb +++ b/test/prism/ruby/ruby_parser_test.rb @@ -46,6 +46,7 @@ class RubyParserTest < TestCase "multi_write.txt", "not.txt", "patterns.txt", + "percent_array_newline_delimiter.txt", "regex.txt", "seattlerb/and_multi.txt", "seattlerb/heredoc__backslash_dos_format.txt", diff --git a/test/prism/snippets_test.rb b/test/prism/snippets_test.rb index 3c28d27a25..5e667c48a0 100644 --- a/test/prism/snippets_test.rb +++ b/test/prism/snippets_test.rb @@ -7,6 +7,7 @@ class SnippetsTest < TestCase except = [ "encoding_binary.txt", "newline_terminated.txt", + "percent_array_newline_delimiter.txt", "seattlerb/begin_rescue_else_ensure_no_bodies.txt", "seattlerb/case_in.txt", "seattlerb/parse_line_defn_no_parens.txt",