1
0
Fork 0
mirror of https://github.com/pnx/tree-sitter-dotenv synced 2026-06-16 01:54:56 +02:00

Update grammar to correctly produce value token

Problem was before that for example the input [0x000KKK] would produce a
hexadecimal and a value token (for the respective parts)

However, the more correct way is to have the whole input identified as a
value token. basicly if the whole input between "=" and a special
"end-of-assignment" token can't be identified as a string,bool,integer
whatever. it should be identified as a value token.
This commit is contained in:
Henrik Hautakoski 2024-12-12 10:55:11 +01:00
parent e7a97e884a
commit d0b54d61aa
8 changed files with 427 additions and 539 deletions

View file

@ -4,12 +4,8 @@
module.exports = grammar({ module.exports = grammar({
name: "dotenv", name: "dotenv",
extras: _ => [
/\s/
],
externals: $ => [ externals: $ => [
$._empty_value, $._end_of_assignment,
], ],
rules: { rules: {
@ -21,12 +17,13 @@ module.exports = grammar({
assignment: $ => seq( assignment: $ => seq(
field("key", $.identifier), field("key", $.identifier),
"=", "=",
field("value", $._value), optional(field("value", $._value)),
$._end_of_assignment,
), ),
comment: _ => seq('#', /.*/), comment: _ => /\#[^\n]*/,
identifier: _ => token(/[A-Za-z_][A-Za-z0-9_]*/), identifier: _ => /[A-Za-z_][A-Za-z0-9_]*/,
_value: $ => choice( _value: $ => choice(
$.string, $.string,
@ -34,7 +31,6 @@ module.exports = grammar({
$.number, $.number,
$.boolean, $.boolean,
$.value, $.value,
alias($._empty_value, $.value),
), ),
string: $ => seq( string: $ => seq(
@ -51,8 +47,8 @@ module.exports = grammar({
// Strings // Strings
string_content: _ => token(/[^']*/), string_content: _ => /[^']*/,
string_interpolation_content: _ => token(/[^"]*/), string_interpolation_content: _ => /[^"]*/,
// Numbers // Numbers
@ -62,12 +58,12 @@ module.exports = grammar({
$.hexadecimal, $.hexadecimal,
), ),
integer: _ => token(/(\-)?\d+/), integer: _ => /(\-)?[1-9]\d*/,
float: _ => seq(/(\-)?\d+/, '.', /\d+/), hexadecimal: _ => /0[xX][0-9a-fA-F]+/,
hexadecimal: _ => seq('0x', /[0-9a-fA-F]+/), float: _ => /(\-)?[1-9]\d*\.\d+/,
boolean: _ => token(choice('true', 'false')), boolean: _ => token(choice('true', 'false')),
value: _ => token(prec(-1, /[^\#\s]+/)), value: _ => /[^\#\s\"\']+/,
}, },
}); });

121
src/grammar.json generated
View file

@ -33,34 +33,34 @@
"value": "=" "value": "="
}, },
{ {
"type": "FIELD", "type": "CHOICE",
"name": "value", "members": [
"content": { {
"type": "SYMBOL", "type": "FIELD",
"name": "_value" "name": "value",
} "content": {
"type": "SYMBOL",
"name": "_value"
}
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_end_of_assignment"
} }
] ]
}, },
"comment": { "comment": {
"type": "SEQ", "type": "PATTERN",
"members": [ "value": "\\#[^\\n]*"
{
"type": "STRING",
"value": "#"
},
{
"type": "PATTERN",
"value": ".*"
}
]
}, },
"identifier": { "identifier": {
"type": "TOKEN", "type": "PATTERN",
"content": { "value": "[A-Za-z_][A-Za-z0-9_]*"
"type": "PATTERN",
"value": "[A-Za-z_][A-Za-z0-9_]*"
}
}, },
"_value": { "_value": {
"type": "CHOICE", "type": "CHOICE",
@ -84,15 +84,6 @@
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "value" "name": "value"
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_empty_value"
},
"named": true,
"value": "value"
} }
] ]
}, },
@ -136,18 +127,12 @@
] ]
}, },
"string_content": { "string_content": {
"type": "TOKEN", "type": "PATTERN",
"content": { "value": "[^']*"
"type": "PATTERN",
"value": "[^']*"
}
}, },
"string_interpolation_content": { "string_interpolation_content": {
"type": "TOKEN", "type": "PATTERN",
"content": { "value": "[^\"]*"
"type": "PATTERN",
"value": "[^\"]*"
}
}, },
"number": { "number": {
"type": "CHOICE", "type": "CHOICE",
@ -167,41 +152,16 @@
] ]
}, },
"integer": { "integer": {
"type": "TOKEN", "type": "PATTERN",
"content": { "value": "(\\-)?[1-9]\\d*"
"type": "PATTERN",
"value": "(\\-)?\\d+"
}
},
"float": {
"type": "SEQ",
"members": [
{
"type": "PATTERN",
"value": "(\\-)?\\d+"
},
{
"type": "STRING",
"value": "."
},
{
"type": "PATTERN",
"value": "\\d+"
}
]
}, },
"hexadecimal": { "hexadecimal": {
"type": "SEQ", "type": "PATTERN",
"members": [ "value": "0[xX][0-9a-fA-F]+"
{ },
"type": "STRING", "float": {
"value": "0x" "type": "PATTERN",
}, "value": "(\\-)?[1-9]\\d*\\.\\d+"
{
"type": "PATTERN",
"value": "[0-9a-fA-F]+"
}
]
}, },
"boolean": { "boolean": {
"type": "TOKEN", "type": "TOKEN",
@ -220,15 +180,8 @@
} }
}, },
"value": { "value": {
"type": "TOKEN", "type": "PATTERN",
"content": { "value": "[^\\#\\s\\\"\\']+"
"type": "PREC",
"value": -1,
"content": {
"type": "PATTERN",
"value": "[^\\#\\=\\s]+"
}
}
} }
}, },
"extras": [ "extras": [
@ -242,7 +195,7 @@
"externals": [ "externals": [
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_empty_value" "name": "_end_of_assignment"
} }
], ],
"inline": [], "inline": [],

50
src/node-types.json generated
View file

@ -15,7 +15,7 @@
}, },
"value": { "value": {
"multiple": false, "multiple": false,
"required": true, "required": false,
"types": [ "types": [
{ {
"type": "boolean", "type": "boolean",
@ -41,11 +41,6 @@
} }
} }
}, },
{
"type": "comment",
"named": true,
"fields": {}
},
{ {
"type": "document", "type": "document",
"named": true, "named": true,
@ -65,21 +60,6 @@
] ]
} }
}, },
{
"type": "float",
"named": true,
"fields": {}
},
{
"type": "hexadecimal",
"named": true,
"fields": {}
},
{
"type": "integer",
"named": true,
"fields": {}
},
{ {
"type": "number", "type": "number",
"named": true, "named": true,
@ -137,22 +117,10 @@
"type": "\"", "type": "\"",
"named": false "named": false
}, },
{
"type": "#",
"named": false
},
{ {
"type": "'", "type": "'",
"named": false "named": false
}, },
{
"type": ".",
"named": false
},
{
"type": "0x",
"named": false
},
{ {
"type": "=", "type": "=",
"named": false "named": false
@ -161,10 +129,26 @@
"type": "boolean", "type": "boolean",
"named": true "named": true
}, },
{
"type": "comment",
"named": true
},
{
"type": "float",
"named": true
},
{
"type": "hexadecimal",
"named": true
},
{ {
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "integer",
"named": true
},
{ {
"type": "string_content", "type": "string_content",
"named": true "named": true

735
src/parser.c generated

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,7 @@
#include "tree_sitter/array.h" #include "tree_sitter/array.h"
enum TokenType { enum TokenType {
EMPTY_VALUE, END_OF_ASSIGNMENT,
}; };
void *tree_sitter_dotenv_external_scanner_create(void) { void *tree_sitter_dotenv_external_scanner_create(void) {
@ -26,7 +26,7 @@ void advanceWS(TSLexer *lexer) {
bool tree_sitter_dotenv_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { bool tree_sitter_dotenv_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[EMPTY_VALUE]) { if (valid_symbols[END_OF_ASSIGNMENT]) {
advanceWS(lexer); advanceWS(lexer);
if (lexer->lookahead == '\r') { if (lexer->lookahead == '\r') {
@ -36,10 +36,9 @@ bool tree_sitter_dotenv_external_scanner_scan(void *payload, TSLexer *lexer, con
if (lexer->eof(lexer) if (lexer->eof(lexer)
|| lexer->lookahead == '#' || lexer->lookahead == '#'
|| lexer->lookahead == '\n') { || lexer->lookahead == '\n') {
lexer->result_symbol = EMPTY_VALUE; lexer->result_symbol = END_OF_ASSIGNMENT;
return true; return true;
} }
} }
return false; return false;
} }

View file

@ -10,11 +10,9 @@ KEY_WITH_VALUE = value
(document (document
(assignment (assignment
key: (identifier) key: (identifier))
value: (value))
(assignment (assignment
key: (identifier) key: (identifier))
value: (value))
(assignment (assignment
key: (identifier) key: (identifier)
value: (value))) value: (value)))

View file

@ -11,7 +11,7 @@ Comments
EMPTY_WITH_COMMENT=# comment EMPTY_WITH_COMMENT=# comment
EMPTY_WITH_COMMENT_WHITESPACE = # comment EMPTY_WITH_COMMENT_WHITESPACE = # comment
STRING_VALUE_WITH_COMMENT = 'string' # comment STRING_VALUE_WITH_COMMENT = 'string content' # comment
STRING_VALUE_WITH_COMMENT = "string" # comment STRING_VALUE_WITH_COMMENT = "string" # comment
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
@ -22,12 +22,10 @@ STRING_VALUE_WITH_COMMENT = "string" # comment
(comment) (comment)
(comment) (comment)
(assignment (assignment
key: (identifier) key: (identifier))
value: (value))
(comment) (comment)
(assignment (assignment
key: (identifier) key: (identifier))
value: (value))
(comment) (comment)
(assignment (assignment
key: (identifier) key: (identifier)

View file

@ -4,6 +4,7 @@ Number - Integer
INTEGER_VALUE = 1234 INTEGER_VALUE = 1234
NEGATIVE_INTEGER_VALUE = -1234 NEGATIVE_INTEGER_VALUE = -1234
INVALID_INTEGER = 01234
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
@ -15,8 +16,10 @@ NEGATIVE_INTEGER_VALUE = -1234
(assignment (assignment
key: (identifier) key: (identifier)
value: (number value: (number
(integer)))) (integer)))
(assignment
key: (identifier)
value: (value)))
================================================================================ ================================================================================
Number - Float Number - Float
@ -42,6 +45,7 @@ Number - Hex
================================================================================ ================================================================================
HEX = 0xff00ff HEX = 0xff00ff
INVALID_HEX = 0xffUUOKJ
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
@ -49,7 +53,10 @@ HEX = 0xff00ff
(assignment (assignment
key: (identifier) key: (identifier)
value: (number value: (number
(hexadecimal)))) (hexadecimal)))
(assignment
key: (identifier)
value: (value)))
================================================================================ ================================================================================
Boolean Boolean