1
0
Fork 0
mirror of https://github.com/pnx/tree-sitter-dotenv synced 2026-06-16 01:54:56 +02:00

Update grammar to correctly produce value token

Problem was before that for example the input [0x000KKK] would produce a
hexadecimal and a value token (for the respective parts)

However, the more correct way is to have the whole input identified as a
value token. basicly if the whole input between "=" and a special
"end-of-assignment" token can't be identified as a string,bool,integer
whatever. it should be identified as a value token.
This commit is contained in:
Henrik Hautakoski 2024-12-12 10:55:11 +01:00
parent e7a97e884a
commit d0b54d61aa
8 changed files with 427 additions and 539 deletions

View file

@ -4,12 +4,8 @@
module.exports = grammar({
name: "dotenv",
extras: _ => [
/\s/
],
externals: $ => [
$._empty_value,
$._end_of_assignment,
],
rules: {
@ -21,12 +17,13 @@ module.exports = grammar({
assignment: $ => seq(
field("key", $.identifier),
"=",
field("value", $._value),
optional(field("value", $._value)),
$._end_of_assignment,
),
comment: _ => seq('#', /.*/),
comment: _ => /\#[^\n]*/,
identifier: _ => token(/[A-Za-z_][A-Za-z0-9_]*/),
identifier: _ => /[A-Za-z_][A-Za-z0-9_]*/,
_value: $ => choice(
$.string,
@ -34,7 +31,6 @@ module.exports = grammar({
$.number,
$.boolean,
$.value,
alias($._empty_value, $.value),
),
string: $ => seq(
@ -51,8 +47,8 @@ module.exports = grammar({
// Strings
string_content: _ => token(/[^']*/),
string_interpolation_content: _ => token(/[^"]*/),
string_content: _ => /[^']*/,
string_interpolation_content: _ => /[^"]*/,
// Numbers
@ -62,12 +58,12 @@ module.exports = grammar({
$.hexadecimal,
),
integer: _ => token(/(\-)?\d+/),
float: _ => seq(/(\-)?\d+/, '.', /\d+/),
hexadecimal: _ => seq('0x', /[0-9a-fA-F]+/),
integer: _ => /(\-)?[1-9]\d*/,
hexadecimal: _ => /0[xX][0-9a-fA-F]+/,
float: _ => /(\-)?[1-9]\d*\.\d+/,
boolean: _ => token(choice('true', 'false')),
value: _ => token(prec(-1, /[^\#\s]+/)),
value: _ => /[^\#\s\"\']+/,
},
});

121
src/grammar.json generated
View file

@ -33,34 +33,34 @@
"value": "="
},
{
"type": "FIELD",
"name": "value",
"content": {
"type": "SYMBOL",
"name": "_value"
}
"type": "CHOICE",
"members": [
{
"type": "FIELD",
"name": "value",
"content": {
"type": "SYMBOL",
"name": "_value"
}
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_end_of_assignment"
}
]
},
"comment": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "#"
},
{
"type": "PATTERN",
"value": ".*"
}
]
"type": "PATTERN",
"value": "\\#[^\\n]*"
},
"identifier": {
"type": "TOKEN",
"content": {
"type": "PATTERN",
"value": "[A-Za-z_][A-Za-z0-9_]*"
}
"type": "PATTERN",
"value": "[A-Za-z_][A-Za-z0-9_]*"
},
"_value": {
"type": "CHOICE",
@ -84,15 +84,6 @@
{
"type": "SYMBOL",
"name": "value"
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_empty_value"
},
"named": true,
"value": "value"
}
]
},
@ -136,18 +127,12 @@
]
},
"string_content": {
"type": "TOKEN",
"content": {
"type": "PATTERN",
"value": "[^']*"
}
"type": "PATTERN",
"value": "[^']*"
},
"string_interpolation_content": {
"type": "TOKEN",
"content": {
"type": "PATTERN",
"value": "[^\"]*"
}
"type": "PATTERN",
"value": "[^\"]*"
},
"number": {
"type": "CHOICE",
@ -167,41 +152,16 @@
]
},
"integer": {
"type": "TOKEN",
"content": {
"type": "PATTERN",
"value": "(\\-)?\\d+"
}
},
"float": {
"type": "SEQ",
"members": [
{
"type": "PATTERN",
"value": "(\\-)?\\d+"
},
{
"type": "STRING",
"value": "."
},
{
"type": "PATTERN",
"value": "\\d+"
}
]
"type": "PATTERN",
"value": "(\\-)?[1-9]\\d*"
},
"hexadecimal": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "0x"
},
{
"type": "PATTERN",
"value": "[0-9a-fA-F]+"
}
]
"type": "PATTERN",
"value": "0[xX][0-9a-fA-F]+"
},
"float": {
"type": "PATTERN",
"value": "(\\-)?[1-9]\\d*\\.\\d+"
},
"boolean": {
"type": "TOKEN",
@ -220,15 +180,8 @@
}
},
"value": {
"type": "TOKEN",
"content": {
"type": "PREC",
"value": -1,
"content": {
"type": "PATTERN",
"value": "[^\\#\\=\\s]+"
}
}
"type": "PATTERN",
"value": "[^\\#\\s\\\"\\']+"
}
},
"extras": [
@ -242,7 +195,7 @@
"externals": [
{
"type": "SYMBOL",
"name": "_empty_value"
"name": "_end_of_assignment"
}
],
"inline": [],

50
src/node-types.json generated
View file

@ -15,7 +15,7 @@
},
"value": {
"multiple": false,
"required": true,
"required": false,
"types": [
{
"type": "boolean",
@ -41,11 +41,6 @@
}
}
},
{
"type": "comment",
"named": true,
"fields": {}
},
{
"type": "document",
"named": true,
@ -65,21 +60,6 @@
]
}
},
{
"type": "float",
"named": true,
"fields": {}
},
{
"type": "hexadecimal",
"named": true,
"fields": {}
},
{
"type": "integer",
"named": true,
"fields": {}
},
{
"type": "number",
"named": true,
@ -137,22 +117,10 @@
"type": "\"",
"named": false
},
{
"type": "#",
"named": false
},
{
"type": "'",
"named": false
},
{
"type": ".",
"named": false
},
{
"type": "0x",
"named": false
},
{
"type": "=",
"named": false
@ -161,10 +129,26 @@
"type": "boolean",
"named": true
},
{
"type": "comment",
"named": true
},
{
"type": "float",
"named": true
},
{
"type": "hexadecimal",
"named": true
},
{
"type": "identifier",
"named": true
},
{
"type": "integer",
"named": true
},
{
"type": "string_content",
"named": true

735
src/parser.c generated

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,7 @@
#include "tree_sitter/array.h"
enum TokenType {
EMPTY_VALUE,
END_OF_ASSIGNMENT,
};
void *tree_sitter_dotenv_external_scanner_create(void) {
@ -26,7 +26,7 @@ void advanceWS(TSLexer *lexer) {
bool tree_sitter_dotenv_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[EMPTY_VALUE]) {
if (valid_symbols[END_OF_ASSIGNMENT]) {
advanceWS(lexer);
if (lexer->lookahead == '\r') {
@ -36,10 +36,9 @@ bool tree_sitter_dotenv_external_scanner_scan(void *payload, TSLexer *lexer, con
if (lexer->eof(lexer)
|| lexer->lookahead == '#'
|| lexer->lookahead == '\n') {
lexer->result_symbol = EMPTY_VALUE;
lexer->result_symbol = END_OF_ASSIGNMENT;
return true;
}
}
return false;
}

View file

@ -10,11 +10,9 @@ KEY_WITH_VALUE = value
(document
(assignment
key: (identifier)
value: (value))
key: (identifier))
(assignment
key: (identifier)
value: (value))
key: (identifier))
(assignment
key: (identifier)
value: (value)))

View file

@ -11,7 +11,7 @@ Comments
EMPTY_WITH_COMMENT=# comment
EMPTY_WITH_COMMENT_WHITESPACE = # comment
STRING_VALUE_WITH_COMMENT = 'string' # comment
STRING_VALUE_WITH_COMMENT = 'string content' # comment
STRING_VALUE_WITH_COMMENT = "string" # comment
--------------------------------------------------------------------------------
@ -22,12 +22,10 @@ STRING_VALUE_WITH_COMMENT = "string" # comment
(comment)
(comment)
(assignment
key: (identifier)
value: (value))
key: (identifier))
(comment)
(assignment
key: (identifier)
value: (value))
key: (identifier))
(comment)
(assignment
key: (identifier)

View file

@ -4,6 +4,7 @@ Number - Integer
INTEGER_VALUE = 1234
NEGATIVE_INTEGER_VALUE = -1234
INVALID_INTEGER = 01234
--------------------------------------------------------------------------------
@ -15,8 +16,10 @@ NEGATIVE_INTEGER_VALUE = -1234
(assignment
key: (identifier)
value: (number
(integer))))
(integer)))
(assignment
key: (identifier)
value: (value)))
================================================================================
Number - Float
@ -42,6 +45,7 @@ Number - Hex
================================================================================
HEX = 0xff00ff
INVALID_HEX = 0xffUUOKJ
--------------------------------------------------------------------------------
@ -49,7 +53,10 @@ HEX = 0xff00ff
(assignment
key: (identifier)
value: (number
(hexadecimal))))
(hexadecimal)))
(assignment
key: (identifier)
value: (value)))
================================================================================
Boolean