diff options
author | Gavin Howard <gavin@yzena.com> | 2021-07-20 19:50:18 -0600 |
---|---|---|
committer | Gavin Howard <gavin@yzena.com> | 2021-07-20 19:50:18 -0600 |
commit | df956b3c36243a401857fe21f4600efae801856d (patch) | |
tree | 9e6cd3bfc6dbc0f056b849558dab9714981f63ad /src | |
parent | 8da9ac65bc2e59e12952d5f96b0bbb38fcb11291 (diff) | |
download | platform_external_bc-df956b3c36243a401857fe21f4600efae801856d.tar.gz platform_external_bc-df956b3c36243a401857fe21f4600efae801856d.tar.bz2 platform_external_bc-df956b3c36243a401857fe21f4600efae801856d.zip |
Add the ability for users to redefine keywords
This was an idea from Stefan Esser of FreeBSD. At first, I did not like
it because it seemed like it would require a deep change to the parser,
to check everywhere a name could be used.
Fortunately, I came up with the idea of using an array of booleans in
BcVm to allow the parser to mark when a keyword has been redefined,
which can only happen in one place: defining functions. This made the
change to the parser simple at the expense of another simple change to
the bc lexer (literally only one line).
Because of the use of this feature in allowing users to make my bc parse
GNU bc-compatible scripts, as well as interoperate with scripts from
other bc's, I am putting this change in.
Signed-off-by: Gavin Howard <gavin@yzena.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/bc_lex.c | 7 | ||||
-rw-r--r-- | src/bc_parse.c | 49 | ||||
-rw-r--r-- | src/data.c | 11 | ||||
-rw-r--r-- | src/vm.c | 5 |
4 files changed, 69 insertions, 3 deletions
diff --git a/src/bc_lex.c b/src/bc_lex.c index 1fc13926..717e7f04 100644 --- a/src/bc_lex.c +++ b/src/bc_lex.c @@ -61,6 +61,11 @@ static void bc_lex_identifier(BcLex *l) { if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') { + // If the keyword has been redefined, break out of the loop and use + // it as a name. This depends on the parser ensuring that only + // non-POSIX keywords get redefined. + if (BC_REDEFINE && vm.redefined_kws[i]) break; + l->t = BC_LEX_KW_AUTO + (BcLexType) i; // Warn or error, as appropriate for the mode, if the keyword is not @@ -70,6 +75,8 @@ static void bc_lex_identifier(BcLex *l) { // We minus 1 because the index has already been incremented. l->i += n - 1; + + // Already have the token; bail. return; } } diff --git a/src/bc_parse.c b/src/bc_parse.c index cc58f787..70d982f5 100644 --- a/src/bc_parse.c +++ b/src/bc_parse.c @@ -1144,6 +1144,41 @@ static void bc_parse_loopExit(BcParse *p, BcLexType type) { } /** + * Redefines a keyword, if necessary. + * @param p The parser. + */ +static void bc_parse_redefineKeyword(BcParse *p) { + + // Must have a name, or a keyword that is going to be redefined. + if (BC_ERR(p->l.t != BC_LEX_NAME)) { + + if (BC_REDEFINE && BC_PARSE_IS_KEYWORD(p->l.t)) { + + size_t idx = p->l.t - BC_LEX_KW_AUTO; + const BcLexKeyword *kw = bc_lex_kws + idx; + + // If this is true, the keyword can be redefined. We don't allow + // redefining POSIX keywords because that would be a disaster. + if (!vm.redefined_kws[idx] && !BC_LEX_KW_POSIX(kw)) { + + vm.redefined_kws[idx] = true; + + // Set the token to BC_LEX_NAME because bc_parse_func() is + // expecting that. + p->l.t = BC_LEX_NAME; + + // Set the lexer's string to the name of the keyword to be used + // later in bc_parse_func(). I mean, the function *has* to have + // a name... + bc_vec_string(&p->l.str, BC_LEX_KW_LEN(kw), kw->name); + } + else bc_parse_err(p, BC_ERR_PARSE_FUNC); + } + else bc_parse_err(p, BC_ERR_PARSE_FUNC); + } +} + +/** * Parse a function (header). * @param p The parser. */ @@ -1155,22 +1190,30 @@ static void bc_parse_func(BcParse *p) { bc_lex_next(&p->l); - // Must have a name. - if (BC_ERR(p->l.t != BC_LEX_NAME)) - bc_parse_err(p, BC_ERR_PARSE_FUNC); + // Check for keyword redefinition. + bc_parse_redefineKeyword(p); // If the name is "void", and POSIX is not on, mark as void. voidfn = (!BC_IS_POSIX && p->l.t == BC_LEX_NAME && !strcmp(p->l.str.v, "void")); + // We can safely do this because the expected token should not overwrite the + // function name. bc_lex_next(&p->l); + // Check for keyword redefinition. + if (voidfn && p->l.t != BC_LEX_LPAREN) bc_parse_redefineKeyword(p); + // If we *don't* have another name, then void is the name of the function. voidfn = (voidfn && p->l.t == BC_LEX_NAME); // With a void function, allow POSIX to complain and get a new token. if (voidfn) { + bc_parse_err(p, BC_ERR_POSIX_VOID); + + // We can safely do this because the expected token should not overwrite + // the function name. bc_lex_next(&p->l); } @@ -867,6 +867,17 @@ const BcLexKeyword bc_lex_kws[] = { /// The length of the list of bc keywords. const size_t bc_lex_kws_len = sizeof(bc_lex_kws) / sizeof(BcLexKeyword); +#if BC_C11 + +// This is here to ensure that BC_LEX_NKWS, which is needed for the +// redefined_kws in BcVm, is correct. If it's correct under C11, it will be +// correct under C99, and I did not know any other way of ensuring they remained +// synchronized. +static_assert(sizeof(bc_lex_kws) / sizeof(BcLexKeyword) == BC_LEX_NKWS, + "BC_LEX_NKWS is wrong."); + +#endif // BC_C11 + /// An array of booleans that correspond to token types. An entry is true if the /// token is valid in an expression, false otherwise. const uint8_t bc_parse_exprs[] = { @@ -1376,8 +1376,13 @@ void bc_vm_boot(int argc, char *argv[]) { } #if BC_ENABLED + // Disable global stacks in POSIX mode. if (BC_IS_POSIX) vm.flags &= ~(BC_FLAG_G); + + bc_vm_setenvFlag("BC_REDEFINE_KEYWORDS", BC_DEFAULT_REDEFINE_KEYWORDS, + BC_FLAG_REDEFINE_KWS); + #endif // BC_ENABLED #if BC_ENABLED |