From 921a9dd8bd32f37a278773ca5ba6c8cb4800d167 Mon Sep 17 00:00:00 2001 From: Andrew Kesterson Date: Sat, 4 May 2024 22:08:20 -0400 Subject: [PATCH] - Added string strip methods lstrip and rstrip - Fixed the tokenizer to chomp whitespace from left and right of tokens - Fixed the tokenizer so it returns reserved symbols not just constants and expressions - Added some tests for the basic tokenizer and parser - Started working on structures to allow the basic interpreter to store lines in memory --- README.md | 7 ++---- src/basic.c | 50 +++++++++++++++++++++--------------- src/basic.h | 18 +++++++++++-- src/kernel.c | 4 ++- src/string.c | 56 +++++++++++++++++++++++++++++++++++++++++ src/string.h | 3 ++- tests/basic_parser.c | 16 ++++++++++++ tests/basic_parser.deps | 5 ++++ tests/basic_tokenizer.c | 15 +++++------ tests/string_chomp.c | 19 ++++++++++++++ tests/string_chomp.deps | 2 ++ 11 files changed, 159 insertions(+), 36 deletions(-) create mode 100644 tests/basic_parser.c create mode 100644 tests/basic_parser.deps create mode 100644 tests/string_chomp.c create mode 100644 tests/string_chomp.deps diff --git a/README.md b/README.md index 6d39624..b7ab419 100644 --- a/README.md +++ b/README.md @@ -22,22 +22,19 @@ Currently the BASIC only understands simple arithmetic expressions. But this wil How can I run it? ===== -You have to build it to - run it. To build it, you need: +You have to build it to run it. To build it, you need: -* An x86 computer with a floppy drive (or the bochs emulator) * bcc (bruce's c compiler - check your OS's package repositories) * nasm * gnu make * ld86, objdump86, as86 -To run it, you can use any x86 emulator that can boot a floppy image, but the makefile assumes you have 'bochs' installed. +To run it, you need either an x86 computer with a floppy drive (or the bochs emulator. For emulation you can use any x86 emulator that can boot a floppy image, but the makefile assumes you have 'bochs' installed. make clean run This will rebuild all of the sources and fire up the bochs emulator. Have fun. - Developing & Testing ======= diff --git a/src/basic.c b/src/basic.c index 58206ca..9c59dad 100644 --- a/src/basic.c +++ b/src/basic.c @@ -21,6 +21,7 @@ char *_tokenize(char *ptr, char *token) { char *orig = NULL; char *tokenptr = NULL; + char tmpbuff[256]; int len = 0; int numtokens = 0; int i = 0; @@ -28,6 +29,7 @@ char *_tokenize(char *ptr, char *token) if ( ptr == NULL || token == NULL ) { return NULL; } + memset((char *)&tmpbuff, 0x00, 256); if ( _tokenizer_prev == ptr ) { ptr = _tokenizer_prev_next; } @@ -39,20 +41,25 @@ char *_tokenize(char *ptr, char *token) tokenptr = token; for ( i = 0 ; i < numtokens; i++) { if ( *ptr == *(tokenptr + i)) { + if ( len == 0 ) { + len = 1; + ptr += 1; + } goto _tokenize_copy; } } ptr += 1; - len += 1; + len += 1; } _tokenize_copy: if ( len > BASIC_TOKENIZER_MAX_LENGTH ) { basic_errno = BASIC_ERR_SYNTAX_TOKEN_LENGTH; return NULL; - } else if ( len == 0 ) { - return NULL; } - memcpy((void *)&_tokenizer_value, (void *)orig, len); + memcpy((void *)&_tokenizer_value, (void *)orig, len); + lstrip((char *)&_tokenizer_value, (char *)&tmpbuff, " "); + rstrip((char *)&tmpbuff, (char *)&_tokenizer_value, " "); + _tokenizer_prev_next = (ptr + 1); return ptr; } @@ -149,6 +156,7 @@ int basic_solve_expr(struct basic_expr *expr, struct basic_variable *result) struct basic_expr *basic_parse_expr(char *expbuf) { struct basic_expr *ret = &math_expressions[0]; + char *token = NULL; char flags = 0; /*char *subptr = 0;*/ @@ -157,35 +165,37 @@ struct basic_expr *basic_parse_expr(char *expbuf) while ( *expbuf != '\0' ) { if ( *expbuf == ' ' ) { - expbuf += sizeof(char); + expbuf += 1; continue; - } else if ( isdigit(*expbuf) == 1 ) { + } + + expbuf = _tokenize(expbuf, BASIC_TOKENIZER_TOKENS); + token = _token_get(); + if ( isdigit(*token) == 1 ) { if ( (ret->type == 0) && (flags & BASIC_FOUND_LVAL) == BASIC_FOUND_LVAL ) { basic_errno = BASIC_ERR_SYNTAX_MULTIPLE_LVALUES; return NULL; } else if ( ret->type == 0x0 ) { - expbuf = _tokenize(expbuf, BASIC_TOKENIZER_TOKENS); - ret->lval.i = atoi(_token_get()); + ret->lval.i = atoi(token); ret->lval_type = BASIC_LVAL_CONST; - flags = (flags + BASIC_FOUND_LVAL); + flags = (flags | BASIC_FOUND_LVAL); } else if ( ret->type != 0x0 && ((flags & BASIC_FOUND_RVAL) == BASIC_FOUND_RVAL)) { basic_errno = BASIC_ERR_SYNTAX_MULTIPLE_RVALUES; return NULL; } else if ( ret->type != 0x0 ) { - expbuf = _tokenize(expbuf, BASIC_TOKENIZER_TOKENS); - ret->rval.i = atoi(_token_get()); + ret->rval.i = atoi(token); ret->rval_type = BASIC_RVAL_CONST; } - } else if ( ret->type == 0x0 ) { - if ( *expbuf == '+' ) { + } else if ( token != NULL && ret->type == 0x0 ) { + if ( *token == '+' ) { ret->type = BASIC_OPTP_ADD; - } else if ( *expbuf == '*' ) { + } else if ( *token == '*' ) { ret->type = BASIC_OPTP_MUL; - } else if ( *expbuf == '-' ) { + } else if ( *token == '-' ) { ret->type = BASIC_OPTP_SUB; - } else if ( *expbuf == '/' ) { + } else if ( *token == '/' ) { ret->type = BASIC_OPTP_DIV; - } else if ( *expbuf == '%' ) { + } else if ( *token == '%' ) { ret->type = BASIC_OPTP_MOD; } else { basic_errno = BASIC_ERR_SYNTAX_GENERAL; @@ -195,7 +205,6 @@ struct basic_expr *basic_parse_expr(char *expbuf) basic_errno = BASIC_ERR_SYNTAX_GENERAL; return NULL; } - expbuf += sizeof(char); } return ret; } @@ -216,7 +225,7 @@ void basic_print_var(struct basic_variable *var) } } -void basic_repl(void) +void basic_repl(basic_program *program) { char keybuff[512]; char outbuff[128]; @@ -244,6 +253,7 @@ void basic_repl(void) if ( _cgets((char *)&keybuff) != NULL ) { _cputs("\n"); + /* Evaluate */ expr = basic_parse_expr((char *)&keybuff); if ( expr == NULL ) { _cputs("Error: "); @@ -254,7 +264,6 @@ void basic_repl(void) continue; } - /* Evaluate */ basic_solve_expr(expr, &result); if ( basic_errno != 0 ) { _cputs("Error: "); @@ -262,6 +271,7 @@ void basic_repl(void) _cputs((char *)&decimal); _cputs("\n"); } else { + /* Print */ basic_print_var(&result); } } diff --git a/src/basic.h b/src/basic.h index 02b665e..0bd56fe 100644 --- a/src/basic.h +++ b/src/basic.h @@ -30,7 +30,7 @@ #define BASIC_ERR_INTERNAL_UNIMPLEMENTED 9 #define BASIC_ERR_MATH_DBZ 10 -#define BASIC_TOKENIZER_TOKENS " +-/%*=" +#define BASIC_TOKENIZER_TOKENS "+-/%*=" #define BASIC_TOKENIZER_MAX_LENGTH 512 #define BASIC_VARNAME_MAX_LENGTH 16 @@ -67,11 +67,25 @@ struct basic_variable { }; typedef struct basic_variable basic_variable; +struct basic_line { + int lineno; + char content[256]; + struct basic_line *nextline; +}; +typedef struct basic_line basic_line; + +struct basic_program { + char name[128]; + basic_line *first; +}; +typedef struct basic_program basic_program; + #define BASIC_CONST_TRUE 1 #define BASIC_CONST_FALSE 0 extern int basic_errno; -void basic_repl(void); +void basic_repl(basic_program *program); +basic_expr *basic_parse_expr(char *); #endif /* _BASIC_H_ */ diff --git a/src/kernel.c b/src/kernel.c index 9950a06..26a4b0d 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -1,8 +1,10 @@ #include "screen.h" #include "string.h" #include "conio.h" +#include "basic.h" void main(void) { - basic_repl(); + basic_program program; + basic_repl(&program); } diff --git a/src/string.c b/src/string.c index aedcef5..ed59b15 100644 --- a/src/string.c +++ b/src/string.c @@ -81,3 +81,59 @@ int strcmp(char *s1, char *s2) return 0; } + +int lstrip(char *s1, char *s2, char *strip) +{ + int stripped = 0; + char *stripptr = strip; + if ( s1 == NULL || s2 == NULL || strip == NULL ) { + return 0; + } + while ( *s1 != 0 ) { + if ( stripptr != NULL ) { + for ( stripptr = strip; *stripptr != 0; stripptr += 1) { + if ( *s1 == *stripptr ) { + stripped += 1; + goto _lstrip_outer_continue; + } + } + stripptr = NULL; + } + *s2 = *s1; + s2 += 1; +_lstrip_outer_continue: + s1 += 1; + } + return stripped; +} + +int rstrip(char *s1, char *s2, char *strip) +{ + int stripped = 0; + char *stripptr = strip; + char *rs1 = s1; + if ( s1 == NULL || s2 == NULL || strip == NULL ) { + return 0; + } + rs1 += strlen(s1)-1; + while ( rs1 >= s1 ) { + for ( stripptr = strip; *stripptr != 0; stripptr += 1) { + if ( *rs1 == *stripptr ) { + stripped += 1; + rs1 -= 1; + goto _rstrip_continue; + } + } + break; +_rstrip_continue: + ; + } + while (s1 <= rs1) { + *s2 = *s1; + s2 += 1; + s1 += 1; + } + *s2 = 0; + return stripped; +} + diff --git a/src/string.h b/src/string.h index 1bae3b7..c4824cf 100644 --- a/src/string.h +++ b/src/string.h @@ -8,5 +8,6 @@ int strncat(char *dest, char *src, size_t n); void *memset(void *s, char c, size_t n); void *memcpy(void *dest, void *src, size_t n); int strcmp(char *s1, char *s2); - +int lstrip(char *s1, char *s2, char *strip); +int rstrip(char *s1, char *s2, char *strip); #endif /* _STRING_H_ */ diff --git a/tests/basic_parser.c b/tests/basic_parser.c new file mode 100644 index 0000000..d1c301a --- /dev/null +++ b/tests/basic_parser.c @@ -0,0 +1,16 @@ +#include "types.h" +#include "basic.h" + + +int main(void) +{ + struct basic_expr *expr; + expr = basic_parse_expr("1 + 1"); + if ( expr == NULL ) return 1; + if ( expr->lval_type != BASIC_LVAL_CONST ) return 2; + if ( expr->rval_type != BASIC_RVAL_CONST ) return 3; + if ( expr->lval.i != 1 ) return 4; + if ( expr->rval.i != 1 ) return 5; + if ( expr->type != BASIC_OPTP_ADD ) return 6; + return 0; +} \ No newline at end of file diff --git a/tests/basic_parser.deps b/tests/basic_parser.deps new file mode 100644 index 0000000..1f29ad1 --- /dev/null +++ b/tests/basic_parser.deps @@ -0,0 +1,5 @@ +basic +stdlib +string +conio +screen diff --git a/tests/basic_tokenizer.c b/tests/basic_tokenizer.c index 8c9d4d1..86427fb 100644 --- a/tests/basic_tokenizer.c +++ b/tests/basic_tokenizer.c @@ -6,12 +6,12 @@ char *_tokenize(char *ptr, char *token); char *_token_get(void); -#define assert_lvalue(str, lval, ret_null, ret_neq) \ +#define assert_token_value(str, val, ret_null, ret_neq) \ ptr = _tokenize(str, BASIC_TOKENIZER_TOKENS); \ value = _token_get(); \ if ( ptr == NULL ) return ret_null; \ - rc = strcmp(value, lval); \ - printf("(value) == (lval) ? : (%s) == (%s) %d\n", value, lval, rc); \ + rc = strcmp(value, val); \ + printf("(%s) => (value) == (val) ? : (%s) == (%s) %d\n", str, value, val, rc); \ if ( rc != 0 ) return ret_neq; @@ -21,9 +21,10 @@ int main(void) char *value = NULL; int rc = 0; - assert_lvalue("1+1", "1", 1, 2); - assert_lvalue("1 + 1", "1", 2, 3); - assert_lvalue("10 + 10", "10", 4, 5); - + assert_token_value("1+1", "1", 1, 2); + assert_token_value("1 + 1", "1", 2, 3); + assert_token_value("10 + 10", "10", 4, 5); + assert_token_value("1+ 2", "1", 6, 7) + assert_token_value("+ 2", "+", 8, 9) return 0; } \ No newline at end of file diff --git a/tests/string_chomp.c b/tests/string_chomp.c new file mode 100644 index 0000000..9147da0 --- /dev/null +++ b/tests/string_chomp.c @@ -0,0 +1,19 @@ +#include "string.h" +#include "stdlib.h" +#include + +#define assert_strip(method, str, strip, count, rvalue, ret_strcmp, ret_count) \ + memset((char *)&buff, 0x00, 32); \ + rc = method(str, (char *)&buff, strip); \ + printf("method(%s) => (%d, %s)\n", str, rc, (char *)&buff); \ + if ( rc != count) return ret_count; \ + if ( strcmp((char *)&buff, rvalue) != 0 ) return ret_strcmp; + +int main(void) +{ + char buff[32]; + int rc = 0; + assert_strip(lstrip, " white space", " ", 3, "white space", 1, 2) + assert_strip(rstrip, "white space ", " ", 3, "white space", 3, 4) + return 0; +} \ No newline at end of file diff --git a/tests/string_chomp.deps b/tests/string_chomp.deps new file mode 100644 index 0000000..83c4d5c --- /dev/null +++ b/tests/string_chomp.deps @@ -0,0 +1,2 @@ +string +stdlib