diff options
| author | Jack Palevich <jackpal@google.com> | 2009-05-11 14:49:29 -0700 |
|---|---|---|
| committer | Jack Palevich <jackpal@google.com> | 2009-05-11 18:49:27 -0700 |
| commit | 21a15a2416b4b138bf509186106525944e78ad08 (patch) | |
| tree | f25bdd540f38a97c8c7e025ca624718e46e15472 /libacc | |
| parent | a96930572c3ef440065a87985ed5f01724bf159b (diff) | |
| download | system_core-21a15a2416b4b138bf509186106525944e78ad08.tar.gz system_core-21a15a2416b4b138bf509186106525944e78ad08.tar.bz2 system_core-21a15a2416b4b138bf509186106525944e78ad08.zip | |
Various C++ improvements
+ Changed indentation, causes the stats to say there's a lot of lines changed.
+ Should be able to compile multiple times with the same compiler object.
+ Create a CodeBuffer class to hold the code.
+ Create a CodeGenerator class to encapsulate knowlege of the CPU instruction set.
+ Started filling in the code generator.
Diffstat (limited to 'libacc')
| -rw-r--r-- | libacc/acc.cpp | 1336 | ||||
| -rw-r--r-- | libacc/tests/bellard.otccex.c | 126 | ||||
| -rw-r--r-- | libacc/tests/missing-main.c | 4 | ||||
| -rw-r--r-- | libacc/tests/otcc.out-orig | bin | 8591 -> 8591 bytes |
4 files changed, 924 insertions, 542 deletions
diff --git a/libacc/acc.cpp b/libacc/acc.cpp index 248c3822..0c6acd48 100644 --- a/libacc/acc.cpp +++ b/libacc/acc.cpp @@ -1,24 +1,24 @@ /* - Obfuscated Tiny C Compiler + Obfuscated Tiny C Compiler - Copyright (C) 2001-2003 Fabrice Bellard + Copyright (C) 2001-2003 Fabrice Bellard - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product and its documentation - *is* required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. -*/ + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product and its documentation + *is* required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + */ #include <ctype.h> #include <dlfcn.h> @@ -30,622 +30,874 @@ namespace acc { class compiler { -/* vars: value of variables - loc : local variable index - glo : global variable index - ind : output code ptr - rsym: return symbol - prog: output code - dstk: define stack - dptr, dch: macro state -*/ -int tok, tokc, tokl, ch, vars, rsym, prog, ind, loc, glo, sym_stk, dstk, dptr, dch, last_id; -FILE* file; - -#define ALLOC_SIZE 99999 - -/* depends on the init string */ -#define TOK_STR_SIZE 48 -#define TOK_IDENT 0x100 -#define TOK_INT 0x100 -#define TOK_IF 0x120 -#define TOK_ELSE 0x138 -#define TOK_WHILE 0x160 -#define TOK_BREAK 0x190 -#define TOK_RETURN 0x1c0 -#define TOK_FOR 0x1f8 -#define TOK_DEFINE 0x218 -#define TOK_MAIN 0x250 - -#define TOK_DUMMY 1 -#define TOK_NUM 2 - -#define LOCAL 0x200 - -#define SYM_FORWARD 0 -#define SYM_DEFINE 1 - -/* tokens in string heap */ -#define TAG_TOK ' ' -#define TAG_MACRO 2 - -void pdef(int t) -{ - *(char *)dstk++ = t; -} -void inp() -{ - if (dptr) { - ch = *(char *)dptr++; - if (ch == TAG_MACRO) { - dptr = 0; - ch = dch; - } - } else - ch = fgetc(file); - /* printf("ch=%c 0x%x\n", ch, ch); */ -} + class CodeBuf { + char* ind; + char* pProgramBase; -int isid() -{ - return isalnum(ch) | ch == '_'; -} + void release() { + if (pProgramBase != 0) { + free(pProgramBase); + pProgramBase = 0; + } + } -/* read a character constant */ -void getq() -{ - if (ch == '\\') { - inp(); - if (ch == 'n') - ch = '\n'; - } -} + public: + CodeBuf() { + pProgramBase = 0; + ind = 0; + } + + ~CodeBuf() { + release(); + } -void next() -{ - int l, a; + void init(int size) { + release(); + pProgramBase = (char*) calloc(1, size); + ind = pProgramBase; + } - while (isspace(ch) | ch == '#') { - if (ch == '#') { - inp(); - next(); - if (tok == TOK_DEFINE) { - next(); - pdef(TAG_TOK); /* fill last ident tag */ - *(int *)tok = SYM_DEFINE; - *(int *)(tok + 4) = dstk; /* define stack */ + void o(int n) { + /* cannot use unsigned, so we must do a hack */ + while (n && n != -1) { + *ind++ = n; + n = n >> 8; } - /* well we always save the values ! */ - while (ch != '\n') { - pdef(ch); - inp(); + } + + /* + * Output a byte. Handles all values, 0..ff. + */ + void ob(int n) { + *ind++ = n; + } + + /* output a symbol and patch all calls to it */ + void gsym(int t) { + int n; + while (t) { + n = *(int *) t; /* next value */ + *(int *) t = ((int) ind) - t - 4; + t = n; } - pdef(ch); - pdef(TAG_MACRO); } - inp(); + + /* psym is used to put an instruction with a data field which is a + reference to a symbol. It is in fact the same as oad ! */ + int psym(int n, int t) { + return oad(n, t); + } + + /* instruction + address */ + int oad(int n, int t) { + o(n); + *(int *) ind = t; + t = (int) ind; + ind = ind + 4; + return t; + } + + inline void* getBase() { + return (void*) pProgramBase; + } + + int getSize() { + return ind - pProgramBase; + } + + int getPC() { + return (int) ind; + } + }; + + class CodeGenerator { + public: + CodeGenerator() {} + virtual ~CodeGenerator() {} + + void init(CodeBuf* pCodeBuf) { + this->pCodeBuf = pCodeBuf; + } + + /* output a symbol and patch all calls to it */ + void gsym(int t) { + pCodeBuf->gsym(t); + } + + protected: + void o(int n) { + pCodeBuf->o(n); + } + + /* + * Output a byte. Handles all values, 0..ff. + */ + void ob(int n) { + pCodeBuf->ob(n); + } + + /* psym is used to put an instruction with a data field which is a + reference to a symbol. It is in fact the same as oad ! */ + int psym(int n, int t) { + return oad(n, t); + } + + /* instruction + address */ + int oad(int n, int t) { + return pCodeBuf->oad(n,t); + } + + int getPC() { + return pCodeBuf->getPC(); + } + + private: + CodeBuf* pCodeBuf; + }; + + class X86CodeGenerator : public CodeGenerator { + public: + X86CodeGenerator() {} + virtual ~X86CodeGenerator() {} + + /* load immediate value */ + int li(int t) { + oad(0xb8, t); /* mov $xx, %eax */ + } + + int gjmp(int t) { + return psym(0xe9, t); + } + + /* l = 0: je, l == 1: jne */ + int gtst(int l, int t) { + o(0x0fc085); /* test %eax, %eax, je/jne xxx */ + return psym(0x84 + l, t); + } + + int gcmp(int t) { + o(0xc139); /* cmp %eax,%ecx */ + li(0); + o(0x0f); /* setxx %al */ + o(t + 0x90); + o(0xc0); + } + + void clearECX() { + oad(0xb9, 0); /* movl $0, %ecx */ + } + + void pushEAX() { + o(0x50); /* push %eax */ + } + + void storeEAXIntoPoppedLVal(bool isInt) { + o(0x59); /* pop %ecx */ + o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */ + } + + void loadEAXIndirect(bool isInt) { + if (isInt) + o(0x8b); /* mov (%eax), %eax */ + else + o(0xbe0f); /* movsbl (%eax), %eax */ + ob(0); /* add zero in code */ + } + + void leaEAX(int ea) { + gmov(10, ea); /* leal EA, %eax */ + } + + void storeEAX(int ea) { + gmov(6, ea); /* mov %eax, EA */ + } + + void loadEAX(int ea) { + gmov(8, ea); /* mov EA, %eax */ + } + + void puzzleAdd(int n, int tokc) { + /* Not sure what this does, related to variable loading with an + * operator at level 11. + */ + gmov(0, n); /* 83 ADD */ + o(tokc); + } + + int allocStackSpaceForArgs() { + return oad(0xec81, 0); /* sub $xxx, %esp */ + } + + void storeEAToArg(int l) { + oad(0x248489, l); /* movl %eax, xxx(%esp) */ + } + + int callForward(int symbol) { + return psym(0xe8, symbol); /* call xxx */ + } + + void callRelative(int t) { + psym(0xe8, t); /* call xxx */ + } + + void callIndirect(int l) { + oad(0x2494ff, l); /* call *xxx(%esp) */ + } + + void adjustStackAfterCall(int l) { + oad(0xc481, l); /* add $xxx, %esp */ + } + + void oHack(int n) { + o(n); + } + + void oadHack(int n, int t) { + oad(n, t); + } + private: + + int gmov(int l, int t) { + o(l + 0x83); + oad((t < LOCAL) << 7 | 5, t); + } + }; + + /* vars: value of variables + loc : local variable index + glo : global variable index + ind : output code ptr + rsym: return symbol + prog: output code + dstk: define stack + dptr, dch: macro state + */ + int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk, + dptr, dch, last_id; + void* pSymbolBase; + void* pGlobalBase; + void* pVarsBase; + FILE* file; + + CodeBuf codeBuf; + X86CodeGenerator* pGen; + + static const int ALLOC_SIZE = 99999; + + /* depends on the init string */ + static const int TOK_STR_SIZE = 48; + static const int TOK_IDENT = 0x100; + static const int TOK_INT = 0x100; + static const int TOK_IF = 0x120; + static const int TOK_ELSE = 0x138; + static const int TOK_WHILE = 0x160; + static const int TOK_BREAK = 0x190; + static const int TOK_RETURN = 0x1c0; + static const int TOK_FOR = 0x1f8; + static const int TOK_DEFINE = 0x218; + static const int TOK_MAIN = 0x250; + + static const int TOK_DUMMY = 1; + static const int TOK_NUM = 2; + + static const int LOCAL = 0x200; + + static const int SYM_FORWARD = 0; + static const int SYM_DEFINE = 1; + + /* tokens in string heap */ + static const int TAG_TOK = ' '; + static const int TAG_MACRO = 2; + + void pdef(int t) { + *(char *) dstk++ = t; + } + + void inp() { + if (dptr) { + ch = *(char *) dptr++; + if (ch == TAG_MACRO) { + dptr = 0; + ch = dch; + } + } else + ch = fgetc(file); + /* printf("ch=%c 0x%x\n", ch, ch); */ + } + + int isid() { + return isalnum(ch) | ch == '_'; } - tokl = 0; - tok = ch; - /* encode identifiers & numbers */ - if (isid()) { - pdef(TAG_TOK); - last_id = dstk; - while (isid()) { - pdef(ch); + + /* read a character constant */ + void getq() { + if (ch == '\\') { inp(); + if (ch == 'n') + ch = '\n'; } - if (isdigit(tok)) { - tokc = strtol((char*) last_id, 0, 0); - tok = TOK_NUM; - } else { - *(char *)dstk = TAG_TOK; /* no need to mark end of string (we - suppose data is initied to zero */ - tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1)) - sym_stk); - *(char *)dstk = 0; /* mark real end of ident for dlsym() */ - tok = tok * 8 + TOK_IDENT; - if (tok > TOK_DEFINE) { - tok = vars + tok; - /* printf("tok=%s %x\n", last_id, tok); */ - /* define handling */ - if (*(int *)tok == SYM_DEFINE) { - dptr = *(int *)(tok + 4); - dch = ch; - inp(); + } + + void next() { + int l, a; + + while (isspace(ch) | ch == '#') { + if (ch == '#') { + inp(); + next(); + if (tok == TOK_DEFINE) { next(); + pdef(TAG_TOK); /* fill last ident tag */ + *(int *) tok = SYM_DEFINE; + *(int *) (tok + 4) = dstk; /* define stack */ + } + /* well we always save the values ! */ + while (ch != '\n') { + pdef(ch); + inp(); } + pdef(ch); + pdef(TAG_MACRO); } - } - } else { - inp(); - if (tok == '\'') { - tok = TOK_NUM; - getq(); - tokc = ch; - inp(); inp(); - } else if (tok == '/' & ch == '*') { - inp(); - while (ch) { - while (ch != '*') - inp(); + } + tokl = 0; + tok = ch; + /* encode identifiers & numbers */ + if (isid()) { + pdef(TAG_TOK); + last_id = dstk; + while (isid()) { + pdef(ch); inp(); - if (ch == '/') - ch = 0; } + if (isdigit(tok)) { + tokc = strtol((char*) last_id, 0, 0); + tok = TOK_NUM; + } else { + *(char *) dstk = TAG_TOK; /* no need to mark end of string (we + suppose data is initialized to zero by calloc) */ + tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1)) + - sym_stk); + *(char *) dstk = 0; /* mark real end of ident for dlsym() */ + tok = tok * 8 + TOK_IDENT; + if (tok > TOK_DEFINE) { + tok = vars + tok; + /* printf("tok=%s %x\n", last_id, tok); */ + /* define handling */ + if (*(int *) tok == SYM_DEFINE) { + dptr = *(int *) (tok + 4); + dch = ch; + inp(); + next(); + } + } + } + } else { inp(); - next(); - } else - { - const char* t = "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!=\'g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b"; - while (l = *t++) { - a = *t++; - tokc = 0; - while ((tokl = *t++ - 'b') < 0) - tokc = tokc * 64 + tokl + 64; - if (l == tok & (a == ch | a == '@')) { + if (tok == '\'') { + tok = TOK_NUM; + getq(); + tokc = ch; + inp(); + inp(); + } else if (tok == '/' & ch == '*') { + inp(); + while (ch) { + while (ch != '*') + inp(); + inp(); + if (ch == '/') + ch = 0; + } + inp(); + next(); + } else { + const char + * t = + "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!=\'g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b"; + while (l = *t++) { + a = *t++; + tokc = 0; + while ((tokl = *t++ - 'b') < 0) + tokc = tokc * 64 + tokl + 64; + if (l == tok & (a == ch | a == '@')) { #if 0 - printf("%c%c -> tokl=%d tokc=0x%x\n", - l, a, tokl, tokc); + printf("%c%c -> tokl=%d tokc=0x%x\n", + l, a, tokl, tokc); #endif - if (a == ch) { - inp(); - tok = TOK_DUMMY; /* dummy token for double tokens */ + if (a == ch) { + inp(); + tok = TOK_DUMMY; /* dummy token for double tokens */ + } + break; } - break; } } } - } #if 0 - { - int p; + { + int p; - printf("tok=0x%x ", tok); - if (tok >= TOK_IDENT) { - printf("'"); - if (tok > TOK_DEFINE) + printf("tok=0x%x ", tok); + if (tok >= TOK_IDENT) { + printf("'"); + if (tok> TOK_DEFINE) p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8; - else + else p = sym_stk + 1 + (tok - TOK_IDENT) / 8; - while (*(char *)p != TAG_TOK && *(char *)p) + while (*(char *)p != TAG_TOK && *(char *)p) printf("%c", *(char *)p++); - printf("'\n"); - } else if (tok == TOK_NUM) { - printf("%d\n", tokc); - } else { - printf("'%c'\n", tok); + printf("'\n"); + } else if (tok == TOK_NUM) { + printf("%d\n", tokc); + } else { + printf("'%c'\n", tok); + } } - } #endif -} + } -void error(const char *fmt,...) -{ - va_list ap; + void error(const char *fmt, ...) { + va_list ap; - va_start(ap, fmt); - fprintf(stderr, "%ld: ", ftell((FILE *)file)); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - va_end(ap); - exit(1); -} + va_start(ap, fmt); + fprintf(stderr, "%ld: ", ftell((FILE *) file)); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); + } -void skip(int c) -{ - if (tok != c) { - error("'%c' expected", c); + void skip(int c) { + if (tok != c) { + error("'%c' expected", c); + } + next(); } - next(); -} -void o(int n) -{ - /* cannot use unsigned, so we must do a hack */ - while (n && n != -1) { - *(char *)ind++ = n; - n = n >> 8; + /* load immediate value */ + int li(int t) { + return pGen->li(t); } -} -/* output a symbol and patch all calls to it */ -void gsym(int t) -{ - int n; - while (t) { - n = *(int *)t; /* next value */ - *(int *)t = ind - t - 4; - t = n; + int gjmp(int t) { + return pGen->gjmp(t); } -} -/* psym is used to put an instruction with a data field which is a - reference to a symbol. It is in fact the same as oad ! */ -#define psym oad - -/* instruction + address */ -int oad(int n, int t) -{ - o(n); - *(int *)ind = t; - t = ind; - ind = ind + 4; - return t; -} + /* l = 0: je, l == 1: jne */ + int gtst(int l, int t) { + return pGen->gtst(l, t); + } -/* load immediate value */ -int li(int t) -{ - oad(0xb8, t); /* mov $xx, %eax */ -} + int gcmp(int t) { + return pGen->gcmp(t); + } -int gjmp(int t) -{ - return psym(0xe9, t); -} + void clearEXC() { + pGen->clearECX(); + } -/* l = 0: je, l == 1: jne */ -int gtst(int l, int t) -{ - o(0x0fc085); /* test %eax, %eax, je/jne xxx */ - return psym(0x84 + l, t); -} + void storeEAXIntoPoppedLVal(bool isInt) { + pGen->storeEAXIntoPoppedLVal(isInt); + } -int gcmp(int t) -{ - o(0xc139); /* cmp %eax,%ecx */ - li(0); - o(0x0f); /* setxx %al */ - o(t + 0x90); - o(0xc0); -} + void loadEAXIndirect(bool isInt) { + pGen->loadEAXIndirect(isInt); + } -int gmov(int l, int t) -{ - o(l + 0x83); - oad((t < LOCAL) << 7 | 5, t); -} + void leaEAX(int ea) { + pGen->leaEAX(ea); + } -/* l is one if '=' parsing wanted (quick hack) */ -void unary(int l) -{ - int n, t, a, c; - - n = 1; /* type of expression 0 = forward, 1 = value, other = - lvalue */ - if (tok == '\"') { - li(glo); - while (ch != '\"') { - getq(); - *(char *)glo++ = ch; + /* Temporary hack for emitting x86 code directly. */ + void o(int n) { + pGen->oHack(n); + } + + /* instruction + address */ + int oad(int n, int t) { + pGen->oadHack(n,t); + } + + /* instruction + address */ + int psym(int n, int t) { + pGen->oadHack(n,t); + } + + void gsym(int n) { + pGen->gsym(n); + } + + /* l is one if '=' parsing wanted (quick hack) */ + void unary(int l) { + int n, t, a, c; + + n = 1; /* type of expression 0 = forward, 1 = value, other = + lvalue */ + if (tok == '\"') { + li(glo); + while (ch != '\"') { + getq(); + *(char *) glo++ = ch; + inp(); + } + *(char *) glo = 0; + glo = glo + 4 & -4; /* align heap */ inp(); - } - *(char *)glo = 0; - glo = glo + 4 & -4; /* align heap */ - inp(); - next(); - } else { - c = tokl; - a = tokc; - t = tok; - next(); - if (t == TOK_NUM) { - li(a); - } else if (c == 2) { - /* -, +, !, ~ */ - unary(0); - oad(0xb9, 0); /* movl $0, %ecx */ - if (t == '!') - gcmp(a); - else - o(a); - } else if (t == '(') { - expr(); - skip(')'); - } else if (t == '*') { - /* parse cast */ - skip('('); - t = tok; /* get type */ - next(); /* skip int/char/void */ - next(); /* skip '*' or '(' */ - if (tok == '*') { - /* function type */ - skip('*'); + next(); + } else { + c = tokl; + a = tokc; + t = tok; + next(); + if (t == TOK_NUM) { + li(a); + } else if (c == 2) { + /* -, +, !, ~ */ + unary(0); + clearEXC(); + if (t == '!') + gcmp(a); + else + o(a); + } else if (t == '(') { + expr(); skip(')'); + } else if (t == '*') { + /* parse cast */ skip('('); + t = tok; /* get type */ + next(); /* skip int/char/void */ + next(); /* skip '*' or '(' */ + if (tok == '*') { + /* function type */ + skip('*'); + skip(')'); + skip('('); + skip(')'); + t = 0; + } skip(')'); - t = 0; - } - skip(')'); - unary(0); - if (tok == '=') { - next(); - o(0x50); /* push %eax */ - expr(); - o(0x59); /* pop %ecx */ - o(0x0188 + (t == TOK_INT)); /* movl %eax/%al, (%ecx) */ - } else if (t) { - if (t == TOK_INT) - o(0x8b); /* mov (%eax), %eax */ - else - o(0xbe0f); /* movsbl (%eax), %eax */ - ind++; /* add zero in code */ - } - } else if (t == '&') { - gmov(10, *(int *)tok); /* leal EA, %eax */ - next(); - } else { - n = *(int *)t; - /* forward reference: try dlsym */ - if (!n) - n = (int) dlsym(0, (char*) last_id); - if (tok == '=' & l) { - /* assignment */ + unary(0); + if (tok == '=') { + next(); + pGen->pushEAX(); + expr(); + storeEAXIntoPoppedLVal(t == TOK_INT); + } else if (t) { + loadEAXIndirect(t == TOK_INT); + } + } else if (t == '&') { + leaEAX(*(int *) tok); next(); - expr(); - gmov(6, n); /* mov %eax, EA */ - } else if (tok != '(') { - /* variable */ - gmov(8, n); /* mov EA, %eax */ - if (tokl == 11) { - gmov(0, n); - o(tokc); + } else { + n = *(int *) t; + /* forward reference: try dlsym */ + if (!n) + n = (int) dlsym(0, (char*) last_id); + if (tok == '=' & l) { + /* assignment */ next(); + expr(); + pGen->storeEAX(n); + } else if (tok != '(') { + /* variable */ + pGen->loadEAX(n); + if (tokl == 11) { + pGen->puzzleAdd(n, tokc); + next(); + } } } } - } - /* function call */ - if (tok == '(') { - if (n == 1) - o(0x50); /* push %eax */ + /* function call */ + if (tok == '(') { + if (n == 1) + pGen->pushEAX(); - /* push args and invert order */ - a = oad(0xec81, 0); /* sub $xxx, %esp */ - next(); - l = 0; - while(tok != ')') { - expr(); - oad(0x248489, l); /* movl %eax, xxx(%esp) */ - if (tok == ',') - next(); - l = l + 4; - } - *(int *)a = l; - next(); - if (!n) { - /* forward reference */ - t = t + 4; - *(int *)t = psym(0xe8, *(int *)t); - } else if (n == 1) { - oad(0x2494ff, l); /* call *xxx(%esp) */ - l = l + 4; - } else { - oad(0xe8, n - ind - 5); /* call xxx */ + /* push args and invert order */ + a = pGen->allocStackSpaceForArgs(); + next(); + l = 0; + while (tok != ')') { + expr(); + pGen->storeEAToArg(l); + if (tok == ',') + next(); + l = l + 4; + } + *(int *) a = l; + next(); + if (!n) { + /* forward reference */ + t = t + 4; + *(int *) t = pGen->callForward(*(int *) t); + } else if (n == 1) { + pGen->callIndirect(l); + l = l + 4; + } else { + pGen->callRelative(n - codeBuf.getPC() - 5); /* call xxx */ + } + if (l) + pGen->adjustStackAfterCall(l); } - if (l) - oad(0xc481, l); /* add $xxx, %esp */ } -} -void sum(int l) -{ - int t, n, a; - - if (l-- == 1) - unary(1); - else { - sum(l); - a = 0; - while (l == tokl) { - n = tok; - t = tokc; - next(); + void sum(int l) { + int t, n, a; - if (l > 8) { - a = gtst(t, a); /* && and || output code generation */ - sum(l); - } else { - o(0x50); /* push %eax */ - sum(l); - o(0x59); /* pop %ecx */ + if (l-- == 1) + unary(1); + else { + sum(l); + a = 0; + while (l == tokl) { + n = tok; + t = tokc; + next(); - if (l == 4 | l == 5) { - gcmp(t); + if (l > 8) { + a = gtst(t, a); /* && and || output code generation */ + sum(l); } else { - o(t); - if (n == '%') - o(0x92); /* xchg %edx, %eax */ + o(0x50); /* push %eax */ + sum(l); + o(0x59); /* pop %ecx */ + + if (l == 4 | l == 5) { + gcmp(t); + } else { + o(t); + if (n == '%') + o(0x92); /* xchg %edx, %eax */ + } } } - } - /* && and || output code generation */ - if (a && l > 8) { - a = gtst(t, a); - li(t ^ 1); - gjmp(5); /* jmp $ + 5 */ - gsym(a); - li(t); + /* && and || output code generation */ + if (a && l > 8) { + a = gtst(t, a); + li(t ^ 1); + gjmp(5); /* jmp $ + 5 */ + gsym(a); + li(t); + } } } -} - -void expr() -{ - sum(11); -} - -int test_expr() -{ - expr(); - return gtst(0, 0); -} + void expr() { + sum(11); + } + int test_expr() { + expr(); + return gtst(0, 0); + } -void block(int l) -{ - int a, n, t; + void block(int l) { + int a, n, t; - if (tok == TOK_IF) { - next(); - skip('('); - a = test_expr(); - skip(')'); - block(l); - if (tok == TOK_ELSE) { + if (tok == TOK_IF) { next(); - n = gjmp(0); /* jmp */ - gsym(a); - block(l); - gsym(n); /* patch else jmp */ - } else { - gsym(a); /* patch if test */ - } - } else if (tok == TOK_WHILE | tok == TOK_FOR) { - t = tok; - next(); - skip('('); - if (t == TOK_WHILE) { - n = ind; + skip('('); a = test_expr(); - } else { - if (tok != ';') - expr(); - skip(';'); - n = ind; - a = 0; - if (tok != ';') + skip(')'); + block(l); + if (tok == TOK_ELSE) { + next(); + n = gjmp(0); /* jmp */ + gsym(a); + block(l); + gsym(n); /* patch else jmp */ + } else { + gsym(a); /* patch if test */ + } + } else if (tok == TOK_WHILE | tok == TOK_FOR) { + t = tok; + next(); + skip('('); + if (t == TOK_WHILE) { + n = codeBuf.getPC(); a = test_expr(); - skip(';'); - if (tok != ')') { - t = gjmp(0); - expr(); - gjmp(n - ind - 5); - gsym(t); - n = t + 4; + } else { + if (tok != ';') + expr(); + skip(';'); + n = codeBuf.getPC(); + a = 0; + if (tok != ';') + a = test_expr(); + skip(';'); + if (tok != ')') { + t = gjmp(0); + expr(); + gjmp(n - codeBuf.getPC() - 5); + gsym(t); + n = t + 4; + } } - } - skip(')'); - block((int) &a); - gjmp(n - ind - 5); /* jmp */ - gsym(a); - } else if (tok == '{') { - next(); - /* declarations */ - decl(1); - while(tok != '}') - block(l); - next(); - } else { - if (tok == TOK_RETURN) { + skip(')'); + block((int) &a); + gjmp(n - codeBuf.getPC() - 5); /* jmp */ + gsym(a); + } else if (tok == '{') { next(); - if (tok != ';') - expr(); - rsym = gjmp(rsym); /* jmp */ - } else if (tok == TOK_BREAK) { + /* declarations */ + decl(1); + while (tok != '}') + block(l); next(); - *(int *)l = gjmp(*(int *)l); - } else if (tok != ';') - expr(); - skip(';'); + } else { + if (tok == TOK_RETURN) { + next(); + if (tok != ';') + expr(); + rsym = gjmp(rsym); /* jmp */ + } else if (tok == TOK_BREAK) { + next(); + *(int *) l = gjmp(*(int *) l); + } else if (tok != ';') + expr(); + skip(';'); + } } -} -/* 'l' is true if local declarations */ -void decl(int l) -{ - int a; + /* 'l' is true if local declarations */ + void decl(int l) { + int a; - while (tok == TOK_INT | tok != -1 & !l) { - if (tok == TOK_INT) { - next(); - while (tok != ';') { - if (l) { - loc = loc + 4; - *(int *)tok = -loc; - } else { - *(int *)tok = glo; - glo = glo + 4; - } + while (tok == TOK_INT | tok != -1 & !l) { + if (tok == TOK_INT) { next(); - if (tok == ',') + while (tok != ';') { + if (l) { + loc = loc + 4; + *(int *) tok = -loc; + } else { + *(int *) tok = glo; + glo = glo + 4; + } next(); - } - skip(';'); - } else { - /* patch forward references (XXX: do not work for function - pointers) */ - gsym(*(int *)(tok + 4)); - /* put function address */ - *(int *)tok = ind; - next(); - skip('('); - a = 8; - while (tok != ')') { - /* read param name and compute offset */ - *(int *)tok = a; - a = a + 4; + if (tok == ',') + next(); + } + skip(';'); + } else { + /* patch forward references (XXX: do not work for function + pointers) */ + gsym(*(int *) (tok + 4)); + /* put function address */ + *(int *) tok = codeBuf.getPC(); next(); - if (tok == ',') + skip('('); + a = 8; + while (tok != ')') { + /* read param name and compute offset */ + *(int *) tok = a; + a = a + 4; next(); + if (tok == ',') + next(); + } + next(); /* skip ')' */ + rsym = loc = 0; + o(0xe58955); /* push %ebp, mov %esp, %ebp */ + a = oad(0xec81, 0); /* sub $xxx, %esp */ + block(0); + gsym(rsym); + o(0xc3c9); /* leave, ret */ + *(int *) a = loc; /* save local variables */ } - next(); /* skip ')' */ - rsym = loc = 0; - o(0xe58955); /* push %ebp, mov %esp, %ebp */ - a = oad(0xec81, 0); /* sub $xxx, %esp */ - block(0); - gsym(rsym); - o(0xc3c9); /* leave, ret */ - *(int *)a = loc; /* save local variables */ } } -} + + void cleanup() { + if (sym_stk != 0) { + free((void*) sym_stk); + sym_stk = 0; + } + if (pGlobalBase != 0) { + free((void*) pGlobalBase); + pGlobalBase = 0; + } + if (pVarsBase != 0) { + free(pVarsBase); + pVarsBase = 0; + } + if (pGen) { + delete pGen; + pGen = 0; + } + } + + void clear() { + tok = 0; + tokc = 0; + tokl = 0; + ch = 0; + vars = 0; + rsym = 0; + loc = 0; + glo = 0; + sym_stk = 0; + dstk = 0; + dptr = 0; + dch = 0; + last_id = 0; + file = 0; + pGlobalBase = 0; + pVarsBase = 0; + pGen = 0; + } public: -compiler() : - tok(0), tokc(0), tokl(0), ch(0), - vars(0), rsym(0), prog(0), ind(0), loc(0), glo(0), sym_stk(0), - dstk(0), dptr(0), dch(0), last_id(0), file(0) -{ -} + compiler() { + clear(); + } -int compile(FILE* in) { - - file = in; - sym_stk = (int) calloc(1, ALLOC_SIZE); - dstk = (int) strcpy((char*) sym_stk, - " int if else while break return for define main ") + TOK_STR_SIZE; - glo = (int) calloc(1, ALLOC_SIZE); - ind = prog = (int) calloc(1, ALLOC_SIZE); - vars = (int) calloc(1, ALLOC_SIZE); - inp(); - next(); - decl(0); - return 0; -} + ~compiler() { + cleanup(); + } -int run(int argc, char** argv) -{ - typedef int (*mainPtr)(int argc, char** argv); - mainPtr aMain = (mainPtr) * (int*) (vars + TOK_MAIN); - if (! aMain) { - fprintf(stderr, "Could not find main"); - return -1; + int compile(FILE* in) { + cleanup(); + clear(); + codeBuf.init(ALLOC_SIZE); + pGen = new X86CodeGenerator(); + pGen->init(&codeBuf); + file = in; + sym_stk = (int) calloc(1, ALLOC_SIZE); + dstk = (int) strcpy((char*) sym_stk, + " int if else while break return for define main ") + + TOK_STR_SIZE; + pGlobalBase = calloc(1, ALLOC_SIZE); + glo = (int) pGlobalBase; + pVarsBase = calloc(1, ALLOC_SIZE); + vars = (int) pVarsBase; + inp(); + next(); + decl(0); + return 0; } - return aMain(argc, argv); -} -int dump(FILE* out) { - fwrite((void *)prog, 1, ind - prog, out); - return 0; -} + int run(int argc, char** argv) { + typedef int (*mainPtr)(int argc, char** argv); + mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN); + if (!aMain) { + fprintf(stderr, "Could not find function \"main\".\n"); + return -1; + } + return aMain(argc, argv); + } -}; + int dump(FILE* out) { + fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out); + return 0; + } +}; } // namespace acc @@ -654,7 +906,7 @@ int main(int argc, char** argv) { const char* inFile = NULL; const char* outFile = NULL; int i; - for(i = 1; i < argc; i++) { + for (i = 1; i < argc; i++) { char* arg = argv[i]; if (arg[0] == '-') { switch (arg[1]) { @@ -681,7 +933,7 @@ int main(int argc, char** argv) { FILE* in = stdin; if (inFile) { in = fopen(inFile, "r"); - if (! in) { + if (!in) { fprintf(stderr, "Could not open input file %s\n", inFile); return 1; } @@ -697,15 +949,15 @@ int main(int argc, char** argv) { } if (doTest) { FILE* save = fopen(outFile, "w"); - if (! save) { + if (!save) { fprintf(stderr, "Could not open output file %s\n", outFile); return 5; } compiler.dump(save); fclose(save); } else { - int codeArgc = argc-i+1; - char** codeArgv=argv + i - 1; + int codeArgc = argc - i + 1; + char** codeArgv = argv + i - 1; codeArgv[0] = (char*) (inFile ? inFile : "stdin"); return compiler.run(codeArgc, codeArgv); } diff --git a/libacc/tests/bellard.otccex.c b/libacc/tests/bellard.otccex.c new file mode 100644 index 00000000..e8f09891 --- /dev/null +++ b/libacc/tests/bellard.otccex.c @@ -0,0 +1,126 @@ +/* #!/usr/local/bin/otcc */ +/* + * Sample OTCC C example. You can uncomment the first line and install + * otcc in /usr/local/bin to make otcc scripts ! + */ + +/* Any preprocessor directive except #define are ignored. We put this + include so that a standard C compiler can compile this code too. */ +#include <stdio.h> + +/* defines are handled, but macro arguments cannot be given. No + recursive defines are tolerated */ +#define DEFAULT_BASE 10 + +/* + * Only old style K&R prototypes are parsed. Only int arguments are + * allowed (implicit types). + * + * By benchmarking the execution time of this function (for example + * for fib(35)), you'll notice that OTCC is quite fast because it + * generates native i386 machine code. + */ +fib(n) +{ + if (n <= 2) + return 1; + else + return fib(n-1) + fib(n-2); +} + +/* Identifiers are parsed the same way as C: begins with letter or + '_', and then letters, '_' or digits */ +fact(n) +{ + /* local variables can be declared. Only 'int' type is supported */ + int i, r; + r = 1; + /* 'while' and 'for' loops are supported */ + for(i=2;i<=n;i++) + r = r * i; + return r; +} + +/* Well, we could use printf, but it would be too easy */ +print_num(n, b) +{ + int tab, p, c; + /* Numbers can be entered in decimal, hexadecimal ('0x' prefix) and + octal ('0' prefix) */ + /* more complex programs use malloc */ + tab = malloc(0x100); + p = tab; + while (1) { + c = n % b; + /* Character constants can be used */ + if (c >= 10) + c = c + 'a' - 10; + else + c = c + '0'; + *(char *)p = c; + p++; + n = n / b; + /* 'break' is supported */ + if (n == 0) + break; + } + while (p != tab) { + p--; + printf("%c", *(char *)p); + } + free(tab); +} + +/* 'main' takes standard 'argc' and 'argv' parameters */ +main(argc, argv) +{ + /* no local name space is supported, but local variables ARE + supported. As long as you do not use a globally defined + variable name as local variable (which is a bad habbit), you + won't have any problem */ + int s, n, f, base; + + /* && and || operator have the same semantics as C (left to right + evaluation and early exit) */ + if (argc != 2 && argc != 3) { + /* '*' operator is supported with explicit casting to 'int *', + 'char *' or 'int (*)()' (function pointer). Of course, 'int' + are supposed to be used as pointers too. */ + s = *(int *)argv; + help(s); + return 1; + } + /* Any libc function can be used because OTCC uses dynamic linking */ + n = atoi(*(int *)(argv + 4)); + base = DEFAULT_BASE; + if (argc >= 3) { + base = atoi(*(int *)(argv + 8)); + if (base < 2 || base > 36) { + /* external variables can be used too (here: 'stderr') */ + fprintf(stderr, "Invalid base\n"); + return 1; + } + } + printf("fib(%d) = ", n); + print_num(fib(n), base); + printf("\n"); + + printf("fact(%d) = ", n); + if (n > 12) { + printf("Overflow"); + } else { + /* why not using a function pointer ? */ + f = &fact; + print_num((*(int (*)())f)(n), base); + } + printf("\n"); + return 0; +} + +/* functions can be used before being defined */ +help(name) +{ + printf("usage: %s n [base]\n", name); + printf("Compute fib(n) and fact(n) and output the result in base 'base'\n"); +} + diff --git a/libacc/tests/missing-main.c b/libacc/tests/missing-main.c new file mode 100644 index 00000000..e73eec46 --- /dev/null +++ b/libacc/tests/missing-main.c @@ -0,0 +1,4 @@ +/* No main. */ + +a() { +}
\ No newline at end of file diff --git a/libacc/tests/otcc.out-orig b/libacc/tests/otcc.out-orig Binary files differindex 2cb08ff5..3bf7e1f1 100644 --- a/libacc/tests/otcc.out-orig +++ b/libacc/tests/otcc.out-orig |
