diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cebaeda --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ + +test-all: + python ifcc-test.py testfiles + +test-last: + python ifcc-test.py -v -v testfiles/$(shell ls -A testfiles | tail -n 1) diff --git a/compiler/.clangd b/compiler/.clangd new file mode 100644 index 0000000..c7af73a --- /dev/null +++ b/compiler/.clangd @@ -0,0 +1,6 @@ +CompileFlags: + Add: + - "-I/opt/homebrew/opt/antlr4-cpp-runtime/include/antlr4-runtime" + - "-I./generated" + - "-std=c++17" + diff --git a/compiler/CodeGenVisitor.cpp b/compiler/CodeGenVisitor.cpp index 0398d07..7d8a712 100644 --- a/compiler/CodeGenVisitor.cpp +++ b/compiler/CodeGenVisitor.cpp @@ -1,32 +1,43 @@ #include "CodeGenVisitor.h" - #include "generated/ifccParser.h" -antlrcpp::Any CodeGenVisitor::visitStmt(ifccParser::StmtContext *ctx) { +std::any CodeGenVisitor::visitProg(ifccParser::ProgContext *ctx) { + scopeStack.push_back("main"); + int size = symbolTable->stackSize(currentScope()); +#ifdef __APPLE__ + std::cout << ".globl _main\n_main:\n"; + std::cout << " sub sp, sp, #" << size << "\n"; +#else + std::cout << ".globl main\nmain:\n"; + std::cout << " subq $" << size << ", %rsp\n"; +#endif + + this->visitChildren(ctx); + +#ifdef __APPLE__ + std::cout << " add sp, sp, #" << size << "\n"; +#else + std::cout << " addq $" << size << ", %rsp\n"; +#endif + std::cout << " ret\n"; + scopeStack.pop_back(); + return 0; +} + +std::any CodeGenVisitor::visitStmt(ifccParser::StmtContext *ctx) { std::cout << " ;" << ctx->getText() << "\n"; return this->visitChildren(ctx); } -// Declare a new variable: reserve a slot on the stack, mark as uninitialized. -antlrcpp::Any CodeGenVisitor::visitDecl_stmt(ifccParser::Decl_stmtContext *ctx) { - std::string name = ctx->VAR_NAME()->getText(); - if (symbolTable.find(name) != symbolTable.end()) { - std::cerr << "error: variable '" << name << "' already declared\n"; - return 1; - } - symbolTable[name] = {nextOffset, false}; - nextOffset += 4; +// Declaration pass already filled the symbol table: nothing to do here. +std::any CodeGenVisitor::visitDecl_stmt(ifccParser::Decl_stmtContext *ctx) { return 0; } // Assign a value to a variable: evaluate val, store on stack, mark as initialized. -antlrcpp::Any CodeGenVisitor::visitSet_stmt(ifccParser::Set_stmtContext *ctx) { +std::any CodeGenVisitor::visitSet_stmt(ifccParser::Set_stmtContext *ctx) { std::string name = ctx->VAR_NAME()->getText(); - if (symbolTable.find(name) == symbolTable.end()) { - std::cerr << "error: variable '" << name << "' undeclared\n"; - return 1; - } - int offset = symbolTable[name].offset; + int offset = symbolTable->getOffset(currentScope(), name); this->visit(ctx->val()); @@ -36,12 +47,12 @@ antlrcpp::Any CodeGenVisitor::visitSet_stmt(ifccParser::Set_stmtContext *ctx) { std::cout << " movl %eax, " << offset << "(%rsp)\n"; #endif - symbolTable[name].initialized = true; + symbolTable->markInitialized(currentScope(), name); return 0; } -// Load a constant or variable into the accumulator register -antlrcpp::Any CodeGenVisitor::visitVal(ifccParser::ValContext *ctx) { +// Load a constant or variable into the accumulator register. +std::any CodeGenVisitor::visitVal(ifccParser::ValContext *ctx) { if (ctx->CONST()) { int val = stoi(ctx->CONST()->getText()); #ifdef __APPLE__ @@ -51,50 +62,21 @@ antlrcpp::Any CodeGenVisitor::visitVal(ifccParser::ValContext *ctx) { #endif return val; } + std::string name = ctx->VAR_NAME()->getText(); - if (symbolTable.find(name) == symbolTable.end()) { - std::cerr << "error: variable '" << name << "' undeclared\n"; - return 1; - } - if (!symbolTable[name].initialized) { - std::cerr << "warning: variable '" << name << "' used before initialization\n"; - } - int offset = symbolTable[name].offset; + int offset = symbolTable->getOffset(currentScope(), name); + symbolTable->isInitialized(currentScope(), name); // emits warning if needed + #ifdef __APPLE__ std::cout << " ldr w0, [sp, #" << offset << "]\n"; #else std::cout << " movl " << offset << "(%rsp), %eax\n"; #endif + return offset; } -antlrcpp::Any CodeGenVisitor::visitProg(ifccParser::ProgContext *ctx) { -#ifdef __APPLE__ - std::cout << ".globl _main\n"; - std::cout << "_main:\n"; - // Prologue: reserve space for local variables - std::cout << " sub sp, sp, #16\n"; -#else - std::cout << ".globl main\n"; - std::cout << "main:\n"; - // Prologue: reserve space for local variables - std::cout << " subq $16, %rsp\n"; -#endif - - this->visitChildren(ctx); - - // Epilogue -#ifdef __APPLE__ - std::cout << " add sp, sp, #16\n"; -#else - std::cout << " addq $16, %rsp\n"; -#endif - std::cout << " ret\n"; - return 0; -} - -antlrcpp::Any CodeGenVisitor::visitReturn_stmt(ifccParser::Return_stmtContext *ctx) { - // Evaluate the return value into the accumulator register +std::any CodeGenVisitor::visitReturn_stmt(ifccParser::Return_stmtContext *ctx) { this->visit(ctx->val()); return 0; } diff --git a/compiler/CodeGenVisitor.h b/compiler/CodeGenVisitor.h index 47ff99d..ccefcc5 100644 --- a/compiler/CodeGenVisitor.h +++ b/compiler/CodeGenVisitor.h @@ -1,31 +1,31 @@ #pragma once -#include +#include #include #include "antlr4-runtime.h" +#include "SymbolTable.h" #include "generated/ifccBaseVisitor.h" -class CodeGenVisitor : public ifccBaseVisitor { +class CodeGenVisitor : public ifccBaseVisitor { + SymbolTable *symbolTable; // shared, not owned + std::vector scopeStack; // navigation state, owned by this visitor - struct VarInfo { - int offset; // stack offset relative to sp - bool initialized; // true after a set_stmt assigns a value - }; + std::string currentScope() const { return scopeStack.back(); } - // Symbol table: variable name -> VarInfo - std::map symbolTable; - int nextOffset = 0; // first variable at [sp, #0], next at [sp, #4], etc. +public: + explicit CodeGenVisitor(SymbolTable *st) : symbolTable(st) { + } - public: - std::any visitProg(ifccParser::ProgContext *ctx) override ; - std::any visitStmt(ifccParser::StmtContext *ctx) override ; - std::any visitReturn_stmt(ifccParser::Return_stmtContext *ctx) override ; - std::any visitDecl_stmt(ifccParser::Decl_stmtContext *ctx) override ; - std::any visitSet_stmt(ifccParser::Set_stmtContext *ctx) override ; - std::any visitVal(ifccParser::ValContext *ctx) override ; + std::any visitProg(ifccParser::ProgContext *ctx) override; + std::any visitStmt(ifccParser::StmtContext *ctx) override; + + std::any visitReturn_stmt(ifccParser::Return_stmtContext *ctx) override; + + std::any visitDecl_stmt(ifccParser::Decl_stmtContext *ctx) override; + + std::any visitSet_stmt(ifccParser::Set_stmtContext *ctx) override; + + std::any visitVal(ifccParser::ValContext *ctx) override; }; - - - diff --git a/compiler/DeclarationVisitor.cpp b/compiler/DeclarationVisitor.cpp new file mode 100644 index 0000000..24866d7 --- /dev/null +++ b/compiler/DeclarationVisitor.cpp @@ -0,0 +1,36 @@ +#include "DeclarationVisitor.h" + +#include "generated/ifccParser.h" + +std::any DeclarationVisitor::visitProg(ifccParser::ProgContext *ctx) { + symbolTable->addScope("main"); + scopeStack.push_back("main"); + this->visitChildren(ctx); + scopeStack.pop_back(); + return 0; +} + +std::any DeclarationVisitor::visitStmt(ifccParser::StmtContext *ctx) { + return this->visitChildren(ctx); +} + +std::any DeclarationVisitor::visitDecl_stmt(ifccParser::Decl_stmtContext *ctx) { + std::vector vars = ctx->VAR_NAME(); + for (auto var : vars) { + std::string name = var->getText(); + symbolTable->declare(currentScope(), name); + } + return 0; +} + +std::any DeclarationVisitor::visitSet_stmt(ifccParser::Set_stmtContext *ctx) { + return 0; +} + +std::any DeclarationVisitor::visitVal(ifccParser::ValContext *ctx) { + return 0; +} + +std::any DeclarationVisitor::visitReturn_stmt(ifccParser::Return_stmtContext *ctx) { + return 0; +} diff --git a/compiler/DeclarationVisitor.h b/compiler/DeclarationVisitor.h new file mode 100644 index 0000000..017893a --- /dev/null +++ b/compiler/DeclarationVisitor.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include "antlr4-runtime.h" +#include "SymbolTable.h" +#include "generated/ifccBaseVisitor.h" + + +class DeclarationVisitor : public ifccBaseVisitor { + SymbolTable *symbolTable = new SymbolTable(); + + std::vector scopeStack; + + const std::string ¤tScope() const { return scopeStack.back(); } + +public: + ~DeclarationVisitor() override { + delete symbolTable; + } + + [[nodiscard]] SymbolTable *getSymbolTable() const { + return symbolTable; + } + + std::any visitProg(ifccParser::ProgContext *ctx) override; + + std::any visitStmt(ifccParser::StmtContext *ctx) override; + + std::any visitReturn_stmt(ifccParser::Return_stmtContext *ctx) override; + + std::any visitDecl_stmt(ifccParser::Decl_stmtContext *ctx) override; + + std::any visitSet_stmt(ifccParser::Set_stmtContext *ctx) override; + + std::any visitVal(ifccParser::ValContext *ctx) override; +}; + + diff --git a/compiler/Makefile b/compiler/Makefile index 09950ee..b94a203 100644 --- a/compiler/Makefile +++ b/compiler/Makefile @@ -18,7 +18,9 @@ OBJECTS=build/ifccBaseVisitor.o \ build/ifccVisitor.o \ build/ifccParser.o \ build/main.o \ - build/CodeGenVisitor.o + build/CodeGenVisitor.o \ + build/DeclarationVisitor.o \ + build/SymbolTable.o ifcc: $(OBJECTS) @mkdir -p build diff --git a/compiler/SymbolTable.cpp b/compiler/SymbolTable.cpp new file mode 100644 index 0000000..aec4e51 --- /dev/null +++ b/compiler/SymbolTable.cpp @@ -0,0 +1 @@ +#include "SymbolTable.h" diff --git a/compiler/SymbolTable.h b/compiler/SymbolTable.h new file mode 100644 index 0000000..1226a42 --- /dev/null +++ b/compiler/SymbolTable.h @@ -0,0 +1,97 @@ +#pragma once +#include +#include +#include + +class SymbolTable { + struct VarInfo { + int offset; + bool initialized; + }; + + struct Scope { + std::string functionName; + std::map vars; + int nextOffset = 0; + }; + + std::map scopes; // name -> Scope + +public: + // Creates a new scope (call in DeclarationVisitor). + void addScope(const std::string &functionName) { + if (scopes.count(functionName)) { + std::cerr << "error: scope '" << functionName << "' already exists\n"; + exit(1); + } + scopes[functionName] = {functionName, {}, 0}; + } + + // Declare a variable in a named scope; returns its stack offset. + int declare(const std::string &scopeName, const std::string &name) { + auto &scope = getScope(scopeName); + if (scope.vars.count(name)) { + std::cerr << "error: variable '" << name << "' already declared in '" << scopeName << "'\n"; + exit(1); + } + int offset = scope.nextOffset; + scope.vars[name] = {offset, false}; + scope.nextOffset += 4; + return offset; + } + + int getOffset(const std::string &scopeName, const std::string &name) { + auto &vars = getScope(scopeName).vars; + if (!vars.count(name)) { + std::cerr << "error: variable '" << name << "' undeclared in '" << scopeName << "'\n"; + exit(1); + } + return vars[name].offset; + } + + bool isDeclared(const std::string &scopeName, const std::string &name) const { + if (!scopes.count(scopeName)) return false; + return scopes.at(scopeName).vars.count(name) > 0; + } + + void markInitialized(const std::string &scopeName, const std::string &name) { + auto &vars = getScope(scopeName).vars; + if (!vars.count(name)) { + std::cerr << "error: variable '" << name << "' undeclared in '" << scopeName << "'\n"; + exit(1); + } + vars[name].initialized = true; + } + + bool isInitialized(const std::string &scopeName, const std::string &name) { + if (!isDeclared(scopeName, name)) return false; + if (!scopes.at(scopeName).vars[name].initialized) { + std::cerr << "warning: variable '" << name << "' used before initialization\n"; + return false; + } + return true; + } + + // Stack size for a given scope, aligned to 16 bytes. + int stackSize(const std::string &scopeName) const { + int n = getScope(scopeName).nextOffset; + return (n + 15) & ~15; + } + +private: + Scope &getScope(const std::string &name) { + if (!scopes.count(name)) { + std::cerr << "error: scope '" << name << "' not found\n"; + exit(1); + } + return scopes.at(name); + } + + const Scope &getScope(const std::string &name) const { + if (!scopes.count(name)) { + std::cerr << "error: scope '" << name << "' not found\n"; + exit(1); + } + return scopes.at(name); + } +}; diff --git a/compiler/ifcc.g4 b/compiler/ifcc.g4 index 45d6770..3dd64af 100644 --- a/compiler/ifcc.g4 +++ b/compiler/ifcc.g4 @@ -8,7 +8,7 @@ stmt: decl_stmt | set_stmt | return_stmt ; return_stmt: RETURN val ';' ; -decl_stmt: 'int' VAR_NAME ';' ; +decl_stmt: 'int' (VAR_NAME ',')* VAR_NAME ';' ; set_stmt: VAR_NAME '=' val ';' ; val: CONST | VAR_NAME ; diff --git a/compiler/main.cpp b/compiler/main.cpp index e42a551..e3f4099 100644 --- a/compiler/main.cpp +++ b/compiler/main.cpp @@ -9,48 +9,45 @@ #include "generated/ifccBaseVisitor.h" #include "CodeGenVisitor.h" +#include "DeclarationVisitor.h" using namespace antlr4; using namespace std; -int main(int argn, const char **argv) -{ - stringstream in; - if (argn==2) - { - ifstream lecture(argv[1]); - if( !lecture.good() ) - { - cerr<<"error: cannot read file: " << argv[1] << endl ; - exit(1); - } - in << lecture.rdbuf(); - } - else - { - cerr << "usage: ifcc path/to/file.c" << endl ; - exit(1); - } - - ANTLRInputStream input(in.str()); +int main(int argn, const char **argv) { + stringstream in; + if (argn == 2) { + ifstream lecture(argv[1]); + if (!lecture.good()) { + cerr << "error: cannot read file: " << argv[1] << endl; + exit(1); + } + in << lecture.rdbuf(); + } else { + cerr << "usage: ifcc path/to/file.c" << endl; + exit(1); + } - ifccLexer lexer(&input); - CommonTokenStream tokens(&lexer); + ANTLRInputStream input(in.str()); - tokens.fill(); + ifccLexer lexer(&input); + CommonTokenStream tokens(&lexer); - ifccParser parser(&tokens); - tree::ParseTree* tree = parser.axiom(); + tokens.fill(); - if(parser.getNumberOfSyntaxErrors() != 0) - { - cerr << "error: syntax error during parsing" << endl; - exit(1); - } + ifccParser parser(&tokens); + tree::ParseTree *tree = parser.axiom(); - - CodeGenVisitor v; - v.visit(tree); + if (parser.getNumberOfSyntaxErrors() != 0) { + cerr << "error: syntax error during parsing" << endl; + exit(1); + } - return 0; + DeclarationVisitor dv; + dv.visit(tree); + + CodeGenVisitor cgv = CodeGenVisitor(dv.getSymbolTable()); + cgv.visit(tree); + + return 0; } diff --git a/ifcc-test.py b/ifcc-test.py index 1de1ab7..f7d3c40 100755 --- a/ifcc-test.py +++ b/ifcc-test.py @@ -17,17 +17,17 @@ import subprocess def run_command(string, logfile=None, toscreen=False): """ execute `string` as a shell command. Maybe write stdout+stderr to `logfile` and/or to the toscreen. - return the exit status""" + return the exit status""" if args.debug: print("ifcc-test.py: "+string) - + process=subprocess.Popen(string,shell=True, stderr=subprocess.STDOUT,stdout=subprocess.PIPE, text=True,bufsize=0) if logfile: logfile=open(logfile,'w') - + while True: output = process.stdout.readline() if len(output) == 0: # only happens when 'process' has terminated @@ -45,7 +45,7 @@ def dumpfile(name,quiet=False): if not quiet: print(data,end='') return data - + ###################################################################################### ## ARGPARSE step: make sense of our command-line arguments @@ -56,7 +56,7 @@ import textwrap import shutil width = shutil.get_terminal_size().columns-2 twf=lambda text: textwrap.fill(text,width,initial_indent=' '*4,subsequent_indent=' '*6) - + argparser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description = "Testing script for the ifcc compiler. operates in one of two modes:\n\n" @@ -67,7 +67,7 @@ epilog="examples:\n\n" +twf("python3 ifcc-test.py path/to/some/dir/*.c path/to/some/other/dir")+'\n' +'\n' +twf("python3 ifcc-test.py -o ./myprog path/to/some/source.c")+'\n' - +twf("python3 ifcc-test.py -S -o truc.s truc.c")+'\n' + +twf("python3 ifcc-test.py -S -o truc.asm truc.c")+'\n' , ) @@ -121,7 +121,7 @@ if args.S or args.c or args.output: print("error: this mode only supports a single input file") exit(1) inputfilename=args.input[0] - + if inputfilename[-2:] != ".c": print("error: incorrect filename suffix (should be '.c'): "+inputfilename) exit(1) @@ -135,10 +135,10 @@ if args.S or args.c or args.output: if (args.S or args.c) and not args.output: print("error: option '-o filename' is required in this mode") exit(1) - + if args.S: # produce assembly - if args.output[-2:] != ".s": - print("error: output file name must end with '.s'") + if args.output[-2:] != ".asm": + print("error: output file name must end with '.asm'") exit(1) ifccstatus=run_command(f'{pld_base_dir}/compiler/ifcc {inputfilename} > {args.output}') if ifccstatus: # let's show error messages on screen @@ -150,17 +150,17 @@ if args.S or args.c or args.output: if args.output[-2:] != ".o": print("error: output file name must end with '.o'") exit(1) - asmname=args.output[:-2]+".s" + asmname=args.output[:-2]+".asm" ifccstatus=run_command(f'{pld_base_dir}/compiler/ifcc {inputfilename} > {asmname}') if ifccstatus: # let's show error messages on screen exit(run_command(f'{pld_base_dir}/compiler/ifcc {inputfilename}',toscreen=True)) exit(run_command(f'gcc -c -o {args.output} {asmname}',toscreen=True)) - + else: # produce an executable - if args.output[-2:] in [".o",".c",".s"]: + if args.output[-2:] in [".o",".c",".asm"]: print("error: incorrect name for an executable: "+args.output) exit(1) - asmname=args.output+".s" + asmname=args.output+".asm" ifccstatus=run_command(f'{pld_base_dir}/compiler/ifcc {inputfilename} > {asmname}') if ifccstatus: exit(run_command(f'{pld_base_dir}/compiler/ifcc {inputfilename}', toscreen=True)) @@ -237,7 +237,7 @@ for inputfilename in inputfilenames: subdirname=subdirname.replace('/','-') # flatten path to single subdir if args.debug>=2: print("debug: subdir="+subdirname) - + os.mkdir(pld_base_dir+'/ifcc-test-output/'+subdirname) shutil.copyfile(inputfilename,pld_base_dir+'/ifcc-test-output/'+subdirname+'/input.c') jobs.append(subdirname) @@ -269,20 +269,20 @@ for jobname in jobs: print('TEST-CASE: '+jobname) os.chdir(jobname) - + ## Reference compiler = GCC - gccstatus=run_command("gcc -S -o asm-gcc.s input.c", "gcc-compile.txt") + gccstatus=run_command("gcc -S -o asm-gcc.asm input.c", "gcc-compile.txt") if gccstatus == 0: # test-case is a valid program. we should run it - gccstatus=run_command("gcc -o exe-gcc asm-gcc.s", "gcc-link.txt") + gccstatus=run_command("gcc -o exe-gcc asm-gcc.asm", "gcc-link.txt") if gccstatus == 0: # then both compile and link stage went well exegccstatus=run_command("./exe-gcc", "gcc-execute.txt") if args.verbose >=2: dumpfile("gcc-execute.txt") - + ## IFCC compiler - ifccstatus=run_command(f'{pld_base_dir}/compiler/ifcc input.c > asm-ifcc.s', 'ifcc-compile.txt') - + ifccstatus=run_command(f'{pld_base_dir}/compiler/ifcc input.c > asm-ifcc.asm', 'ifcc-compile.txt') + if gccstatus != 0 and ifccstatus != 0: ## ifcc correctly rejects invalid program -> test-case ok print("TEST OK") @@ -297,23 +297,23 @@ for jobname in jobs: print("TEST FAIL (your compiler rejects a valid program)") all_ok=False if args.verbose: - dumpfile("asm-ifcc.s") # stdout of ifcc + dumpfile("asm-ifcc.asm") # stdout of ifcc dumpfile("ifcc-compile.txt") # stderr of ifcc continue else: ## ifcc accepts to compile valid program -> let's link it - ldstatus=run_command("gcc -o exe-ifcc asm-ifcc.s", "ifcc-link.txt") + ldstatus=run_command("gcc -o exe-ifcc asm-ifcc.asm", "ifcc-link.txt") if ldstatus: print("TEST FAIL (your compiler produces incorrect assembly)") all_ok=False if args.verbose: - dumpfile("asm-ifcc.s") + dumpfile("asm-ifcc.asm") dumpfile("ifcc-link.txt") continue ## both compilers did produce an executable, so now we run both ## these executables and compare the results. - + run_command("./exe-ifcc", "ifcc-execute.txt") if open("gcc-execute.txt").read() != open("ifcc-execute.txt").read() : print("TEST FAIL (different results at execution)") diff --git a/prog/Makefile b/prog/Makefile deleted file mode 100644 index 2cd5efb..0000000 --- a/prog/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -gcc: gcca main-gcc.asm - gcc -o main-gcc.o main-gcc.asm -gcca: main.c - gcc -S -o main-gcc.asm main.c - -ifcc: ifcca main-ifcc.asm - gcc -o main-ifcc.o main-ifcc.asm -ifcca: main.c comp-ifcc - ../compiler/ifcc main.c > main-ifcc.asm -comp-ifcc: - cd ../compiler && make diff --git a/prog/main.c b/prog/main.c deleted file mode 100644 index 12be2b8..0000000 --- a/prog/main.c +++ /dev/null @@ -1,10 +0,0 @@ -int main() { - int z; - int x; - x = 56; - int y; - x = y; - y = 10; - z = x; - return z; -} diff --git a/testfiles/4_multiple_var.c b/testfiles/4_multiple_var.c index 21a4434..779429b 100644 --- a/testfiles/4_multiple_var.c +++ b/testfiles/4_multiple_var.c @@ -1,8 +1,8 @@ int main() { - int z; - int x; + int z, x; x = 56; int y; + int n, o; y = 10; x = y; z = x;