Siehe auch [[jail:lexer]], [[jail:interpreter]] ====== Parser ====== The parser gives the syntax a grammar. The ability to compose larger expressions and statements out of smaller parts. A parser takes the flat list of tokens and builds a tree structure that mirrors the nested nature of the grammar. https://www.youtube.com/watch?v=dDtZLm7HIJs https://www.youtube.com/watch?v=unh6aK8WMwM class Parser { private: Lexer lexer; Token current_token; public: Parser() {}; Parser(Lexer _lexer) { lexer = _lexer; current_token = lexer.getNextToken(); } ~Parser() {}; void error(std::string str) { std::cout << "parser: " << str << "\n"; std::exit(0); } ASTNode parse() { ASTNode node = statement_list(); return node; } int consumed = 0; // Todo void consume(std::string token_type); ASTNode statement_list(); ASTNode statement(); ASTNode declare_statement(); ASTNode assignment_statement(); ASTNode variable(); ASTNode factor(); ASTNode term(); ASTNode expression(); ASTNode condition(); }; void Parser::consume(std::string token_type) { if (current_token._type() == token_type) current_token = lexer.getNextToken(); else error("[parser::consume] unexpected '" + current_token._value() + "', expected '" + token_type + "'\n"); consumed++; } ASTNode Parser::statement_list() { ASTNode node(Token(CODE_BLOCK, "CODE_BLOCK")); while (current_token._type() != "EOF" and current_token._type() != RBRACKET) { node.make_child(statement()); } return node; } ASTNode Parser::statement() { ASTNode node; if (current_token._type() == VARIABLE) { node = assignment_statement(); } else error("[parser::statement] unknown Token '" + current_token._type() + "'"); return node; } ASTNode Parser::assignment_statement() { ASTNode left = variable(); consume(ASSIGN); ASTNode right; right = expression(); ASTNode node(Token(ASSIGN, "=")); node.make_child(left); node.make_child(right); if (current_token._type() == SEMI) consume(SEMI); return node; } ASTNode Parser::variable() { ASTNode node(current_token); consume(VARIABLE); return node; } ASTNode Parser::factor() { if (current_token._type() == PLUS) { ASTNode node(current_token); consume(PLUS); node.make_child(factor()); return node; } else if (current_token._type() == MINUS) { ASTNode node(current_token); consume(MINUS); node.make_child(factor()); return node; } else if (current_token._type() == INC) { ASTNode node(current_token); consume(INC); node.make_child(factor()); return node; } else if (current_token._type() == DEC) { ASTNode node(current_token); consume(DEC); node.make_child(factor()); return node; } else if (current_token._type() == LPAREN) { consume(LPAREN); ASTNode node = expression(); consume(RPAREN); return node; } else if (current_token._type() == INTEGER) { ASTNode node(current_token); consume(INTEGER); return node; } else if (current_token._type() == VARIABLE) { ASTNode node(variable()); return node; } else error("[parser::factor] unknown Token '" + current_token._type() + "'\n"); } ASTNode Parser::term() { ASTNode node = factor(); while (true) { ASTNode temp; if (current_token._type() == MUL) { consume(MUL); temp = ASTNode(Token(MUL, "*")); } else if (current_token._type() == DIV) { consume(DIV); temp = ASTNode(Token(DIV, "/")); } else if (current_token._type() == MOD) { consume(MOD); temp = ASTNode(Token(MOD, "%")); } else if (current_token._type() == POW) { consume(POW); temp = ASTNode(Token(POW, "^")); } else break; temp.make_child(node); temp.make_child(factor()); node = temp; } return node; } ASTNode Parser::expression() { ASTNode node = term(); while (true) { ASTNode temp; if (current_token._type() == PLUS) { consume(PLUS); temp = ASTNode(Token(PLUS, "+")); } else if (current_token._type() == MINUS) { consume(MINUS); temp = ASTNode(Token(MINUS, "-")); } else break; temp.make_child(node); temp.make_child(term()); node = temp; } return node; } ASTNode Parser::condition() { ASTNode node; ASTNode left = expression(); if (current_token._type() == EQ) { node = Token(EQ, EQ); consume(EQ); } else if (current_token._type() == NEQ) { node = Token(NEQ, NEQ); consume(NEQ); } else if (current_token._type() == LT) { node = Token(LT, LT); consume(LT); } else if (current_token._type() == GT) { node = Token(GT, GT); consume(GT); } else if (current_token._type() == LEQ) { node = Token(LEQ, LEQ); consume(LEQ); } else if (current_token._type() == GEQ) { node = Token(GEQ, GEQ); consume(GEQ); } else if (current_token._type() == AND) { node = Token(AND, AND); consume(AND); } else if (current_token._type() == OR) { node = Token(OR, OR); consume(OR); } else error("[parser::condition] unknown Token '" + current_token._type() + "'"); node.make_child(left); node.make_child(expression()); return node; } The parser has now produced a rooted tree to iteratoe over. Token('CODE_BLOCK', 'CODE_BLOCK') Token('ASSIGN', '=') Token('VARIABLE', 'a') Token('INTEGER', '1') Token('ASSIGN', '=') Token('VARIABLE', 'b') Token('INTEGER', '2')