Flex/Bison based compiler and interpreter written in C++ (using Boost)
File detail
Source code
/*
* Copyright (C) 2008 Kamil Dudka <xdudka00@stud.fit.vutbr.cz>
*
* This file is part of vyp08 (compiler and interpreter of VYP08 language).
*
* vyp08 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* vyp08 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with vyp08. If not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include "scanner.h"
#include "vypIO.h"
#ifndef BUILDING_DOX
# include <boost/regex.hpp>
# include <FlexLexer.h>
# include <map>
# include <sstream>
#endif
using namespace StreamDecorator;
using std::string;
/**
* FlexLexer wrapper
* which catches lexical errors
* @note NVI
*/
class PrivateFlexLexer: public yyFlexLexer {
public:
PrivateFlexLexer(std::istream &input, string fileName):
yyFlexLexer(&input, &std::cerr),
fileName_(fileName),
hasError_(false)
{
}
/// Return true if already read source is not lexically valid.
bool hasError() const {
return hasError_;
}
/// NVI equivalent of virtual method yylex
EToken readNext() {
return static_cast<EToken>
(this->yylex());
}
/// NVI equivalent of virtual method LexerError
void postError(const char *msg) {
this->LexerError(msg);
}
protected:
/// override default output behavior
virtual void LexerOutput(const char *buf, int size) {
string msg(buf, size);
this->LexerError(msg.c_str());
}
/// override default error behavior
virtual void LexerError(const char *msg) {
this->hasError_ = true;
std::ostream &str = *(this->yyout);
str << Error(E_ERROR, fileName_, msg, lineno(), "lexical error")
<< std::endl;
}
private:
string fileName_; ///< name (or alias) of input file
bool hasError_; ///< true if any error has been detected
};
/**
* @note RAII object
*/
class FlexScanner: public IScanner {
public:
FlexScanner(std::istream &input, string fileName) {
flex_ = new PrivateFlexLexer(input, fileName);
}
virtual ~FlexScanner() {
delete flex_;
}
virtual bool readNext(Token &token);
virtual bool hasError() const {
return flex_->hasError();
}
private:
PrivateFlexLexer *flex_; ///< superior (flex based) scanner
};
/**
* Map keyword->token
* @note DP decorator
*/
class KwScanner: public IScanner {
public:
KwScanner(IScanner *scannerToUseAndDelete):
scan_(scannerToUseAndDelete)
{
initMap();
}
virtual ~KwScanner() {
delete scan_;
}
virtual bool readNext(Token &token);
virtual bool hasError() const {
return scan_->hasError();
}
private:
typedef STD_MAP(string, EToken) TMap;
TMap map_; ///< map keyword->token
IScanner *scan_; ///< superior IScanner object (design pattern @b decorator)
void initMap();
};
#ifndef BUILDING_DOX
namespace {
#endif
/**
* Parse number from string.
* @param s String to parse from.
* @param number Target to store output number.
* @return Return true on success.
*/
template <typename T> bool readNumber(const string &s, T &number) {
std::istringstream str(s);
str >> number;
return str;
}
/**
* Parse VYP08 string from raw string.
* @note On error partially string is read.
* @param in Raw string to parse from.
* @param out Target to store output string.
* @return Return true on success.
*/
bool readString(const string &in, string &out) {
const boost::regex reString("^\"(.*)\"$");
boost::smatch result;
if (!boost::regex_match(in, result, reString))
// invalid string
return false;
string tmp(result[1]);
out.clear();
for (const char *szTmp = tmp.c_str(); *szTmp; ++szTmp) {
if (static_cast<unsigned char>(*szTmp) < 32)
// char with ASCII value not greater than 31
return false;
if (*szTmp != '\\') {
out.push_back(*szTmp);
continue;
}
if (!*(++szTmp))
// back slash with no successor
return false;
switch (*szTmp) {
case 'n': out.push_back('\n'); break;
case '\\': out.push_back('\\'); break;
case '"': out.push_back('"'); break;
default:
// invalid escape sequence
return false;
}
}
return true;
}
#ifndef BUILDING_DOX
} // namespace
#endif
std::ostream& operator<<(std::ostream &str, EToken type) {
str << Color(C_YELLOW);
switch (type) {
case ETOKEN_NULL: str << "T_NULL"; break;
case ETOKEN_ID: str << "T_ID"; break;
case ETOKEN_NUMBER_INT: str << "T_INT"; break;
case ETOKEN_NUMBER_DOUBLE: str << "T_DOUBLE"; break;
case ETOKEN_STRING: str << "T_STRING"; break;
case ETOKEN_OP_LCBR:
case ETOKEN_OP_RCBR:
case ETOKEN_OP_LPAR:
case ETOKEN_OP_RPAR:
case ETOKEN_OP_STAR:
case ETOKEN_OP_SLASH:
case ETOKEN_OP_PLUS:
case ETOKEN_OP_MINUS:
case ETOKEN_OP_LESS:
case ETOKEN_OP_LESS_EQ:
case ETOKEN_OP_GREATER:
case ETOKEN_OP_GREATER_EQ:
case ETOKEN_OP_ASSIGN:
case ETOKEN_OP_COMMA:
case ETOKEN_OP_SEMICOLON:
case ETOKEN_KW_AND:
case ETOKEN_KW_DIV:
case ETOKEN_KW_EQ:
case ETOKEN_KW_OR:
case ETOKEN_KW_NEQ:
case ETOKEN_KW_NOT: str << "T_OP"; break;
case ETOKEN_KW_DOUBLE:
case ETOKEN_KW_ELSE:
case ETOKEN_KW_IF:
case ETOKEN_KW_INT:
case ETOKEN_KW_STRING:
case ETOKEN_KW_VAR:
case ETOKEN_KW_VOID:
case ETOKEN_KW_WHILE: str << "T_KEYWORD"; break;
}
str << Color(C_NO_COLOR);
switch (type) {
case ETOKEN_NULL:
case ETOKEN_ID:
case ETOKEN_NUMBER_INT:
case ETOKEN_NUMBER_DOUBLE:
case ETOKEN_STRING:
break;
default:
str << "[" << Color(C_LIGHT_BLUE);
switch (type) {
case ETOKEN_NULL:
case ETOKEN_ID:
case ETOKEN_NUMBER_INT:
case ETOKEN_NUMBER_DOUBLE:
case ETOKEN_STRING:
break;
case ETOKEN_OP_LCBR: str << "{"; break;
case ETOKEN_OP_RCBR: str << "}"; break;
case ETOKEN_OP_LPAR: str << "("; break;
case ETOKEN_OP_RPAR: str << ")"; break;
case ETOKEN_OP_STAR: str << "*"; break;
case ETOKEN_OP_SLASH: str << "/"; break;
case ETOKEN_OP_PLUS: str << "+"; break;
case ETOKEN_OP_MINUS: str << "-"; break;
case ETOKEN_OP_LESS: str << "<"; break;
case ETOKEN_OP_LESS_EQ: str << "<="; break;
case ETOKEN_OP_GREATER: str << ">"; break;
case ETOKEN_OP_GREATER_EQ: str << ">="; break;
case ETOKEN_OP_ASSIGN: str << ":="; break;
case ETOKEN_OP_COMMA: str << ","; break;
case ETOKEN_OP_SEMICOLON: str << ";"; break;
case ETOKEN_KW_AND: str << "and"; break;
case ETOKEN_KW_DIV: str << "div"; break;
case ETOKEN_KW_EQ: str << "eq"; break;
case ETOKEN_KW_OR: str << "or"; break;
case ETOKEN_KW_NEQ: str << "neq"; break;
case ETOKEN_KW_NOT: str << "not"; break;
case ETOKEN_KW_DOUBLE: str << "double"; break;
case ETOKEN_KW_ELSE: str << "else"; break;
case ETOKEN_KW_IF: str << "if"; break;
case ETOKEN_KW_INT: str << "int"; break;
case ETOKEN_KW_STRING: str << "string"; break;
case ETOKEN_KW_VAR: str << "var"; break;
case ETOKEN_KW_VOID: str << "void"; break;
case ETOKEN_KW_WHILE: str << "while"; break;
}
str << Color(C_NO_COLOR) << "]";
}
return str;
}
std::ostream& operator<<(std::ostream &str, const Token &token) {
// start with lineno:token_typ:
str << Color(C_LIGHT_GREEN) << token.lineno << Color(C_NO_COLOR)
<< ":" << token.type;
// append (...) in some cases
switch (token.type) {
case ETOKEN_ID:
case ETOKEN_STRING:
str << "[" << Color(C_LIGHT_RED) << token.text << Color(C_NO_COLOR) << "]";
break;
case ETOKEN_NUMBER_INT:
str << "[" << Color(C_LIGHT_RED) << token.numberInt << Color(C_NO_COLOR) << "]";
break;
case ETOKEN_NUMBER_DOUBLE:
str << "[" << Color(C_LIGHT_RED) << token.numberDouble << Color(C_NO_COLOR) << "]";
break;
default:
break;
}
str << Color(C_NO_COLOR);
return str;
}
IScanner* ScannerFactory::createScanner(std::istream &input, std::string fileName) {
// construct decorator chain
// ATTENTION: KwScanner object is responsible to destroy FlexScanner object
return new KwScanner(
new FlexScanner(input, fileName));
}
bool FlexScanner::readNext(Token &token) {
EToken type;
while ((type = flex_->readNext())) {
// common part for all tokens
token.type = type;
token.lineno = flex_->lineno();
// token-specific actions
switch (type) {
case ETOKEN_ID:
token.text = flex_->YYText();
return true;
case ETOKEN_NUMBER_INT:
if (readNumber(flex_->YYText(), token.numberInt))
return true;
else
flex_->postError("invalid integral literal");
break;
case ETOKEN_NUMBER_DOUBLE:
if (readNumber(flex_->YYText(), token.numberDouble))
return true;
else
flex_->postError("invalid decimal literal");
break;
case ETOKEN_STRING:
if (readString(flex_->YYText(), token.text))
return true;
else
flex_->postError("invalid string literal");
break;
default:
return true;
} // switch (type)
} // while ((type = flex_->readNext()))
return false;
}
bool KwScanner::readNext(Token &token) {
if (!scan_->readNext(token))
// no token from input
return false;
if (token.type != ETOKEN_ID)
// can't be keyword
return true;
TMap::const_iterator i = map_.find(token.text);
if (i != map_.end())
// keyword match
token.type = i->second;
return true;
}
/// if you are looking where to add a new keyword, this is the best place ;-)
void KwScanner::initMap() {
// initialize keyword->token mapping
map_["and"] = ETOKEN_KW_AND;
map_["div"] = ETOKEN_KW_DIV;
map_["double"] = ETOKEN_KW_DOUBLE;
map_["else"] = ETOKEN_KW_ELSE;
map_["eq"] = ETOKEN_KW_EQ;
map_["if"] = ETOKEN_KW_IF;
map_["int"] = ETOKEN_KW_INT;
map_["neq"] = ETOKEN_KW_NEQ;
map_["not"] = ETOKEN_KW_NOT;
map_["or"] = ETOKEN_KW_OR;
map_["string"] = ETOKEN_KW_STRING;
map_["var"] = ETOKEN_KW_VAR;
map_["void"] = ETOKEN_KW_VOID;
map_["while"] = ETOKEN_KW_WHILE;
}