Česky
Kamil Dudka

Flex/Bison based compiler and interpreter written in C++ (using Boost)

File detail

Name:Downloadscanner.cc [Download]
Location: vyp08 > vyp08-1.0pre1 > src
Size:12.7 KB
Last modification:2022-09-09 13:06

Source code

/*
 * Copyright (C) 2008 Kamil Dudka <xdudka00@stud.fit.vutbr.cz>
 *
 * This file is part of vyp08 (compiler and interpreter of VYP08 language).
 *
 * vyp08 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * vyp08 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with vyp08.  If not, see <http://www.gnu.org/licenses/>.
 */
 
#include "config.h"
#include "scanner.h"
 
#include "vypIO.h"
 
#ifndef BUILDING_DOX
#   include <boost/regex.hpp>
#   include <FlexLexer.h>
#   include <map>
#   include <sstream>
#endif
 
using namespace StreamDecorator;
using std::string;
 
/**
 * FlexLexer wrapper
 * which catches lexical errors
 * @note NVI
 */
class PrivateFlexLexer: public yyFlexLexer {
    public:
        PrivateFlexLexer(std::istream &input, string fileName):
            yyFlexLexer(&input, &std::cerr),
            fileName_(fileName),
            hasError_(false)
        {
        }
        /// Return true if already read source is not lexically valid.
        bool hasError() const {
            return hasError_;
        }
        /// NVI equivalent of virtual method yylex
        EToken readNext() {
            return static_cast<EToken>
                (this->yylex());
        }
        /// NVI equivalent of virtual method LexerError
        void postError(const char *msg) {
            this->LexerError(msg);
        }
    protected:
        /// override default output behavior
        virtual void LexerOutput(const char *buf, int size) {
            string msg(buf, size);
            this->LexerError(msg.c_str());
        }
        /// override default error behavior
        virtual void LexerError(const char *msg) {
            this->hasError_ = true;
            std::ostream &str = *(this->yyout);
            str << Error(E_ERROR, fileName_, msg, lineno(), "lexical error")
                << std::endl;
        }
    private:
        string  fileName_;                              ///< name (or alias) of input file
        bool    hasError_;                              ///< true if any error has been detected
};
 
/**
 * @note RAII object
 */
class FlexScanner: public IScanner {
    public:
        FlexScanner(std::istream &input, string fileName) {
            flex_ = new PrivateFlexLexer(input, fileName);
        }
        virtual ~FlexScanner() {
            delete flex_;
        }
        virtual bool readNext(Token &token);
        virtual bool hasError() const {
            return flex_->hasError();
        }
    private:
        PrivateFlexLexer *flex_;                        ///< superior (flex based) scanner
};
 
/**
 * Map keyword->token
 * @note DP decorator
 */
class KwScanner: public IScanner {
    public:
        KwScanner(IScanner *scannerToUseAndDelete):
            scan_(scannerToUseAndDelete)
        {
            initMap();
        }
        virtual ~KwScanner() {
            delete scan_;
        }
        virtual bool readNext(Token &token);
        virtual bool hasError() const {
            return scan_->hasError();
        }
    private:
        typedef STD_MAP(string, EToken) TMap;
        TMap map_;                                      ///< map keyword->token
 
        IScanner *scan_;                                ///< superior IScanner object (design pattern @b decorator)
        void initMap();
};
 
#ifndef BUILDING_DOX
namespace {
#endif
    /**
     * Parse number from string.
     * @param s String to parse from.
     * @param number Target to store output number.
     * @return Return true on success.
     */
    template <typename T> bool readNumber(const string &s, T &number) {
        std::istringstream str(s);
        str >> number;
        return str;
    }
 
    /**
     * Parse VYP08 string from raw string.
     * @note On error partially string is read.
     * @param in Raw string to parse from.
     * @param out Target to store output string.
     * @return Return true on success.
     */
    bool readString(const string &in, string &out) {
        const boost::regex reString("^\"(.*)\"$");
        boost::smatch result;
        if (!boost::regex_match(in, result, reString))
            // invalid string
            return false;
        string tmp(result[1]);
        out.clear();
        for (const char *szTmp = tmp.c_str(); *szTmp; ++szTmp) {
            if (static_cast<unsigned char>(*szTmp) < 32)
                // char with ASCII value not greater than 31
                return false;
 
            if (*szTmp != '\\') {
                out.push_back(*szTmp);
                continue;
            }
            if (!*(++szTmp))
                // back slash with no successor
                return false;
 
            switch (*szTmp) {
                case 'n':   out.push_back('\n');    break;
                case '\\':  out.push_back('\\');    break;
                case '"':   out.push_back('"');     break;
                default:
                            // invalid escape sequence
                            return false;
            }
        }
        return true;
    }
#ifndef BUILDING_DOX
} // namespace
#endif
 
std::ostream& operator<<(std::ostream &str, EToken type) {
    str << Color(C_YELLOW);
    switch (type) {
        case ETOKEN_NULL:           str << "T_NULL";        break;
        case ETOKEN_ID:             str << "T_ID";          break;
        case ETOKEN_NUMBER_INT:     str << "T_INT";         break;
        case ETOKEN_NUMBER_DOUBLE:  str << "T_DOUBLE";      break;
        case ETOKEN_STRING:         str << "T_STRING";      break;
        case ETOKEN_OP_LCBR:
        case ETOKEN_OP_RCBR:
        case ETOKEN_OP_LPAR:
        case ETOKEN_OP_RPAR:
        case ETOKEN_OP_STAR:
        case ETOKEN_OP_SLASH:
        case ETOKEN_OP_PLUS:
        case ETOKEN_OP_MINUS:
        case ETOKEN_OP_LESS:
        case ETOKEN_OP_LESS_EQ:
        case ETOKEN_OP_GREATER:
        case ETOKEN_OP_GREATER_EQ:
        case ETOKEN_OP_ASSIGN:
        case ETOKEN_OP_COMMA:
        case ETOKEN_OP_SEMICOLON:
        case ETOKEN_KW_AND:
        case ETOKEN_KW_DIV:
        case ETOKEN_KW_EQ:
        case ETOKEN_KW_OR:
        case ETOKEN_KW_NEQ:
        case ETOKEN_KW_NOT:         str << "T_OP";          break;
        case ETOKEN_KW_DOUBLE:
        case ETOKEN_KW_ELSE:
        case ETOKEN_KW_IF:
        case ETOKEN_KW_INT:
        case ETOKEN_KW_STRING:
        case ETOKEN_KW_VAR:
        case ETOKEN_KW_VOID:
        case ETOKEN_KW_WHILE:       str << "T_KEYWORD";     break;
    }
    str << Color(C_NO_COLOR);
    switch (type) {
        case ETOKEN_NULL:
        case ETOKEN_ID:
        case ETOKEN_NUMBER_INT:
        case ETOKEN_NUMBER_DOUBLE:
        case ETOKEN_STRING:
            break;
        default:
            str << "[" << Color(C_LIGHT_BLUE);
            switch (type) {
                case ETOKEN_NULL:
                case ETOKEN_ID:
                case ETOKEN_NUMBER_INT:
                case ETOKEN_NUMBER_DOUBLE:
                case ETOKEN_STRING:
                    break;
                case ETOKEN_OP_LCBR:            str << "{";         break;
                case ETOKEN_OP_RCBR:            str << "}";         break;
                case ETOKEN_OP_LPAR:            str << "(";         break;
                case ETOKEN_OP_RPAR:            str << ")";         break;
                case ETOKEN_OP_STAR:            str << "*";         break;
                case ETOKEN_OP_SLASH:           str << "/";         break;
                case ETOKEN_OP_PLUS:            str << "+";         break;
                case ETOKEN_OP_MINUS:           str << "-";         break;
                case ETOKEN_OP_LESS:            str << "<";         break;
                case ETOKEN_OP_LESS_EQ:         str << "<=";        break;
                case ETOKEN_OP_GREATER:         str << ">";         break;
                case ETOKEN_OP_GREATER_EQ:      str << ">=";        break;
                case ETOKEN_OP_ASSIGN:          str << ":=";        break;
                case ETOKEN_OP_COMMA:           str << ",";         break;
                case ETOKEN_OP_SEMICOLON:       str << ";";         break;
                case ETOKEN_KW_AND:             str << "and";       break;
                case ETOKEN_KW_DIV:             str << "div";       break;
                case ETOKEN_KW_EQ:              str << "eq";        break;
                case ETOKEN_KW_OR:              str << "or";        break;
                case ETOKEN_KW_NEQ:             str << "neq";       break;
                case ETOKEN_KW_NOT:             str << "not";       break;
                case ETOKEN_KW_DOUBLE:          str << "double";    break;
                case ETOKEN_KW_ELSE:            str << "else";      break;
                case ETOKEN_KW_IF:              str << "if";        break;
                case ETOKEN_KW_INT:             str << "int";       break;
                case ETOKEN_KW_STRING:          str << "string";    break;
                case ETOKEN_KW_VAR:             str << "var";       break;
                case ETOKEN_KW_VOID:            str << "void";      break;
                case ETOKEN_KW_WHILE:           str << "while";     break;
            }
            str << Color(C_NO_COLOR) << "]";
    }
    return str;
}
 
std::ostream& operator<<(std::ostream &str, const Token &token) {
    // start with lineno:token_typ:
    str << Color(C_LIGHT_GREEN) << token.lineno << Color(C_NO_COLOR)
        << ":" << token.type;
 
    // append (...) in some cases
    switch (token.type) {
        case ETOKEN_ID:
        case ETOKEN_STRING:
            str << "[" << Color(C_LIGHT_RED) << token.text << Color(C_NO_COLOR) << "]";
            break;
        case ETOKEN_NUMBER_INT:
            str << "[" << Color(C_LIGHT_RED) << token.numberInt << Color(C_NO_COLOR) << "]";
            break;
        case ETOKEN_NUMBER_DOUBLE:
            str << "[" << Color(C_LIGHT_RED) << token.numberDouble << Color(C_NO_COLOR) << "]";
            break;
        default:
            break;
    }
    str << Color(C_NO_COLOR);
    return str;
}
 
IScanner* ScannerFactory::createScanner(std::istream &input, std::string fileName) {
    // construct decorator chain
    // ATTENTION: KwScanner object is responsible to destroy FlexScanner object
    return new KwScanner(
        new FlexScanner(input, fileName));
}
 
bool FlexScanner::readNext(Token &token) {
    EToken type;
    while ((type = flex_->readNext())) {
        // common part for all tokens
        token.type      = type;
        token.lineno    = flex_->lineno();
 
        // token-specific actions
        switch (type) {
            case ETOKEN_ID:
                token.text = flex_->YYText();
                return true;
            case ETOKEN_NUMBER_INT:
                if (readNumber(flex_->YYText(), token.numberInt))
                    return true;
                else
                    flex_->postError("invalid integral literal");
                break;
            case ETOKEN_NUMBER_DOUBLE:
                if (readNumber(flex_->YYText(), token.numberDouble))
                    return true;
                else
                    flex_->postError("invalid decimal literal");
                break;
            case ETOKEN_STRING:
                if (readString(flex_->YYText(), token.text))
                    return true;
                else
                    flex_->postError("invalid string literal");
                break;
            default:
                return true;
        } // switch (type)
    } // while ((type = flex_->readNext()))
    return false;
}
 
bool KwScanner::readNext(Token &token) {
    if (!scan_->readNext(token))
        // no token from input
        return false;
 
    if (token.type != ETOKEN_ID)
        // can't be keyword
        return true;
 
    TMap::const_iterator i = map_.find(token.text);
    if (i != map_.end())
        // keyword match
        token.type = i->second;
 
    return true;
}
 
/// if you are looking where to add a new keyword, this is the best place ;-)
void KwScanner::initMap() {
    // initialize keyword->token mapping
    map_["and"]     = ETOKEN_KW_AND;
    map_["div"]     = ETOKEN_KW_DIV;
    map_["double"]  = ETOKEN_KW_DOUBLE;
    map_["else"]    = ETOKEN_KW_ELSE;
    map_["eq"]      = ETOKEN_KW_EQ;
    map_["if"]      = ETOKEN_KW_IF;
    map_["int"]     = ETOKEN_KW_INT;
    map_["neq"]     = ETOKEN_KW_NEQ;
    map_["not"]     = ETOKEN_KW_NOT;
    map_["or"]      = ETOKEN_KW_OR;
    map_["string"]  = ETOKEN_KW_STRING;
    map_["var"]     = ETOKEN_KW_VAR;
    map_["void"]    = ETOKEN_KW_VOID;
    map_["while"]   = ETOKEN_KW_WHILE;
}