00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "config.h"
00021 #include "scanner.h"
00022
00023 #include "vypIO.h"
00024
00025 #ifndef BUILDING_DOX
00026 # include <boost/regex.hpp>
00027 # include <FlexLexer.h>
00028 # include <map>
00029 # include <sstream>
00030 #endif
00031
00032 using namespace StreamDecorator;
00033 using std::string;
00034
00040 class PrivateFlexLexer: public yyFlexLexer {
00041 public:
00042 PrivateFlexLexer(std::istream &input, string fileName):
00043 yyFlexLexer(&input, &std::cerr),
00044 fileName_(fileName),
00045 hasError_(false)
00046 {
00047 }
00049 bool hasError() const {
00050 return hasError_;
00051 }
00053 EToken readNext() {
00054 return static_cast<EToken>
00055 (this->yylex());
00056 }
00058 void postError(const char *msg) {
00059 this->LexerError(msg);
00060 }
00061 protected:
00063 virtual void LexerOutput(const char *buf, int size) {
00064 string msg(buf, size);
00065 this->LexerError(msg.c_str());
00066 }
00068 virtual void LexerError(const char *msg) {
00069 this->hasError_ = true;
00070 std::ostream &str = *(this->yyout);
00071 str << Error(E_ERROR, fileName_, msg, lineno(), "lexical error")
00072 << std::endl;
00073 }
00074 private:
00075 string fileName_;
00076 bool hasError_;
00077 };
00078
00082 class FlexScanner: public IScanner {
00083 public:
00084 FlexScanner(std::istream &input, string fileName) {
00085 flex_ = new PrivateFlexLexer(input, fileName);
00086 }
00087 virtual ~FlexScanner() {
00088 delete flex_;
00089 }
00090 virtual bool readNext(Token &token);
00091 virtual bool hasError() const {
00092 return flex_->hasError();
00093 }
00094 private:
00095 PrivateFlexLexer *flex_;
00096 };
00097
00102 class KwScanner: public IScanner {
00103 public:
00104 KwScanner(IScanner *scannerToUseAndDelete):
00105 scan_(scannerToUseAndDelete)
00106 {
00107 initMap();
00108 }
00109 virtual ~KwScanner() {
00110 delete scan_;
00111 }
00112 virtual bool readNext(Token &token);
00113 virtual bool hasError() const {
00114 return scan_->hasError();
00115 }
00116 private:
00117 typedef STD_MAP(string, EToken) TMap;
00118 TMap map_;
00119
00120 IScanner *scan_;
00121 void initMap();
00122 };
00123
00124 #ifndef BUILDING_DOX
00125 namespace {
00126 #endif
00127
00133 template <typename T> bool readNumber(const string &s, T &number) {
00134 std::istringstream str(s);
00135 str >> number;
00136 return str;
00137 }
00138
00146 bool readString(const string &in, string &out) {
00147 const boost::regex reString("^\"(.*)\"$");
00148 boost::smatch result;
00149 if (!boost::regex_match(in, result, reString))
00150
00151 return false;
00152 string tmp(result[1]);
00153 out.clear();
00154 for (const char *szTmp = tmp.c_str(); *szTmp; ++szTmp) {
00155 if (static_cast<unsigned char>(*szTmp) < 32)
00156
00157 return false;
00158
00159 if (*szTmp != '\\') {
00160 out.push_back(*szTmp);
00161 continue;
00162 }
00163 if (!*(++szTmp))
00164
00165 return false;
00166
00167 switch (*szTmp) {
00168 case 'n': out.push_back('\n'); break;
00169 case '\\': out.push_back('\\'); break;
00170 case '"': out.push_back('"'); break;
00171 default:
00172
00173 return false;
00174 }
00175 }
00176 return true;
00177 }
00178 #ifndef BUILDING_DOX
00179 }
00180 #endif
00181
00182 std::ostream& operator<<(std::ostream &str, EToken type) {
00183 str << Color(C_YELLOW);
00184 switch (type) {
00185 case ETOKEN_NULL: str << "T_NULL"; break;
00186 case ETOKEN_ID: str << "T_ID"; break;
00187 case ETOKEN_NUMBER_INT: str << "T_INT"; break;
00188 case ETOKEN_NUMBER_DOUBLE: str << "T_DOUBLE"; break;
00189 case ETOKEN_STRING: str << "T_STRING"; break;
00190 case ETOKEN_OP_LCBR:
00191 case ETOKEN_OP_RCBR:
00192 case ETOKEN_OP_LPAR:
00193 case ETOKEN_OP_RPAR:
00194 case ETOKEN_OP_STAR:
00195 case ETOKEN_OP_SLASH:
00196 case ETOKEN_OP_PLUS:
00197 case ETOKEN_OP_MINUS:
00198 case ETOKEN_OP_LESS:
00199 case ETOKEN_OP_LESS_EQ:
00200 case ETOKEN_OP_GREATER:
00201 case ETOKEN_OP_GREATER_EQ:
00202 case ETOKEN_OP_ASSIGN:
00203 case ETOKEN_OP_COMMA:
00204 case ETOKEN_OP_SEMICOLON:
00205 case ETOKEN_KW_AND:
00206 case ETOKEN_KW_DIV:
00207 case ETOKEN_KW_EQ:
00208 case ETOKEN_KW_OR:
00209 case ETOKEN_KW_NEQ:
00210 case ETOKEN_KW_NOT: str << "T_OP"; break;
00211 case ETOKEN_KW_DOUBLE:
00212 case ETOKEN_KW_ELSE:
00213 case ETOKEN_KW_IF:
00214 case ETOKEN_KW_INT:
00215 case ETOKEN_KW_STRING:
00216 case ETOKEN_KW_VAR:
00217 case ETOKEN_KW_VOID:
00218 case ETOKEN_KW_WHILE: str << "T_KEYWORD"; break;
00219 }
00220 str << Color(C_NO_COLOR);
00221 switch (type) {
00222 case ETOKEN_NULL:
00223 case ETOKEN_ID:
00224 case ETOKEN_NUMBER_INT:
00225 case ETOKEN_NUMBER_DOUBLE:
00226 case ETOKEN_STRING:
00227 break;
00228 default:
00229 str << "[" << Color(C_LIGHT_BLUE);
00230 switch (type) {
00231 case ETOKEN_NULL:
00232 case ETOKEN_ID:
00233 case ETOKEN_NUMBER_INT:
00234 case ETOKEN_NUMBER_DOUBLE:
00235 case ETOKEN_STRING:
00236 break;
00237 case ETOKEN_OP_LCBR: str << "{"; break;
00238 case ETOKEN_OP_RCBR: str << "}"; break;
00239 case ETOKEN_OP_LPAR: str << "("; break;
00240 case ETOKEN_OP_RPAR: str << ")"; break;
00241 case ETOKEN_OP_STAR: str << "*"; break;
00242 case ETOKEN_OP_SLASH: str << "/"; break;
00243 case ETOKEN_OP_PLUS: str << "+"; break;
00244 case ETOKEN_OP_MINUS: str << "-"; break;
00245 case ETOKEN_OP_LESS: str << "<"; break;
00246 case ETOKEN_OP_LESS_EQ: str << "<="; break;
00247 case ETOKEN_OP_GREATER: str << ">"; break;
00248 case ETOKEN_OP_GREATER_EQ: str << ">="; break;
00249 case ETOKEN_OP_ASSIGN: str << ":="; break;
00250 case ETOKEN_OP_COMMA: str << ","; break;
00251 case ETOKEN_OP_SEMICOLON: str << ";"; break;
00252 case ETOKEN_KW_AND: str << "and"; break;
00253 case ETOKEN_KW_DIV: str << "div"; break;
00254 case ETOKEN_KW_EQ: str << "eq"; break;
00255 case ETOKEN_KW_OR: str << "or"; break;
00256 case ETOKEN_KW_NEQ: str << "neq"; break;
00257 case ETOKEN_KW_NOT: str << "not"; break;
00258 case ETOKEN_KW_DOUBLE: str << "double"; break;
00259 case ETOKEN_KW_ELSE: str << "else"; break;
00260 case ETOKEN_KW_IF: str << "if"; break;
00261 case ETOKEN_KW_INT: str << "int"; break;
00262 case ETOKEN_KW_STRING: str << "string"; break;
00263 case ETOKEN_KW_VAR: str << "var"; break;
00264 case ETOKEN_KW_VOID: str << "void"; break;
00265 case ETOKEN_KW_WHILE: str << "while"; break;
00266 }
00267 str << Color(C_NO_COLOR) << "]";
00268 }
00269 return str;
00270 }
00271
00272 std::ostream& operator<<(std::ostream &str, const Token &token) {
00273
00274 str << Color(C_LIGHT_GREEN) << token.lineno << Color(C_NO_COLOR)
00275 << ":" << token.type;
00276
00277
00278 switch (token.type) {
00279 case ETOKEN_ID:
00280 case ETOKEN_STRING:
00281 str << "[" << Color(C_LIGHT_RED) << token.text << Color(C_NO_COLOR) << "]";
00282 break;
00283 case ETOKEN_NUMBER_INT:
00284 str << "[" << Color(C_LIGHT_RED) << token.numberInt << Color(C_NO_COLOR) << "]";
00285 break;
00286 case ETOKEN_NUMBER_DOUBLE:
00287 str << "[" << Color(C_LIGHT_RED) << token.numberDouble << Color(C_NO_COLOR) << "]";
00288 break;
00289 default:
00290 break;
00291 }
00292 str << Color(C_NO_COLOR);
00293 return str;
00294 }
00295
00296 IScanner* ScannerFactory::createScanner(std::istream &input, std::string fileName) {
00297
00298
00299 return new KwScanner(
00300 new FlexScanner(input, fileName));
00301 }
00302
00303 bool FlexScanner::readNext(Token &token) {
00304 EToken type;
00305 while ((type = flex_->readNext())) {
00306
00307 token.type = type;
00308 token.lineno = flex_->lineno();
00309
00310
00311 switch (type) {
00312 case ETOKEN_ID:
00313 token.text = flex_->YYText();
00314 return true;
00315 case ETOKEN_NUMBER_INT:
00316 if (readNumber(flex_->YYText(), token.numberInt))
00317 return true;
00318 else
00319 flex_->postError("invalid integral literal");
00320 break;
00321 case ETOKEN_NUMBER_DOUBLE:
00322 if (readNumber(flex_->YYText(), token.numberDouble))
00323 return true;
00324 else
00325 flex_->postError("invalid decimal literal");
00326 break;
00327 case ETOKEN_STRING:
00328 if (readString(flex_->YYText(), token.text))
00329 return true;
00330 else
00331 flex_->postError("invalid string literal");
00332 break;
00333 default:
00334 return true;
00335 }
00336 }
00337 return false;
00338 }
00339
00340 bool KwScanner::readNext(Token &token) {
00341 if (!scan_->readNext(token))
00342
00343 return false;
00344
00345 if (token.type != ETOKEN_ID)
00346
00347 return true;
00348
00349 TMap::const_iterator i = map_.find(token.text);
00350 if (i != map_.end())
00351
00352 token.type = i->second;
00353
00354 return true;
00355 }
00356
00358 void KwScanner::initMap() {
00359
00360 map_["and"] = ETOKEN_KW_AND;
00361 map_["div"] = ETOKEN_KW_DIV;
00362 map_["double"] = ETOKEN_KW_DOUBLE;
00363 map_["else"] = ETOKEN_KW_ELSE;
00364 map_["eq"] = ETOKEN_KW_EQ;
00365 map_["if"] = ETOKEN_KW_IF;
00366 map_["int"] = ETOKEN_KW_INT;
00367 map_["neq"] = ETOKEN_KW_NEQ;
00368 map_["not"] = ETOKEN_KW_NOT;
00369 map_["or"] = ETOKEN_KW_OR;
00370 map_["string"] = ETOKEN_KW_STRING;
00371 map_["var"] = ETOKEN_KW_VAR;
00372 map_["void"] = ETOKEN_KW_VOID;
00373 map_["while"] = ETOKEN_KW_WHILE;
00374 }