diff --git a/assignments/PA2/Makefile b/assignments/PA2/Makefile index fa068dc..e8c00d9 100644 --- a/assignments/PA2/Makefile +++ b/assignments/PA2/Makefile @@ -15,6 +15,7 @@ CFIL= ${CSRC} ${CGEN} LSRC= Makefile OBJS= ${CFIL:.cc=.o} OUTPUT= test.output +TESTCASE= CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN} @@ -22,7 +23,7 @@ CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN} FFLAGS= -d -ocool-lex.cc CC=g++ -CFLAGS= -g -Wall -Wno-unused -Wno-write-strings ${CPPINCLUDE} +CFLAGS= -g -Wall -Wno-unused -Wno-write-strings -std=c++11 ${CPPINCLUDE} FLEX=flex ${FFLAGS} DEPEND = ${CC} -MM ${CPPINCLUDE} @@ -68,3 +69,8 @@ clean-compile: -include ${CFIL:.cc=.d} +comparetest: + ${CLASSDIR}/bin/lexer ${TESTCASE} > std.out + ./lexer ${TESTCASE} > my.out + diff my.out std.out + rm my.out std.out \ No newline at end of file diff --git a/assignments/PA2/cool.flex b/assignments/PA2/cool.flex index 700d0ab..4a34985 100644 --- a/assignments/PA2/cool.flex +++ b/assignments/PA2/cool.flex @@ -1,4 +1,3 @@ -%{ /* * The scanner definition for COOL. */ @@ -8,6 +7,7 @@ * output, so headers and global definitions are placed here to be visible * to the code in the file. Don't remove anything that was here initially */ +%{ #include #include #include @@ -43,6 +43,8 @@ extern YYSTYPE cool_yylval; * Add Your own definitions here */ +int comment_nest_level; +int string_recover_error; %} /* @@ -50,51 +52,140 @@ extern YYSTYPE cool_yylval; */ DARROW => -DIGIT [0-9] -DIGITS {DIGIT}+ -/* */ +LE <= +ASSIGN <- + /* Keywords Definition*/ /* Except for the constants true and false, keywords are case insensitive */ -K_CLASS (?i:class) -K_ELSE (?i:else) -K_FI (?i:fi) -K_IF (?i:if) -K_IN (?i:in) -K_INHERITS (?i:inherits) -K_ISVOID (?i:isvoid) -K_LET (?i:let) -K_LOOP (?i:loop) -K_POOL (?i:pool) -K_THEN (?i:then) -K_WHILE (?i:while) -K_CASE (?i:case) -K_ESAC (?i:esac) -K_NEW (?i:new) -K_OF (?i:of) -K_NOT (?i:not) +/* + * In flex-old package, you cannot use the case-insensitive function (?i:keyword) , + * this is only available after 2.5.34, while our version is 2.5.4 + */ +CLASS [Cc][Ll][Aa][Ss][Ss] +ELSE [Ee][Ll][Ss][Ee] +FI [Ff][Ii] +IF [Ii][Ff] +IN [Ii][Nn] +INHERITS [Ii][Nn][Hh][Ee][Rr][Ii][Tt][Ss] +ISVOID [Ii][Ss][Vv][Oo][Ii][Dd] +LET [Ll][Ee][Tt] +LOOP [Ll][Oo][Oo][Pp] +POOL [Pp][Oo][Oo][Ll] +THEN [Tt][Hh][Ee][Nn] +WHILE [Ww][Hh][Ii][Ll][Ee] +CASE [Cc][Aa][Ss][Ee] +ESAC [Ee][Ss][Aa][Cc] +NEW [Nn][Ee][Ww] +OF [Oo][Ff] +NOT [Nn][Oo][Tt] /* the first letter of true/false must be lowercase; the trailing may be upper or lower case. */ -K_TRUE t(?i:rue) -K_FALSE f(?i:alse) +TRUE t[Rr][Uu][Ee] +FALSE f[Aa][Ll][Ss][Ee] +%x NCOMMENT SCOMMENT STRING %% /* * Nested comments + * Really strange, not the same as C style comment, nested comments are accepted. + * For example, `(* haha (* nested comment*) fufu*)` is valid, but `(* ho )* *)` or `(*(* hey*)` are invalid + * <> rules may not be used with other patterns + */ +"(*" { + BEGIN(NCOMMENT); + comment_nest_level = 0; /* entering from outside, initialize nest level */ + } +"(*" { comment_nest_level += 1; /* new nested comment, increase level */ } +\n { curr_lineno += 1; } +<> { + cool_yylval.error_msg = "EOF in comment"; + BEGIN(INITIAL); /* first return to outside, or there will be an EOF loop*/ + return (ERROR); + /* + * If a comment remains open when EOF is encountered, report this error with the message ‘‘EOF + * in comment’’. Do not tokenize the comment’s contents simply because the terminator is missing. + */ + } +. +"*)" { + if (comment_nest_level > 0) comment_nest_level -= 1; /* decrease nest level unless the outmost*/ + else BEGIN(INITIAL); + } + /* + * If you see “*)” outside a comment, report this error as ‘‘Unmatched *)’’, + * rather than tokenizing it as * and ). + */ +"*)" { + cool_yylval.error_msg = "Unmatched *)"; + return (ERROR); + } + + /* + * Single line comment + * Any characters between two dashes “--” and the next newline (or EOF, if there is no next newline) are treated as comments. */ +"--" { BEGIN(SCOMMENT); } +. +\n { + curr_lineno += 1; + BEGIN(INITIAL); + /* there's no special case for single line comment, EOF will auto end */ + } /* * The multiple-character operators. */ {DARROW} { return (DARROW); } +{LE} { return (LE); } +{ASSIGN} { return (ASSIGN); } + + /* + * The tokens for single character symbols (e.g., “;” and “,”) + * are represented just by the integer (ASCII) value of the character itself. + */ +"+" { return '+'; } +"/" { return '/'; } +"-" { return '-'; } +"*" { return '*'; } +"=" { return '='; } +"<" { return '<'; } +"." { return '.'; } +"~" { return '~'; } +"," { return ','; } +";" { return ';'; } +":" { return ':'; } +"(" { return '('; } +")" { return ')'; } +"@" { return '@'; } +"{" { return '{'; } +"}" { return '}'; } /* * Keywords are case-insensitive except for the values true and false, * which must begin with a lower-case letter. */ - +{CLASS} { return (CLASS); } +{ELSE} { return (ELSE); } +{FI} { return (FI); } +{IF} { return (IF); } +{IN} { return (IN); } +{INHERITS} { return (INHERITS); } +{ISVOID} { return (ISVOID); } +{LET} { return (LET); } +{LOOP} { return (LOOP); } +{POOL} { return (POOL); } +{THEN} { return (THEN); } +{WHILE} { return (WHILE); } +{CASE} { return (CASE); } +{ESAC} { return (ESAC); } +{NEW} { return (NEW); } +{OF} { return (OF); } +{NOT} { return (NOT); } +{TRUE} { cool_yylval.boolean = 1; return(BOOL_CONST); } +{FALSE} { cool_yylval.boolean = 0; return(BOOL_CONST); } /* * String constants (C syntax) @@ -102,6 +193,141 @@ K_FALSE f(?i:alse) * \n \t \b \f, the result is c. * */ +\" { + BEGIN(STRING); + string_buf_ptr = string_buf; /* reset string buf ptr*/ + string_recover_error = 0; /* reset error flag*/ + } +[^\"\\\n] { + if (!string_recover_error){ + *string_buf_ptr = yytext[0]; + string_buf_ptr ++; + if (string_buf_ptr >= string_buf + MAX_STR_CONST) { + string_recover_error = 1; /* string too long */ + } + } + } +\\(.|\n) { + if (!string_recover_error){ + /* + * Within a string, a sequence ‘\c’ denotes the + * character ‘c’, except \b \t \n \f + */ + switch(yytext[1]) { + case 'b': *string_buf_ptr = '\b'; break; + case 't': *string_buf_ptr = '\t'; break; + case 'n': *string_buf_ptr = '\n'; break; + case 'f': *string_buf_ptr = '\f'; break; + case '\n': *string_buf_ptr = '\n'; curr_lineno += 1; break; + default: *string_buf_ptr = yytext[1]; break; + } + string_buf_ptr ++; + if (string_buf_ptr >= string_buf + MAX_STR_CONST) { + string_recover_error = 1; /* string too long */ + } + } + } + /* + * A string may not contain EOF. + * A string may not contain the null (character \0). + * Any other character may be included in a string. + */ +<> { + cool_yylval.error_msg = "EOF in string constant"; + BEGIN(INITIAL); + return (ERROR); + /* + * for strings, if an EOF is encountered before the close-quote, + * report this error as ‘‘EOF in string constant’’. + */ + } +\0 { + string_recover_error = 2; /* null character */ + } + /* + * In either case(null char | too long), lexing should resume after the end of the string. + * The end of the string is defined as either: + * the beginning of the next line if an unescaped newline occurs after these errors are encountered + * after the closing ” otherwise + */ +\"|\n { + BEGIN(INITIAL); + if (yytext[0] == '\n') { + curr_lineno += 1; + } + if (!string_recover_error) { + if (yytext[0] == '\"') { + cool_yylval.symbol = stringtable.add_string(string_buf, string_buf_ptr - string_buf); + return (STR_CONST); + } + else if (yytext[0] == '\n') { + /* the escaped case should haved been captured in the escape rule */ + cool_yylval.error_msg = "Unterminated string constant"; + return (ERROR); + /* + * If a string contains an unescaped newline, + * report that error as ‘‘Unterminated string constant’’ + * and resume lexing at the beginning of the next line + */ + } + } + else if (string_recover_error == 1) { + cool_yylval.error_msg = "EOF in string constant"; + return (ERROR); + /* + * When a string is too long, + * report the error as ‘‘String constant too long’’ + * in the error string in the ERROR token. + */ + } + else if (string_recover_error == 2){ + cool_yylval.error_msg = "String contains null character"; + return (ERROR); + /* + * If the string contains invalid characters (i.e., the null character), + * report this as ‘‘String contains null character’’. + */ + } + } + /* Integer constants + * Integers are non-empty strings of digits 0-9 + */ +[0-9]+ { + cool_yylval.symbol = inttable.add_string(yytext); + return (INT_CONST); + } + /* + * Identifiers + * Identifiers are strings (other than keywords) consisting of letters, digits, and the underscore character. + * type identifiers begin with a capital letter + * object identifiers begin with a lower case letter + */ +[A-Z][A-Za-z0-9_]* { + cool_yylval.symbol = idtable.add_string(yytext); + return (TYPEID); + } +[a-z][A-Za-z0-9_]* { + cool_yylval.symbol = idtable.add_string(yytext); + return (OBJECTID); + } + + /* + * Whitespace consists of any sequence of the characters: blank (ascii 32), \n (newline, ascii 10), \f (form + * feed, ascii 12), \r (carriage return, ascii 13), \t (tab, ascii 9), \v (vertical tab, ascii 11) + */ +[\x20\f\r\t\v]+ +\n { curr_lineno += 1; } + + /* + * Invalid Character + * When an invalid character (one that can’t begin any token) is encountered, a string containing just + * that character should be returned as the error string. + * Resume lexing at the following character. + */ +. { + cool_yylval.error_msg = strdup(yytext); + return (ERROR); + } %% diff --git a/assignments/PA2/test.cl b/assignments/PA2/test.cl index 04c8101..87f84da 100644 --- a/assignments/PA2/test.cl +++ b/assignments/PA2/test.cl @@ -95,3 +95,19 @@ class Main { } }; }; + -- fuc + >fuck > ? *) + (* (* (*shit + fuck + scheisse*)sdsd*)*) + sds + inttest: Int <- 0011234; inttest2: Int <- 0x011234; +-- + trUE <= True = true < fALse FaLSE; + err_string "This is + not ok" + err_string "This is \ + also not ok" + ok_string "This is \ + ok" + escape_string "\"\n\\n\b\f\t\K\*" \ No newline at end of file