PA2 seems ok
This commit is contained in:
parent
90aa2cbdf9
commit
88610ace11
@ -15,6 +15,7 @@ CFIL= ${CSRC} ${CGEN}
|
||||
LSRC= Makefile
|
||||
OBJS= ${CFIL:.cc=.o}
|
||||
OUTPUT= test.output
|
||||
TESTCASE=
|
||||
|
||||
CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
|
||||
|
||||
@ -22,7 +23,7 @@ CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
|
||||
FFLAGS= -d -ocool-lex.cc
|
||||
|
||||
CC=g++
|
||||
CFLAGS= -g -Wall -Wno-unused -Wno-write-strings ${CPPINCLUDE}
|
||||
CFLAGS= -g -Wall -Wno-unused -Wno-write-strings -std=c++11 ${CPPINCLUDE}
|
||||
FLEX=flex ${FFLAGS}
|
||||
DEPEND = ${CC} -MM ${CPPINCLUDE}
|
||||
|
||||
@ -68,3 +69,8 @@ clean-compile:
|
||||
-include ${CFIL:.cc=.d}
|
||||
|
||||
|
||||
comparetest:
|
||||
${CLASSDIR}/bin/lexer ${TESTCASE} > std.out
|
||||
./lexer ${TESTCASE} > my.out
|
||||
diff my.out std.out
|
||||
rm my.out std.out
|
||||
@ -1,4 +1,3 @@
|
||||
%{
|
||||
/*
|
||||
* The scanner definition for COOL.
|
||||
*/
|
||||
@ -8,6 +7,7 @@
|
||||
* output, so headers and global definitions are placed here to be visible
|
||||
* to the code in the file. Don't remove anything that was here initially
|
||||
*/
|
||||
%{
|
||||
#include <cool-parse.h>
|
||||
#include <stringtab.h>
|
||||
#include <utilities.h>
|
||||
@ -43,6 +43,8 @@ extern YYSTYPE cool_yylval;
|
||||
* Add Your own definitions here
|
||||
*/
|
||||
|
||||
int comment_nest_level;
|
||||
int string_recover_error;
|
||||
%}
|
||||
|
||||
/*
|
||||
@ -50,51 +52,140 @@ extern YYSTYPE cool_yylval;
|
||||
*/
|
||||
|
||||
DARROW =>
|
||||
DIGIT [0-9]
|
||||
DIGITS {DIGIT}+
|
||||
/* */
|
||||
LE <=
|
||||
ASSIGN <-
|
||||
|
||||
/* Keywords Definition*/
|
||||
/* Except for the constants true and false, keywords are case insensitive */
|
||||
K_CLASS (?i:class)
|
||||
K_ELSE (?i:else)
|
||||
K_FI (?i:fi)
|
||||
K_IF (?i:if)
|
||||
K_IN (?i:in)
|
||||
K_INHERITS (?i:inherits)
|
||||
K_ISVOID (?i:isvoid)
|
||||
K_LET (?i:let)
|
||||
K_LOOP (?i:loop)
|
||||
K_POOL (?i:pool)
|
||||
K_THEN (?i:then)
|
||||
K_WHILE (?i:while)
|
||||
K_CASE (?i:case)
|
||||
K_ESAC (?i:esac)
|
||||
K_NEW (?i:new)
|
||||
K_OF (?i:of)
|
||||
K_NOT (?i:not)
|
||||
/*
|
||||
* In flex-old package, you cannot use the case-insensitive function (?i:keyword) ,
|
||||
* this is only available after 2.5.34, while our version is 2.5.4
|
||||
*/
|
||||
CLASS [Cc][Ll][Aa][Ss][Ss]
|
||||
ELSE [Ee][Ll][Ss][Ee]
|
||||
FI [Ff][Ii]
|
||||
IF [Ii][Ff]
|
||||
IN [Ii][Nn]
|
||||
INHERITS [Ii][Nn][Hh][Ee][Rr][Ii][Tt][Ss]
|
||||
ISVOID [Ii][Ss][Vv][Oo][Ii][Dd]
|
||||
LET [Ll][Ee][Tt]
|
||||
LOOP [Ll][Oo][Oo][Pp]
|
||||
POOL [Pp][Oo][Oo][Ll]
|
||||
THEN [Tt][Hh][Ee][Nn]
|
||||
WHILE [Ww][Hh][Ii][Ll][Ee]
|
||||
CASE [Cc][Aa][Ss][Ee]
|
||||
ESAC [Ee][Ss][Aa][Cc]
|
||||
NEW [Nn][Ee][Ww]
|
||||
OF [Oo][Ff]
|
||||
NOT [Nn][Oo][Tt]
|
||||
/* the first letter of true/false must be lowercase; the trailing may be upper or lower case. */
|
||||
K_TRUE t(?i:rue)
|
||||
K_FALSE f(?i:alse)
|
||||
TRUE t[Rr][Uu][Ee]
|
||||
FALSE f[Aa][Ll][Ss][Ee]
|
||||
|
||||
|
||||
%x NCOMMENT SCOMMENT STRING
|
||||
|
||||
%%
|
||||
|
||||
/*
|
||||
* Nested comments
|
||||
* Really strange, not the same as C style comment, nested comments are accepted.
|
||||
* For example, `(* haha (* nested comment*) fufu*)` is valid, but `(* ho )* *)` or `(*(* hey*)` are invalid
|
||||
* <<EOF>> rules may not be used with other patterns
|
||||
*/
|
||||
"(*" {
|
||||
BEGIN(NCOMMENT);
|
||||
comment_nest_level = 0; /* entering from outside, initialize nest level */
|
||||
}
|
||||
<NCOMMENT>"(*" { comment_nest_level += 1; /* new nested comment, increase level */ }
|
||||
<NCOMMENT>\n { curr_lineno += 1; }
|
||||
<NCOMMENT><<EOF>> {
|
||||
cool_yylval.error_msg = "EOF in comment";
|
||||
BEGIN(INITIAL); /* first return to outside, or there will be an EOF loop*/
|
||||
return (ERROR);
|
||||
/*
|
||||
* If a comment remains open when EOF is encountered, report this error with the message ‘‘EOF
|
||||
* in comment’’. Do not tokenize the comment’s contents simply because the terminator is missing.
|
||||
*/
|
||||
}
|
||||
<NCOMMENT>.
|
||||
<NCOMMENT>"*)" {
|
||||
if (comment_nest_level > 0) comment_nest_level -= 1; /* decrease nest level unless the outmost*/
|
||||
else BEGIN(INITIAL);
|
||||
}
|
||||
/*
|
||||
* If you see “*)” outside a comment, report this error as ‘‘Unmatched *)’’,
|
||||
* rather than tokenizing it as * and ).
|
||||
*/
|
||||
"*)" {
|
||||
cool_yylval.error_msg = "Unmatched *)";
|
||||
return (ERROR);
|
||||
}
|
||||
|
||||
/*
|
||||
* Single line comment
|
||||
* Any characters between two dashes “--” and the next newline (or EOF, if there is no next newline) are treated as comments.
|
||||
*/
|
||||
|
||||
"--" { BEGIN(SCOMMENT); }
|
||||
<SCOMMENT>.
|
||||
<SCOMMENT>\n {
|
||||
curr_lineno += 1;
|
||||
BEGIN(INITIAL);
|
||||
/* there's no special case for single line comment, EOF will auto end */
|
||||
}
|
||||
|
||||
/*
|
||||
* The multiple-character operators.
|
||||
*/
|
||||
{DARROW} { return (DARROW); }
|
||||
{LE} { return (LE); }
|
||||
{ASSIGN} { return (ASSIGN); }
|
||||
|
||||
/*
|
||||
* The tokens for single character symbols (e.g., “;” and “,”)
|
||||
* are represented just by the integer (ASCII) value of the character itself.
|
||||
*/
|
||||
"+" { return '+'; }
|
||||
"/" { return '/'; }
|
||||
"-" { return '-'; }
|
||||
"*" { return '*'; }
|
||||
"=" { return '='; }
|
||||
"<" { return '<'; }
|
||||
"." { return '.'; }
|
||||
"~" { return '~'; }
|
||||
"," { return ','; }
|
||||
";" { return ';'; }
|
||||
":" { return ':'; }
|
||||
"(" { return '('; }
|
||||
")" { return ')'; }
|
||||
"@" { return '@'; }
|
||||
"{" { return '{'; }
|
||||
"}" { return '}'; }
|
||||
|
||||
/*
|
||||
* Keywords are case-insensitive except for the values true and false,
|
||||
* which must begin with a lower-case letter.
|
||||
*/
|
||||
|
||||
{CLASS} { return (CLASS); }
|
||||
{ELSE} { return (ELSE); }
|
||||
{FI} { return (FI); }
|
||||
{IF} { return (IF); }
|
||||
{IN} { return (IN); }
|
||||
{INHERITS} { return (INHERITS); }
|
||||
{ISVOID} { return (ISVOID); }
|
||||
{LET} { return (LET); }
|
||||
{LOOP} { return (LOOP); }
|
||||
{POOL} { return (POOL); }
|
||||
{THEN} { return (THEN); }
|
||||
{WHILE} { return (WHILE); }
|
||||
{CASE} { return (CASE); }
|
||||
{ESAC} { return (ESAC); }
|
||||
{NEW} { return (NEW); }
|
||||
{OF} { return (OF); }
|
||||
{NOT} { return (NOT); }
|
||||
{TRUE} { cool_yylval.boolean = 1; return(BOOL_CONST); }
|
||||
{FALSE} { cool_yylval.boolean = 0; return(BOOL_CONST); }
|
||||
|
||||
/*
|
||||
* String constants (C syntax)
|
||||
@ -102,6 +193,141 @@ K_FALSE f(?i:alse)
|
||||
* \n \t \b \f, the result is c.
|
||||
*
|
||||
*/
|
||||
\" {
|
||||
BEGIN(STRING);
|
||||
string_buf_ptr = string_buf; /* reset string buf ptr*/
|
||||
string_recover_error = 0; /* reset error flag*/
|
||||
}
|
||||
<STRING>[^\"\\\n] {
|
||||
if (!string_recover_error){
|
||||
*string_buf_ptr = yytext[0];
|
||||
string_buf_ptr ++;
|
||||
if (string_buf_ptr >= string_buf + MAX_STR_CONST) {
|
||||
string_recover_error = 1; /* string too long */
|
||||
}
|
||||
}
|
||||
}
|
||||
<STRING>\\(.|\n) {
|
||||
if (!string_recover_error){
|
||||
/*
|
||||
* Within a string, a sequence ‘\c’ denotes the
|
||||
* character ‘c’, except \b \t \n \f
|
||||
*/
|
||||
switch(yytext[1]) {
|
||||
case 'b': *string_buf_ptr = '\b'; break;
|
||||
case 't': *string_buf_ptr = '\t'; break;
|
||||
case 'n': *string_buf_ptr = '\n'; break;
|
||||
case 'f': *string_buf_ptr = '\f'; break;
|
||||
case '\n': *string_buf_ptr = '\n'; curr_lineno += 1; break;
|
||||
default: *string_buf_ptr = yytext[1]; break;
|
||||
}
|
||||
string_buf_ptr ++;
|
||||
if (string_buf_ptr >= string_buf + MAX_STR_CONST) {
|
||||
string_recover_error = 1; /* string too long */
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* A string may not contain EOF.
|
||||
* A string may not contain the null (character \0).
|
||||
* Any other character may be included in a string.
|
||||
*/
|
||||
<STRING><<EOF>> {
|
||||
cool_yylval.error_msg = "EOF in string constant";
|
||||
BEGIN(INITIAL);
|
||||
return (ERROR);
|
||||
/*
|
||||
* for strings, if an EOF is encountered before the close-quote,
|
||||
* report this error as ‘‘EOF in string constant’’.
|
||||
*/
|
||||
}
|
||||
<STRING>\0 {
|
||||
string_recover_error = 2; /* null character */
|
||||
}
|
||||
/*
|
||||
* In either case(null char | too long), lexing should resume after the end of the string.
|
||||
* The end of the string is defined as either:
|
||||
* the beginning of the next line if an unescaped newline occurs after these errors are encountered
|
||||
* after the closing ” otherwise
|
||||
*/
|
||||
<STRING>\"|\n {
|
||||
BEGIN(INITIAL);
|
||||
if (yytext[0] == '\n') {
|
||||
curr_lineno += 1;
|
||||
}
|
||||
if (!string_recover_error) {
|
||||
if (yytext[0] == '\"') {
|
||||
cool_yylval.symbol = stringtable.add_string(string_buf, string_buf_ptr - string_buf);
|
||||
return (STR_CONST);
|
||||
}
|
||||
else if (yytext[0] == '\n') {
|
||||
/* the escaped case should haved been captured in the escape rule */
|
||||
cool_yylval.error_msg = "Unterminated string constant";
|
||||
return (ERROR);
|
||||
/*
|
||||
* If a string contains an unescaped newline,
|
||||
* report that error as ‘‘Unterminated string constant’’
|
||||
* and resume lexing at the beginning of the next line
|
||||
*/
|
||||
}
|
||||
}
|
||||
else if (string_recover_error == 1) {
|
||||
cool_yylval.error_msg = "EOF in string constant";
|
||||
return (ERROR);
|
||||
/*
|
||||
* When a string is too long,
|
||||
* report the error as ‘‘String constant too long’’
|
||||
* in the error string in the ERROR token.
|
||||
*/
|
||||
}
|
||||
else if (string_recover_error == 2){
|
||||
cool_yylval.error_msg = "String contains null character";
|
||||
return (ERROR);
|
||||
/*
|
||||
* If the string contains invalid characters (i.e., the null character),
|
||||
* report this as ‘‘String contains null character’’.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
/* Integer constants
|
||||
* Integers are non-empty strings of digits 0-9
|
||||
*/
|
||||
[0-9]+ {
|
||||
cool_yylval.symbol = inttable.add_string(yytext);
|
||||
return (INT_CONST);
|
||||
}
|
||||
|
||||
/*
|
||||
* Identifiers
|
||||
* Identifiers are strings (other than keywords) consisting of letters, digits, and the underscore character.
|
||||
* type identifiers begin with a capital letter
|
||||
* object identifiers begin with a lower case letter
|
||||
*/
|
||||
[A-Z][A-Za-z0-9_]* {
|
||||
cool_yylval.symbol = idtable.add_string(yytext);
|
||||
return (TYPEID);
|
||||
}
|
||||
[a-z][A-Za-z0-9_]* {
|
||||
cool_yylval.symbol = idtable.add_string(yytext);
|
||||
return (OBJECTID);
|
||||
}
|
||||
|
||||
/*
|
||||
* Whitespace consists of any sequence of the characters: blank (ascii 32), \n (newline, ascii 10), \f (form
|
||||
* feed, ascii 12), \r (carriage return, ascii 13), \t (tab, ascii 9), \v (vertical tab, ascii 11)
|
||||
*/
|
||||
[\x20\f\r\t\v]+
|
||||
\n { curr_lineno += 1; }
|
||||
|
||||
/*
|
||||
* Invalid Character
|
||||
* When an invalid character (one that can’t begin any token) is encountered, a string containing just
|
||||
* that character should be returned as the error string.
|
||||
* Resume lexing at the following character.
|
||||
*/
|
||||
. {
|
||||
cool_yylval.error_msg = strdup(yytext);
|
||||
return (ERROR);
|
||||
}
|
||||
%%
|
||||
|
||||
@ -95,3 +95,19 @@ class Main {
|
||||
}
|
||||
};
|
||||
};
|
||||
-- fuc
|
||||
>fuck > ? *)
|
||||
(* (* (*shit
|
||||
fuck
|
||||
scheisse*)sdsd*)*)
|
||||
sds
|
||||
inttest: Int <- 0011234; inttest2: Int <- 0x011234;
|
||||
--
|
||||
trUE <= True = true < fALse FaLSE;
|
||||
err_string "This is
|
||||
not ok"
|
||||
err_string "This is \
|
||||
also not ok"
|
||||
ok_string "This is \
|
||||
ok"
|
||||
escape_string "\"\n\\n\b\f\t\K\*"
|
||||
Loading…
Reference in New Issue
Block a user