PA2 seems ok

This commit is contained in:
ridethepig 2023-03-19 16:56:41 +00:00
parent 90aa2cbdf9
commit 88610ace11
3 changed files with 273 additions and 25 deletions

View File

@ -15,6 +15,7 @@ CFIL= ${CSRC} ${CGEN}
LSRC= Makefile LSRC= Makefile
OBJS= ${CFIL:.cc=.o} OBJS= ${CFIL:.cc=.o}
OUTPUT= test.output OUTPUT= test.output
TESTCASE=
CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN} CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
@ -22,7 +23,7 @@ CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
FFLAGS= -d -ocool-lex.cc FFLAGS= -d -ocool-lex.cc
CC=g++ CC=g++
CFLAGS= -g -Wall -Wno-unused -Wno-write-strings ${CPPINCLUDE} CFLAGS= -g -Wall -Wno-unused -Wno-write-strings -std=c++11 ${CPPINCLUDE}
FLEX=flex ${FFLAGS} FLEX=flex ${FFLAGS}
DEPEND = ${CC} -MM ${CPPINCLUDE} DEPEND = ${CC} -MM ${CPPINCLUDE}
@ -68,3 +69,8 @@ clean-compile:
-include ${CFIL:.cc=.d} -include ${CFIL:.cc=.d}
comparetest:
${CLASSDIR}/bin/lexer ${TESTCASE} > std.out
./lexer ${TESTCASE} > my.out
diff my.out std.out
rm my.out std.out

View File

@ -1,4 +1,3 @@
%{
/* /*
* The scanner definition for COOL. * The scanner definition for COOL.
*/ */
@ -8,6 +7,7 @@
* output, so headers and global definitions are placed here to be visible * output, so headers and global definitions are placed here to be visible
* to the code in the file. Don't remove anything that was here initially * to the code in the file. Don't remove anything that was here initially
*/ */
%{
#include <cool-parse.h> #include <cool-parse.h>
#include <stringtab.h> #include <stringtab.h>
#include <utilities.h> #include <utilities.h>
@ -43,6 +43,8 @@ extern YYSTYPE cool_yylval;
* Add Your own definitions here * Add Your own definitions here
*/ */
int comment_nest_level;
int string_recover_error;
%} %}
/* /*
@ -50,51 +52,140 @@ extern YYSTYPE cool_yylval;
*/ */
DARROW => DARROW =>
DIGIT [0-9] LE <=
DIGITS {DIGIT}+ ASSIGN <-
/* */
/* Keywords Definition*/ /* Keywords Definition*/
/* Except for the constants true and false, keywords are case insensitive */ /* Except for the constants true and false, keywords are case insensitive */
K_CLASS (?i:class) /*
K_ELSE (?i:else) * In flex-old package, you cannot use the case-insensitive function (?i:keyword) ,
K_FI (?i:fi) * this is only available after 2.5.34, while our version is 2.5.4
K_IF (?i:if) */
K_IN (?i:in) CLASS [Cc][Ll][Aa][Ss][Ss]
K_INHERITS (?i:inherits) ELSE [Ee][Ll][Ss][Ee]
K_ISVOID (?i:isvoid) FI [Ff][Ii]
K_LET (?i:let) IF [Ii][Ff]
K_LOOP (?i:loop) IN [Ii][Nn]
K_POOL (?i:pool) INHERITS [Ii][Nn][Hh][Ee][Rr][Ii][Tt][Ss]
K_THEN (?i:then) ISVOID [Ii][Ss][Vv][Oo][Ii][Dd]
K_WHILE (?i:while) LET [Ll][Ee][Tt]
K_CASE (?i:case) LOOP [Ll][Oo][Oo][Pp]
K_ESAC (?i:esac) POOL [Pp][Oo][Oo][Ll]
K_NEW (?i:new) THEN [Tt][Hh][Ee][Nn]
K_OF (?i:of) WHILE [Ww][Hh][Ii][Ll][Ee]
K_NOT (?i:not) CASE [Cc][Aa][Ss][Ee]
ESAC [Ee][Ss][Aa][Cc]
NEW [Nn][Ee][Ww]
OF [Oo][Ff]
NOT [Nn][Oo][Tt]
/* the first letter of true/false must be lowercase; the trailing may be upper or lower case. */ /* the first letter of true/false must be lowercase; the trailing may be upper or lower case. */
K_TRUE t(?i:rue) TRUE t[Rr][Uu][Ee]
K_FALSE f(?i:alse) FALSE f[Aa][Ll][Ss][Ee]
%x NCOMMENT SCOMMENT STRING
%% %%
/* /*
* Nested comments * Nested comments
* Really strange, not the same as C style comment, nested comments are accepted.
* For example, `(* haha (* nested comment*) fufu*)` is valid, but `(* ho )* *)` or `(*(* hey*)` are invalid
* <<EOF>> rules may not be used with other patterns
*/
"(*" {
BEGIN(NCOMMENT);
comment_nest_level = 0; /* entering from outside, initialize nest level */
}
<NCOMMENT>"(*" { comment_nest_level += 1; /* new nested comment, increase level */ }
<NCOMMENT>\n { curr_lineno += 1; }
<NCOMMENT><<EOF>> {
cool_yylval.error_msg = "EOF in comment";
BEGIN(INITIAL); /* first return to outside, or there will be an EOF loop*/
return (ERROR);
/*
* If a comment remains open when EOF is encountered, report this error with the message EOF
* in comment. Do not tokenize the comments contents simply because the terminator is missing.
*/
}
<NCOMMENT>.
<NCOMMENT>"*)" {
if (comment_nest_level > 0) comment_nest_level -= 1; /* decrease nest level unless the outmost*/
else BEGIN(INITIAL);
}
/*
* If you see “*)” outside a comment, report this error as Unmatched *),
* rather than tokenizing it as * and ).
*/
"*)" {
cool_yylval.error_msg = "Unmatched *)";
return (ERROR);
}
/*
* Single line comment
* Any characters between two dashes “--” and the next newline (or EOF, if there is no next newline) are treated as comments.
*/ */
"--" { BEGIN(SCOMMENT); }
<SCOMMENT>.
<SCOMMENT>\n {
curr_lineno += 1;
BEGIN(INITIAL);
/* there's no special case for single line comment, EOF will auto end */
}
/* /*
* The multiple-character operators. * The multiple-character operators.
*/ */
{DARROW} { return (DARROW); } {DARROW} { return (DARROW); }
{LE} { return (LE); }
{ASSIGN} { return (ASSIGN); }
/*
* The tokens for single character symbols (e.g., “;” and “,”)
* are represented just by the integer (ASCII) value of the character itself.
*/
"+" { return '+'; }
"/" { return '/'; }
"-" { return '-'; }
"*" { return '*'; }
"=" { return '='; }
"<" { return '<'; }
"." { return '.'; }
"~" { return '~'; }
"," { return ','; }
";" { return ';'; }
":" { return ':'; }
"(" { return '('; }
")" { return ')'; }
"@" { return '@'; }
"{" { return '{'; }
"}" { return '}'; }
/* /*
* Keywords are case-insensitive except for the values true and false, * Keywords are case-insensitive except for the values true and false,
* which must begin with a lower-case letter. * which must begin with a lower-case letter.
*/ */
{CLASS} { return (CLASS); }
{ELSE} { return (ELSE); }
{FI} { return (FI); }
{IF} { return (IF); }
{IN} { return (IN); }
{INHERITS} { return (INHERITS); }
{ISVOID} { return (ISVOID); }
{LET} { return (LET); }
{LOOP} { return (LOOP); }
{POOL} { return (POOL); }
{THEN} { return (THEN); }
{WHILE} { return (WHILE); }
{CASE} { return (CASE); }
{ESAC} { return (ESAC); }
{NEW} { return (NEW); }
{OF} { return (OF); }
{NOT} { return (NOT); }
{TRUE} { cool_yylval.boolean = 1; return(BOOL_CONST); }
{FALSE} { cool_yylval.boolean = 0; return(BOOL_CONST); }
/* /*
* String constants (C syntax) * String constants (C syntax)
@ -102,6 +193,141 @@ K_FALSE f(?i:alse)
* \n \t \b \f, the result is c. * \n \t \b \f, the result is c.
* *
*/ */
\" {
BEGIN(STRING);
string_buf_ptr = string_buf; /* reset string buf ptr*/
string_recover_error = 0; /* reset error flag*/
}
<STRING>[^\"\\\n] {
if (!string_recover_error){
*string_buf_ptr = yytext[0];
string_buf_ptr ++;
if (string_buf_ptr >= string_buf + MAX_STR_CONST) {
string_recover_error = 1; /* string too long */
}
}
}
<STRING>\\(.|\n) {
if (!string_recover_error){
/*
* Within a string, a sequence \c denotes the
* character c, except \b \t \n \f
*/
switch(yytext[1]) {
case 'b': *string_buf_ptr = '\b'; break;
case 't': *string_buf_ptr = '\t'; break;
case 'n': *string_buf_ptr = '\n'; break;
case 'f': *string_buf_ptr = '\f'; break;
case '\n': *string_buf_ptr = '\n'; curr_lineno += 1; break;
default: *string_buf_ptr = yytext[1]; break;
}
string_buf_ptr ++;
if (string_buf_ptr >= string_buf + MAX_STR_CONST) {
string_recover_error = 1; /* string too long */
}
}
}
/*
* A string may not contain EOF.
* A string may not contain the null (character \0).
* Any other character may be included in a string.
*/
<STRING><<EOF>> {
cool_yylval.error_msg = "EOF in string constant";
BEGIN(INITIAL);
return (ERROR);
/*
* for strings, if an EOF is encountered before the close-quote,
* report this error as EOF in string constant.
*/
}
<STRING>\0 {
string_recover_error = 2; /* null character */
}
/*
* In either case(null char | too long), lexing should resume after the end of the string.
* The end of the string is defined as either:
* the beginning of the next line if an unescaped newline occurs after these errors are encountered
* after the closing ” otherwise
*/
<STRING>\"|\n {
BEGIN(INITIAL);
if (yytext[0] == '\n') {
curr_lineno += 1;
}
if (!string_recover_error) {
if (yytext[0] == '\"') {
cool_yylval.symbol = stringtable.add_string(string_buf, string_buf_ptr - string_buf);
return (STR_CONST);
}
else if (yytext[0] == '\n') {
/* the escaped case should haved been captured in the escape rule */
cool_yylval.error_msg = "Unterminated string constant";
return (ERROR);
/*
* If a string contains an unescaped newline,
* report that error as Unterminated string constant
* and resume lexing at the beginning of the next line
*/
}
}
else if (string_recover_error == 1) {
cool_yylval.error_msg = "EOF in string constant";
return (ERROR);
/*
* When a string is too long,
* report the error as String constant too long
* in the error string in the ERROR token.
*/
}
else if (string_recover_error == 2){
cool_yylval.error_msg = "String contains null character";
return (ERROR);
/*
* If the string contains invalid characters (i.e., the null character),
* report this as String contains null character.
*/
}
}
/* Integer constants
* Integers are non-empty strings of digits 0-9
*/
[0-9]+ {
cool_yylval.symbol = inttable.add_string(yytext);
return (INT_CONST);
}
/*
* Identifiers
* Identifiers are strings (other than keywords) consisting of letters, digits, and the underscore character.
* type identifiers begin with a capital letter
* object identifiers begin with a lower case letter
*/
[A-Z][A-Za-z0-9_]* {
cool_yylval.symbol = idtable.add_string(yytext);
return (TYPEID);
}
[a-z][A-Za-z0-9_]* {
cool_yylval.symbol = idtable.add_string(yytext);
return (OBJECTID);
}
/*
* Whitespace consists of any sequence of the characters: blank (ascii 32), \n (newline, ascii 10), \f (form
* feed, ascii 12), \r (carriage return, ascii 13), \t (tab, ascii 9), \v (vertical tab, ascii 11)
*/
[\x20\f\r\t\v]+
\n { curr_lineno += 1; }
/*
* Invalid Character
* When an invalid character (one that cant begin any token) is encountered, a string containing just
* that character should be returned as the error string.
* Resume lexing at the following character.
*/
. {
cool_yylval.error_msg = strdup(yytext);
return (ERROR);
}
%% %%

View File

@ -95,3 +95,19 @@ class Main {
} }
}; };
}; };
-- fuc
>fuck > ? *)
(* (* (*shit
fuck
scheisse*)sdsd*)*)
sds
inttest: Int <- 0011234; inttest2: Int <- 0x011234;
--
trUE <= True = true < fALse FaLSE;
err_string "This is
not ok"
err_string "This is \
also not ok"
ok_string "This is \
ok"
escape_string "\"\n\\n\b\f\t\K\*"