PA2 seems ok

This commit is contained in:
ridethepig 2023-03-19 16:56:41 +00:00
parent 90aa2cbdf9
commit 88610ace11
3 changed files with 273 additions and 25 deletions

View File

@ -15,6 +15,7 @@ CFIL= ${CSRC} ${CGEN}
LSRC= Makefile
OBJS= ${CFIL:.cc=.o}
OUTPUT= test.output
TESTCASE=
CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
@ -22,7 +23,7 @@ CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
FFLAGS= -d -ocool-lex.cc
CC=g++
CFLAGS= -g -Wall -Wno-unused -Wno-write-strings ${CPPINCLUDE}
CFLAGS= -g -Wall -Wno-unused -Wno-write-strings -std=c++11 ${CPPINCLUDE}
FLEX=flex ${FFLAGS}
DEPEND = ${CC} -MM ${CPPINCLUDE}
@ -68,3 +69,8 @@ clean-compile:
-include ${CFIL:.cc=.d}
comparetest:
${CLASSDIR}/bin/lexer ${TESTCASE} > std.out
./lexer ${TESTCASE} > my.out
diff my.out std.out
rm my.out std.out

View File

@ -1,4 +1,3 @@
%{
/*
* The scanner definition for COOL.
*/
@ -8,6 +7,7 @@
* output, so headers and global definitions are placed here to be visible
* to the code in the file. Don't remove anything that was here initially
*/
%{
#include <cool-parse.h>
#include <stringtab.h>
#include <utilities.h>
@ -43,6 +43,8 @@ extern YYSTYPE cool_yylval;
* Add Your own definitions here
*/
int comment_nest_level;
int string_recover_error;
%}
/*
@ -50,51 +52,140 @@ extern YYSTYPE cool_yylval;
*/
DARROW =>
DIGIT [0-9]
DIGITS {DIGIT}+
/* */
LE <=
ASSIGN <-
/* Keywords Definition*/
/* Except for the constants true and false, keywords are case insensitive */
K_CLASS (?i:class)
K_ELSE (?i:else)
K_FI (?i:fi)
K_IF (?i:if)
K_IN (?i:in)
K_INHERITS (?i:inherits)
K_ISVOID (?i:isvoid)
K_LET (?i:let)
K_LOOP (?i:loop)
K_POOL (?i:pool)
K_THEN (?i:then)
K_WHILE (?i:while)
K_CASE (?i:case)
K_ESAC (?i:esac)
K_NEW (?i:new)
K_OF (?i:of)
K_NOT (?i:not)
/*
* In flex-old package, you cannot use the case-insensitive function (?i:keyword) ,
* this is only available after 2.5.34, while our version is 2.5.4
*/
CLASS [Cc][Ll][Aa][Ss][Ss]
ELSE [Ee][Ll][Ss][Ee]
FI [Ff][Ii]
IF [Ii][Ff]
IN [Ii][Nn]
INHERITS [Ii][Nn][Hh][Ee][Rr][Ii][Tt][Ss]
ISVOID [Ii][Ss][Vv][Oo][Ii][Dd]
LET [Ll][Ee][Tt]
LOOP [Ll][Oo][Oo][Pp]
POOL [Pp][Oo][Oo][Ll]
THEN [Tt][Hh][Ee][Nn]
WHILE [Ww][Hh][Ii][Ll][Ee]
CASE [Cc][Aa][Ss][Ee]
ESAC [Ee][Ss][Aa][Cc]
NEW [Nn][Ee][Ww]
OF [Oo][Ff]
NOT [Nn][Oo][Tt]
/* the first letter of true/false must be lowercase; the trailing may be upper or lower case. */
K_TRUE t(?i:rue)
K_FALSE f(?i:alse)
TRUE t[Rr][Uu][Ee]
FALSE f[Aa][Ll][Ss][Ee]
%x NCOMMENT SCOMMENT STRING
%%
/*
* Nested comments
* Really strange, not the same as C style comment, nested comments are accepted.
* For example, `(* haha (* nested comment*) fufu*)` is valid, but `(* ho )* *)` or `(*(* hey*)` are invalid
* <<EOF>> rules may not be used with other patterns
*/
"(*" {
BEGIN(NCOMMENT);
comment_nest_level = 0; /* entering from outside, initialize nest level */
}
<NCOMMENT>"(*" { comment_nest_level += 1; /* new nested comment, increase level */ }
<NCOMMENT>\n { curr_lineno += 1; }
<NCOMMENT><<EOF>> {
cool_yylval.error_msg = "EOF in comment";
BEGIN(INITIAL); /* first return to outside, or there will be an EOF loop*/
return (ERROR);
/*
* If a comment remains open when EOF is encountered, report this error with the message EOF
* in comment. Do not tokenize the comments contents simply because the terminator is missing.
*/
}
<NCOMMENT>.
<NCOMMENT>"*)" {
if (comment_nest_level > 0) comment_nest_level -= 1; /* decrease nest level unless the outmost*/
else BEGIN(INITIAL);
}
/*
* If you see “*)” outside a comment, report this error as Unmatched *),
* rather than tokenizing it as * and ).
*/
"*)" {
cool_yylval.error_msg = "Unmatched *)";
return (ERROR);
}
/*
* Single line comment
* Any characters between two dashes “--” and the next newline (or EOF, if there is no next newline) are treated as comments.
*/
"--" { BEGIN(SCOMMENT); }
<SCOMMENT>.
<SCOMMENT>\n {
curr_lineno += 1;
BEGIN(INITIAL);
/* there's no special case for single line comment, EOF will auto end */
}
/*
* The multiple-character operators.
*/
{DARROW} { return (DARROW); }
{LE} { return (LE); }
{ASSIGN} { return (ASSIGN); }
/*
* The tokens for single character symbols (e.g., “;” and “,”)
* are represented just by the integer (ASCII) value of the character itself.
*/
"+" { return '+'; }
"/" { return '/'; }
"-" { return '-'; }
"*" { return '*'; }
"=" { return '='; }
"<" { return '<'; }
"." { return '.'; }
"~" { return '~'; }
"," { return ','; }
";" { return ';'; }
":" { return ':'; }
"(" { return '('; }
")" { return ')'; }
"@" { return '@'; }
"{" { return '{'; }
"}" { return '}'; }
/*
* Keywords are case-insensitive except for the values true and false,
* which must begin with a lower-case letter.
*/
{CLASS} { return (CLASS); }
{ELSE} { return (ELSE); }
{FI} { return (FI); }
{IF} { return (IF); }
{IN} { return (IN); }
{INHERITS} { return (INHERITS); }
{ISVOID} { return (ISVOID); }
{LET} { return (LET); }
{LOOP} { return (LOOP); }
{POOL} { return (POOL); }
{THEN} { return (THEN); }
{WHILE} { return (WHILE); }
{CASE} { return (CASE); }
{ESAC} { return (ESAC); }
{NEW} { return (NEW); }
{OF} { return (OF); }
{NOT} { return (NOT); }
{TRUE} { cool_yylval.boolean = 1; return(BOOL_CONST); }
{FALSE} { cool_yylval.boolean = 0; return(BOOL_CONST); }
/*
* String constants (C syntax)
@ -102,6 +193,141 @@ K_FALSE f(?i:alse)
* \n \t \b \f, the result is c.
*
*/
\" {
BEGIN(STRING);
string_buf_ptr = string_buf; /* reset string buf ptr*/
string_recover_error = 0; /* reset error flag*/
}
<STRING>[^\"\\\n] {
if (!string_recover_error){
*string_buf_ptr = yytext[0];
string_buf_ptr ++;
if (string_buf_ptr >= string_buf + MAX_STR_CONST) {
string_recover_error = 1; /* string too long */
}
}
}
<STRING>\\(.|\n) {
if (!string_recover_error){
/*
* Within a string, a sequence \c denotes the
* character c, except \b \t \n \f
*/
switch(yytext[1]) {
case 'b': *string_buf_ptr = '\b'; break;
case 't': *string_buf_ptr = '\t'; break;
case 'n': *string_buf_ptr = '\n'; break;
case 'f': *string_buf_ptr = '\f'; break;
case '\n': *string_buf_ptr = '\n'; curr_lineno += 1; break;
default: *string_buf_ptr = yytext[1]; break;
}
string_buf_ptr ++;
if (string_buf_ptr >= string_buf + MAX_STR_CONST) {
string_recover_error = 1; /* string too long */
}
}
}
/*
* A string may not contain EOF.
* A string may not contain the null (character \0).
* Any other character may be included in a string.
*/
<STRING><<EOF>> {
cool_yylval.error_msg = "EOF in string constant";
BEGIN(INITIAL);
return (ERROR);
/*
* for strings, if an EOF is encountered before the close-quote,
* report this error as EOF in string constant.
*/
}
<STRING>\0 {
string_recover_error = 2; /* null character */
}
/*
* In either case(null char | too long), lexing should resume after the end of the string.
* The end of the string is defined as either:
* the beginning of the next line if an unescaped newline occurs after these errors are encountered
* after the closing ” otherwise
*/
<STRING>\"|\n {
BEGIN(INITIAL);
if (yytext[0] == '\n') {
curr_lineno += 1;
}
if (!string_recover_error) {
if (yytext[0] == '\"') {
cool_yylval.symbol = stringtable.add_string(string_buf, string_buf_ptr - string_buf);
return (STR_CONST);
}
else if (yytext[0] == '\n') {
/* the escaped case should haved been captured in the escape rule */
cool_yylval.error_msg = "Unterminated string constant";
return (ERROR);
/*
* If a string contains an unescaped newline,
* report that error as Unterminated string constant
* and resume lexing at the beginning of the next line
*/
}
}
else if (string_recover_error == 1) {
cool_yylval.error_msg = "EOF in string constant";
return (ERROR);
/*
* When a string is too long,
* report the error as String constant too long
* in the error string in the ERROR token.
*/
}
else if (string_recover_error == 2){
cool_yylval.error_msg = "String contains null character";
return (ERROR);
/*
* If the string contains invalid characters (i.e., the null character),
* report this as String contains null character.
*/
}
}
/* Integer constants
* Integers are non-empty strings of digits 0-9
*/
[0-9]+ {
cool_yylval.symbol = inttable.add_string(yytext);
return (INT_CONST);
}
/*
* Identifiers
* Identifiers are strings (other than keywords) consisting of letters, digits, and the underscore character.
* type identifiers begin with a capital letter
* object identifiers begin with a lower case letter
*/
[A-Z][A-Za-z0-9_]* {
cool_yylval.symbol = idtable.add_string(yytext);
return (TYPEID);
}
[a-z][A-Za-z0-9_]* {
cool_yylval.symbol = idtable.add_string(yytext);
return (OBJECTID);
}
/*
* Whitespace consists of any sequence of the characters: blank (ascii 32), \n (newline, ascii 10), \f (form
* feed, ascii 12), \r (carriage return, ascii 13), \t (tab, ascii 9), \v (vertical tab, ascii 11)
*/
[\x20\f\r\t\v]+
\n { curr_lineno += 1; }
/*
* Invalid Character
* When an invalid character (one that cant begin any token) is encountered, a string containing just
* that character should be returned as the error string.
* Resume lexing at the following character.
*/
. {
cool_yylval.error_msg = strdup(yytext);
return (ERROR);
}
%%

View File

@ -95,3 +95,19 @@ class Main {
}
};
};
-- fuc
>fuck > ? *)
(* (* (*shit
fuck
scheisse*)sdsd*)*)
sds
inttest: Int <- 0011234; inttest2: Int <- 0x011234;
--
trUE <= True = true < fALse FaLSE;
err_string "This is
not ok"
err_string "This is \
also not ok"
ok_string "This is \
ok"
escape_string "\"\n\\n\b\f\t\K\*"