PA2 seems ok
This commit is contained in:
parent
90aa2cbdf9
commit
88610ace11
@ -15,6 +15,7 @@ CFIL= ${CSRC} ${CGEN}
|
|||||||
LSRC= Makefile
|
LSRC= Makefile
|
||||||
OBJS= ${CFIL:.cc=.o}
|
OBJS= ${CFIL:.cc=.o}
|
||||||
OUTPUT= test.output
|
OUTPUT= test.output
|
||||||
|
TESTCASE=
|
||||||
|
|
||||||
CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
|
CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
|
||||||
|
|
||||||
@ -22,7 +23,7 @@ CPPINCLUDE= -I. -I${CLASSDIR}/include/PA${ASSN} -I${CLASSDIR}/src/PA${ASSN}
|
|||||||
FFLAGS= -d -ocool-lex.cc
|
FFLAGS= -d -ocool-lex.cc
|
||||||
|
|
||||||
CC=g++
|
CC=g++
|
||||||
CFLAGS= -g -Wall -Wno-unused -Wno-write-strings ${CPPINCLUDE}
|
CFLAGS= -g -Wall -Wno-unused -Wno-write-strings -std=c++11 ${CPPINCLUDE}
|
||||||
FLEX=flex ${FFLAGS}
|
FLEX=flex ${FFLAGS}
|
||||||
DEPEND = ${CC} -MM ${CPPINCLUDE}
|
DEPEND = ${CC} -MM ${CPPINCLUDE}
|
||||||
|
|
||||||
@ -68,3 +69,8 @@ clean-compile:
|
|||||||
-include ${CFIL:.cc=.d}
|
-include ${CFIL:.cc=.d}
|
||||||
|
|
||||||
|
|
||||||
|
comparetest:
|
||||||
|
${CLASSDIR}/bin/lexer ${TESTCASE} > std.out
|
||||||
|
./lexer ${TESTCASE} > my.out
|
||||||
|
diff my.out std.out
|
||||||
|
rm my.out std.out
|
||||||
@ -1,4 +1,3 @@
|
|||||||
%{
|
|
||||||
/*
|
/*
|
||||||
* The scanner definition for COOL.
|
* The scanner definition for COOL.
|
||||||
*/
|
*/
|
||||||
@ -8,6 +7,7 @@
|
|||||||
* output, so headers and global definitions are placed here to be visible
|
* output, so headers and global definitions are placed here to be visible
|
||||||
* to the code in the file. Don't remove anything that was here initially
|
* to the code in the file. Don't remove anything that was here initially
|
||||||
*/
|
*/
|
||||||
|
%{
|
||||||
#include <cool-parse.h>
|
#include <cool-parse.h>
|
||||||
#include <stringtab.h>
|
#include <stringtab.h>
|
||||||
#include <utilities.h>
|
#include <utilities.h>
|
||||||
@ -43,6 +43,8 @@ extern YYSTYPE cool_yylval;
|
|||||||
* Add Your own definitions here
|
* Add Your own definitions here
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
int comment_nest_level;
|
||||||
|
int string_recover_error;
|
||||||
%}
|
%}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -50,51 +52,140 @@ extern YYSTYPE cool_yylval;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
DARROW =>
|
DARROW =>
|
||||||
DIGIT [0-9]
|
LE <=
|
||||||
DIGITS {DIGIT}+
|
ASSIGN <-
|
||||||
/* */
|
|
||||||
/* Keywords Definition*/
|
/* Keywords Definition*/
|
||||||
/* Except for the constants true and false, keywords are case insensitive */
|
/* Except for the constants true and false, keywords are case insensitive */
|
||||||
K_CLASS (?i:class)
|
/*
|
||||||
K_ELSE (?i:else)
|
* In flex-old package, you cannot use the case-insensitive function (?i:keyword) ,
|
||||||
K_FI (?i:fi)
|
* this is only available after 2.5.34, while our version is 2.5.4
|
||||||
K_IF (?i:if)
|
*/
|
||||||
K_IN (?i:in)
|
CLASS [Cc][Ll][Aa][Ss][Ss]
|
||||||
K_INHERITS (?i:inherits)
|
ELSE [Ee][Ll][Ss][Ee]
|
||||||
K_ISVOID (?i:isvoid)
|
FI [Ff][Ii]
|
||||||
K_LET (?i:let)
|
IF [Ii][Ff]
|
||||||
K_LOOP (?i:loop)
|
IN [Ii][Nn]
|
||||||
K_POOL (?i:pool)
|
INHERITS [Ii][Nn][Hh][Ee][Rr][Ii][Tt][Ss]
|
||||||
K_THEN (?i:then)
|
ISVOID [Ii][Ss][Vv][Oo][Ii][Dd]
|
||||||
K_WHILE (?i:while)
|
LET [Ll][Ee][Tt]
|
||||||
K_CASE (?i:case)
|
LOOP [Ll][Oo][Oo][Pp]
|
||||||
K_ESAC (?i:esac)
|
POOL [Pp][Oo][Oo][Ll]
|
||||||
K_NEW (?i:new)
|
THEN [Tt][Hh][Ee][Nn]
|
||||||
K_OF (?i:of)
|
WHILE [Ww][Hh][Ii][Ll][Ee]
|
||||||
K_NOT (?i:not)
|
CASE [Cc][Aa][Ss][Ee]
|
||||||
|
ESAC [Ee][Ss][Aa][Cc]
|
||||||
|
NEW [Nn][Ee][Ww]
|
||||||
|
OF [Oo][Ff]
|
||||||
|
NOT [Nn][Oo][Tt]
|
||||||
/* the first letter of true/false must be lowercase; the trailing may be upper or lower case. */
|
/* the first letter of true/false must be lowercase; the trailing may be upper or lower case. */
|
||||||
K_TRUE t(?i:rue)
|
TRUE t[Rr][Uu][Ee]
|
||||||
K_FALSE f(?i:alse)
|
FALSE f[Aa][Ll][Ss][Ee]
|
||||||
|
|
||||||
|
|
||||||
|
%x NCOMMENT SCOMMENT STRING
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Nested comments
|
* Nested comments
|
||||||
|
* Really strange, not the same as C style comment, nested comments are accepted.
|
||||||
|
* For example, `(* haha (* nested comment*) fufu*)` is valid, but `(* ho )* *)` or `(*(* hey*)` are invalid
|
||||||
|
* <<EOF>> rules may not be used with other patterns
|
||||||
|
*/
|
||||||
|
"(*" {
|
||||||
|
BEGIN(NCOMMENT);
|
||||||
|
comment_nest_level = 0; /* entering from outside, initialize nest level */
|
||||||
|
}
|
||||||
|
<NCOMMENT>"(*" { comment_nest_level += 1; /* new nested comment, increase level */ }
|
||||||
|
<NCOMMENT>\n { curr_lineno += 1; }
|
||||||
|
<NCOMMENT><<EOF>> {
|
||||||
|
cool_yylval.error_msg = "EOF in comment";
|
||||||
|
BEGIN(INITIAL); /* first return to outside, or there will be an EOF loop*/
|
||||||
|
return (ERROR);
|
||||||
|
/*
|
||||||
|
* If a comment remains open when EOF is encountered, report this error with the message ‘‘EOF
|
||||||
|
* in comment’’. Do not tokenize the comment’s contents simply because the terminator is missing.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
<NCOMMENT>.
|
||||||
|
<NCOMMENT>"*)" {
|
||||||
|
if (comment_nest_level > 0) comment_nest_level -= 1; /* decrease nest level unless the outmost*/
|
||||||
|
else BEGIN(INITIAL);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* If you see “*)” outside a comment, report this error as ‘‘Unmatched *)’’,
|
||||||
|
* rather than tokenizing it as * and ).
|
||||||
|
*/
|
||||||
|
"*)" {
|
||||||
|
cool_yylval.error_msg = "Unmatched *)";
|
||||||
|
return (ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Single line comment
|
||||||
|
* Any characters between two dashes “--” and the next newline (or EOF, if there is no next newline) are treated as comments.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
"--" { BEGIN(SCOMMENT); }
|
||||||
|
<SCOMMENT>.
|
||||||
|
<SCOMMENT>\n {
|
||||||
|
curr_lineno += 1;
|
||||||
|
BEGIN(INITIAL);
|
||||||
|
/* there's no special case for single line comment, EOF will auto end */
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The multiple-character operators.
|
* The multiple-character operators.
|
||||||
*/
|
*/
|
||||||
{DARROW} { return (DARROW); }
|
{DARROW} { return (DARROW); }
|
||||||
|
{LE} { return (LE); }
|
||||||
|
{ASSIGN} { return (ASSIGN); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The tokens for single character symbols (e.g., “;” and “,”)
|
||||||
|
* are represented just by the integer (ASCII) value of the character itself.
|
||||||
|
*/
|
||||||
|
"+" { return '+'; }
|
||||||
|
"/" { return '/'; }
|
||||||
|
"-" { return '-'; }
|
||||||
|
"*" { return '*'; }
|
||||||
|
"=" { return '='; }
|
||||||
|
"<" { return '<'; }
|
||||||
|
"." { return '.'; }
|
||||||
|
"~" { return '~'; }
|
||||||
|
"," { return ','; }
|
||||||
|
";" { return ';'; }
|
||||||
|
":" { return ':'; }
|
||||||
|
"(" { return '('; }
|
||||||
|
")" { return ')'; }
|
||||||
|
"@" { return '@'; }
|
||||||
|
"{" { return '{'; }
|
||||||
|
"}" { return '}'; }
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Keywords are case-insensitive except for the values true and false,
|
* Keywords are case-insensitive except for the values true and false,
|
||||||
* which must begin with a lower-case letter.
|
* which must begin with a lower-case letter.
|
||||||
*/
|
*/
|
||||||
|
{CLASS} { return (CLASS); }
|
||||||
|
{ELSE} { return (ELSE); }
|
||||||
|
{FI} { return (FI); }
|
||||||
|
{IF} { return (IF); }
|
||||||
|
{IN} { return (IN); }
|
||||||
|
{INHERITS} { return (INHERITS); }
|
||||||
|
{ISVOID} { return (ISVOID); }
|
||||||
|
{LET} { return (LET); }
|
||||||
|
{LOOP} { return (LOOP); }
|
||||||
|
{POOL} { return (POOL); }
|
||||||
|
{THEN} { return (THEN); }
|
||||||
|
{WHILE} { return (WHILE); }
|
||||||
|
{CASE} { return (CASE); }
|
||||||
|
{ESAC} { return (ESAC); }
|
||||||
|
{NEW} { return (NEW); }
|
||||||
|
{OF} { return (OF); }
|
||||||
|
{NOT} { return (NOT); }
|
||||||
|
{TRUE} { cool_yylval.boolean = 1; return(BOOL_CONST); }
|
||||||
|
{FALSE} { cool_yylval.boolean = 0; return(BOOL_CONST); }
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* String constants (C syntax)
|
* String constants (C syntax)
|
||||||
@ -102,6 +193,141 @@ K_FALSE f(?i:alse)
|
|||||||
* \n \t \b \f, the result is c.
|
* \n \t \b \f, the result is c.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
\" {
|
||||||
|
BEGIN(STRING);
|
||||||
|
string_buf_ptr = string_buf; /* reset string buf ptr*/
|
||||||
|
string_recover_error = 0; /* reset error flag*/
|
||||||
|
}
|
||||||
|
<STRING>[^\"\\\n] {
|
||||||
|
if (!string_recover_error){
|
||||||
|
*string_buf_ptr = yytext[0];
|
||||||
|
string_buf_ptr ++;
|
||||||
|
if (string_buf_ptr >= string_buf + MAX_STR_CONST) {
|
||||||
|
string_recover_error = 1; /* string too long */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
<STRING>\\(.|\n) {
|
||||||
|
if (!string_recover_error){
|
||||||
|
/*
|
||||||
|
* Within a string, a sequence ‘\c’ denotes the
|
||||||
|
* character ‘c’, except \b \t \n \f
|
||||||
|
*/
|
||||||
|
switch(yytext[1]) {
|
||||||
|
case 'b': *string_buf_ptr = '\b'; break;
|
||||||
|
case 't': *string_buf_ptr = '\t'; break;
|
||||||
|
case 'n': *string_buf_ptr = '\n'; break;
|
||||||
|
case 'f': *string_buf_ptr = '\f'; break;
|
||||||
|
case '\n': *string_buf_ptr = '\n'; curr_lineno += 1; break;
|
||||||
|
default: *string_buf_ptr = yytext[1]; break;
|
||||||
|
}
|
||||||
|
string_buf_ptr ++;
|
||||||
|
if (string_buf_ptr >= string_buf + MAX_STR_CONST) {
|
||||||
|
string_recover_error = 1; /* string too long */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* A string may not contain EOF.
|
||||||
|
* A string may not contain the null (character \0).
|
||||||
|
* Any other character may be included in a string.
|
||||||
|
*/
|
||||||
|
<STRING><<EOF>> {
|
||||||
|
cool_yylval.error_msg = "EOF in string constant";
|
||||||
|
BEGIN(INITIAL);
|
||||||
|
return (ERROR);
|
||||||
|
/*
|
||||||
|
* for strings, if an EOF is encountered before the close-quote,
|
||||||
|
* report this error as ‘‘EOF in string constant’’.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
<STRING>\0 {
|
||||||
|
string_recover_error = 2; /* null character */
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* In either case(null char | too long), lexing should resume after the end of the string.
|
||||||
|
* The end of the string is defined as either:
|
||||||
|
* the beginning of the next line if an unescaped newline occurs after these errors are encountered
|
||||||
|
* after the closing ” otherwise
|
||||||
|
*/
|
||||||
|
<STRING>\"|\n {
|
||||||
|
BEGIN(INITIAL);
|
||||||
|
if (yytext[0] == '\n') {
|
||||||
|
curr_lineno += 1;
|
||||||
|
}
|
||||||
|
if (!string_recover_error) {
|
||||||
|
if (yytext[0] == '\"') {
|
||||||
|
cool_yylval.symbol = stringtable.add_string(string_buf, string_buf_ptr - string_buf);
|
||||||
|
return (STR_CONST);
|
||||||
|
}
|
||||||
|
else if (yytext[0] == '\n') {
|
||||||
|
/* the escaped case should haved been captured in the escape rule */
|
||||||
|
cool_yylval.error_msg = "Unterminated string constant";
|
||||||
|
return (ERROR);
|
||||||
|
/*
|
||||||
|
* If a string contains an unescaped newline,
|
||||||
|
* report that error as ‘‘Unterminated string constant’’
|
||||||
|
* and resume lexing at the beginning of the next line
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (string_recover_error == 1) {
|
||||||
|
cool_yylval.error_msg = "EOF in string constant";
|
||||||
|
return (ERROR);
|
||||||
|
/*
|
||||||
|
* When a string is too long,
|
||||||
|
* report the error as ‘‘String constant too long’’
|
||||||
|
* in the error string in the ERROR token.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
else if (string_recover_error == 2){
|
||||||
|
cool_yylval.error_msg = "String contains null character";
|
||||||
|
return (ERROR);
|
||||||
|
/*
|
||||||
|
* If the string contains invalid characters (i.e., the null character),
|
||||||
|
* report this as ‘‘String contains null character’’.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Integer constants
|
||||||
|
* Integers are non-empty strings of digits 0-9
|
||||||
|
*/
|
||||||
|
[0-9]+ {
|
||||||
|
cool_yylval.symbol = inttable.add_string(yytext);
|
||||||
|
return (INT_CONST);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Identifiers
|
||||||
|
* Identifiers are strings (other than keywords) consisting of letters, digits, and the underscore character.
|
||||||
|
* type identifiers begin with a capital letter
|
||||||
|
* object identifiers begin with a lower case letter
|
||||||
|
*/
|
||||||
|
[A-Z][A-Za-z0-9_]* {
|
||||||
|
cool_yylval.symbol = idtable.add_string(yytext);
|
||||||
|
return (TYPEID);
|
||||||
|
}
|
||||||
|
[a-z][A-Za-z0-9_]* {
|
||||||
|
cool_yylval.symbol = idtable.add_string(yytext);
|
||||||
|
return (OBJECTID);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Whitespace consists of any sequence of the characters: blank (ascii 32), \n (newline, ascii 10), \f (form
|
||||||
|
* feed, ascii 12), \r (carriage return, ascii 13), \t (tab, ascii 9), \v (vertical tab, ascii 11)
|
||||||
|
*/
|
||||||
|
[\x20\f\r\t\v]+
|
||||||
|
\n { curr_lineno += 1; }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Invalid Character
|
||||||
|
* When an invalid character (one that can’t begin any token) is encountered, a string containing just
|
||||||
|
* that character should be returned as the error string.
|
||||||
|
* Resume lexing at the following character.
|
||||||
|
*/
|
||||||
|
. {
|
||||||
|
cool_yylval.error_msg = strdup(yytext);
|
||||||
|
return (ERROR);
|
||||||
|
}
|
||||||
%%
|
%%
|
||||||
|
|||||||
@ -95,3 +95,19 @@ class Main {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
-- fuc
|
||||||
|
>fuck > ? *)
|
||||||
|
(* (* (*shit
|
||||||
|
fuck
|
||||||
|
scheisse*)sdsd*)*)
|
||||||
|
sds
|
||||||
|
inttest: Int <- 0011234; inttest2: Int <- 0x011234;
|
||||||
|
--
|
||||||
|
trUE <= True = true < fALse FaLSE;
|
||||||
|
err_string "This is
|
||||||
|
not ok"
|
||||||
|
err_string "This is \
|
||||||
|
also not ok"
|
||||||
|
ok_string "This is \
|
||||||
|
ok"
|
||||||
|
escape_string "\"\n\\n\b\f\t\K\*"
|
||||||
Loading…
Reference in New Issue
Block a user