From 194806b17d3309202ddaf7a981ec02581984f033 Mon Sep 17 00:00:00 2001 From: Alex Merry Date: Sat, 23 Mar 2013 15:49:16 +0000 Subject: Bring back parser/lexer error reporting Even better than before! --- tikzit/src/common/TikzGraphAssembler+Parser.h | 6 +- tikzit/src/common/TikzGraphAssembler.h | 4 +- tikzit/src/common/TikzGraphAssembler.m | 233 ++++++++++++++------------ tikzit/src/common/tikzlexer.lm | 81 +++++---- tikzit/src/common/tikzparser.ym | 34 +++- tikzit/src/common/util.h | 2 + tikzit/src/common/util.m | 15 ++ tikzit/src/osx/TikzSourceController.m | 4 +- 8 files changed, 222 insertions(+), 157 deletions(-) diff --git a/tikzit/src/common/TikzGraphAssembler+Parser.h b/tikzit/src/common/TikzGraphAssembler+Parser.h index efceae9..55fa901 100644 --- a/tikzit/src/common/TikzGraphAssembler+Parser.h +++ b/tikzit/src/common/TikzGraphAssembler+Parser.h @@ -30,14 +30,10 @@ - (void) addNodeToMap:(Node*)n; /** Get a previously-stored node by name */ - (Node*) nodeWithName:(NSString*)name; -- (void) newLineStarted:(char *)text; -- (void) incrementPosBy:(size_t)amount; -- (void) invalidateWithError:(const char *)message; +- (void) reportError:(const char *)message atLocation:(YYLTYPE*)yylloc; - (void*) scanner; @end #define YY_EXTRA_TYPE TikzGraphAssembler * -#define YYLEX_PARAM [assembler scanner] -void yyerror(TikzGraphAssembler *assembler, const char *str); // vi:ft=objc:noet:ts=4:sts=4:sw=4 diff --git a/tikzit/src/common/TikzGraphAssembler.h b/tikzit/src/common/TikzGraphAssembler.h index 16fdf7f..37279e5 100644 --- a/tikzit/src/common/TikzGraphAssembler.h +++ b/tikzit/src/common/TikzGraphAssembler.h @@ -25,13 +25,11 @@ #import "Graph.h" @interface TikzGraphAssembler : NSObject { + const char *tikzStr; Graph *graph; void *scanner; NSMutableDictionary *nodeMap; NSError *lastError; - char linebuff[500]; - int lineno; - size_t tokenpos; } + (BOOL) parseTikz:(NSString*)tikz forGraph:(Graph*)gr error:(NSError**)e; diff --git a/tikzit/src/common/TikzGraphAssembler.m b/tikzit/src/common/TikzGraphAssembler.m index 5a01036..a6f0e3d 100644 --- a/tikzit/src/common/TikzGraphAssembler.m +++ b/tikzit/src/common/TikzGraphAssembler.m @@ -22,15 +22,11 @@ // #import "TikzGraphAssembler.h" -#import "TikzGraphAssembler+Parser.h" #import "tikzparser.h" +#import "TikzGraphAssembler+Parser.h" #import "tikzlexer.h" #import "NSError+Tikzit.h" -void yyerror(TikzGraphAssembler *assembler, const char *str) { - [assembler invalidateWithError:str]; -} - @implementation TikzGraphAssembler - (id)init { @@ -57,6 +53,37 @@ void yyerror(TikzGraphAssembler *assembler, const char *str) { [super dealloc]; } +- (BOOL) parseTikz:(NSString*)t error:(NSError**)error { + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + + tikzStr = [t UTF8String]; + yy_scan_string(tikzStr, scanner); + int result = yyparse(self); + tikzStr = NULL; + + [pool drain]; + + if (result == 0) { + return YES; + } else { + if (error) { + if (lastError) { + *error = [[lastError retain] autorelease]; + } else if (result == 1) { + *error = [NSError errorWithMessage:@"Syntax error" + code:TZ_ERR_PARSE]; + } else if (result == 2) { + *error = [NSError errorWithMessage:@"Insufficient memory" + code:TZ_ERR_PARSE]; + } else { + *error = [NSError errorWithMessage:@"Unknown error" + code:TZ_ERR_PARSE]; + } + } + return NO; + } +} + + (BOOL) parseTikz:(NSString*)tikz forGraph:(Graph*)gr { return [self parseTikz:tikz forGraph:gr error:NULL]; } @@ -74,60 +101,15 @@ void yyerror(TikzGraphAssembler *assembler, const char *str) { } + (BOOL) parseTikz:(NSString*)tikz forGraph:(Graph*)gr error:(NSError**)error { - if([tikz length] == 0) { // empty string -> empty graph return YES; } - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; TikzGraphAssembler *assembler = [[self alloc] initWithGraph:gr]; - [assembler autorelease]; - - /* - lineno = 1; - tokenpos = 0; - NSRange range = [tikz rangeOfString:@"\n"]; - NSString *firstLine; - if (range.length == 0) { - firstLine = tikz; - } else { - firstLine = [tikz substringToIndex:range.location]; - } - if (![firstLine getCString:linebuff - maxLength:500 - encoding:NSUTF8StringEncoding]) { - // first line too long; just terminate it at the end of the buffer - linebuff[499] = 0; - } - */ - - yy_scan_string([tikz UTF8String], [assembler scanner]); - int result = yyparse(assembler); - - [pool drain]; - if (result == 0) { - return YES; - } else { - if (error) { - /* - if (lastError) { - *error = [[lastError retain] autorelease]; - } else - */ - if (result == 1) { - *error = [NSError errorWithMessage:@"Syntax error" - code:TZ_ERR_PARSE]; - } else if (result == 2) { - *error = [NSError errorWithMessage:@"Insufficient memory" - code:TZ_ERR_PARSE]; - } else { - *error = [NSError errorWithMessage:@"Unknown error" - code:TZ_ERR_PARSE]; - } - } - return NO; - } + BOOL success = [assembler parseTikz:tikz error:error]; + [assembler release]; + return success; } + (BOOL)validateTikzPropertyNameOrValue:(NSString*)tikz { @@ -140,7 +122,8 @@ void yyerror(TikzGraphAssembler *assembler, const char *str) { yyset_extra(nil, scanner); yy_scan_string([testTikz UTF8String], scanner); YYSTYPE lval; - yylex(&lval, scanner); + YYLTYPE lloc; + yylex(&lval, &lloc, scanner); r = !(yyget_leng(scanner) < [testTikz length]); yylex_destroy(scanner); [testTikz autorelease]; @@ -148,70 +131,112 @@ void yyerror(TikzGraphAssembler *assembler, const char *str) { return r; } -- (void)invalidate { - [graph release]; - graph = nil; - lastError = nil; -} - @end @implementation TikzGraphAssembler (Parser) - (Graph*)graph { return graph; } + - (void)addNodeToMap:(Node*)n { [nodeMap setObject:n forKey:[n name]]; } + - (Node*)nodeWithName:(NSString*)name { return [nodeMap objectForKey:name]; } -- (void) newLineStarted:(char *)text { - /* - strncpy(linebuff, yytext+1, 500); - linebuff[499] = 0; // ensure null-terminated - lineno++; - tokenpos = 0; - */ -} -- (void) incrementPosBy:(size_t)amount { - //tokenpos += amount; + +- (void) setLastError:(NSError*)error { + [error retain]; + [lastError release]; + lastError = error; } -- (void) invalidateWithError:(const char *)message { - /* - // if the error is on the first line, treat specially - if([assembler lineNumber] == 1){ - //strcpy(linebuff, yytext+1); - NSLog(@"Problem ahoy!"); - } +- (void) reportError:(const char *)message atLocation:(YYLTYPE*)yylloc { + NSString *nsmsg = [NSString stringWithUTF8String:message]; - NSString *pointerStrPad = [@"" stringByPaddingToLength:(tokenpos-yyleng) - withString:@" " - startingAtIndex:0]; - NSString *pointerStr = [@"" stringByPaddingToLength:yyleng - withString:@"^" - startingAtIndex:0]; - NSLog(@"Parse error on line %i: %s\n%s\n%@\n", lineno, str, linebuff, - [pointerStrPad stringByAppendingString:pointerStr]); - NSDictionary *userInfo = - [NSDictionary dictionaryWithObjectsAndKeys: - [NSString stringWithUTF8String:str], - NSLocalizedDescriptionKey, - [NSNumber numberWithInt:lineno], - @"lineNumber", - [NSString stringWithUTF8String:linebuff], - @"syntaxString", - [NSNumber numberWithInt:tokenpos], - @"tokenStart", - [NSNumber numberWithInt:yyleng], - @"tokenLength"]; - NSError *error = - [NSError errorWithDomain:@"net.sourceforge.tikzit" - code:TZ_ERR_PARSE - userInfo:userInfo]; + const char *first_line_start = find_start_of_nth_line ( + tikzStr, yylloc->first_line - 1); + const char *last_line_start = find_start_of_nth_line ( + first_line_start, yylloc->last_line - yylloc->first_line); + const char *last_line_end = last_line_start; + while (*last_line_end && *last_line_end != '\n') { + // points to just after end of last line + ++last_line_end; + } + const char *error_start = first_line_start + (yylloc->first_column - 1); + const char *error_end = last_line_start + (yylloc->last_column - 1); - lastError = [error retain]; - */ - [self invalidate]; + if (error_start > error_end || error_end > last_line_end) { + // error position state is corrupted + NSLog(@"Got bad error state for error \"%s\": start(%i,%i), end(%i,%i)", + message, + yylloc->first_line, + yylloc->first_column, + yylloc->last_line, + yylloc->last_column); + [self setLastError:[NSError errorWithMessage:nsmsg + code:TZ_ERR_PARSE]]; + } else { + // +1 for null terminator + size_t error_text_len = last_line_end - first_line_start; + char *error_text = malloc (error_text_len + 1); + strncpy (error_text, first_line_start, error_text_len); + *(error_text + error_text_len) = '\0'; + + int error_start_pos = error_start - first_line_start; + int error_end_pos = error_end - first_line_start; + + NSDictionary *userInfo = + [NSDictionary dictionaryWithObjectsAndKeys: + nsmsg, + NSLocalizedDescriptionKey, + [NSNumber numberWithInt:yylloc->first_line], + @"startLine", + [NSNumber numberWithInt:yylloc->first_column], + @"startColumn", + [NSNumber numberWithInt:yylloc->last_line], + @"endLine", + [NSNumber numberWithInt:yylloc->last_column], + @"endColumn", + [NSString stringWithUTF8String:error_text], + @"syntaxString", + [NSNumber numberWithInt:error_start_pos], + @"tokenStart", + [NSNumber numberWithInt:error_end_pos], + @"tokenLength", + nil]; + [self setLastError: + [NSError errorWithDomain:TZErrorDomain + code:TZ_ERR_PARSE + userInfo:userInfo]]; + + // we can now freely edit error_text + // we only bother printing out the first line + if (yylloc->last_line > yylloc->first_line) { + char *nlp = strchr(error_text, '\n'); + if (nlp) { + *nlp = '\0'; + error_text_len = nlp - error_text; + if (error_end_pos > error_text_len) + error_end_pos = error_text_len; + } + } + NSString *pointerLinePadding = + [@"" stringByPaddingToLength:error_start_pos + withString:@" " + startingAtIndex:0]; + NSString *pointerLineCarets = + [@"" stringByPaddingToLength:(error_end_pos - error_start_pos) + withString:@"^" + startingAtIndex:0]; + NSLog(@"Parse error on line %i, starting at %i: %s\n%s\n%@%@", + yylloc->first_line, + yylloc->first_column, + message, + error_text, + pointerLinePadding, + pointerLineCarets); + free (error_text); + } } - (void*) scanner { return scanner; } @end diff --git a/tikzit/src/common/tikzlexer.lm b/tikzit/src/common/tikzlexer.lm index 3e2e0ed..7966028 100644 --- a/tikzit/src/common/tikzlexer.lm +++ b/tikzit/src/common/tikzlexer.lm @@ -22,9 +22,15 @@ #import #import "tikzparser.h" +#define YY_USER_ACTION \ + yylloc->first_line = yylloc->last_line; \ + if (yylloc->last_line != 1 || yylloc->last_column != 1) \ + yylloc->first_column = yylloc->last_column + 1; \ + yylloc->last_column = yylloc->first_column + yyleng; + %} -%option reentrant bison-bridge 8bit +%option reentrant bison-bridge bison-locations 8bit %option nounput %option yylineno %option noyywrap @@ -39,81 +45,76 @@ FLOAT \-?[0-9]*(\.[0-9]+)? %% -\n.* { - [yyextra newLineStarted:yytext+1]; - yyless(1); +\n { + yylloc->first_line += 1; + yylloc->last_line = yylloc->first_line; + yylloc->first_column = yylloc->last_column = 0; +} +[ ]+ { } /* ignore whitespace */; +[\t]+ { + // tab = 8 columns + // note that we have already adjusted by yyleng at this point + yylloc->last_column = yylloc->first_column + 7*yyleng; } -[ ]+ { [yyextra incrementPosBy:yyleng]; } /* ignore whitespace */; -[\t]+ { [yyextra incrementPosBy:8*yyleng]; } /* ignore whitespace */; -\\begin\{tikzpicture\} { [yyextra incrementPosBy:yyleng]; return BEGIN_TIKZPICTURE_CMD; } -\\end\{tikzpicture\} { [yyextra incrementPosBy:yyleng]; return END_TIKZPICTURE_CMD; } -\\begin\{pgfonlayer\} { [yyextra incrementPosBy:yyleng]; return BEGIN_PGFONLAYER_CMD; } -\\end\{pgfonlayer\} { [yyextra incrementPosBy:yyleng]; return END_PGFONLAYER_CMD; } -\\draw { [yyextra incrementPosBy:yyleng]; return DRAW_CMD; } -\\node { [yyextra incrementPosBy:yyleng]; return NODE_CMD; } -\\path { [yyextra incrementPosBy:yyleng]; return PATH_CMD; } -rectangle { [yyextra incrementPosBy:yyleng]; return RECTANGLE; } -node { [yyextra incrementPosBy:yyleng]; return NODE; } -at { [yyextra incrementPosBy:yyleng]; return AT; } -to { [yyextra incrementPosBy:yyleng]; return TO; } -; { [yyextra incrementPosBy:yyleng]; return SEMICOLON; } +\\begin\{tikzpicture\} { return BEGIN_TIKZPICTURE_CMD; } +\\end\{tikzpicture\} { return END_TIKZPICTURE_CMD; } +\\begin\{pgfonlayer\} { return BEGIN_PGFONLAYER_CMD; } +\\end\{pgfonlayer\} { return END_PGFONLAYER_CMD; } +\\draw { return DRAW_CMD; } +\\node { return NODE_CMD; } +\\path { return PATH_CMD; } +rectangle { return RECTANGLE; } +node { return NODE; } +at { return AT; } +to { return TO; } +; { return SEMICOLON; } \([ ]*{FLOAT}[ ]*,[ ]*{FLOAT}[ ]*\) { - [yyextra incrementPosBy:1]; + yylloc->last_column = yylloc->first_column + 1; yyless(1); BEGIN(xcoord); } {FLOAT} { - [yyextra incrementPosBy:yyleng]; yylval->pt.x=(float)strtod(yytext,NULL); BEGIN(ycoord); } -, { [yyextra incrementPosBy:yyleng]; } +, { } {FLOAT} { - [yyextra incrementPosBy:yyleng]; yylval->pt.y=(float)strtod(yytext,NULL); } \) { - [yyextra incrementPosBy:yyleng]; BEGIN(INITIAL); return COORD; } /* when we see "[", change parsing mode */ \[ /*syntaxhlfix]*/ { - [yyextra incrementPosBy:yyleng]; BEGIN(props); return LEFTBRACKET; } -= { [yyextra incrementPosBy:yyleng]; return EQUALS; } -, { [yyextra incrementPosBy:yyleng]; return COMMA; } += { return EQUALS; } +, { return COMMA; } [^=,\{\] \t]([^=,\{\]]*[^=,\{\] \t])? { - [yyextra incrementPosBy:yyleng]; yylval->nsstr=[NSString stringWithUTF8String:yytext]; return PROPSTRING; } \] { - [yyextra incrementPosBy:yyleng]; BEGIN(INITIAL); return RIGHTBRACKET; } \( { - [yyextra incrementPosBy:yyleng]; BEGIN(noderef); return LEFTPARENTHESIS; } \. { - [yyextra incrementPosBy:yyleng]; return FULLSTOP; } [^\.\{\)]+ { - [yyextra incrementPosBy:yyleng]; yylval->nsstr=[NSString stringWithUTF8String:yytext]; return REFSTRING; } \) { - [yyextra incrementPosBy:yyleng]; BEGIN(INITIAL); return RIGHTPARENTHESIS; } @@ -125,8 +126,12 @@ to { [yyextra incrementPosBy:yyleng]; return TO; } while (1) { char c = input(yyscanner); // eof reached before closing brace - if (c == '\0' || c == EOF) yyterminate(); + if (c == '\0' || c == EOF) { + return UNCLOSED_DELIM_STR; + } + yylloc->last_column += 1; + yyleng += 1; if (escape) { escape = 0; } else if (c == '\\') { @@ -136,17 +141,23 @@ to { [yyextra incrementPosBy:yyleng]; return TO; } } else if (c == '}') { brace_depth--; if (brace_depth == 0) break; + } else if (c == '\n') { + yylloc->last_line += 1; + yylloc->last_column = 0; } [buf appendFormat:@"%c", c]; } NSString *s = [buf copy]; - yyleng += 1 + [buf length]; [s autorelease]; yylval->nsstr = s; - [yyextra incrementPosBy:yyleng]; return DELIMITEDSTRING; } +\\begin { return UNKNOWN_BEGIN_CMD; } +\\end { return UNKNOWN_END_CMD; } +\\[a-zA-Z]+[a-zA-Z0-9]* { return UNKNOWN_CMD; } +. { return UNKNOWN_STR; } + /* vi:ft=lex:noet:ts=4:sts=4:sw=4: */ diff --git a/tikzit/src/common/tikzparser.ym b/tikzit/src/common/tikzparser.ym index 1183f12..cf9205c 100644 --- a/tikzit/src/common/tikzparser.ym +++ b/tikzit/src/common/tikzparser.ym @@ -24,10 +24,11 @@ %} %code requires { -#import "TikzGraphAssembler+Parser.h" -#import "GraphElementData.h" -#import "GraphElementProperty.h" -#import "Node.h" +#import +@class TikzGraphAssembler; +@class GraphElementData; +@class GraphElementProperty; +@class Node; struct noderef { Node *node; NSString *anchor; @@ -36,10 +37,10 @@ struct noderef { %defines "common/tikzparser.h" %pure-parser +%locations %parse-param {TikzGraphAssembler *assembler} %error-verbose - %union { NSPoint pt; NSString *nsstr; @@ -50,7 +51,15 @@ struct noderef { }; %{ +#import "TikzGraphAssembler+Parser.h" +#import "GraphElementData.h" +#import "GraphElementProperty.h" +#import "Node.h" #import "tikzlexer.h" +#define YYLEX_PARAM [assembler scanner] +void yyerror(YYLTYPE *yylloc, TikzGraphAssembler *assembler, const char *str) { + [assembler reportError:str atLocation:yylloc]; +} %} @@ -79,6 +88,12 @@ struct noderef { %token REFSTRING "string" %token DELIMITEDSTRING "{-delimited string" +%token UNKNOWN_BEGIN_CMD "unknown \\begin command" +%token UNKNOWN_END_CMD "unknown \\end command" +%token UNKNOWN_CMD "unknown latex command" +%token UNKNOWN_STR "unknown string" +%token UNCLOSED_DELIM_STR "unclosed {-delimited string" + %type nodename %type optanchor %type val @@ -125,7 +140,8 @@ nodename: "(" REFSTRING ")" { $$ = $2; }; node: "\\node" optproperties nodename "at" COORD DELIMITEDSTRING ";" { Node *node = [Node node]; - [node setData:$2]; + if ($2) + [node setData:$2]; [node setName:$3]; [node setPoint:$5]; [node setLabel:$6]; @@ -147,13 +163,15 @@ optedgenode: | "node" optproperties DELIMITEDSTRING { $$ = [Node node]; - [$$ setData:$2]; + if ($2) + [$$ setData:$2]; [$$ setLabel:$3]; } edge: "\\draw" optproperties noderef "to" optedgenode optnoderef ";" { Edge *edge = [Edge edge]; - [edge setData:$2]; + if ($2) + [edge setData:$2]; [edge setSource:$3.node]; [edge setSourceAnchor:$3.anchor]; [edge setEdgeNode:$5]; diff --git a/tikzit/src/common/util.h b/tikzit/src/common/util.h index 1ee4ef5..f527820 100644 --- a/tikzit/src/common/util.h +++ b/tikzit/src/common/util.h @@ -189,4 +189,6 @@ float normaliseAngleRad (float rads); */ NSString *alphaHex(unsigned short sh); +const char *find_start_of_nth_line (const char * string, int line); + // vi:ft=objc:noet:ts=4:sts=4:sw=4 diff --git a/tikzit/src/common/util.m b/tikzit/src/common/util.m index 2d40cbd..aa21a67 100644 --- a/tikzit/src/common/util.m +++ b/tikzit/src/common/util.m @@ -361,5 +361,20 @@ NSString *alphaHex(unsigned short sh) { return [NSString stringWithFormat:@"%c%c", ahex[sh/16], ahex[sh%16]]; } +const char *find_start_of_nth_line (const char * string, int line) { + int l = 0; + const char *lineStart = string; + while (*lineStart && l < line) { + while (*lineStart && *lineStart != '\n') { + ++lineStart; + } + if (*lineStart) { + ++l; + ++lineStart; + } + } + return lineStart; +} + // vi:ft=objc:noet:ts=4:sts=4:sw=4 diff --git a/tikzit/src/osx/TikzSourceController.m b/tikzit/src/osx/TikzSourceController.m index d01589b..0cb3703 100644 --- a/tikzit/src/osx/TikzSourceController.m +++ b/tikzit/src/osx/TikzSourceController.m @@ -183,8 +183,8 @@ NSDictionary *d = [lastError userInfo]; - NSString *ts = [NSString stringWithFormat: @"Parse error on line %@: %@\n", [d valueForKey:@"lineNumber"], [d valueForKey:NSLocalizedDescriptionKey]]; - NSMutableAttributedString *as = [[NSMutableAttributedString alloc] initWithString:[NSString stringWithFormat: @"Parse error on line %@: %@\n%@\n", [d valueForKey:@"lineNumber"], [d valueForKey:NSLocalizedDescriptionKey], [[d valueForKey:@"syntaxString"] stringByReplacingOccurrencesOfString:@"\t" withString:@""]]]; + NSString *ts = [NSString stringWithFormat: @"Parse error on line %@: %@\n", [d valueForKey:@"startLine"], [d valueForKey:NSLocalizedDescriptionKey]]; + NSMutableAttributedString *as = [[NSMutableAttributedString alloc] initWithString:[NSString stringWithFormat: @"Parse error on line %@: %@\n%@\n", [d valueForKey:@"startLine"], [d valueForKey:NSLocalizedDescriptionKey], [[d valueForKey:@"syntaxString"] stringByReplacingOccurrencesOfString:@"\t" withString:@""]]]; NSInteger tokenLength = [[d valueForKey:@"tokenLength"] integerValue]; // Bit of a mess, offset around to find correct position and correct for 4 characters for every one character of \t -- cgit v1.2.3