pqarrays

Paddy 2016-02-26 Parent:ce9c92fc81ab

2:9a415db0346a tip Browse Files

Fix whitespace lexing/parsing. Our consumeWhitespace method had a bug where it wouldn't emit the whitespace token, meaning we weren't actually skipping the whitespace. Oops. While in there, I removed an outdated TODO (we already supplied a default state). I updated the lexing of unquoted strings to take into account the rules about spaces and unquoted strings; basically, spaces before or after are ignored as whitespace, spaces in the middle ocunt, and empty strings are not allowed. I removed an extra case when detecting what to do when lexing an unquoted string; we already consumed all the whitespace, so the next character shouldn't be whitespace, so no need to test for it. We need to consume whitespace before we start lexing the separator character. I updated the token debugging to be a bit more useful, by defining a String() method on the token type itself, so it'll expose both the type and the value. This makes unexpected errors easier to deal with, and is used in all the errors raised by the parser now. I added a bunch of whitespace tests for lexing and parsing.

lexer.go lexer_test.go parser.go parser_test.go

     1.1 --- a/lexer.go	Thu Feb 25 23:52:05 2016 -0800
     1.2 +++ b/lexer.go	Fri Feb 26 01:23:50 2016 -0800
     1.3 @@ -68,6 +68,10 @@
     1.4  	val string
     1.5  }
     1.6  
     1.7 +func (t token) String() string {
     1.8 +	return fmt.Sprintf("%s: %s", t.typ.String(), t.val)
     1.9 +}
    1.10 +
    1.11  func lex(input string) *lexer {
    1.12  	l := &lexer{
    1.13  		input:  input,
    1.14 @@ -82,7 +86,7 @@
    1.15  }
    1.16  
    1.17  func (l *lexer) run() {
    1.18 -	for l.state = lexStart; l.state != nil; { // TODO(paddy): default state
    1.19 +	for l.state = lexStart; l.state != nil; {
    1.20  		l.state = l.state(l)
    1.21  	}
    1.22  }
    1.23 @@ -158,7 +162,7 @@
    1.24  	for unicode.IsSpace(l.peek()) {
    1.25  		l.next()
    1.26  	}
    1.27 -	if l.start > l.pos {
    1.28 +	if l.pos > l.start {
    1.29  		l.emit(tokenWhitespace)
    1.30  	}
    1.31  }
    1.32 @@ -203,7 +207,6 @@
    1.33  	case r == separator:
    1.34  		return l.errorf("empty item in array")
    1.35  	case unicode.IsSpace(r):
    1.36 -		l.consumeWhitespace()
    1.37  		return lexItem
    1.38  	case r == '"':
    1.39  		return lexQuotedString
    1.40 @@ -246,11 +249,25 @@
    1.41  			if l.pos <= l.start {
    1.42  				return l.errorf(rightDelim + " in unquoted string")
    1.43  			}
    1.44 +			lastNonSpace := -1
    1.45 +			s := l.input[l.start:l.pos]
    1.46 +			for pos, r := range s {
    1.47 +				if !unicode.IsSpace(r) {
    1.48 +					lastNonSpace = l.start + pos + 1
    1.49 +				}
    1.50 +			}
    1.51 +			if lastNonSpace < 0 {
    1.52 +				return l.errorf("unquoted empty string")
    1.53 +			}
    1.54 +			for lastNonSpace < l.pos {
    1.55 +				l.backup()
    1.56 +			}
    1.57  			if string(l.input[l.start:l.pos]) == "NULL" {
    1.58  				l.emit(tokenNull)
    1.59  			} else {
    1.60  				l.emit(tokenString)
    1.61  			}
    1.62 +			l.consumeWhitespace()
    1.63  			return lexRightDelim
    1.64  		}
    1.65  		switch r := l.next(); {
    1.66 @@ -258,8 +275,6 @@
    1.67  			return l.errorf("eof while parsing string")
    1.68  		case r == '"':
    1.69  			return l.errorf("\" in unquoted string")
    1.70 -		case unicode.IsSpace(r):
    1.71 -			return l.errorf("unquoted empty string")
    1.72  		case r == '\\':
    1.73  			return l.errorf("\\ in unquoted string")
    1.74  		case r == separator:
    1.75 @@ -278,6 +293,7 @@
    1.76  }
    1.77  
    1.78  func lexSeparator(l *lexer) stateFunc {
    1.79 +	l.consumeWhitespace()
    1.80  	if strings.HasPrefix(l.input[l.pos:], rightDelim) {
    1.81  		return lexRightDelim
    1.82  	}
     2.1 --- a/lexer_test.go	Thu Feb 25 23:52:05 2016 -0800
     2.2 +++ b/lexer_test.go	Fri Feb 26 01:23:50 2016 -0800
     2.3 @@ -7,6 +7,7 @@
     2.4  var testInputs = map[string][]token{
     2.5  	``:                                       []token{{typ: tokenError, val: "expected array to start before "}},
     2.6  	`{}`:                                     []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenArrayEnd, val: "}"}},
     2.7 +	`{    }`:                                 []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenWhitespace, val: "    "}, {typ: tokenArrayEnd, val: "}"}},
     2.8  	`{lions}`:                                []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenArrayEnd, val: "}"}},
     2.9  	`{lions,tigers}`:                         []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "tigers"}, {typ: tokenArrayEnd, val: "}"}},
    2.10  	`{lions,tigers,bears}`:                   []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "tigers"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "bears"}, {typ: tokenArrayEnd, val: "}"}},
    2.11 @@ -28,7 +29,7 @@
    2.12  				break
    2.13  			}
    2.14  		}
    2.15 -		t.Logf("%#+v\n", tokens)
    2.16 +		t.Logf("`%s`: %#+v\n", input, tokens)
    2.17  		if len(tokens) != len(expectedTokens) {
    2.18  			t.Fatalf("Expected %d tokens, got %d\n", len(expectedTokens), len(tokens))
    2.19  		}
     3.1 --- a/parser.go	Thu Feb 25 23:52:05 2016 -0800
     3.2 +++ b/parser.go	Fri Feb 26 01:23:50 2016 -0800
     3.3 @@ -46,7 +46,7 @@
     3.4  		return parseEOF, nil
     3.5  	}
     3.6  	if tok.typ != tokenEOF {
     3.7 -		return nil, errors.New("expected EOF, got " + tok.typ.String())
     3.8 +		return nil, errors.New("expected EOF, got " + tok.String())
     3.9  	}
    3.10  	return nil, nil
    3.11  }
    3.12 @@ -62,7 +62,7 @@
    3.13  		parsed <- nil
    3.14  		return parseSeparatorOrDelim, nil
    3.15  	}
    3.16 -	return nil, errors.New("expected string, got " + tok.typ.String())
    3.17 +	return nil, errors.New("expected string, got " + tok.String())
    3.18  }
    3.19  
    3.20  func parseStringOrNullOrEnd(l *lexer, parsed chan *string) (parseFunc, error) {
    3.21 @@ -78,7 +78,7 @@
    3.22  	} else if tok.typ == tokenArrayEnd {
    3.23  		return parseEOF, nil
    3.24  	}
    3.25 -	return nil, errors.New("Expected string or end, got " + tok.typ.String())
    3.26 +	return nil, errors.New("Expected string or end, got " + tok.String())
    3.27  }
    3.28  
    3.29  func parseSeparatorOrDelim(l *lexer, parsed chan *string) (parseFunc, error) {
    3.30 @@ -90,7 +90,7 @@
    3.31  	} else if tok.typ == tokenArrayEnd {
    3.32  		return parseEOF, nil
    3.33  	}
    3.34 -	return nil, errors.New("expected separator or delim, got " + tok.typ.String())
    3.35 +	return nil, errors.New("expected separator or delim, got " + tok.String())
    3.36  }
    3.37  
    3.38  func parseStart(l *lexer, parsed chan *string) (parseFunc, error) {
    3.39 @@ -100,5 +100,5 @@
    3.40  	} else if tok.typ == tokenArrayStart {
    3.41  		return parseStringOrNullOrEnd, nil
    3.42  	}
    3.43 -	return nil, errors.New("expected separator or delim, got " + tok.typ.String())
    3.44 +	return nil, errors.New("expected separator or delim, got " + tok.String())
    3.45  }
     4.1 --- a/parser_test.go	Thu Feb 25 23:52:05 2016 -0800
     4.2 +++ b/parser_test.go	Fri Feb 26 01:23:50 2016 -0800
     4.3 @@ -9,22 +9,27 @@
     4.4  }
     4.5  
     4.6  var parseTestInputs = map[string][]*string{
     4.7 -	`{}`:                            []*string{},
     4.8 -	`{lions}`:                       []*string{strPtr("lions")},
     4.9 -	`{lions,tigers}`:                []*string{strPtr("lions"), strPtr("tigers")},
    4.10 -	`{lions,tigers,NULL}`:           []*string{strPtr("lions"), strPtr("tigers"), nil},
    4.11 -	`{lions,tigers,bears}`:          []*string{strPtr("lions"), strPtr("tigers"), strPtr("bears")},
    4.12 -	`{lions,tigers,bears,"oh my!"}`: []*string{strPtr("lions"), strPtr("tigers"), strPtr("bears"), strPtr("oh my!")},
    4.13 +	`{      }`:                             []*string{},
    4.14 +	`{}`:                                   []*string{},
    4.15 +	`{lions}`:                              []*string{strPtr("lions")},
    4.16 +	`{ lions}`:                             []*string{strPtr("lions")},
    4.17 +	`{lions,tigers}`:                       []*string{strPtr("lions"), strPtr("tigers")},
    4.18 +	`{lions, tigers  }`:                    []*string{strPtr("lions"), strPtr("tigers")},
    4.19 +	`{lions,tigers,NULL}`:                  []*string{strPtr("lions"), strPtr("tigers"), nil},
    4.20 +	`{lions, tigers ,  NULL }`:             []*string{strPtr("lions"), strPtr("tigers"), nil},
    4.21 +	`{lions,tigers,bears}`:                 []*string{strPtr("lions"), strPtr("tigers"), strPtr("bears")},
    4.22 +	`{ lions ,tigers  ,bears, "oh my!"  }`: []*string{strPtr("lions"), strPtr("tigers"), strPtr("bears"), strPtr("oh my!")},
    4.23  }
    4.24  
    4.25  func TestParseInputsTable(t *testing.T) {
    4.26  	for input, expected := range parseTestInputs {
    4.27  		l := lex(input)
    4.28 +		t.Logf("`%s`\n", input)
    4.29  		output, err := parse(l)
    4.30  		if err != nil {
    4.31  			t.Fatalf(err.Error())
    4.32  		}
    4.33 -		t.Logf("`%s`: %#+v\n", input, output)
    4.34 +		t.Logf("%#+v\n", output)
    4.35  		if len(output) != len(expected) {
    4.36  			t.Fatalf("Expected %d items in array, got %d\n", len(expected), len(output))
    4.37  		}
    4.38 @@ -32,7 +37,7 @@
    4.39  			if item == nil && expected[pos] != nil {
    4.40  				t.Errorf("Expected %d to be %s, got nil instead.", pos, *expected[pos])
    4.41  			} else if item != nil && expected[pos] == nil {
    4.42 -				t.Errorf("Expected %d to be nil, got %s instead.", pos, *item)
    4.43 +				t.Errorf("Expected %d to be nil, got '%s' instead.", pos, *item)
    4.44  			} else if item != nil && expected[pos] != nil {
    4.45  				continue
    4.46  			} else if item == nil && expected[pos] == nil {