pqarrays
pqarrays/parser.go
Fix whitespace lexing/parsing. Our consumeWhitespace method had a bug where it wouldn't emit the whitespace token, meaning we weren't actually skipping the whitespace. Oops. While in there, I removed an outdated TODO (we already supplied a default state). I updated the lexing of unquoted strings to take into account the rules about spaces and unquoted strings; basically, spaces before or after are ignored as whitespace, spaces in the middle ocunt, and empty strings are not allowed. I removed an extra case when detecting what to do when lexing an unquoted string; we already consumed all the whitespace, so the next character shouldn't be whitespace, so no need to test for it. We need to consume whitespace before we start lexing the separator character. I updated the token debugging to be a bit more useful, by defining a String() method on the token type itself, so it'll expose both the type and the value. This makes unexpected errors easier to deal with, and is used in all the errors raised by the parser now. I added a bunch of whitespace tests for lexing and parsing.
| paddy@0 | 1 package pqarrays |
| paddy@0 | 2 |
| paddy@0 | 3 import ( |
| paddy@0 | 4 "errors" |
| paddy@0 | 5 ) |
| paddy@0 | 6 |
| paddy@0 | 7 func parse(l *lexer) ([]*string, error) { |
| paddy@0 | 8 var parsed []*string |
| paddy@0 | 9 pchan := make(chan *string) |
| paddy@0 | 10 errchan := make(chan error) |
| paddy@0 | 11 done := make(chan struct{}) |
| paddy@0 | 12 go runParse(l, pchan, errchan, done) |
| paddy@0 | 13 for { |
| paddy@0 | 14 select { |
| paddy@0 | 15 case err := <-errchan: |
| paddy@0 | 16 return parsed, err |
| paddy@0 | 17 case item := <-pchan: |
| paddy@0 | 18 parsed = append(parsed, item) |
| paddy@0 | 19 case <-done: |
| paddy@0 | 20 return parsed, nil |
| paddy@0 | 21 } |
| paddy@0 | 22 } |
| paddy@0 | 23 } |
| paddy@0 | 24 |
| paddy@0 | 25 func runParse(l *lexer, parsed chan *string, err chan error, done chan struct{}) { |
| paddy@0 | 26 var state parseFunc = parseStart |
| paddy@0 | 27 for { |
| paddy@0 | 28 var e error |
| paddy@0 | 29 state, e = state(l, parsed) |
| paddy@0 | 30 if e != nil { |
| paddy@0 | 31 err <- e |
| paddy@0 | 32 break |
| paddy@0 | 33 } |
| paddy@0 | 34 if state == nil { |
| paddy@0 | 35 break |
| paddy@0 | 36 } |
| paddy@0 | 37 } |
| paddy@0 | 38 close(done) |
| paddy@0 | 39 } |
| paddy@0 | 40 |
| paddy@0 | 41 type parseFunc func(*lexer, chan *string) (parseFunc, error) |
| paddy@0 | 42 |
| paddy@0 | 43 func parseEOF(l *lexer, parsed chan *string) (parseFunc, error) { |
| paddy@0 | 44 tok := l.nextToken() |
| paddy@0 | 45 if tok.typ == tokenWhitespace { |
| paddy@1 | 46 return parseEOF, nil |
| paddy@0 | 47 } |
| paddy@0 | 48 if tok.typ != tokenEOF { |
| paddy@2 | 49 return nil, errors.New("expected EOF, got " + tok.String()) |
| paddy@0 | 50 } |
| paddy@0 | 51 return nil, nil |
| paddy@0 | 52 } |
| paddy@0 | 53 |
| paddy@0 | 54 func parseStringOrNull(l *lexer, parsed chan *string) (parseFunc, error) { |
| paddy@0 | 55 tok := l.nextToken() |
| paddy@0 | 56 if tok.typ == tokenWhitespace { |
| paddy@1 | 57 return parseStringOrNull, nil |
| paddy@0 | 58 } else if tok.typ == tokenString { |
| paddy@0 | 59 parsed <- &tok.val |
| paddy@0 | 60 return parseSeparatorOrDelim, nil |
| paddy@0 | 61 } else if tok.typ == tokenNull { |
| paddy@0 | 62 parsed <- nil |
| paddy@0 | 63 return parseSeparatorOrDelim, nil |
| paddy@0 | 64 } |
| paddy@2 | 65 return nil, errors.New("expected string, got " + tok.String()) |
| paddy@0 | 66 } |
| paddy@0 | 67 |
| paddy@1 | 68 func parseStringOrNullOrEnd(l *lexer, parsed chan *string) (parseFunc, error) { |
| paddy@1 | 69 tok := l.nextToken() |
| paddy@1 | 70 if tok.typ == tokenWhitespace { |
| paddy@1 | 71 return parseStringOrNullOrEnd, nil |
| paddy@1 | 72 } else if tok.typ == tokenString { |
| paddy@1 | 73 parsed <- &tok.val |
| paddy@1 | 74 return parseSeparatorOrDelim, nil |
| paddy@1 | 75 } else if tok.typ == tokenNull { |
| paddy@1 | 76 parsed <- nil |
| paddy@1 | 77 return parseSeparatorOrDelim, nil |
| paddy@1 | 78 } else if tok.typ == tokenArrayEnd { |
| paddy@1 | 79 return parseEOF, nil |
| paddy@1 | 80 } |
| paddy@2 | 81 return nil, errors.New("Expected string or end, got " + tok.String()) |
| paddy@1 | 82 } |
| paddy@1 | 83 |
| paddy@0 | 84 func parseSeparatorOrDelim(l *lexer, parsed chan *string) (parseFunc, error) { |
| paddy@0 | 85 tok := l.nextToken() |
| paddy@0 | 86 if tok.typ == tokenWhitespace { |
| paddy@0 | 87 return parseSeparatorOrDelim, nil |
| paddy@0 | 88 } else if tok.typ == tokenSeparator { |
| paddy@0 | 89 return parseStringOrNull, nil |
| paddy@0 | 90 } else if tok.typ == tokenArrayEnd { |
| paddy@0 | 91 return parseEOF, nil |
| paddy@0 | 92 } |
| paddy@2 | 93 return nil, errors.New("expected separator or delim, got " + tok.String()) |
| paddy@0 | 94 } |
| paddy@0 | 95 |
| paddy@0 | 96 func parseStart(l *lexer, parsed chan *string) (parseFunc, error) { |
| paddy@0 | 97 tok := l.nextToken() |
| paddy@0 | 98 if tok.typ == tokenWhitespace { |
| paddy@0 | 99 return parseStart, nil |
| paddy@0 | 100 } else if tok.typ == tokenArrayStart { |
| paddy@1 | 101 return parseStringOrNullOrEnd, nil |
| paddy@0 | 102 } |
| paddy@2 | 103 return nil, errors.New("expected separator or delim, got " + tok.String()) |
| paddy@0 | 104 } |