pqarrays
2015-04-19
Child:ce9c92fc81ab
0:bfe2a4af6bdf Browse Files
First pass implementation. Use a lexer to generate tokens out of the Array type responses that PostgreSQL will send. Write a parser for string[] array types. Create a StringArray type that fulfills the driver.Valuer and sql.Scanner interfaces using the parser and lexer.
.hgignore array.go lexer.go lexer_test.go parser.go parser_test.go
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/.hgignore Sun Apr 19 23:47:36 2015 -0400 1.3 @@ -0,0 +1,1 @@ 1.4 +cover.out
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/array.go Sun Apr 19 23:47:36 2015 -0400 2.3 @@ -0,0 +1,48 @@ 2.4 +package pqarrays 2.5 + 2.6 +import ( 2.7 + "database/sql/driver" 2.8 + "errors" 2.9 + "strconv" 2.10 + "strings" 2.11 +) 2.12 + 2.13 +var ( 2.14 + ErrUnexpectedValueType = errors.New("expected value to be a string or []byte") 2.15 +) 2.16 + 2.17 +type StringArray []string 2.18 + 2.19 +func (s StringArray) Value() (driver.Value, error) { 2.20 + output := make([]string, 0, len(s)) 2.21 + for _, item := range s { 2.22 + item = strconv.Quote(item) 2.23 + item = strings.Replace(item, "'", "\\'", -1) 2.24 + output = append(output, item) 2.25 + } 2.26 + return []byte(`{` + strings.Join(output, ",") + `}`), nil 2.27 +} 2.28 + 2.29 +func (s *StringArray) Scan(value interface{}) error { 2.30 + *s = (*s)[:0] 2.31 + var input string 2.32 + if _, ok := value.(string); ok { 2.33 + input = value.(string) 2.34 + } else if _, ok := value.([]byte); ok { 2.35 + input = string(value.([]byte)) 2.36 + } else { 2.37 + return ErrUnexpectedValueType 2.38 + } 2.39 + l := lex(input) 2.40 + parsed, err := parse(l) 2.41 + if err != nil { 2.42 + return err 2.43 + } 2.44 + for _, item := range parsed { 2.45 + if item == nil { 2.46 + continue 2.47 + } 2.48 + *s = append(*s, *item) 2.49 + } 2.50 + return nil 2.51 +}
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/lexer.go Sun Apr 19 23:47:36 2015 -0400 3.3 @@ -0,0 +1,298 @@ 3.4 +package pqarrays 3.5 + 3.6 +import ( 3.7 + "fmt" 3.8 + "strings" 3.9 + "unicode" 3.10 + "unicode/utf8" 3.11 +) 3.12 + 3.13 +const ( 3.14 + eof = -1 3.15 + leftDelim = "{" 3.16 + rightDelim = "}" 3.17 + separator = ',' 3.18 +) 3.19 + 3.20 +type tokenType int 3.21 + 3.22 +const ( 3.23 + tokenError tokenType = iota 3.24 + tokenWhitespace 3.25 + tokenArrayStart 3.26 + tokenString 3.27 + tokenNull 3.28 + tokenSeparator 3.29 + tokenArrayEnd 3.30 + tokenEOF 3.31 +) 3.32 + 3.33 +func (t tokenType) String() string { 3.34 + switch t { 3.35 + case tokenError: 3.36 + return "error" 3.37 + case tokenWhitespace: 3.38 + return "whitespace" 3.39 + case tokenArrayStart: 3.40 + return "array start" 3.41 + case tokenString: 3.42 + return "string" 3.43 + case tokenNull: 3.44 + return "null" 3.45 + case tokenSeparator: 3.46 + return "separator" 3.47 + case tokenArrayEnd: 3.48 + return "array end" 3.49 + case tokenEOF: 3.50 + return "eof" 3.51 + default: 3.52 + return "unknown token" 3.53 + } 3.54 +} 3.55 + 3.56 +type stateFunc func(*lexer) stateFunc 3.57 + 3.58 +type lexer struct { 3.59 + tokens chan token 3.60 + input string 3.61 + start int 3.62 + pos int 3.63 + omitted []int 3.64 + width int 3.65 + state stateFunc 3.66 + arrayDepth int 3.67 +} 3.68 + 3.69 +type token struct { 3.70 + typ tokenType 3.71 + val string 3.72 +} 3.73 + 3.74 +func lex(input string) *lexer { 3.75 + l := &lexer{ 3.76 + input: input, 3.77 + tokens: make(chan token), 3.78 + } 3.79 + go l.run() 3.80 + return l 3.81 +} 3.82 + 3.83 +func (l *lexer) nextToken() token { 3.84 + return <-l.tokens 3.85 +} 3.86 + 3.87 +func (l *lexer) run() { 3.88 + for l.state = lexStart; l.state != nil; { // TODO(paddy): default state 3.89 + l.state = l.state(l) 3.90 + } 3.91 +} 3.92 + 3.93 +func (l *lexer) emit(t tokenType) { 3.94 + var val string 3.95 + if len(l.omitted) < 1 { 3.96 + val = l.input[l.start:l.pos] 3.97 + } else { 3.98 + start := l.start 3.99 + for _, pos := range l.omitted { 3.100 + val += l.input[start:pos] 3.101 + start = pos + 1 3.102 + } 3.103 + if l.pos > start { 3.104 + val += l.input[start:l.pos] 3.105 + } 3.106 + } 3.107 + l.tokens <- token{typ: t, val: val} 3.108 + l.start = l.pos 3.109 + l.omitted = l.omitted[0:0] 3.110 +} 3.111 + 3.112 +func (l *lexer) next() rune { 3.113 + if l.pos >= len(l.input) { 3.114 + l.width = 0 3.115 + return eof 3.116 + } 3.117 + var r rune 3.118 + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 3.119 + l.pos += l.width 3.120 + return r 3.121 +} 3.122 + 3.123 +func (l *lexer) omit() { 3.124 + l.omitted = append(l.omitted, l.pos-1) 3.125 +} 3.126 + 3.127 +func (l *lexer) ignore() { 3.128 + l.start = l.pos 3.129 +} 3.130 + 3.131 +func (l *lexer) backup() { 3.132 + l.pos -= l.width 3.133 +} 3.134 + 3.135 +func (l *lexer) peek() rune { 3.136 + r := l.next() 3.137 + l.backup() 3.138 + return r 3.139 +} 3.140 + 3.141 +func (l *lexer) accept(valid string) bool { 3.142 + if strings.IndexRune(valid, l.next()) >= 0 { 3.143 + return true 3.144 + } 3.145 + l.backup() 3.146 + return false 3.147 +} 3.148 + 3.149 +func (l *lexer) acceptRun(valid string) { 3.150 + for strings.IndexRune(valid, l.next()) >= 0 { 3.151 + } 3.152 + l.backup() 3.153 +} 3.154 + 3.155 +func (l *lexer) errorf(format string, args ...interface{}) stateFunc { 3.156 + l.tokens <- token{tokenError, fmt.Sprintf(format, args...)} 3.157 + return nil 3.158 +} 3.159 + 3.160 +func (l *lexer) consumeWhitespace() { 3.161 + for unicode.IsSpace(l.peek()) { 3.162 + l.next() 3.163 + } 3.164 + if l.start > l.pos { 3.165 + l.emit(tokenWhitespace) 3.166 + } 3.167 +} 3.168 + 3.169 +func lexStart(l *lexer) stateFunc { 3.170 + l.consumeWhitespace() 3.171 + return lexArrayStart 3.172 +} 3.173 + 3.174 +func lexArrayStart(l *lexer) stateFunc { 3.175 + if strings.HasPrefix(l.input[l.pos:], leftDelim) { 3.176 + return lexLeftDelim 3.177 + } 3.178 + return l.errorf("expected array to start before %s", l.input[l.pos:]) 3.179 +} 3.180 + 3.181 +func lexLeftDelim(l *lexer) stateFunc { 3.182 + l.pos += len(leftDelim) 3.183 + l.emit(tokenArrayStart) 3.184 + l.arrayDepth += 1 3.185 + return lexItem 3.186 +} 3.187 + 3.188 +func lexRightDelim(l *lexer) stateFunc { 3.189 + l.pos += len(rightDelim) 3.190 + l.emit(tokenArrayEnd) 3.191 + l.arrayDepth -= 1 3.192 + return lexSeparator 3.193 +} 3.194 + 3.195 +func lexItem(l *lexer) stateFunc { 3.196 + l.consumeWhitespace() 3.197 + if strings.HasPrefix(l.input[l.pos:], rightDelim) { 3.198 + return lexRightDelim 3.199 + } 3.200 + if strings.HasPrefix(l.input[l.pos:], leftDelim) { 3.201 + return lexLeftDelim 3.202 + } 3.203 + switch r := l.peek(); { 3.204 + case r == eof: 3.205 + return l.errorf("unclosed array") 3.206 + case r == separator: 3.207 + return l.errorf("empty item in array") 3.208 + case unicode.IsSpace(r): 3.209 + l.consumeWhitespace() 3.210 + return lexItem 3.211 + case r == '"': 3.212 + return lexQuotedString 3.213 + default: 3.214 + return lexString 3.215 + } 3.216 +} 3.217 + 3.218 +func lexQuotedString(l *lexer) stateFunc { 3.219 + l.next() 3.220 + l.ignore() // ignore the open quote 3.221 + for { 3.222 + switch r := l.next(); { 3.223 + case r == eof: 3.224 + return l.errorf("unclosed quoted string") 3.225 + case r == '"': 3.226 + l.backup() 3.227 + l.emit(tokenString) 3.228 + l.next() 3.229 + l.ignore() 3.230 + return lexSeparator 3.231 + case r == '\\': 3.232 + // omit the \ itself 3.233 + l.omit() 3.234 + // always skip over the character following a \ 3.235 + l.next() 3.236 + if r == eof { 3.237 + return l.errorf("unclosed quoted string") 3.238 + } 3.239 + } 3.240 + } 3.241 +} 3.242 + 3.243 +func lexString(l *lexer) stateFunc { 3.244 + for { 3.245 + if strings.HasPrefix(l.input[l.pos:], leftDelim) { 3.246 + return l.errorf(leftDelim + " in unquoted string") 3.247 + } 3.248 + if strings.HasPrefix(l.input[l.pos:], rightDelim) { 3.249 + if l.pos <= l.start { 3.250 + return l.errorf(rightDelim + " in unquoted string") 3.251 + } 3.252 + if string(l.input[l.start:l.pos]) == "NULL" { 3.253 + l.emit(tokenNull) 3.254 + } else { 3.255 + l.emit(tokenString) 3.256 + } 3.257 + return lexRightDelim 3.258 + } 3.259 + switch r := l.next(); { 3.260 + case r == eof: 3.261 + return l.errorf("eof while parsing string") 3.262 + case r == '"': 3.263 + return l.errorf("\" in unquoted string") 3.264 + case unicode.IsSpace(r): 3.265 + return l.errorf("unquoted empty string") 3.266 + case r == '\\': 3.267 + return l.errorf("\\ in unquoted string") 3.268 + case r == separator: 3.269 + l.backup() 3.270 + if l.pos <= l.start { 3.271 + return l.errorf("unquoted empty string") 3.272 + } 3.273 + if string(l.input[l.start:l.pos]) == "NULL" { 3.274 + l.emit(tokenNull) 3.275 + } else { 3.276 + l.emit(tokenString) 3.277 + } 3.278 + return lexSeparator 3.279 + } 3.280 + } 3.281 +} 3.282 + 3.283 +func lexSeparator(l *lexer) stateFunc { 3.284 + if strings.HasPrefix(l.input[l.pos:], rightDelim) { 3.285 + return lexRightDelim 3.286 + } 3.287 + r := l.next() 3.288 + if r == separator { 3.289 + l.emit(tokenSeparator) 3.290 + return lexItem 3.291 + } else if r == eof { 3.292 + if l.arrayDepth > 0 { 3.293 + return l.errorf("unclosed array") 3.294 + } 3.295 + l.emit(tokenEOF) 3.296 + return nil 3.297 + } else { 3.298 + l.backup() 3.299 + return l.errorf("expected %s, none found before %s\n", separator, l.input[l.pos:]) 3.300 + } 3.301 +}
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/lexer_test.go Sun Apr 19 23:47:36 2015 -0400 4.3 @@ -0,0 +1,44 @@ 4.4 +package pqarrays 4.5 + 4.6 +import ( 4.7 + "testing" 4.8 +) 4.9 + 4.10 +var testInputs = map[string][]token{ 4.11 + ``: []token{{typ: tokenError, val: "expected array to start before "}}, 4.12 + `{}`: []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenArrayEnd, val: "}"}}, 4.13 + `{lions}`: []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenArrayEnd, val: "}"}}, 4.14 + `{lions,tigers}`: []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "tigers"}, {typ: tokenArrayEnd, val: "}"}}, 4.15 + `{lions,tigers,bears}`: []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "tigers"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "bears"}, {typ: tokenArrayEnd, val: "}"}}, 4.16 + `{lions,tigers,bears,"oh my!"}`: []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "tigers"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "bears"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "oh my!"}, {typ: tokenArrayEnd, val: "}"}}, 4.17 + `{{two,dimensional},{array,"of items"}}`: []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "two"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "dimensional"}, {typ: tokenArrayEnd, val: "}"}, {typ: tokenSeparator, val: ","}, {typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "array"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "of items"}, {typ: tokenArrayEnd, val: "}"}, {typ: tokenArrayEnd, val: "}"}}, 4.18 +} 4.19 + 4.20 +func TestInputsTable(t *testing.T) { 4.21 + for input, expectedTokens := range testInputs { 4.22 + l := lex(input) 4.23 + var tokens []token 4.24 + for { 4.25 + tok := l.nextToken() 4.26 + if tok.typ == tokenEOF { 4.27 + break 4.28 + } 4.29 + tokens = append(tokens, tok) 4.30 + if tok.typ == tokenError { 4.31 + break 4.32 + } 4.33 + } 4.34 + t.Logf("%#+v\n", tokens) 4.35 + if len(tokens) != len(expectedTokens) { 4.36 + t.Fatalf("Expected %d tokens, got %d\n", len(expectedTokens), len(tokens)) 4.37 + } 4.38 + for pos, tok := range tokens { 4.39 + if expectedTokens[pos].typ != tok.typ { 4.40 + t.Errorf("Expected token in pos %d to have type of %s, got %s instead.", pos, expectedTokens[pos].typ, tok.typ) 4.41 + } 4.42 + if expectedTokens[pos].val != tok.val { 4.43 + t.Errorf("Expected token in pos %d to have value of `%s`, got `%s` instead.", pos, expectedTokens[pos].val, tok.val) 4.44 + } 4.45 + } 4.46 + } 4.47 +}
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/parser.go Sun Apr 19 23:47:36 2015 -0400 5.3 @@ -0,0 +1,88 @@ 5.4 +package pqarrays 5.5 + 5.6 +import ( 5.7 + "errors" 5.8 +) 5.9 + 5.10 +func parse(l *lexer) ([]*string, error) { 5.11 + var parsed []*string 5.12 + pchan := make(chan *string) 5.13 + errchan := make(chan error) 5.14 + done := make(chan struct{}) 5.15 + go runParse(l, pchan, errchan, done) 5.16 + for { 5.17 + select { 5.18 + case err := <-errchan: 5.19 + return parsed, err 5.20 + case item := <-pchan: 5.21 + parsed = append(parsed, item) 5.22 + case <-done: 5.23 + return parsed, nil 5.24 + } 5.25 + } 5.26 +} 5.27 + 5.28 +func runParse(l *lexer, parsed chan *string, err chan error, done chan struct{}) { 5.29 + var state parseFunc = parseStart 5.30 + for { 5.31 + var e error 5.32 + state, e = state(l, parsed) 5.33 + if e != nil { 5.34 + err <- e 5.35 + break 5.36 + } 5.37 + if state == nil { 5.38 + break 5.39 + } 5.40 + } 5.41 + close(done) 5.42 +} 5.43 + 5.44 +type parseFunc func(*lexer, chan *string) (parseFunc, error) 5.45 + 5.46 +func parseEOF(l *lexer, parsed chan *string) (parseFunc, error) { 5.47 + tok := l.nextToken() 5.48 + if tok.typ == tokenWhitespace { 5.49 + tok = l.nextToken() 5.50 + } 5.51 + if tok.typ != tokenEOF { 5.52 + return nil, errors.New("expected EOF, got " + tok.typ.String()) 5.53 + } 5.54 + return nil, nil 5.55 +} 5.56 + 5.57 +func parseStringOrNull(l *lexer, parsed chan *string) (parseFunc, error) { 5.58 + tok := l.nextToken() 5.59 + if tok.typ == tokenWhitespace { 5.60 + tok = l.nextToken() 5.61 + } else if tok.typ == tokenString { 5.62 + parsed <- &tok.val 5.63 + return parseSeparatorOrDelim, nil 5.64 + } else if tok.typ == tokenNull { 5.65 + parsed <- nil 5.66 + return parseSeparatorOrDelim, nil 5.67 + } 5.68 + return nil, errors.New("expected string, got " + tok.typ.String()) 5.69 +} 5.70 + 5.71 +func parseSeparatorOrDelim(l *lexer, parsed chan *string) (parseFunc, error) { 5.72 + tok := l.nextToken() 5.73 + if tok.typ == tokenWhitespace { 5.74 + return parseSeparatorOrDelim, nil 5.75 + } else if tok.typ == tokenSeparator { 5.76 + return parseStringOrNull, nil 5.77 + } else if tok.typ == tokenArrayEnd { 5.78 + return parseEOF, nil 5.79 + } 5.80 + return nil, errors.New("expected separator or delim, got " + tok.typ.String()) 5.81 +} 5.82 + 5.83 +func parseStart(l *lexer, parsed chan *string) (parseFunc, error) { 5.84 + tok := l.nextToken() 5.85 + if tok.typ == tokenWhitespace { 5.86 + return parseStart, nil 5.87 + } else if tok.typ == tokenArrayStart { 5.88 + return parseStringOrNull, nil 5.89 + } 5.90 + return nil, errors.New("expected separator or delim, got " + tok.typ.String()) 5.91 +}
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/parser_test.go Sun Apr 19 23:47:36 2015 -0400 6.3 @@ -0,0 +1,44 @@ 6.4 +package pqarrays 6.5 + 6.6 +import ( 6.7 + "testing" 6.8 +) 6.9 + 6.10 +func strPtr(in string) *string { 6.11 + return &in 6.12 +} 6.13 + 6.14 +var parseTestInputs = map[string][]*string{ 6.15 + `{lions}`: []*string{strPtr("lions")}, 6.16 + `{lions,tigers}`: []*string{strPtr("lions"), strPtr("tigers")}, 6.17 + `{lions,tigers,NULL}`: []*string{strPtr("lions"), strPtr("tigers"), nil}, 6.18 + `{lions,tigers,bears}`: []*string{strPtr("lions"), strPtr("tigers"), strPtr("bears")}, 6.19 + `{lions,tigers,bears,"oh my!"}`: []*string{strPtr("lions"), strPtr("tigers"), strPtr("bears"), strPtr("oh my!")}, 6.20 +} 6.21 + 6.22 +func TestParseInputsTable(t *testing.T) { 6.23 + for input, expected := range parseTestInputs { 6.24 + l := lex(input) 6.25 + output, err := parse(l) 6.26 + if err != nil { 6.27 + t.Fatalf(err.Error()) 6.28 + } 6.29 + t.Logf("`%s`: %#+v\n", input, output) 6.30 + if len(output) != len(expected) { 6.31 + t.Fatalf("Expected %d items in array, got %d\n", len(expected), len(output)) 6.32 + } 6.33 + for pos, item := range output { 6.34 + if item == nil && expected[pos] != nil { 6.35 + t.Errorf("Expected %d to be %s, got nil instead.", pos, *expected[pos]) 6.36 + } else if item != nil && expected[pos] == nil { 6.37 + t.Errorf("Expected %d to be nil, got %s instead.", pos, *item) 6.38 + } else if item != nil && expected[pos] != nil { 6.39 + continue 6.40 + } else if item == nil && expected[pos] == nil { 6.41 + continue 6.42 + } else if *item != *expected[pos] { 6.43 + t.Errorf("Expected %d to be %s, got %s instead.", pos, *expected[pos], *item) 6.44 + } 6.45 + } 6.46 + } 6.47 +}