pqarrays

Paddy 2015-04-19 Child:ce9c92fc81ab

0:bfe2a4af6bdf Browse Files

First pass implementation. Use a lexer to generate tokens out of the Array type responses that PostgreSQL will send. Write a parser for string[] array types. Create a StringArray type that fulfills the driver.Valuer and sql.Scanner interfaces using the parser and lexer.

.hgignore array.go lexer.go lexer_test.go parser.go parser_test.go

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/.hgignore	Sun Apr 19 23:47:36 2015 -0400
     1.3 @@ -0,0 +1,1 @@
     1.4 +cover.out
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/array.go	Sun Apr 19 23:47:36 2015 -0400
     2.3 @@ -0,0 +1,48 @@
     2.4 +package pqarrays
     2.5 +
     2.6 +import (
     2.7 +	"database/sql/driver"
     2.8 +	"errors"
     2.9 +	"strconv"
    2.10 +	"strings"
    2.11 +)
    2.12 +
    2.13 +var (
    2.14 +	ErrUnexpectedValueType = errors.New("expected value to be a string or []byte")
    2.15 +)
    2.16 +
    2.17 +type StringArray []string
    2.18 +
    2.19 +func (s StringArray) Value() (driver.Value, error) {
    2.20 +	output := make([]string, 0, len(s))
    2.21 +	for _, item := range s {
    2.22 +		item = strconv.Quote(item)
    2.23 +		item = strings.Replace(item, "'", "\\'", -1)
    2.24 +		output = append(output, item)
    2.25 +	}
    2.26 +	return []byte(`{` + strings.Join(output, ",") + `}`), nil
    2.27 +}
    2.28 +
    2.29 +func (s *StringArray) Scan(value interface{}) error {
    2.30 +	*s = (*s)[:0]
    2.31 +	var input string
    2.32 +	if _, ok := value.(string); ok {
    2.33 +		input = value.(string)
    2.34 +	} else if _, ok := value.([]byte); ok {
    2.35 +		input = string(value.([]byte))
    2.36 +	} else {
    2.37 +		return ErrUnexpectedValueType
    2.38 +	}
    2.39 +	l := lex(input)
    2.40 +	parsed, err := parse(l)
    2.41 +	if err != nil {
    2.42 +		return err
    2.43 +	}
    2.44 +	for _, item := range parsed {
    2.45 +		if item == nil {
    2.46 +			continue
    2.47 +		}
    2.48 +		*s = append(*s, *item)
    2.49 +	}
    2.50 +	return nil
    2.51 +}
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/lexer.go	Sun Apr 19 23:47:36 2015 -0400
     3.3 @@ -0,0 +1,298 @@
     3.4 +package pqarrays
     3.5 +
     3.6 +import (
     3.7 +	"fmt"
     3.8 +	"strings"
     3.9 +	"unicode"
    3.10 +	"unicode/utf8"
    3.11 +)
    3.12 +
    3.13 +const (
    3.14 +	eof        = -1
    3.15 +	leftDelim  = "{"
    3.16 +	rightDelim = "}"
    3.17 +	separator  = ','
    3.18 +)
    3.19 +
    3.20 +type tokenType int
    3.21 +
    3.22 +const (
    3.23 +	tokenError tokenType = iota
    3.24 +	tokenWhitespace
    3.25 +	tokenArrayStart
    3.26 +	tokenString
    3.27 +	tokenNull
    3.28 +	tokenSeparator
    3.29 +	tokenArrayEnd
    3.30 +	tokenEOF
    3.31 +)
    3.32 +
    3.33 +func (t tokenType) String() string {
    3.34 +	switch t {
    3.35 +	case tokenError:
    3.36 +		return "error"
    3.37 +	case tokenWhitespace:
    3.38 +		return "whitespace"
    3.39 +	case tokenArrayStart:
    3.40 +		return "array start"
    3.41 +	case tokenString:
    3.42 +		return "string"
    3.43 +	case tokenNull:
    3.44 +		return "null"
    3.45 +	case tokenSeparator:
    3.46 +		return "separator"
    3.47 +	case tokenArrayEnd:
    3.48 +		return "array end"
    3.49 +	case tokenEOF:
    3.50 +		return "eof"
    3.51 +	default:
    3.52 +		return "unknown token"
    3.53 +	}
    3.54 +}
    3.55 +
    3.56 +type stateFunc func(*lexer) stateFunc
    3.57 +
    3.58 +type lexer struct {
    3.59 +	tokens     chan token
    3.60 +	input      string
    3.61 +	start      int
    3.62 +	pos        int
    3.63 +	omitted    []int
    3.64 +	width      int
    3.65 +	state      stateFunc
    3.66 +	arrayDepth int
    3.67 +}
    3.68 +
    3.69 +type token struct {
    3.70 +	typ tokenType
    3.71 +	val string
    3.72 +}
    3.73 +
    3.74 +func lex(input string) *lexer {
    3.75 +	l := &lexer{
    3.76 +		input:  input,
    3.77 +		tokens: make(chan token),
    3.78 +	}
    3.79 +	go l.run()
    3.80 +	return l
    3.81 +}
    3.82 +
    3.83 +func (l *lexer) nextToken() token {
    3.84 +	return <-l.tokens
    3.85 +}
    3.86 +
    3.87 +func (l *lexer) run() {
    3.88 +	for l.state = lexStart; l.state != nil; { // TODO(paddy): default state
    3.89 +		l.state = l.state(l)
    3.90 +	}
    3.91 +}
    3.92 +
    3.93 +func (l *lexer) emit(t tokenType) {
    3.94 +	var val string
    3.95 +	if len(l.omitted) < 1 {
    3.96 +		val = l.input[l.start:l.pos]
    3.97 +	} else {
    3.98 +		start := l.start
    3.99 +		for _, pos := range l.omitted {
   3.100 +			val += l.input[start:pos]
   3.101 +			start = pos + 1
   3.102 +		}
   3.103 +		if l.pos > start {
   3.104 +			val += l.input[start:l.pos]
   3.105 +		}
   3.106 +	}
   3.107 +	l.tokens <- token{typ: t, val: val}
   3.108 +	l.start = l.pos
   3.109 +	l.omitted = l.omitted[0:0]
   3.110 +}
   3.111 +
   3.112 +func (l *lexer) next() rune {
   3.113 +	if l.pos >= len(l.input) {
   3.114 +		l.width = 0
   3.115 +		return eof
   3.116 +	}
   3.117 +	var r rune
   3.118 +	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   3.119 +	l.pos += l.width
   3.120 +	return r
   3.121 +}
   3.122 +
   3.123 +func (l *lexer) omit() {
   3.124 +	l.omitted = append(l.omitted, l.pos-1)
   3.125 +}
   3.126 +
   3.127 +func (l *lexer) ignore() {
   3.128 +	l.start = l.pos
   3.129 +}
   3.130 +
   3.131 +func (l *lexer) backup() {
   3.132 +	l.pos -= l.width
   3.133 +}
   3.134 +
   3.135 +func (l *lexer) peek() rune {
   3.136 +	r := l.next()
   3.137 +	l.backup()
   3.138 +	return r
   3.139 +}
   3.140 +
   3.141 +func (l *lexer) accept(valid string) bool {
   3.142 +	if strings.IndexRune(valid, l.next()) >= 0 {
   3.143 +		return true
   3.144 +	}
   3.145 +	l.backup()
   3.146 +	return false
   3.147 +}
   3.148 +
   3.149 +func (l *lexer) acceptRun(valid string) {
   3.150 +	for strings.IndexRune(valid, l.next()) >= 0 {
   3.151 +	}
   3.152 +	l.backup()
   3.153 +}
   3.154 +
   3.155 +func (l *lexer) errorf(format string, args ...interface{}) stateFunc {
   3.156 +	l.tokens <- token{tokenError, fmt.Sprintf(format, args...)}
   3.157 +	return nil
   3.158 +}
   3.159 +
   3.160 +func (l *lexer) consumeWhitespace() {
   3.161 +	for unicode.IsSpace(l.peek()) {
   3.162 +		l.next()
   3.163 +	}
   3.164 +	if l.start > l.pos {
   3.165 +		l.emit(tokenWhitespace)
   3.166 +	}
   3.167 +}
   3.168 +
   3.169 +func lexStart(l *lexer) stateFunc {
   3.170 +	l.consumeWhitespace()
   3.171 +	return lexArrayStart
   3.172 +}
   3.173 +
   3.174 +func lexArrayStart(l *lexer) stateFunc {
   3.175 +	if strings.HasPrefix(l.input[l.pos:], leftDelim) {
   3.176 +		return lexLeftDelim
   3.177 +	}
   3.178 +	return l.errorf("expected array to start before %s", l.input[l.pos:])
   3.179 +}
   3.180 +
   3.181 +func lexLeftDelim(l *lexer) stateFunc {
   3.182 +	l.pos += len(leftDelim)
   3.183 +	l.emit(tokenArrayStart)
   3.184 +	l.arrayDepth += 1
   3.185 +	return lexItem
   3.186 +}
   3.187 +
   3.188 +func lexRightDelim(l *lexer) stateFunc {
   3.189 +	l.pos += len(rightDelim)
   3.190 +	l.emit(tokenArrayEnd)
   3.191 +	l.arrayDepth -= 1
   3.192 +	return lexSeparator
   3.193 +}
   3.194 +
   3.195 +func lexItem(l *lexer) stateFunc {
   3.196 +	l.consumeWhitespace()
   3.197 +	if strings.HasPrefix(l.input[l.pos:], rightDelim) {
   3.198 +		return lexRightDelim
   3.199 +	}
   3.200 +	if strings.HasPrefix(l.input[l.pos:], leftDelim) {
   3.201 +		return lexLeftDelim
   3.202 +	}
   3.203 +	switch r := l.peek(); {
   3.204 +	case r == eof:
   3.205 +		return l.errorf("unclosed array")
   3.206 +	case r == separator:
   3.207 +		return l.errorf("empty item in array")
   3.208 +	case unicode.IsSpace(r):
   3.209 +		l.consumeWhitespace()
   3.210 +		return lexItem
   3.211 +	case r == '"':
   3.212 +		return lexQuotedString
   3.213 +	default:
   3.214 +		return lexString
   3.215 +	}
   3.216 +}
   3.217 +
   3.218 +func lexQuotedString(l *lexer) stateFunc {
   3.219 +	l.next()
   3.220 +	l.ignore() // ignore the open quote
   3.221 +	for {
   3.222 +		switch r := l.next(); {
   3.223 +		case r == eof:
   3.224 +			return l.errorf("unclosed quoted string")
   3.225 +		case r == '"':
   3.226 +			l.backup()
   3.227 +			l.emit(tokenString)
   3.228 +			l.next()
   3.229 +			l.ignore()
   3.230 +			return lexSeparator
   3.231 +		case r == '\\':
   3.232 +			// omit the \ itself
   3.233 +			l.omit()
   3.234 +			// always skip over the character following a \
   3.235 +			l.next()
   3.236 +			if r == eof {
   3.237 +				return l.errorf("unclosed quoted string")
   3.238 +			}
   3.239 +		}
   3.240 +	}
   3.241 +}
   3.242 +
   3.243 +func lexString(l *lexer) stateFunc {
   3.244 +	for {
   3.245 +		if strings.HasPrefix(l.input[l.pos:], leftDelim) {
   3.246 +			return l.errorf(leftDelim + " in unquoted string")
   3.247 +		}
   3.248 +		if strings.HasPrefix(l.input[l.pos:], rightDelim) {
   3.249 +			if l.pos <= l.start {
   3.250 +				return l.errorf(rightDelim + " in unquoted string")
   3.251 +			}
   3.252 +			if string(l.input[l.start:l.pos]) == "NULL" {
   3.253 +				l.emit(tokenNull)
   3.254 +			} else {
   3.255 +				l.emit(tokenString)
   3.256 +			}
   3.257 +			return lexRightDelim
   3.258 +		}
   3.259 +		switch r := l.next(); {
   3.260 +		case r == eof:
   3.261 +			return l.errorf("eof while parsing string")
   3.262 +		case r == '"':
   3.263 +			return l.errorf("\" in unquoted string")
   3.264 +		case unicode.IsSpace(r):
   3.265 +			return l.errorf("unquoted empty string")
   3.266 +		case r == '\\':
   3.267 +			return l.errorf("\\ in unquoted string")
   3.268 +		case r == separator:
   3.269 +			l.backup()
   3.270 +			if l.pos <= l.start {
   3.271 +				return l.errorf("unquoted empty string")
   3.272 +			}
   3.273 +			if string(l.input[l.start:l.pos]) == "NULL" {
   3.274 +				l.emit(tokenNull)
   3.275 +			} else {
   3.276 +				l.emit(tokenString)
   3.277 +			}
   3.278 +			return lexSeparator
   3.279 +		}
   3.280 +	}
   3.281 +}
   3.282 +
   3.283 +func lexSeparator(l *lexer) stateFunc {
   3.284 +	if strings.HasPrefix(l.input[l.pos:], rightDelim) {
   3.285 +		return lexRightDelim
   3.286 +	}
   3.287 +	r := l.next()
   3.288 +	if r == separator {
   3.289 +		l.emit(tokenSeparator)
   3.290 +		return lexItem
   3.291 +	} else if r == eof {
   3.292 +		if l.arrayDepth > 0 {
   3.293 +			return l.errorf("unclosed array")
   3.294 +		}
   3.295 +		l.emit(tokenEOF)
   3.296 +		return nil
   3.297 +	} else {
   3.298 +		l.backup()
   3.299 +		return l.errorf("expected %s, none found before %s\n", separator, l.input[l.pos:])
   3.300 +	}
   3.301 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/lexer_test.go	Sun Apr 19 23:47:36 2015 -0400
     4.3 @@ -0,0 +1,44 @@
     4.4 +package pqarrays
     4.5 +
     4.6 +import (
     4.7 +	"testing"
     4.8 +)
     4.9 +
    4.10 +var testInputs = map[string][]token{
    4.11 +	``:                                       []token{{typ: tokenError, val: "expected array to start before "}},
    4.12 +	`{}`:                                     []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenArrayEnd, val: "}"}},
    4.13 +	`{lions}`:                                []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenArrayEnd, val: "}"}},
    4.14 +	`{lions,tigers}`:                         []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "tigers"}, {typ: tokenArrayEnd, val: "}"}},
    4.15 +	`{lions,tigers,bears}`:                   []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "tigers"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "bears"}, {typ: tokenArrayEnd, val: "}"}},
    4.16 +	`{lions,tigers,bears,"oh my!"}`:          []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "lions"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "tigers"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "bears"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "oh my!"}, {typ: tokenArrayEnd, val: "}"}},
    4.17 +	`{{two,dimensional},{array,"of items"}}`: []token{{typ: tokenArrayStart, val: "{"}, {typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "two"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "dimensional"}, {typ: tokenArrayEnd, val: "}"}, {typ: tokenSeparator, val: ","}, {typ: tokenArrayStart, val: "{"}, {typ: tokenString, val: "array"}, {typ: tokenSeparator, val: ","}, {typ: tokenString, val: "of items"}, {typ: tokenArrayEnd, val: "}"}, {typ: tokenArrayEnd, val: "}"}},
    4.18 +}
    4.19 +
    4.20 +func TestInputsTable(t *testing.T) {
    4.21 +	for input, expectedTokens := range testInputs {
    4.22 +		l := lex(input)
    4.23 +		var tokens []token
    4.24 +		for {
    4.25 +			tok := l.nextToken()
    4.26 +			if tok.typ == tokenEOF {
    4.27 +				break
    4.28 +			}
    4.29 +			tokens = append(tokens, tok)
    4.30 +			if tok.typ == tokenError {
    4.31 +				break
    4.32 +			}
    4.33 +		}
    4.34 +		t.Logf("%#+v\n", tokens)
    4.35 +		if len(tokens) != len(expectedTokens) {
    4.36 +			t.Fatalf("Expected %d tokens, got %d\n", len(expectedTokens), len(tokens))
    4.37 +		}
    4.38 +		for pos, tok := range tokens {
    4.39 +			if expectedTokens[pos].typ != tok.typ {
    4.40 +				t.Errorf("Expected token in pos %d to have type of %s, got %s instead.", pos, expectedTokens[pos].typ, tok.typ)
    4.41 +			}
    4.42 +			if expectedTokens[pos].val != tok.val {
    4.43 +				t.Errorf("Expected token in pos %d to have value of `%s`, got `%s` instead.", pos, expectedTokens[pos].val, tok.val)
    4.44 +			}
    4.45 +		}
    4.46 +	}
    4.47 +}
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/parser.go	Sun Apr 19 23:47:36 2015 -0400
     5.3 @@ -0,0 +1,88 @@
     5.4 +package pqarrays
     5.5 +
     5.6 +import (
     5.7 +	"errors"
     5.8 +)
     5.9 +
    5.10 +func parse(l *lexer) ([]*string, error) {
    5.11 +	var parsed []*string
    5.12 +	pchan := make(chan *string)
    5.13 +	errchan := make(chan error)
    5.14 +	done := make(chan struct{})
    5.15 +	go runParse(l, pchan, errchan, done)
    5.16 +	for {
    5.17 +		select {
    5.18 +		case err := <-errchan:
    5.19 +			return parsed, err
    5.20 +		case item := <-pchan:
    5.21 +			parsed = append(parsed, item)
    5.22 +		case <-done:
    5.23 +			return parsed, nil
    5.24 +		}
    5.25 +	}
    5.26 +}
    5.27 +
    5.28 +func runParse(l *lexer, parsed chan *string, err chan error, done chan struct{}) {
    5.29 +	var state parseFunc = parseStart
    5.30 +	for {
    5.31 +		var e error
    5.32 +		state, e = state(l, parsed)
    5.33 +		if e != nil {
    5.34 +			err <- e
    5.35 +			break
    5.36 +		}
    5.37 +		if state == nil {
    5.38 +			break
    5.39 +		}
    5.40 +	}
    5.41 +	close(done)
    5.42 +}
    5.43 +
    5.44 +type parseFunc func(*lexer, chan *string) (parseFunc, error)
    5.45 +
    5.46 +func parseEOF(l *lexer, parsed chan *string) (parseFunc, error) {
    5.47 +	tok := l.nextToken()
    5.48 +	if tok.typ == tokenWhitespace {
    5.49 +		tok = l.nextToken()
    5.50 +	}
    5.51 +	if tok.typ != tokenEOF {
    5.52 +		return nil, errors.New("expected EOF, got " + tok.typ.String())
    5.53 +	}
    5.54 +	return nil, nil
    5.55 +}
    5.56 +
    5.57 +func parseStringOrNull(l *lexer, parsed chan *string) (parseFunc, error) {
    5.58 +	tok := l.nextToken()
    5.59 +	if tok.typ == tokenWhitespace {
    5.60 +		tok = l.nextToken()
    5.61 +	} else if tok.typ == tokenString {
    5.62 +		parsed <- &tok.val
    5.63 +		return parseSeparatorOrDelim, nil
    5.64 +	} else if tok.typ == tokenNull {
    5.65 +		parsed <- nil
    5.66 +		return parseSeparatorOrDelim, nil
    5.67 +	}
    5.68 +	return nil, errors.New("expected string, got " + tok.typ.String())
    5.69 +}
    5.70 +
    5.71 +func parseSeparatorOrDelim(l *lexer, parsed chan *string) (parseFunc, error) {
    5.72 +	tok := l.nextToken()
    5.73 +	if tok.typ == tokenWhitespace {
    5.74 +		return parseSeparatorOrDelim, nil
    5.75 +	} else if tok.typ == tokenSeparator {
    5.76 +		return parseStringOrNull, nil
    5.77 +	} else if tok.typ == tokenArrayEnd {
    5.78 +		return parseEOF, nil
    5.79 +	}
    5.80 +	return nil, errors.New("expected separator or delim, got " + tok.typ.String())
    5.81 +}
    5.82 +
    5.83 +func parseStart(l *lexer, parsed chan *string) (parseFunc, error) {
    5.84 +	tok := l.nextToken()
    5.85 +	if tok.typ == tokenWhitespace {
    5.86 +		return parseStart, nil
    5.87 +	} else if tok.typ == tokenArrayStart {
    5.88 +		return parseStringOrNull, nil
    5.89 +	}
    5.90 +	return nil, errors.New("expected separator or delim, got " + tok.typ.String())
    5.91 +}
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/parser_test.go	Sun Apr 19 23:47:36 2015 -0400
     6.3 @@ -0,0 +1,44 @@
     6.4 +package pqarrays
     6.5 +
     6.6 +import (
     6.7 +	"testing"
     6.8 +)
     6.9 +
    6.10 +func strPtr(in string) *string {
    6.11 +	return &in
    6.12 +}
    6.13 +
    6.14 +var parseTestInputs = map[string][]*string{
    6.15 +	`{lions}`:                       []*string{strPtr("lions")},
    6.16 +	`{lions,tigers}`:                []*string{strPtr("lions"), strPtr("tigers")},
    6.17 +	`{lions,tigers,NULL}`:           []*string{strPtr("lions"), strPtr("tigers"), nil},
    6.18 +	`{lions,tigers,bears}`:          []*string{strPtr("lions"), strPtr("tigers"), strPtr("bears")},
    6.19 +	`{lions,tigers,bears,"oh my!"}`: []*string{strPtr("lions"), strPtr("tigers"), strPtr("bears"), strPtr("oh my!")},
    6.20 +}
    6.21 +
    6.22 +func TestParseInputsTable(t *testing.T) {
    6.23 +	for input, expected := range parseTestInputs {
    6.24 +		l := lex(input)
    6.25 +		output, err := parse(l)
    6.26 +		if err != nil {
    6.27 +			t.Fatalf(err.Error())
    6.28 +		}
    6.29 +		t.Logf("`%s`: %#+v\n", input, output)
    6.30 +		if len(output) != len(expected) {
    6.31 +			t.Fatalf("Expected %d items in array, got %d\n", len(expected), len(output))
    6.32 +		}
    6.33 +		for pos, item := range output {
    6.34 +			if item == nil && expected[pos] != nil {
    6.35 +				t.Errorf("Expected %d to be %s, got nil instead.", pos, *expected[pos])
    6.36 +			} else if item != nil && expected[pos] == nil {
    6.37 +				t.Errorf("Expected %d to be nil, got %s instead.", pos, *item)
    6.38 +			} else if item != nil && expected[pos] != nil {
    6.39 +				continue
    6.40 +			} else if item == nil && expected[pos] == nil {
    6.41 +				continue
    6.42 +			} else if *item != *expected[pos] {
    6.43 +				t.Errorf("Expected %d to be %s, got %s instead.", pos, *expected[pos], *item)
    6.44 +			}
    6.45 +		}
    6.46 +	}
    6.47 +}