pqarrays

Paddy 2015-04-19 Child:ce9c92fc81ab
pqarrays/lexer.go

First pass implementation. Use a lexer to generate tokens out of the Array type responses that PostgreSQL will send. Write a parser for string[] array types. Create a StringArray type that fulfills the driver.Valuer and sql.Scanner interfaces using the parser and lexer.
Download raw file
View source Diff to previous Annotate
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lexer.go	Sun Apr 19 23:47:36 2015 -0400
     1.3 @@ -0,0 +1,298 @@
     1.4 +package pqarrays
     1.5 +
     1.6 +import (
     1.7 +	"fmt"
     1.8 +	"strings"
     1.9 +	"unicode"
    1.10 +	"unicode/utf8"
    1.11 +)
    1.12 +
    1.13 +const (
    1.14 +	eof        = -1
    1.15 +	leftDelim  = "{"
    1.16 +	rightDelim = "}"
    1.17 +	separator  = ','
    1.18 +)
    1.19 +
    1.20 +type tokenType int
    1.21 +
    1.22 +const (
    1.23 +	tokenError tokenType = iota
    1.24 +	tokenWhitespace
    1.25 +	tokenArrayStart
    1.26 +	tokenString
    1.27 +	tokenNull
    1.28 +	tokenSeparator
    1.29 +	tokenArrayEnd
    1.30 +	tokenEOF
    1.31 +)
    1.32 +
    1.33 +func (t tokenType) String() string {
    1.34 +	switch t {
    1.35 +	case tokenError:
    1.36 +		return "error"
    1.37 +	case tokenWhitespace:
    1.38 +		return "whitespace"
    1.39 +	case tokenArrayStart:
    1.40 +		return "array start"
    1.41 +	case tokenString:
    1.42 +		return "string"
    1.43 +	case tokenNull:
    1.44 +		return "null"
    1.45 +	case tokenSeparator:
    1.46 +		return "separator"
    1.47 +	case tokenArrayEnd:
    1.48 +		return "array end"
    1.49 +	case tokenEOF:
    1.50 +		return "eof"
    1.51 +	default:
    1.52 +		return "unknown token"
    1.53 +	}
    1.54 +}
    1.55 +
    1.56 +type stateFunc func(*lexer) stateFunc
    1.57 +
    1.58 +type lexer struct {
    1.59 +	tokens     chan token
    1.60 +	input      string
    1.61 +	start      int
    1.62 +	pos        int
    1.63 +	omitted    []int
    1.64 +	width      int
    1.65 +	state      stateFunc
    1.66 +	arrayDepth int
    1.67 +}
    1.68 +
    1.69 +type token struct {
    1.70 +	typ tokenType
    1.71 +	val string
    1.72 +}
    1.73 +
    1.74 +func lex(input string) *lexer {
    1.75 +	l := &lexer{
    1.76 +		input:  input,
    1.77 +		tokens: make(chan token),
    1.78 +	}
    1.79 +	go l.run()
    1.80 +	return l
    1.81 +}
    1.82 +
    1.83 +func (l *lexer) nextToken() token {
    1.84 +	return <-l.tokens
    1.85 +}
    1.86 +
    1.87 +func (l *lexer) run() {
    1.88 +	for l.state = lexStart; l.state != nil; { // TODO(paddy): default state
    1.89 +		l.state = l.state(l)
    1.90 +	}
    1.91 +}
    1.92 +
    1.93 +func (l *lexer) emit(t tokenType) {
    1.94 +	var val string
    1.95 +	if len(l.omitted) < 1 {
    1.96 +		val = l.input[l.start:l.pos]
    1.97 +	} else {
    1.98 +		start := l.start
    1.99 +		for _, pos := range l.omitted {
   1.100 +			val += l.input[start:pos]
   1.101 +			start = pos + 1
   1.102 +		}
   1.103 +		if l.pos > start {
   1.104 +			val += l.input[start:l.pos]
   1.105 +		}
   1.106 +	}
   1.107 +	l.tokens <- token{typ: t, val: val}
   1.108 +	l.start = l.pos
   1.109 +	l.omitted = l.omitted[0:0]
   1.110 +}
   1.111 +
   1.112 +func (l *lexer) next() rune {
   1.113 +	if l.pos >= len(l.input) {
   1.114 +		l.width = 0
   1.115 +		return eof
   1.116 +	}
   1.117 +	var r rune
   1.118 +	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   1.119 +	l.pos += l.width
   1.120 +	return r
   1.121 +}
   1.122 +
   1.123 +func (l *lexer) omit() {
   1.124 +	l.omitted = append(l.omitted, l.pos-1)
   1.125 +}
   1.126 +
   1.127 +func (l *lexer) ignore() {
   1.128 +	l.start = l.pos
   1.129 +}
   1.130 +
   1.131 +func (l *lexer) backup() {
   1.132 +	l.pos -= l.width
   1.133 +}
   1.134 +
   1.135 +func (l *lexer) peek() rune {
   1.136 +	r := l.next()
   1.137 +	l.backup()
   1.138 +	return r
   1.139 +}
   1.140 +
   1.141 +func (l *lexer) accept(valid string) bool {
   1.142 +	if strings.IndexRune(valid, l.next()) >= 0 {
   1.143 +		return true
   1.144 +	}
   1.145 +	l.backup()
   1.146 +	return false
   1.147 +}
   1.148 +
   1.149 +func (l *lexer) acceptRun(valid string) {
   1.150 +	for strings.IndexRune(valid, l.next()) >= 0 {
   1.151 +	}
   1.152 +	l.backup()
   1.153 +}
   1.154 +
   1.155 +func (l *lexer) errorf(format string, args ...interface{}) stateFunc {
   1.156 +	l.tokens <- token{tokenError, fmt.Sprintf(format, args...)}
   1.157 +	return nil
   1.158 +}
   1.159 +
   1.160 +func (l *lexer) consumeWhitespace() {
   1.161 +	for unicode.IsSpace(l.peek()) {
   1.162 +		l.next()
   1.163 +	}
   1.164 +	if l.start > l.pos {
   1.165 +		l.emit(tokenWhitespace)
   1.166 +	}
   1.167 +}
   1.168 +
   1.169 +func lexStart(l *lexer) stateFunc {
   1.170 +	l.consumeWhitespace()
   1.171 +	return lexArrayStart
   1.172 +}
   1.173 +
   1.174 +func lexArrayStart(l *lexer) stateFunc {
   1.175 +	if strings.HasPrefix(l.input[l.pos:], leftDelim) {
   1.176 +		return lexLeftDelim
   1.177 +	}
   1.178 +	return l.errorf("expected array to start before %s", l.input[l.pos:])
   1.179 +}
   1.180 +
   1.181 +func lexLeftDelim(l *lexer) stateFunc {
   1.182 +	l.pos += len(leftDelim)
   1.183 +	l.emit(tokenArrayStart)
   1.184 +	l.arrayDepth += 1
   1.185 +	return lexItem
   1.186 +}
   1.187 +
   1.188 +func lexRightDelim(l *lexer) stateFunc {
   1.189 +	l.pos += len(rightDelim)
   1.190 +	l.emit(tokenArrayEnd)
   1.191 +	l.arrayDepth -= 1
   1.192 +	return lexSeparator
   1.193 +}
   1.194 +
   1.195 +func lexItem(l *lexer) stateFunc {
   1.196 +	l.consumeWhitespace()
   1.197 +	if strings.HasPrefix(l.input[l.pos:], rightDelim) {
   1.198 +		return lexRightDelim
   1.199 +	}
   1.200 +	if strings.HasPrefix(l.input[l.pos:], leftDelim) {
   1.201 +		return lexLeftDelim
   1.202 +	}
   1.203 +	switch r := l.peek(); {
   1.204 +	case r == eof:
   1.205 +		return l.errorf("unclosed array")
   1.206 +	case r == separator:
   1.207 +		return l.errorf("empty item in array")
   1.208 +	case unicode.IsSpace(r):
   1.209 +		l.consumeWhitespace()
   1.210 +		return lexItem
   1.211 +	case r == '"':
   1.212 +		return lexQuotedString
   1.213 +	default:
   1.214 +		return lexString
   1.215 +	}
   1.216 +}
   1.217 +
   1.218 +func lexQuotedString(l *lexer) stateFunc {
   1.219 +	l.next()
   1.220 +	l.ignore() // ignore the open quote
   1.221 +	for {
   1.222 +		switch r := l.next(); {
   1.223 +		case r == eof:
   1.224 +			return l.errorf("unclosed quoted string")
   1.225 +		case r == '"':
   1.226 +			l.backup()
   1.227 +			l.emit(tokenString)
   1.228 +			l.next()
   1.229 +			l.ignore()
   1.230 +			return lexSeparator
   1.231 +		case r == '\\':
   1.232 +			// omit the \ itself
   1.233 +			l.omit()
   1.234 +			// always skip over the character following a \
   1.235 +			l.next()
   1.236 +			if r == eof {
   1.237 +				return l.errorf("unclosed quoted string")
   1.238 +			}
   1.239 +		}
   1.240 +	}
   1.241 +}
   1.242 +
   1.243 +func lexString(l *lexer) stateFunc {
   1.244 +	for {
   1.245 +		if strings.HasPrefix(l.input[l.pos:], leftDelim) {
   1.246 +			return l.errorf(leftDelim + " in unquoted string")
   1.247 +		}
   1.248 +		if strings.HasPrefix(l.input[l.pos:], rightDelim) {
   1.249 +			if l.pos <= l.start {
   1.250 +				return l.errorf(rightDelim + " in unquoted string")
   1.251 +			}
   1.252 +			if string(l.input[l.start:l.pos]) == "NULL" {
   1.253 +				l.emit(tokenNull)
   1.254 +			} else {
   1.255 +				l.emit(tokenString)
   1.256 +			}
   1.257 +			return lexRightDelim
   1.258 +		}
   1.259 +		switch r := l.next(); {
   1.260 +		case r == eof:
   1.261 +			return l.errorf("eof while parsing string")
   1.262 +		case r == '"':
   1.263 +			return l.errorf("\" in unquoted string")
   1.264 +		case unicode.IsSpace(r):
   1.265 +			return l.errorf("unquoted empty string")
   1.266 +		case r == '\\':
   1.267 +			return l.errorf("\\ in unquoted string")
   1.268 +		case r == separator:
   1.269 +			l.backup()
   1.270 +			if l.pos <= l.start {
   1.271 +				return l.errorf("unquoted empty string")
   1.272 +			}
   1.273 +			if string(l.input[l.start:l.pos]) == "NULL" {
   1.274 +				l.emit(tokenNull)
   1.275 +			} else {
   1.276 +				l.emit(tokenString)
   1.277 +			}
   1.278 +			return lexSeparator
   1.279 +		}
   1.280 +	}
   1.281 +}
   1.282 +
   1.283 +func lexSeparator(l *lexer) stateFunc {
   1.284 +	if strings.HasPrefix(l.input[l.pos:], rightDelim) {
   1.285 +		return lexRightDelim
   1.286 +	}
   1.287 +	r := l.next()
   1.288 +	if r == separator {
   1.289 +		l.emit(tokenSeparator)
   1.290 +		return lexItem
   1.291 +	} else if r == eof {
   1.292 +		if l.arrayDepth > 0 {
   1.293 +			return l.errorf("unclosed array")
   1.294 +		}
   1.295 +		l.emit(tokenEOF)
   1.296 +		return nil
   1.297 +	} else {
   1.298 +		l.backup()
   1.299 +		return l.errorf("expected %s, none found before %s\n", separator, l.input[l.pos:])
   1.300 +	}
   1.301 +}