pqarrays

Paddy 2016-02-25 Parent:bfe2a4af6bdf Child:9a415db0346a

1:ce9c92fc81ab Go to Latest

pqarrays/lexer.go

Fix bug parsing empty arrays, make golint and go vet happy. Add comments to make golint happy. Also, because comments are a good thing to have. Turn += 1 and -= 1 into ++ and --, respectively, so golint will be happy. Fix an improperly formated errorf, where a rune was being treated as a string. Thanks, go vet! Fix whitespace parsing, returning the parse functions again instead of just skipping the one character. Now if we have more than one whitespace character in a row, they'll all be skipped. Add a parseStringOrNullOrEnd parse function that will be called after the tokenArrayStart character, to fix a bug where empty arrays were expecting a string or null and getting the array end character. This is only valid after tokenArrayStart, however; in other places where parseSeparatorOrDelim is used, it wouldn't be appropriate. Add a parser test for an empty array.

Download raw file

View source Diff to previous Annotate

1 package pqarrays

3 import (

4 "fmt"

5 "strings"

6 "unicode"

7 "unicode/utf8"

10 const (

11 eof = -1

12 leftDelim = "{"

13 rightDelim = "}"

14 separator = ','

17 type tokenType int

19 const (

20 tokenError tokenType = iota

21 tokenWhitespace

22 tokenArrayStart

23 tokenString

24 tokenNull

25 tokenSeparator

26 tokenArrayEnd

27 tokenEOF

30 func (t tokenType) String() string {

31 switch t {

32 case tokenError:

33 return "error"

34 case tokenWhitespace:

35 return "whitespace"

36 case tokenArrayStart:

37 return "array start"

38 case tokenString:

39 return "string"

40 case tokenNull:

41 return "null"

42 case tokenSeparator:

43 return "separator"

44 case tokenArrayEnd:

45 return "array end"

46 case tokenEOF:

47 return "eof"

48 default:

49 return "unknown token"

53 type stateFunc func(*lexer) stateFunc

55 type lexer struct {

56 tokens chan token

57 input string

58 start int

59 pos int

60 omitted []int

61 width int

62 state stateFunc

63 arrayDepth int

66 type token struct {

67 typ tokenType

68 val string

71 func lex(input string) *lexer {

72 l := &lexer{

73 input: input,

74 tokens: make(chan token),

76 go l.run()

77 return l

80 func (l *lexer) nextToken() token {

81 return <-l.tokens

84 func (l *lexer) run() {

85 for l.state = lexStart; l.state != nil; { // TODO(paddy): default state

86 l.state = l.state(l)

90 func (l *lexer) emit(t tokenType) {

91 var val string

92 if len(l.omitted) < 1 {

93 val = l.input[l.start:l.pos]

94 } else {

95 start := l.start

96 for _, pos := range l.omitted {

97 val += l.input[start:pos]

98 start = pos + 1

100 if l.pos > start {

101 val += l.input[start:l.pos]

104 l.tokens <- token{typ: t, val: val}

105 l.start = l.pos

106 l.omitted = l.omitted[0:0]

109 func (l *lexer) next() rune {

110 if l.pos >= len(l.input) {

111 l.width = 0

112 return eof

114 var r rune

115 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])

116 l.pos += l.width

117 return r

120 func (l *lexer) omit() {

121 l.omitted = append(l.omitted, l.pos-1)

124 func (l *lexer) ignore() {

125 l.start = l.pos

128 func (l *lexer) backup() {

129 l.pos -= l.width

132 func (l *lexer) peek() rune {

133 r := l.next()

134 l.backup()

135 return r

138 func (l *lexer) accept(valid string) bool {

139 if strings.IndexRune(valid, l.next()) >= 0 {

140 return true

142 l.backup()

143 return false

146 func (l *lexer) acceptRun(valid string) {

147 for strings.IndexRune(valid, l.next()) >= 0 {

149 l.backup()

152 func (l *lexer) errorf(format string, args ...interface{}) stateFunc {

153 l.tokens <- token{tokenError, fmt.Sprintf(format, args...)}

154 return nil

157 func (l *lexer) consumeWhitespace() {

158 for unicode.IsSpace(l.peek()) {

159 l.next()

161 if l.start > l.pos {

162 l.emit(tokenWhitespace)

166 func lexStart(l *lexer) stateFunc {

167 l.consumeWhitespace()

168 return lexArrayStart

171 func lexArrayStart(l *lexer) stateFunc {

172 if strings.HasPrefix(l.input[l.pos:], leftDelim) {

173 return lexLeftDelim

175 return l.errorf("expected array to start before %s", l.input[l.pos:])

178 func lexLeftDelim(l *lexer) stateFunc {

179 l.pos += len(leftDelim)

180 l.emit(tokenArrayStart)

181 l.arrayDepth++

182 return lexItem

185 func lexRightDelim(l *lexer) stateFunc {

186 l.pos += len(rightDelim)

187 l.emit(tokenArrayEnd)

188 l.arrayDepth--

189 return lexSeparator

192 func lexItem(l *lexer) stateFunc {

193 l.consumeWhitespace()

194 if strings.HasPrefix(l.input[l.pos:], rightDelim) {

195 return lexRightDelim

197 if strings.HasPrefix(l.input[l.pos:], leftDelim) {

198 return lexLeftDelim

200 switch r := l.peek(); {

201 case r == eof:

202 return l.errorf("unclosed array")

203 case r == separator:

204 return l.errorf("empty item in array")

205 case unicode.IsSpace(r):

206 l.consumeWhitespace()

207 return lexItem

208 case r == '"':

209 return lexQuotedString

210 default:

211 return lexString

215 func lexQuotedString(l *lexer) stateFunc {

216 l.next()

217 l.ignore() // ignore the open quote

218 for {

219 switch r := l.next(); {

220 case r == eof:

221 return l.errorf("unclosed quoted string")

222 case r == '"':

223 l.backup()

224 l.emit(tokenString)

225 l.next()

226 l.ignore()

227 return lexSeparator

228 case r == '\\':

229 // omit the \ itself

230 l.omit()

231 // always skip over the character following a \

232 l.next()

233 if r == eof {

234 return l.errorf("unclosed quoted string")

235 }

240 func lexString(l *lexer) stateFunc {

241 for {

242 if strings.HasPrefix(l.input[l.pos:], leftDelim) {

243 return l.errorf(leftDelim + " in unquoted string")

245 if strings.HasPrefix(l.input[l.pos:], rightDelim) {

246 if l.pos <= l.start {

247 return l.errorf(rightDelim + " in unquoted string")

248 }

249 if string(l.input[l.start:l.pos]) == "NULL" {

250 l.emit(tokenNull)

251 } else {

252 l.emit(tokenString)

253 }

254 return lexRightDelim

256 switch r := l.next(); {

257 case r == eof:

258 return l.errorf("eof while parsing string")

259 case r == '"':

260 return l.errorf("\" in unquoted string")

261 case unicode.IsSpace(r):

262 return l.errorf("unquoted empty string")

263 case r == '\\':

264 return l.errorf("\\ in unquoted string")

265 case r == separator:

266 l.backup()

267 if l.pos <= l.start {

268 return l.errorf("unquoted empty string")

269 }

270 if string(l.input[l.start:l.pos]) == "NULL" {

271 l.emit(tokenNull)

272 } else {

273 l.emit(tokenString)

274 }

275 return lexSeparator

280 func lexSeparator(l *lexer) stateFunc {

281 if strings.HasPrefix(l.input[l.pos:], rightDelim) {

282 return lexRightDelim

284 r := l.next()

285 if r == separator {

286 l.emit(tokenSeparator)

287 return lexItem

288 } else if r == eof {

289 if l.arrayDepth > 0 {

290 return l.errorf("unclosed array")

292 l.emit(tokenEOF)

293 return nil

294 } else {

295 l.backup()

296 return l.errorf("expected %s, none found before %s\n", string(separator), l.input[l.pos:])