ducky/devices
2016-01-02
Parent:a700ede02f91
ducky/devices/vendor/code.secondbit.org/pqarrays.hg/lexer.go
Update trout to fix routing bug. Update to tip on trout to fix the routing bug that was causing us such issues. See the commit message of trout at 3df515f0cec5 for more details.
| paddy@16 | 1 package pqarrays |
| paddy@16 | 2 |
| paddy@16 | 3 import ( |
| paddy@16 | 4 "fmt" |
| paddy@16 | 5 "strings" |
| paddy@16 | 6 "unicode" |
| paddy@16 | 7 "unicode/utf8" |
| paddy@16 | 8 ) |
| paddy@16 | 9 |
| paddy@16 | 10 const ( |
| paddy@16 | 11 eof = -1 |
| paddy@16 | 12 leftDelim = "{" |
| paddy@16 | 13 rightDelim = "}" |
| paddy@16 | 14 separator = ',' |
| paddy@16 | 15 ) |
| paddy@16 | 16 |
| paddy@16 | 17 type tokenType int |
| paddy@16 | 18 |
| paddy@16 | 19 const ( |
| paddy@16 | 20 tokenError tokenType = iota |
| paddy@16 | 21 tokenWhitespace |
| paddy@16 | 22 tokenArrayStart |
| paddy@16 | 23 tokenString |
| paddy@16 | 24 tokenNull |
| paddy@16 | 25 tokenSeparator |
| paddy@16 | 26 tokenArrayEnd |
| paddy@16 | 27 tokenEOF |
| paddy@16 | 28 ) |
| paddy@16 | 29 |
| paddy@16 | 30 func (t tokenType) String() string { |
| paddy@16 | 31 switch t { |
| paddy@16 | 32 case tokenError: |
| paddy@16 | 33 return "error" |
| paddy@16 | 34 case tokenWhitespace: |
| paddy@16 | 35 return "whitespace" |
| paddy@16 | 36 case tokenArrayStart: |
| paddy@16 | 37 return "array start" |
| paddy@16 | 38 case tokenString: |
| paddy@16 | 39 return "string" |
| paddy@16 | 40 case tokenNull: |
| paddy@16 | 41 return "null" |
| paddy@16 | 42 case tokenSeparator: |
| paddy@16 | 43 return "separator" |
| paddy@16 | 44 case tokenArrayEnd: |
| paddy@16 | 45 return "array end" |
| paddy@16 | 46 case tokenEOF: |
| paddy@16 | 47 return "eof" |
| paddy@16 | 48 default: |
| paddy@16 | 49 return "unknown token" |
| paddy@16 | 50 } |
| paddy@16 | 51 } |
| paddy@16 | 52 |
| paddy@16 | 53 type stateFunc func(*lexer) stateFunc |
| paddy@16 | 54 |
| paddy@16 | 55 type lexer struct { |
| paddy@16 | 56 tokens chan token |
| paddy@16 | 57 input string |
| paddy@16 | 58 start int |
| paddy@16 | 59 pos int |
| paddy@16 | 60 omitted []int |
| paddy@16 | 61 width int |
| paddy@16 | 62 state stateFunc |
| paddy@16 | 63 arrayDepth int |
| paddy@16 | 64 } |
| paddy@16 | 65 |
| paddy@16 | 66 type token struct { |
| paddy@16 | 67 typ tokenType |
| paddy@16 | 68 val string |
| paddy@16 | 69 } |
| paddy@16 | 70 |
| paddy@16 | 71 func lex(input string) *lexer { |
| paddy@16 | 72 l := &lexer{ |
| paddy@16 | 73 input: input, |
| paddy@16 | 74 tokens: make(chan token), |
| paddy@16 | 75 } |
| paddy@16 | 76 go l.run() |
| paddy@16 | 77 return l |
| paddy@16 | 78 } |
| paddy@16 | 79 |
| paddy@16 | 80 func (l *lexer) nextToken() token { |
| paddy@16 | 81 return <-l.tokens |
| paddy@16 | 82 } |
| paddy@16 | 83 |
| paddy@16 | 84 func (l *lexer) run() { |
| paddy@16 | 85 for l.state = lexStart; l.state != nil; { // TODO(paddy): default state |
| paddy@16 | 86 l.state = l.state(l) |
| paddy@16 | 87 } |
| paddy@16 | 88 } |
| paddy@16 | 89 |
| paddy@16 | 90 func (l *lexer) emit(t tokenType) { |
| paddy@16 | 91 var val string |
| paddy@16 | 92 if len(l.omitted) < 1 { |
| paddy@16 | 93 val = l.input[l.start:l.pos] |
| paddy@16 | 94 } else { |
| paddy@16 | 95 start := l.start |
| paddy@16 | 96 for _, pos := range l.omitted { |
| paddy@16 | 97 val += l.input[start:pos] |
| paddy@16 | 98 start = pos + 1 |
| paddy@16 | 99 } |
| paddy@16 | 100 if l.pos > start { |
| paddy@16 | 101 val += l.input[start:l.pos] |
| paddy@16 | 102 } |
| paddy@16 | 103 } |
| paddy@16 | 104 l.tokens <- token{typ: t, val: val} |
| paddy@16 | 105 l.start = l.pos |
| paddy@16 | 106 l.omitted = l.omitted[0:0] |
| paddy@16 | 107 } |
| paddy@16 | 108 |
| paddy@16 | 109 func (l *lexer) next() rune { |
| paddy@16 | 110 if l.pos >= len(l.input) { |
| paddy@16 | 111 l.width = 0 |
| paddy@16 | 112 return eof |
| paddy@16 | 113 } |
| paddy@16 | 114 var r rune |
| paddy@16 | 115 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) |
| paddy@16 | 116 l.pos += l.width |
| paddy@16 | 117 return r |
| paddy@16 | 118 } |
| paddy@16 | 119 |
| paddy@16 | 120 func (l *lexer) omit() { |
| paddy@16 | 121 l.omitted = append(l.omitted, l.pos-1) |
| paddy@16 | 122 } |
| paddy@16 | 123 |
| paddy@16 | 124 func (l *lexer) ignore() { |
| paddy@16 | 125 l.start = l.pos |
| paddy@16 | 126 } |
| paddy@16 | 127 |
| paddy@16 | 128 func (l *lexer) backup() { |
| paddy@16 | 129 l.pos -= l.width |
| paddy@16 | 130 } |
| paddy@16 | 131 |
| paddy@16 | 132 func (l *lexer) peek() rune { |
| paddy@16 | 133 r := l.next() |
| paddy@16 | 134 l.backup() |
| paddy@16 | 135 return r |
| paddy@16 | 136 } |
| paddy@16 | 137 |
| paddy@16 | 138 func (l *lexer) accept(valid string) bool { |
| paddy@16 | 139 if strings.IndexRune(valid, l.next()) >= 0 { |
| paddy@16 | 140 return true |
| paddy@16 | 141 } |
| paddy@16 | 142 l.backup() |
| paddy@16 | 143 return false |
| paddy@16 | 144 } |
| paddy@16 | 145 |
| paddy@16 | 146 func (l *lexer) acceptRun(valid string) { |
| paddy@16 | 147 for strings.IndexRune(valid, l.next()) >= 0 { |
| paddy@16 | 148 } |
| paddy@16 | 149 l.backup() |
| paddy@16 | 150 } |
| paddy@16 | 151 |
| paddy@16 | 152 func (l *lexer) errorf(format string, args ...interface{}) stateFunc { |
| paddy@16 | 153 l.tokens <- token{tokenError, fmt.Sprintf(format, args...)} |
| paddy@16 | 154 return nil |
| paddy@16 | 155 } |
| paddy@16 | 156 |
| paddy@16 | 157 func (l *lexer) consumeWhitespace() { |
| paddy@16 | 158 for unicode.IsSpace(l.peek()) { |
| paddy@16 | 159 l.next() |
| paddy@16 | 160 } |
| paddy@16 | 161 if l.start > l.pos { |
| paddy@16 | 162 l.emit(tokenWhitespace) |
| paddy@16 | 163 } |
| paddy@16 | 164 } |
| paddy@16 | 165 |
| paddy@16 | 166 func lexStart(l *lexer) stateFunc { |
| paddy@16 | 167 l.consumeWhitespace() |
| paddy@16 | 168 return lexArrayStart |
| paddy@16 | 169 } |
| paddy@16 | 170 |
| paddy@16 | 171 func lexArrayStart(l *lexer) stateFunc { |
| paddy@16 | 172 if strings.HasPrefix(l.input[l.pos:], leftDelim) { |
| paddy@16 | 173 return lexLeftDelim |
| paddy@16 | 174 } |
| paddy@16 | 175 return l.errorf("expected array to start before %s", l.input[l.pos:]) |
| paddy@16 | 176 } |
| paddy@16 | 177 |
| paddy@16 | 178 func lexLeftDelim(l *lexer) stateFunc { |
| paddy@16 | 179 l.pos += len(leftDelim) |
| paddy@16 | 180 l.emit(tokenArrayStart) |
| paddy@16 | 181 l.arrayDepth += 1 |
| paddy@16 | 182 return lexItem |
| paddy@16 | 183 } |
| paddy@16 | 184 |
| paddy@16 | 185 func lexRightDelim(l *lexer) stateFunc { |
| paddy@16 | 186 l.pos += len(rightDelim) |
| paddy@16 | 187 l.emit(tokenArrayEnd) |
| paddy@16 | 188 l.arrayDepth -= 1 |
| paddy@16 | 189 return lexSeparator |
| paddy@16 | 190 } |
| paddy@16 | 191 |
| paddy@16 | 192 func lexItem(l *lexer) stateFunc { |
| paddy@16 | 193 l.consumeWhitespace() |
| paddy@16 | 194 if strings.HasPrefix(l.input[l.pos:], rightDelim) { |
| paddy@16 | 195 return lexRightDelim |
| paddy@16 | 196 } |
| paddy@16 | 197 if strings.HasPrefix(l.input[l.pos:], leftDelim) { |
| paddy@16 | 198 return lexLeftDelim |
| paddy@16 | 199 } |
| paddy@16 | 200 switch r := l.peek(); { |
| paddy@16 | 201 case r == eof: |
| paddy@16 | 202 return l.errorf("unclosed array") |
| paddy@16 | 203 case r == separator: |
| paddy@16 | 204 return l.errorf("empty item in array") |
| paddy@16 | 205 case unicode.IsSpace(r): |
| paddy@16 | 206 l.consumeWhitespace() |
| paddy@16 | 207 return lexItem |
| paddy@16 | 208 case r == '"': |
| paddy@16 | 209 return lexQuotedString |
| paddy@16 | 210 default: |
| paddy@16 | 211 return lexString |
| paddy@16 | 212 } |
| paddy@16 | 213 } |
| paddy@16 | 214 |
| paddy@16 | 215 func lexQuotedString(l *lexer) stateFunc { |
| paddy@16 | 216 l.next() |
| paddy@16 | 217 l.ignore() // ignore the open quote |
| paddy@16 | 218 for { |
| paddy@16 | 219 switch r := l.next(); { |
| paddy@16 | 220 case r == eof: |
| paddy@16 | 221 return l.errorf("unclosed quoted string") |
| paddy@16 | 222 case r == '"': |
| paddy@16 | 223 l.backup() |
| paddy@16 | 224 l.emit(tokenString) |
| paddy@16 | 225 l.next() |
| paddy@16 | 226 l.ignore() |
| paddy@16 | 227 return lexSeparator |
| paddy@16 | 228 case r == '\\': |
| paddy@16 | 229 // omit the \ itself |
| paddy@16 | 230 l.omit() |
| paddy@16 | 231 // always skip over the character following a \ |
| paddy@16 | 232 l.next() |
| paddy@16 | 233 if r == eof { |
| paddy@16 | 234 return l.errorf("unclosed quoted string") |
| paddy@16 | 235 } |
| paddy@16 | 236 } |
| paddy@16 | 237 } |
| paddy@16 | 238 } |
| paddy@16 | 239 |
| paddy@16 | 240 func lexString(l *lexer) stateFunc { |
| paddy@16 | 241 for { |
| paddy@16 | 242 if strings.HasPrefix(l.input[l.pos:], leftDelim) { |
| paddy@16 | 243 return l.errorf(leftDelim + " in unquoted string") |
| paddy@16 | 244 } |
| paddy@16 | 245 if strings.HasPrefix(l.input[l.pos:], rightDelim) { |
| paddy@16 | 246 if l.pos <= l.start { |
| paddy@16 | 247 return l.errorf(rightDelim + " in unquoted string") |
| paddy@16 | 248 } |
| paddy@16 | 249 if string(l.input[l.start:l.pos]) == "NULL" { |
| paddy@16 | 250 l.emit(tokenNull) |
| paddy@16 | 251 } else { |
| paddy@16 | 252 l.emit(tokenString) |
| paddy@16 | 253 } |
| paddy@16 | 254 return lexRightDelim |
| paddy@16 | 255 } |
| paddy@16 | 256 switch r := l.next(); { |
| paddy@16 | 257 case r == eof: |
| paddy@16 | 258 return l.errorf("eof while parsing string") |
| paddy@16 | 259 case r == '"': |
| paddy@16 | 260 return l.errorf("\" in unquoted string") |
| paddy@16 | 261 case unicode.IsSpace(r): |
| paddy@16 | 262 return l.errorf("unquoted empty string") |
| paddy@16 | 263 case r == '\\': |
| paddy@16 | 264 return l.errorf("\\ in unquoted string") |
| paddy@16 | 265 case r == separator: |
| paddy@16 | 266 l.backup() |
| paddy@16 | 267 if l.pos <= l.start { |
| paddy@16 | 268 return l.errorf("unquoted empty string") |
| paddy@16 | 269 } |
| paddy@16 | 270 if string(l.input[l.start:l.pos]) == "NULL" { |
| paddy@16 | 271 l.emit(tokenNull) |
| paddy@16 | 272 } else { |
| paddy@16 | 273 l.emit(tokenString) |
| paddy@16 | 274 } |
| paddy@16 | 275 return lexSeparator |
| paddy@16 | 276 } |
| paddy@16 | 277 } |
| paddy@16 | 278 } |
| paddy@16 | 279 |
| paddy@16 | 280 func lexSeparator(l *lexer) stateFunc { |
| paddy@16 | 281 if strings.HasPrefix(l.input[l.pos:], rightDelim) { |
| paddy@16 | 282 return lexRightDelim |
| paddy@16 | 283 } |
| paddy@16 | 284 r := l.next() |
| paddy@16 | 285 if r == separator { |
| paddy@16 | 286 l.emit(tokenSeparator) |
| paddy@16 | 287 return lexItem |
| paddy@16 | 288 } else if r == eof { |
| paddy@16 | 289 if l.arrayDepth > 0 { |
| paddy@16 | 290 return l.errorf("unclosed array") |
| paddy@16 | 291 } |
| paddy@16 | 292 l.emit(tokenEOF) |
| paddy@16 | 293 return nil |
| paddy@16 | 294 } else { |
| paddy@16 | 295 l.backup() |
| paddy@16 | 296 return l.errorf("expected %s, none found before %s\n", separator, l.input[l.pos:]) |
| paddy@16 | 297 } |
| paddy@16 | 298 } |