blob: d793e944454414cbdd9f17e35ff11219118f15b7 [file] [log] [blame]
Marcel van Lohuizend80624e2018-12-10 15:26:06 +01001// Copyright 2018 The CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package scanner
16
17import (
18 "fmt"
19 "io/ioutil"
20 "os"
21 "path/filepath"
22 "reflect"
23 "runtime"
24 "strings"
25 "testing"
26
27 "cuelang.org/go/cue/errors"
28 "cuelang.org/go/cue/token"
29 "github.com/google/go-cmp/cmp"
30)
31
32var fset = token.NewFileSet()
33
34const /* class */ (
35 special = iota
36 literal
37 operator
38 keyword
39)
40
41func tokenclass(tok token.Token) int {
42 switch {
43 case tok.IsLiteral():
44 return literal
45 case tok.IsOperator():
46 return operator
47 case tok.IsKeyword():
48 return keyword
49 }
50 return special
51}
52
53type elt struct {
54 tok token.Token
55 lit string
56 class int
57}
58
59var testTokens = [...]elt{
60 // Special tokens
61 {token.COMMENT, "/* a comment */", special},
62 {token.COMMENT, "// a comment \n", special},
63 {token.COMMENT, "/*\r*/", special},
64 {token.COMMENT, "//\r\n", special},
65
66 // Identifiers and basic type literals
67 {token.BOTTOM, "_|_", literal},
68 {token.BOTTOM, "_|_", literal},
69
70 {token.IDENT, "foobar", literal},
71 {token.IDENT, "a۰۱۸", literal},
72 {token.IDENT, "foo६४", literal},
73 {token.IDENT, "bar9876", literal},
74 {token.IDENT, "ŝ", literal},
75 {token.IDENT, "ŝfoo", literal},
76 {token.INT, "0", literal},
77 {token.INT, "1", literal},
78 {token.INT, "123456789012345678890", literal},
79 {token.INT, "12345_67890_12345_6788_90", literal},
80 {token.INT, "1234567M", literal},
81 {token.INT, "1234567Mi", literal},
82 {token.INT, "01234567", literal},
83 {token.INT, ".3Mi", literal},
84 {token.INT, "3.3Mi", literal},
85 {token.INT, "0xcafebabe", literal},
86 {token.INT, "0b1100_1001", literal},
87 {token.FLOAT, "0.", literal},
88 {token.FLOAT, ".0", literal},
89 {token.FLOAT, "3.14159265", literal},
90 {token.FLOAT, "1e0", literal},
91 {token.FLOAT, "1e+100", literal},
92 {token.FLOAT, "1e-100", literal},
93 {token.FLOAT, "2.71828e-1000", literal},
94 {token.STRING, "`aa\n\n`", literal},
95 {token.STRING, "'a'", literal},
96 {token.STRING, "'\\000'", literal},
97 {token.STRING, "'\\xFF'", literal},
98 {token.STRING, "'\\uff16'", literal},
99 {token.STRING, "'\\U0000ff16'", literal},
100 {token.STRING, "'foobar'", literal},
101 {token.STRING, "`" + `foo
102 bar` +
103 "`",
104 literal,
105 },
106 {token.STRING, "`foobar`", literal},
107 {token.STRING, "`\r`", literal},
108 {token.STRING, "`foo\r\nbar`", literal},
109 {token.STRING, "'" + `\r` + "'", literal},
110 {token.STRING, "'foo" + `\r\n` + "bar'", literal},
111 {token.STRING, `"foobar"`, literal},
112 {token.STRING, `"""\n foobar\n """`, literal},
113
114 // Operators and delimiters
115 {token.ADD, "+", operator},
116 {token.SUB, "-", operator},
117 {token.MUL, "*", operator},
118 {token.QUO, "/", operator},
119 {token.REM, "%", operator},
120
121 {token.UNIFY, "&", operator},
122 {token.DISJUNCTION, "|", operator},
123
124 {token.LAND, "&&", operator},
125 {token.LOR, "||", operator},
126 {token.LAMBDA, "->", operator},
127
128 {token.EQL, "==", operator},
129 {token.LSS, "<", operator},
130 {token.GTR, ">", operator},
131 {token.BIND, "=", operator},
132 {token.NOT, "!", operator},
133
134 {token.NEQ, "!=", operator},
135 {token.LEQ, "<=", operator},
136 {token.GEQ, ">=", operator},
137 {token.RANGE, "..", operator},
138 {token.ELLIPSIS, "...", operator},
139
140 {token.LPAREN, "(", operator},
141 {token.LBRACK, "[", operator},
142 {token.LBRACE, "{", operator},
143 {token.COMMA, ",", operator},
144 {token.PERIOD, ".", operator},
145
146 {token.RPAREN, ")", operator},
147 {token.RBRACK, "]", operator},
148 {token.RBRACE, "}", operator},
149 {token.COLON, ":", operator},
150
151 // Keywords
152 {token.TRUE, "true", keyword},
153 {token.FALSE, "false", keyword},
154 {token.NULL, "null", keyword},
155
156 {token.FOR, "for", keyword},
157 {token.IF, "if", keyword},
158 {token.IN, "in", keyword},
159}
160
161const whitespace = " \t \n\n\n" // to separate tokens
162
163var source = func() []byte {
164 var src []byte
165 for _, t := range testTokens {
166 src = append(src, t.lit...)
167 src = append(src, whitespace...)
168 }
169 return src
170}()
171
172func newlineCount(s string) int {
173 n := 0
174 for i := 0; i < len(s); i++ {
175 if s[i] == '\n' {
176 n++
177 }
178 }
179 return n
180}
181
182func checkPosScan(t *testing.T, lit string, p token.Pos, expected token.Position) {
183 pos := fset.Position(p)
184 if pos.Filename != expected.Filename {
185 t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
186 }
187 if pos.Offset != expected.Offset {
188 t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
189 }
190 if pos.Line != expected.Line {
191 t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
192 }
193 if pos.Column != expected.Column {
194 t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
195 }
196}
197
198// Verify that calling Scan() provides the correct results.
199func TestScan(t *testing.T) {
200 whitespace_linecount := newlineCount(whitespace)
201
202 // error handler
203 eh := func(_ token.Position, msg string) {
204 t.Errorf("error handler called (msg = %s)", msg)
205 }
206
207 // verify scan
208 var s Scanner
209 s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertCommas)
210
211 // set up expected position
212 epos := token.Position{
213 Filename: "",
214 Offset: 0,
215 Line: 1,
216 Column: 1,
217 }
218
219 index := 0
220 for {
221 pos, tok, lit := s.Scan()
222
223 // check position
224 if tok == token.EOF {
225 // correction for EOF
226 epos.Line = newlineCount(string(source))
227 epos.Column = 2
228 }
229 checkPosScan(t, lit, pos, epos)
230
231 // check token
232 e := elt{token.EOF, "", special}
233 if index < len(testTokens) {
234 e = testTokens[index]
235 index++
236 }
237 if tok != e.tok {
238 t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
239 }
240
241 // check token class
242 if tokenclass(tok) != e.class {
243 t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
244 }
245
246 // check literal
247 elit := ""
248 switch e.tok {
249 case token.COMMENT:
250 // no CRs in comments
251 elit = string(stripCR([]byte(e.lit)))
252 //-style comment literal doesn't contain newline
253 if elit[1] == '/' {
254 elit = elit[0 : len(elit)-1]
255 }
256 case token.IDENT:
257 elit = e.lit
258 case token.COMMA:
259 elit = ","
260 default:
261 if e.tok.IsLiteral() {
262 // no CRs in raw string literals
263 elit = e.lit
264 if elit[0] == '`' {
265 elit = string(stripCR([]byte(elit)))
266 }
267 } else if e.tok.IsKeyword() {
268 elit = e.lit
269 }
270 }
271 if lit != elit {
272 t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
273 }
274
275 if tok == token.EOF {
276 break
277 }
278
279 // update position
280 epos.Offset += len(e.lit) + len(whitespace)
281 epos.Line += newlineCount(e.lit) + whitespace_linecount
282
283 }
284
285 if s.ErrorCount != 0 {
286 t.Errorf("found %d errors", s.ErrorCount)
287 }
288}
289
290func checkComma(t *testing.T, line string, mode Mode) {
291 var S Scanner
292 file := fset.AddFile("TestCommas", fset.Base(), len(line))
293 S.Init(file, []byte(line), nil, mode)
294 pos, tok, lit := S.Scan()
295 for tok != token.EOF {
296 if tok == token.ILLEGAL {
297 // the illegal token literal indicates what
298 // kind of semicolon literal to expect
299 commaLit := "\n"
300 if lit[0] == '#' {
301 commaLit = ","
302 }
303 // next token must be a comma
304 commaPos := file.Position(pos)
305 commaPos.Offset++
306 commaPos.Column++
307 pos, tok, lit = S.Scan()
308 if tok == token.COMMA {
309 if lit != commaLit {
310 t.Errorf(`bad literal for %q: got %q (%q), expected %q`, line, lit, tok, commaLit)
311 }
312 checkPosScan(t, line, pos, commaPos)
313 } else {
314 t.Errorf("bad token for %q: got %s, expected ','", line, tok)
315 }
316 } else if tok == token.COMMA {
317 t.Errorf("bad token for %q: got ',', expected no ','", line)
318 }
319 pos, tok, lit = S.Scan()
320 }
321}
322
323var lines = []string{
324 // # indicates a comma present in the source
325 // ? indicates an automatically inserted comma
326 "",
327 "\ufeff#,", // first BOM is ignored
328 "#,",
329 "foo?\n",
330 "_foo?\n",
331 "123?\n",
332 "1.2?\n",
333 "'x'?\n",
334 "_|_?\n",
335 "_|_?\n",
336 `"x"` + "?\n",
337 "`x`?\n",
338 `"""
339 foo
340 """` + "?\n",
341 // `"""
342 // foo \(bar)
343 // """` + "?\n",
344 `'''
345 foo
346 '''` + "?\n",
347
348 "+\n",
349 "-\n",
350 "*\n",
351 "/\n",
352 "%\n",
353
354 "&\n",
355 // "&?\n",
356 "|\n",
357
358 "&&\n",
359 "||\n",
360 "<-\n",
361 "->\n",
362
363 "==\n",
364 "<\n",
365 ">\n",
366 "=\n",
367 "!\n",
368
369 "!=\n",
370 "<=\n",
371 ">=\n",
372 ":=\n",
373 "...\n",
374
375 "(\n",
376 "[\n",
377 "[[\n",
378 "{\n",
379 "{{\n",
380 "#,\n",
381 ".\n",
382
383 ")?\n",
384 "]?\n",
385 "]]?\n",
386 "}?\n",
387 "}}?\n",
388 ":\n",
389 ";?\n",
390
391 "true?\n",
392 "false?\n",
393 "null?\n",
394
395 "foo?//comment\n",
396 "foo?//comment",
397 "foo?/*comment*/\n",
398 "foo?/*\n*/",
399 "foo?/*comment*/ \n",
400 "foo?/*\n*/ ",
401
402 "foo ?// comment\n",
403 "foo ?// comment",
404 "foo ?/*comment*/\n",
405 "foo ?/*\n*/",
406 "foo ?/* */ /* \n */ bar?/**/\n",
407 "foo ?/*0*/ /*1*/ /*2*/\n",
408
409 "foo ?/*comment*/ \n",
410 "foo ?/*0*/ /*1*/ /*2*/ \n",
411 "foo ?/**/ /*-------------*/ /*----\n*/bar ?/* \n*/baa?\n",
412 "foo ?/* an EOF terminates a line */",
413 "foo ?/* an EOF terminates a line */ /*",
414 "foo ?/* an EOF terminates a line */ //",
415
416 // "package main?\n\nfunc main() {\n\tif {\n\t\treturn /* */ }?\n}?\n",
417 // "package main?",
418}
419
420func TestCommas(t *testing.T) {
421 for _, line := range lines {
422 checkComma(t, line, 0)
423 checkComma(t, line, ScanComments)
424
425 // if the input ended in newlines, the input must tokenize the
426 // same with or without those newlines
427 for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
428 checkComma(t, line[0:i], 0)
429 checkComma(t, line[0:i], ScanComments)
430 }
431 }
432}
433
434func TestRelative(t *testing.T) {
435 test := `
436 package foo
437
438 // comment
439 a: /* a */1
440 b : 5 /*
441 line one
442 line two
443 */
444 c: "dfs"
445 `
446 want := []string{
447 `newline IDENT package`,
448 `blank IDENT foo`,
449 "elided , \n",
450 `section COMMENT // comment`,
451 `newline IDENT a`,
452 `nospace : `,
453 `blank COMMENT /* a */`,
454 `nospace INT 1`,
455 "elided , \n",
456 `newline IDENT b`,
457 `blank : `,
458 `blank INT 5`,
459 "elided , \n",
460 "blank COMMENT /*\n\t line one\n\t line two\n\t*/",
461 `newline IDENT c`,
462 `nospace : `,
463 `blank STRING "dfs"`,
464 "elided , \n",
465 }
466 var S Scanner
467 f := fset.AddFile("TestCommas", fset.Base(), len(test))
468 S.Init(f, []byte(test), nil, ScanComments)
469 pos, tok, lit := S.Scan()
470 got := []string{}
471 for tok != token.EOF {
472 got = append(got, fmt.Sprintf("%-7s %-8s %s", pos.RelPos(), tok, lit))
473 pos, tok, lit = S.Scan()
474 }
475 if !cmp.Equal(got, want) {
476 t.Error(cmp.Diff(got, want))
477 }
478}
479
480type segment struct {
481 srcline string // a line of source text
482 filename string // filename for current token
483 line int // line number for current token
484}
485
486var segments = []segment{
487 // exactly one token per line since the test consumes one token per segment
488 {" line1", filepath.Join("dir", "TestLineComments"), 1},
489 {"\nline2", filepath.Join("dir", "TestLineComments"), 2},
490 {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored
491 {"\nline4", filepath.Join("dir", "TestLineComments"), 4},
492 {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100},
493 {"\n//line \t :42\n line1", "", 42},
494 {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200},
495 {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42},
496 {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored
497 {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored
498 {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored
499 {"\n//line ./foo:42\n line42", filepath.Join("dir", "foo"), 42},
500 {"\n//line a/b/c/File1.go:100\n line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100},
501}
502
503var unixsegments = []segment{
504 {"\n//line /bar:42\n line42", "/bar", 42},
505}
506
507var winsegments = []segment{
508 {"\n//line c:\\bar:42\n line42", "c:\\bar", 42},
509 {"\n//line c:\\dir\\File1.go:100\n line100", "c:\\dir\\File1.go", 100},
510}
511
512// Verify that comments of the form "//line filename:line" are interpreted correctly.
513func TestLineComments(t *testing.T) {
514 segs := segments
515 if runtime.GOOS == "windows" {
516 segs = append(segs, winsegments...)
517 } else {
518 segs = append(segs, unixsegments...)
519 }
520
521 // make source
522 var src string
523 for _, e := range segs {
524 src += e.srcline
525 }
526
527 // verify scan
528 var S Scanner
529 f := fset.AddFile(filepath.Join("dir", "TestLineComments"), fset.Base(), len(src))
530 S.Init(f, []byte(src), nil, dontInsertCommas)
531 for _, s := range segs {
532 p, _, lit := S.Scan()
533 pos := f.Position(p)
534 checkPosScan(t, lit, p, token.Position{
535 Filename: s.filename,
536 Offset: pos.Offset,
537 Line: s.line,
538 Column: pos.Column,
539 })
540 }
541
542 if S.ErrorCount != 0 {
543 t.Errorf("found %d errors", S.ErrorCount)
544 }
545}
546
547// Verify that initializing the same scanner more than once works correctly.
548func TestInit(t *testing.T) {
549 var s Scanner
550
551 // 1st init
552 src1 := "false true { }"
553 f1 := fset.AddFile("src1", fset.Base(), len(src1))
554 s.Init(f1, []byte(src1), nil, dontInsertCommas)
555 if f1.Size() != len(src1) {
556 t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
557 }
558 s.Scan() // false
559 s.Scan() // true
560 _, tok, _ := s.Scan() // {
561 if tok != token.LBRACE {
562 t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE)
563 }
564
565 // 2nd init
566 src2 := "null true { ]"
567 f2 := fset.AddFile("src2", fset.Base(), len(src2))
568 s.Init(f2, []byte(src2), nil, dontInsertCommas)
569 if f2.Size() != len(src2) {
570 t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
571 }
572 _, tok, _ = s.Scan() // go
573 if tok != token.NULL {
574 t.Errorf("bad token: got %s, expected %s", tok, token.NULL)
575 }
576
577 if s.ErrorCount != 0 {
578 t.Errorf("found %d errors", s.ErrorCount)
579 }
580}
581
582func TestScanTemplate(t *testing.T) {
583 // error handler
584 eh := func(pos token.Position, msg string) {
585 t.Errorf("error handler called (pos = %v, msg = %s)", pos, msg)
586 }
587 trim := func(s string) string { return strings.Trim(s, `"\\()`) }
588
589 sources := []string{
590 `"first\(first)\\second\(second)"`,
591 `"level\( ["foo", "level", level ][2] )end\( end )"`,
592 `"level\( { "foo": 1, "bar": level } )end\(end)"`,
593 }
594 for i, src := range sources {
595 name := fmt.Sprintf("tsrc%d", i)
596 t.Run(name, func(t *testing.T) {
597 f := fset.AddFile(name, fset.Base(), len(src))
598
599 // verify scan
600 var s Scanner
601 s.Init(f, []byte(src), eh, ScanComments)
602
603 count := 0
604 var lit, str string
605 for tok := token.ILLEGAL; tok != token.EOF; {
606 switch tok {
607 case token.LPAREN:
608 count++
609 case token.RPAREN:
610 if count--; count == 0 {
611 str = trim(s.ResumeInterpolation('"', 1))
612 }
613 case token.INTERPOLATION:
614 str = trim(lit)
615 case token.IDENT:
616 if lit != str {
617 t.Errorf("str: got %v; want %v", lit, str)
618 }
619 }
620 _, tok, lit = s.Scan()
621 }
622 })
623 }
624}
625
626func TestStdErrorHander(t *testing.T) {
627 const src = "#\n" + // illegal character, cause an error
628 "# #\n" + // two errors on the same line
629 "//line File2:20\n" +
630 "#\n" + // different file, but same line
631 "//line File2:1\n" +
632 "# #\n" + // same file, decreasing line number
633 "//line File1:1\n" +
634 "# # #" // original file, line 1 again
635
636 var list errors.List
637 eh := func(pos token.Position, msg string) { list.AddNew(pos, msg) }
638
639 var s Scanner
640 s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), eh, dontInsertCommas)
641 for {
642 if _, tok, _ := s.Scan(); tok == token.EOF {
643 break
644 }
645 }
646
647 if len(list) != s.ErrorCount {
648 t.Errorf("found %d errors, expected %d", len(list), s.ErrorCount)
649 }
650
651 if len(list) != 9 {
652 t.Errorf("found %d raw errors, expected 9", len(list))
653 errors.Print(os.Stderr, list)
654 }
655
656 list.Sort()
657 if len(list) != 9 {
658 t.Errorf("found %d sorted errors, expected 9", len(list))
659 errors.Print(os.Stderr, list)
660 }
661
662 list.RemoveMultiples()
663 if len(list) != 4 {
664 t.Errorf("found %d one-per-line errors, expected 4", len(list))
665 errors.Print(os.Stderr, list)
666 }
667}
668
669type errorCollector struct {
670 cnt int // number of errors encountered
671 msg string // last error message encountered
672 pos token.Position // last error position encountered
673}
674
675func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) {
676 t.Helper()
677 var s Scanner
678 var h errorCollector
679 eh := func(pos token.Position, msg string) {
680 h.cnt++
681 h.msg = msg
682 h.pos = pos
683 }
684 s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertCommas)
685 _, tok0, lit0 := s.Scan()
686 if tok0 != tok {
687 t.Errorf("%q: got %s, expected %s", src, tok0, tok)
688 }
689 if tok0 != token.ILLEGAL && lit0 != lit {
690 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
691 }
692 cnt := 0
693 if err != "" {
694 cnt = 1
695 }
696 if h.cnt != cnt {
697 t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt)
698 }
699 if h.msg != err {
700 t.Errorf("%q: got msg %q, expected %q", src, h.msg, err)
701 }
702 if h.pos.Offset != pos {
703 t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos)
704 }
705}
706
707var errorTests = []struct {
708 src string
709 tok token.Token
710 pos int
711 lit string
712 err string
713}{
714 {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"},
715 {`?`, token.ILLEGAL, 0, "", "illegal character U+003F '?'"},
716 {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
717 {`_|`, token.ILLEGAL, 0, "", "illegal token '_|'; expected '_'"},
718 // {`' '`, STRING, 0, `' '`, ""},
719 // {"`\0`", STRING, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"},
720 // {`'\07'`, STRING, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"},
721 {`"\8"`, token.STRING, 2, `"\8"`, "unknown escape sequence"},
722 {`"\08"`, token.STRING, 3, `"\08"`, "illegal character U+0038 '8' in escape sequence"},
723 {`"\x"`, token.STRING, 3, `"\x"`, "illegal character U+0022 '\"' in escape sequence"},
724 {`"\x0"`, token.STRING, 4, `"\x0"`, "illegal character U+0022 '\"' in escape sequence"},
725 {`"\x0g"`, token.STRING, 4, `"\x0g"`, "illegal character U+0067 'g' in escape sequence"},
726 {`"\u"`, token.STRING, 3, `"\u"`, "illegal character U+0022 '\"' in escape sequence"},
727 {`"\u0"`, token.STRING, 4, `"\u0"`, "illegal character U+0022 '\"' in escape sequence"},
728 {`"\u00"`, token.STRING, 5, `"\u00"`, "illegal character U+0022 '\"' in escape sequence"},
729 {`"\u000"`, token.STRING, 6, `"\u000"`, "illegal character U+0022 '\"' in escape sequence"},
730 // {`"\u000`, token.STRING, 6, `"\u000`, "string literal not terminated"}, two errors
731 {`"\u0000"`, token.STRING, 0, `"\u0000"`, ""},
732 {`"\U"`, token.STRING, 3, `"\U"`, "illegal character U+0022 '\"' in escape sequence"},
733 {`"\U0"`, token.STRING, 4, `"\U0"`, "illegal character U+0022 '\"' in escape sequence"},
734 {`"\U00"`, token.STRING, 5, `"\U00"`, "illegal character U+0022 '\"' in escape sequence"},
735 {`"\U000"`, token.STRING, 6, `"\U000"`, "illegal character U+0022 '\"' in escape sequence"},
736 {`"\U0000"`, token.STRING, 7, `"\U0000"`, "illegal character U+0022 '\"' in escape sequence"},
737 {`"\U00000"`, token.STRING, 8, `"\U00000"`, "illegal character U+0022 '\"' in escape sequence"},
738 {`"\U000000"`, token.STRING, 9, `"\U000000"`, "illegal character U+0022 '\"' in escape sequence"},
739 {`"\U0000000"`, token.STRING, 10, `"\U0000000"`, "illegal character U+0022 '\"' in escape sequence"},
740 // {`"\U0000000`, token.STRING, 10, `"\U0000000`, "string literal not terminated"}, // escape sequence not terminated"}, two errors
741 {`"\U00000000"`, token.STRING, 0, `"\U00000000"`, ""},
742 {`"\Uffffffff"`, token.STRING, 2, `"\Uffffffff"`, "escape sequence is invalid Unicode code point"},
743 {`'`, token.STRING, 0, `'`, "string literal not terminated"},
744 // TODO
745 // {`'\`, token.STRING, 0, `'\`, "raw string literal not terminated"}, // "escape sequence not terminated"},
746 // {"`\n", token.STRING, 0, s"`\n", "raw string literal not terminated"},
747 // {"'\n ", token.STRING, 0, "'", "raw string literal not terminated"},
748 {`""`, token.STRING, 0, `""`, ""},
749 {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
750 {`""abc`, token.STRING, 0, `""`, ""},
751 {`"""abc`, token.STRING, 0, `"""abc`, "string literal not terminated"},
752 {`'''abc`, token.STRING, 0, `'''abc`, "string literal not terminated"},
753 {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
754 {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"},
755 {"``", token.STRING, 0, "``", ""},
756 // {"$", IDENT, 0, "$", ""}, // TODO: for root of file?
757 {"`", token.STRING, 0, "`", "raw string literal not terminated"},
758 {"''", token.STRING, 0, "''", ""},
759 {"'", token.STRING, 0, "'", "string literal not terminated"},
760 {"/**/", token.COMMENT, 0, "/**/", ""},
761 {"/*", token.COMMENT, 0, "/*", "comment not terminated"},
762 {"077", token.INT, 0, "077", ""},
763 {"078.", token.FLOAT, 0, "078.", ""},
764 {"07801234567.", token.FLOAT, 0, "07801234567.", ""},
765 {"078e0", token.FLOAT, 0, "078e0", ""},
766 {"078", token.INT, 0, "078", "illegal octal number"},
767 {"07800000009", token.INT, 0, "07800000009", "illegal octal number"},
768 {"0x", token.INT, 0, "0x", "illegal hexadecimal number"},
769 {"0X", token.INT, 0, "0X", "illegal hexadecimal number"},
770 {"0Xbeef_", token.INT, 6, "0Xbeef_", "illegal '_' in number"},
771 {"0b", token.INT, 0, "0b", "illegal binary number"},
772 {"0B", token.INT, 0, "0B", "illegal binary number"},
773 // {"123456789012345678890_i", IMAG, 21, "123456789012345678890_i", "illegal '_' in number"},
774 {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"},
775 {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"},
776 {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored
777 {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored
778 // {"`a\ufeff`", IDENT, 2, "`a\ufeff`", "illegal byte order mark"}, // only first BOM is ignored
779 {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored
780}
781
782func TestScanErrors(t *testing.T) {
783 for _, e := range errorTests {
784 t.Run(e.src, func(t *testing.T) {
785 checkError(t, e.src, e.tok, e.pos, e.lit, e.err)
786 })
787 }
788}
789
790// Verify that no comments show up as literal values when skipping comments.
791func TestNoLiteralComments(t *testing.T) {
792 var src = `
793 a: {
794 A: 1 // foo
795 }
796
797 b: {
798 B: 2
799 // foo
800 }
801
802 c: 3 // foo
803
804 d: 4
805 // foo
806
807 b anycode(): {
808 // foo
809 }
810 `
811 var s Scanner
812 s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), nil, 0)
813 for {
814 pos, tok, lit := s.Scan()
815 class := tokenclass(tok)
816 if lit != "" && class != keyword && class != literal && tok != token.COMMA {
817 t.Errorf("%s: tok = %s, lit = %q", fset.Position(pos), tok, lit)
818 }
819 if tok <= token.EOF {
820 break
821 }
822 }
823}
824
825func BenchmarkScan(b *testing.B) {
826 b.StopTimer()
827 fset := token.NewFileSet()
828 file := fset.AddFile("", fset.Base(), len(source))
829 var s Scanner
830 b.StartTimer()
831 for i := 0; i < b.N; i++ {
832 s.Init(file, source, nil, ScanComments)
833 for {
834 _, tok, _ := s.Scan()
835 if tok == token.EOF {
836 break
837 }
838 }
839 }
840}
841
842func BenchmarkScanFile(b *testing.B) {
843 b.StopTimer()
844 const filename = "go"
845 src, err := ioutil.ReadFile(filename)
846 if err != nil {
847 panic(err)
848 }
849 fset := token.NewFileSet()
850 file := fset.AddFile(filename, fset.Base(), len(src))
851 b.SetBytes(int64(len(src)))
852 var s Scanner
853 b.StartTimer()
854 for i := 0; i < b.N; i++ {
855 s.Init(file, src, nil, ScanComments)
856 for {
857 _, tok, _ := s.Scan()
858 if tok == token.EOF {
859 break
860 }
861 }
862 }
863}
864
865func TestScanner_next(t *testing.T) {
866 tests := []struct {
867 name string
868 s *Scanner
869 }{
870 // TODO: Add test cases.
871 }
872 for _, tt := range tests {
873 tt.s.next()
874 }
875}
876
877func TestScanner_Init(t *testing.T) {
878 type args struct {
879 file *token.File
880 src []byte
881 err errors.Handler
882 mode Mode
883 }
884 tests := []struct {
885 name string
886 s *Scanner
887 args args
888 }{
889 // TODO: Add test cases.
890 }
891 for _, tt := range tests {
892 tt.s.Init(tt.args.file, tt.args.src, tt.args.err, tt.args.mode)
893 }
894}
895
896func TestScanner_error(t *testing.T) {
897 type args struct {
898 offs int
899 msg string
900 }
901 tests := []struct {
902 name string
903 s *Scanner
904 args args
905 }{
906 // TODO: Add test cases.
907 }
908 for _, tt := range tests {
909 tt.s.error(tt.args.offs, tt.args.msg)
910 }
911}
912
913func TestScanner_interpretLineComment(t *testing.T) {
914 type args struct {
915 text []byte
916 }
917 tests := []struct {
918 name string
919 s *Scanner
920 args args
921 }{
922 // TODO: Add test cases.
923 }
924 for _, tt := range tests {
925 tt.s.interpretLineComment(tt.args.text)
926 }
927}
928
929func TestScanner_scanComment(t *testing.T) {
930 tests := []struct {
931 name string
932 s *Scanner
933 want string
934 }{
935 // TODO: Add test cases.
936 }
937 for _, tt := range tests {
938 if got := tt.s.scanComment(); got != tt.want {
939 t.Errorf("%q. Scanner.scanComment() = %v, want %v", tt.name, got, tt.want)
940 }
941 }
942}
943
944func TestScanner_findLineEnd(t *testing.T) {
945 tests := []struct {
946 name string
947 s *Scanner
948 want bool
949 }{
950 // TODO: Add test cases.
951 }
952 for _, tt := range tests {
953 if got := tt.s.findLineEnd(); got != tt.want {
954 t.Errorf("%q. Scanner.findLineEnd() = %v, want %v", tt.name, got, tt.want)
955 }
956 }
957}
958
959func Test_isLetter(t *testing.T) {
960 type args struct {
961 ch rune
962 }
963 tests := []struct {
964 name string
965 args args
966 want bool
967 }{
968 // TODO: Add test cases.
969 }
970 for _, tt := range tests {
971 if got := isLetter(tt.args.ch); got != tt.want {
972 t.Errorf("%q. isLetter() = %v, want %v", tt.name, got, tt.want)
973 }
974 }
975}
976
977func Test_isDigit(t *testing.T) {
978 type args struct {
979 ch rune
980 }
981 tests := []struct {
982 name string
983 args args
984 want bool
985 }{
986 // TODO: Add test cases.
987 }
988 for _, tt := range tests {
989 if got := isDigit(tt.args.ch); got != tt.want {
990 t.Errorf("%q. isDigit() = %v, want %v", tt.name, got, tt.want)
991 }
992 }
993}
994
995func TestScanner_scanIdentifier(t *testing.T) {
996 tests := []struct {
997 name string
998 s *Scanner
999 want string
1000 }{
1001 // TODO: Add test cases.
1002 }
1003 for _, tt := range tests {
1004 if got := tt.s.scanIdentifier(); got != tt.want {
1005 t.Errorf("%q. Scanner.scanIdentifier() = %v, want %v", tt.name, got, tt.want)
1006 }
1007 }
1008}
1009
1010func Test_digitVal(t *testing.T) {
1011 type args struct {
1012 ch rune
1013 }
1014 tests := []struct {
1015 name string
1016 args args
1017 want int
1018 }{
1019 // TODO: Add test cases.
1020 }
1021 for _, tt := range tests {
1022 if got := digitVal(tt.args.ch); got != tt.want {
1023 t.Errorf("%q. digitVal() = %v, want %v", tt.name, got, tt.want)
1024 }
1025 }
1026}
1027
1028func TestScanner_scanMantissa(t *testing.T) {
1029 type args struct {
1030 base int
1031 }
1032 tests := []struct {
1033 name string
1034 s *Scanner
1035 args args
1036 }{
1037 // TODO: Add test cases.
1038 }
1039 for _, tt := range tests {
1040 tt.s.scanMantissa(tt.args.base)
1041 }
1042}
1043
1044func TestScanner_scanNumber(t *testing.T) {
1045 type args struct {
1046 seenDecimalPoint bool
1047 }
1048 tests := []struct {
1049 name string
1050 s *Scanner
1051 args args
1052 want token.Token
1053 want1 string
1054 }{
1055 // TODO: Add test cases.
1056 }
1057 for _, tt := range tests {
1058 got, got1 := tt.s.scanNumber(tt.args.seenDecimalPoint)
1059 if !reflect.DeepEqual(got, tt.want) {
1060 t.Errorf("%q. Scanner.scanNumber() got = %v, want %v", tt.name, got, tt.want)
1061 }
1062 if got1 != tt.want1 {
1063 t.Errorf("%q. Scanner.scanNumber() got1 = %v, want %v", tt.name, got1, tt.want1)
1064 }
1065 }
1066}
1067
1068func TestScanner_scanEscape(t *testing.T) {
1069 type args struct {
1070 quote rune
1071 }
1072 tests := []struct {
1073 name string
1074 s *Scanner
1075 args args
1076 want bool
1077 }{
1078 // TODO: Add test cases.
1079 }
1080 for _, tt := range tests {
1081 if got, _ := tt.s.scanEscape(tt.args.quote); got != tt.want {
1082 t.Errorf("%q. Scanner.scanEscape() = %v, want %v", tt.name, got, tt.want)
1083 }
1084 }
1085}
1086
1087func TestScanner_scanString(t *testing.T) {
1088 tests := []struct {
1089 name string
1090 s *Scanner
1091 want string
1092 }{
1093 // TODO: Add test cases.
1094 }
1095 for _, tt := range tests {
1096 if _, got := tt.s.scanString(rune(tt.name[0]), 1, 1); got != tt.want {
1097 t.Errorf("%q. Scanner.scanString() = %v, want %v", tt.name, got, tt.want)
1098 }
1099 }
1100}
1101
1102func Test_stripCR(t *testing.T) {
1103 type args struct {
1104 b []byte
1105 }
1106 tests := []struct {
1107 name string
1108 args args
1109 want []byte
1110 }{
1111 // TODO: Add test cases.
1112 }
1113 for _, tt := range tests {
1114 if got := stripCR(tt.args.b); !reflect.DeepEqual(got, tt.want) {
1115 t.Errorf("%q. stripCR() = %v, want %v", tt.name, got, tt.want)
1116 }
1117 }
1118}
1119
1120func TestScanner_scanRawString(t *testing.T) {
1121 tests := []struct {
1122 name string
1123 s *Scanner
1124 want string
1125 }{
1126 // TODO: Add test cases.
1127 }
1128 for _, tt := range tests {
1129 if got := tt.s.scanRawString(); got != tt.want {
1130 t.Errorf("%q. Scanner.scanRawString() = %v, want %v", tt.name, got, tt.want)
1131 }
1132 }
1133}
1134
1135func TestScanner_skipWhitespace(t *testing.T) {
1136 tests := []struct {
1137 name string
1138 s *Scanner
1139 }{
1140 // TODO: Add test cases.
1141 }
1142 for _, tt := range tests {
1143 tt.s.skipWhitespace(1)
1144 }
1145}
1146
1147func TestScanner_switch2(t *testing.T) {
1148 type args struct {
1149 tok0 token.Token
1150 tok1 token.Token
1151 }
1152 tests := []struct {
1153 name string
1154 s *Scanner
1155 args args
1156 want token.Token
1157 }{
1158 // TODO: Add test cases.
1159 }
1160 for _, tt := range tests {
1161 if got := tt.s.switch2(tt.args.tok0, tt.args.tok1); !reflect.DeepEqual(got, tt.want) {
1162 t.Errorf("%q. Scanner.switch2() = %v, want %v", tt.name, got, tt.want)
1163 }
1164 }
1165}
1166
1167func TestScanner_Scan(t *testing.T) {
1168 tests := []struct {
1169 name string
1170 s *Scanner
1171 wantPos token.Pos
1172 wantTok token.Token
1173 wantLit string
1174 }{
1175 // TODO: Add test cases.
1176 }
1177 for _, tt := range tests {
1178 gotPos, gotTok, gotLit := tt.s.Scan()
1179 if !reflect.DeepEqual(gotPos, tt.wantPos) {
1180 t.Errorf("%q. Scanner.Scan() gotPos = %v, want %v", tt.name, gotPos, tt.wantPos)
1181 }
1182 if !reflect.DeepEqual(gotTok, tt.wantTok) {
1183 t.Errorf("%q. Scanner.Scan() gotTok = %v, want %v", tt.name, gotTok, tt.wantTok)
1184 }
1185 if gotLit != tt.wantLit {
1186 t.Errorf("%q. Scanner.Scan() gotLit = %v, want %v", tt.name, gotLit, tt.wantLit)
1187 }
1188 }
1189}