blob: 2faf296735c174101885ba2408f3267afaaaddfd [file] [log] [blame]
Marcel van Lohuizend80624e2018-12-10 15:26:06 +01001// Copyright 2018 The CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package scanner
16
17import (
18 "fmt"
19 "io/ioutil"
20 "os"
21 "path/filepath"
22 "reflect"
23 "runtime"
24 "strings"
25 "testing"
26
27 "cuelang.org/go/cue/errors"
28 "cuelang.org/go/cue/token"
29 "github.com/google/go-cmp/cmp"
30)
31
32var fset = token.NewFileSet()
33
34const /* class */ (
35 special = iota
36 literal
37 operator
38 keyword
39)
40
41func tokenclass(tok token.Token) int {
42 switch {
43 case tok.IsLiteral():
44 return literal
45 case tok.IsOperator():
46 return operator
47 case tok.IsKeyword():
48 return keyword
49 }
50 return special
51}
52
53type elt struct {
54 tok token.Token
55 lit string
56 class int
57}
58
59var testTokens = [...]elt{
60 // Special tokens
61 {token.COMMENT, "/* a comment */", special},
62 {token.COMMENT, "// a comment \n", special},
63 {token.COMMENT, "/*\r*/", special},
64 {token.COMMENT, "//\r\n", special},
65
66 // Identifiers and basic type literals
67 {token.BOTTOM, "_|_", literal},
68 {token.BOTTOM, "_|_", literal},
69
70 {token.IDENT, "foobar", literal},
71 {token.IDENT, "a۰۱۸", literal},
72 {token.IDENT, "foo६४", literal},
73 {token.IDENT, "bar9876", literal},
74 {token.IDENT, "ŝ", literal},
75 {token.IDENT, "ŝfoo", literal},
76 {token.INT, "0", literal},
77 {token.INT, "1", literal},
78 {token.INT, "123456789012345678890", literal},
79 {token.INT, "12345_67890_12345_6788_90", literal},
80 {token.INT, "1234567M", literal},
81 {token.INT, "1234567Mi", literal},
82 {token.INT, "01234567", literal},
83 {token.INT, ".3Mi", literal},
84 {token.INT, "3.3Mi", literal},
85 {token.INT, "0xcafebabe", literal},
86 {token.INT, "0b1100_1001", literal},
87 {token.FLOAT, "0.", literal},
88 {token.FLOAT, ".0", literal},
89 {token.FLOAT, "3.14159265", literal},
90 {token.FLOAT, "1e0", literal},
91 {token.FLOAT, "1e+100", literal},
92 {token.FLOAT, "1e-100", literal},
93 {token.FLOAT, "2.71828e-1000", literal},
94 {token.STRING, "`aa\n\n`", literal},
95 {token.STRING, "'a'", literal},
96 {token.STRING, "'\\000'", literal},
97 {token.STRING, "'\\xFF'", literal},
98 {token.STRING, "'\\uff16'", literal},
99 {token.STRING, "'\\U0000ff16'", literal},
100 {token.STRING, "'foobar'", literal},
101 {token.STRING, "`" + `foo
102 bar` +
103 "`",
104 literal,
105 },
106 {token.STRING, "`foobar`", literal},
107 {token.STRING, "`\r`", literal},
108 {token.STRING, "`foo\r\nbar`", literal},
109 {token.STRING, "'" + `\r` + "'", literal},
110 {token.STRING, "'foo" + `\r\n` + "bar'", literal},
111 {token.STRING, `"foobar"`, literal},
112 {token.STRING, `"""\n foobar\n """`, literal},
113
114 // Operators and delimiters
115 {token.ADD, "+", operator},
116 {token.SUB, "-", operator},
117 {token.MUL, "*", operator},
118 {token.QUO, "/", operator},
119 {token.REM, "%", operator},
120
121 {token.UNIFY, "&", operator},
122 {token.DISJUNCTION, "|", operator},
123
124 {token.LAND, "&&", operator},
125 {token.LOR, "||", operator},
Marcel van Lohuizend80624e2018-12-10 15:26:06 +0100126
127 {token.EQL, "==", operator},
128 {token.LSS, "<", operator},
129 {token.GTR, ">", operator},
130 {token.BIND, "=", operator},
131 {token.NOT, "!", operator},
132
133 {token.NEQ, "!=", operator},
134 {token.LEQ, "<=", operator},
135 {token.GEQ, ">=", operator},
136 {token.RANGE, "..", operator},
137 {token.ELLIPSIS, "...", operator},
138
139 {token.LPAREN, "(", operator},
140 {token.LBRACK, "[", operator},
141 {token.LBRACE, "{", operator},
142 {token.COMMA, ",", operator},
143 {token.PERIOD, ".", operator},
144
145 {token.RPAREN, ")", operator},
146 {token.RBRACK, "]", operator},
147 {token.RBRACE, "}", operator},
148 {token.COLON, ":", operator},
149
150 // Keywords
151 {token.TRUE, "true", keyword},
152 {token.FALSE, "false", keyword},
153 {token.NULL, "null", keyword},
154
155 {token.FOR, "for", keyword},
156 {token.IF, "if", keyword},
157 {token.IN, "in", keyword},
158}
159
160const whitespace = " \t \n\n\n" // to separate tokens
161
162var source = func() []byte {
163 var src []byte
164 for _, t := range testTokens {
165 src = append(src, t.lit...)
166 src = append(src, whitespace...)
167 }
168 return src
169}()
170
171func newlineCount(s string) int {
172 n := 0
173 for i := 0; i < len(s); i++ {
174 if s[i] == '\n' {
175 n++
176 }
177 }
178 return n
179}
180
181func checkPosScan(t *testing.T, lit string, p token.Pos, expected token.Position) {
182 pos := fset.Position(p)
183 if pos.Filename != expected.Filename {
184 t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
185 }
186 if pos.Offset != expected.Offset {
187 t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
188 }
189 if pos.Line != expected.Line {
190 t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
191 }
192 if pos.Column != expected.Column {
193 t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
194 }
195}
196
197// Verify that calling Scan() provides the correct results.
198func TestScan(t *testing.T) {
199 whitespace_linecount := newlineCount(whitespace)
200
201 // error handler
202 eh := func(_ token.Position, msg string) {
203 t.Errorf("error handler called (msg = %s)", msg)
204 }
205
206 // verify scan
207 var s Scanner
208 s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertCommas)
209
210 // set up expected position
211 epos := token.Position{
212 Filename: "",
213 Offset: 0,
214 Line: 1,
215 Column: 1,
216 }
217
218 index := 0
219 for {
220 pos, tok, lit := s.Scan()
221
222 // check position
223 if tok == token.EOF {
224 // correction for EOF
225 epos.Line = newlineCount(string(source))
226 epos.Column = 2
227 }
228 checkPosScan(t, lit, pos, epos)
229
230 // check token
231 e := elt{token.EOF, "", special}
232 if index < len(testTokens) {
233 e = testTokens[index]
234 index++
235 }
236 if tok != e.tok {
237 t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
238 }
239
240 // check token class
241 if tokenclass(tok) != e.class {
242 t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
243 }
244
245 // check literal
246 elit := ""
247 switch e.tok {
248 case token.COMMENT:
249 // no CRs in comments
250 elit = string(stripCR([]byte(e.lit)))
251 //-style comment literal doesn't contain newline
252 if elit[1] == '/' {
253 elit = elit[0 : len(elit)-1]
254 }
255 case token.IDENT:
256 elit = e.lit
257 case token.COMMA:
258 elit = ","
259 default:
260 if e.tok.IsLiteral() {
261 // no CRs in raw string literals
262 elit = e.lit
263 if elit[0] == '`' {
264 elit = string(stripCR([]byte(elit)))
265 }
266 } else if e.tok.IsKeyword() {
267 elit = e.lit
268 }
269 }
270 if lit != elit {
271 t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
272 }
273
274 if tok == token.EOF {
275 break
276 }
277
278 // update position
279 epos.Offset += len(e.lit) + len(whitespace)
280 epos.Line += newlineCount(e.lit) + whitespace_linecount
281
282 }
283
284 if s.ErrorCount != 0 {
285 t.Errorf("found %d errors", s.ErrorCount)
286 }
287}
288
289func checkComma(t *testing.T, line string, mode Mode) {
290 var S Scanner
291 file := fset.AddFile("TestCommas", fset.Base(), len(line))
292 S.Init(file, []byte(line), nil, mode)
293 pos, tok, lit := S.Scan()
294 for tok != token.EOF {
295 if tok == token.ILLEGAL {
296 // the illegal token literal indicates what
297 // kind of semicolon literal to expect
298 commaLit := "\n"
299 if lit[0] == '#' {
300 commaLit = ","
301 }
302 // next token must be a comma
303 commaPos := file.Position(pos)
304 commaPos.Offset++
305 commaPos.Column++
306 pos, tok, lit = S.Scan()
307 if tok == token.COMMA {
308 if lit != commaLit {
309 t.Errorf(`bad literal for %q: got %q (%q), expected %q`, line, lit, tok, commaLit)
310 }
311 checkPosScan(t, line, pos, commaPos)
312 } else {
313 t.Errorf("bad token for %q: got %s, expected ','", line, tok)
314 }
315 } else if tok == token.COMMA {
316 t.Errorf("bad token for %q: got ',', expected no ','", line)
317 }
318 pos, tok, lit = S.Scan()
319 }
320}
321
322var lines = []string{
323 // # indicates a comma present in the source
324 // ? indicates an automatically inserted comma
325 "",
326 "\ufeff#,", // first BOM is ignored
327 "#,",
328 "foo?\n",
329 "_foo?\n",
330 "123?\n",
331 "1.2?\n",
332 "'x'?\n",
333 "_|_?\n",
334 "_|_?\n",
335 `"x"` + "?\n",
336 "`x`?\n",
337 `"""
338 foo
339 """` + "?\n",
340 // `"""
341 // foo \(bar)
342 // """` + "?\n",
343 `'''
344 foo
345 '''` + "?\n",
346
347 "+\n",
348 "-\n",
349 "*\n",
350 "/\n",
351 "%\n",
352
353 "&\n",
354 // "&?\n",
355 "|\n",
356
357 "&&\n",
358 "||\n",
359 "<-\n",
360 "->\n",
361
362 "==\n",
363 "<\n",
364 ">\n",
365 "=\n",
366 "!\n",
367
368 "!=\n",
369 "<=\n",
370 ">=\n",
371 ":=\n",
372 "...\n",
373
374 "(\n",
375 "[\n",
376 "[[\n",
377 "{\n",
378 "{{\n",
379 "#,\n",
380 ".\n",
381
382 ")?\n",
383 "]?\n",
384 "]]?\n",
385 "}?\n",
386 "}}?\n",
387 ":\n",
388 ";?\n",
389
390 "true?\n",
391 "false?\n",
392 "null?\n",
393
394 "foo?//comment\n",
395 "foo?//comment",
396 "foo?/*comment*/\n",
397 "foo?/*\n*/",
398 "foo?/*comment*/ \n",
399 "foo?/*\n*/ ",
400
401 "foo ?// comment\n",
402 "foo ?// comment",
403 "foo ?/*comment*/\n",
404 "foo ?/*\n*/",
405 "foo ?/* */ /* \n */ bar?/**/\n",
406 "foo ?/*0*/ /*1*/ /*2*/\n",
407
408 "foo ?/*comment*/ \n",
409 "foo ?/*0*/ /*1*/ /*2*/ \n",
410 "foo ?/**/ /*-------------*/ /*----\n*/bar ?/* \n*/baa?\n",
411 "foo ?/* an EOF terminates a line */",
412 "foo ?/* an EOF terminates a line */ /*",
413 "foo ?/* an EOF terminates a line */ //",
414
415 // "package main?\n\nfunc main() {\n\tif {\n\t\treturn /* */ }?\n}?\n",
416 // "package main?",
417}
418
419func TestCommas(t *testing.T) {
420 for _, line := range lines {
421 checkComma(t, line, 0)
422 checkComma(t, line, ScanComments)
423
424 // if the input ended in newlines, the input must tokenize the
425 // same with or without those newlines
426 for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
427 checkComma(t, line[0:i], 0)
428 checkComma(t, line[0:i], ScanComments)
429 }
430 }
431}
432
433func TestRelative(t *testing.T) {
434 test := `
435 package foo
436
437 // comment
438 a: /* a */1
439 b : 5 /*
440 line one
441 line two
442 */
443 c: "dfs"
444 `
445 want := []string{
446 `newline IDENT package`,
447 `blank IDENT foo`,
448 "elided , \n",
449 `section COMMENT // comment`,
450 `newline IDENT a`,
451 `nospace : `,
452 `blank COMMENT /* a */`,
453 `nospace INT 1`,
454 "elided , \n",
455 `newline IDENT b`,
456 `blank : `,
457 `blank INT 5`,
458 "elided , \n",
459 "blank COMMENT /*\n\t line one\n\t line two\n\t*/",
460 `newline IDENT c`,
461 `nospace : `,
462 `blank STRING "dfs"`,
463 "elided , \n",
464 }
465 var S Scanner
466 f := fset.AddFile("TestCommas", fset.Base(), len(test))
467 S.Init(f, []byte(test), nil, ScanComments)
468 pos, tok, lit := S.Scan()
469 got := []string{}
470 for tok != token.EOF {
471 got = append(got, fmt.Sprintf("%-7s %-8s %s", pos.RelPos(), tok, lit))
472 pos, tok, lit = S.Scan()
473 }
474 if !cmp.Equal(got, want) {
475 t.Error(cmp.Diff(got, want))
476 }
477}
478
479type segment struct {
480 srcline string // a line of source text
481 filename string // filename for current token
482 line int // line number for current token
483}
484
485var segments = []segment{
486 // exactly one token per line since the test consumes one token per segment
487 {" line1", filepath.Join("dir", "TestLineComments"), 1},
488 {"\nline2", filepath.Join("dir", "TestLineComments"), 2},
489 {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored
490 {"\nline4", filepath.Join("dir", "TestLineComments"), 4},
491 {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100},
492 {"\n//line \t :42\n line1", "", 42},
493 {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200},
494 {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42},
495 {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored
496 {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored
497 {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored
498 {"\n//line ./foo:42\n line42", filepath.Join("dir", "foo"), 42},
499 {"\n//line a/b/c/File1.go:100\n line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100},
500}
501
502var unixsegments = []segment{
503 {"\n//line /bar:42\n line42", "/bar", 42},
504}
505
506var winsegments = []segment{
507 {"\n//line c:\\bar:42\n line42", "c:\\bar", 42},
508 {"\n//line c:\\dir\\File1.go:100\n line100", "c:\\dir\\File1.go", 100},
509}
510
511// Verify that comments of the form "//line filename:line" are interpreted correctly.
512func TestLineComments(t *testing.T) {
513 segs := segments
514 if runtime.GOOS == "windows" {
515 segs = append(segs, winsegments...)
516 } else {
517 segs = append(segs, unixsegments...)
518 }
519
520 // make source
521 var src string
522 for _, e := range segs {
523 src += e.srcline
524 }
525
526 // verify scan
527 var S Scanner
528 f := fset.AddFile(filepath.Join("dir", "TestLineComments"), fset.Base(), len(src))
529 S.Init(f, []byte(src), nil, dontInsertCommas)
530 for _, s := range segs {
531 p, _, lit := S.Scan()
532 pos := f.Position(p)
533 checkPosScan(t, lit, p, token.Position{
534 Filename: s.filename,
535 Offset: pos.Offset,
536 Line: s.line,
537 Column: pos.Column,
538 })
539 }
540
541 if S.ErrorCount != 0 {
542 t.Errorf("found %d errors", S.ErrorCount)
543 }
544}
545
546// Verify that initializing the same scanner more than once works correctly.
547func TestInit(t *testing.T) {
548 var s Scanner
549
550 // 1st init
551 src1 := "false true { }"
552 f1 := fset.AddFile("src1", fset.Base(), len(src1))
553 s.Init(f1, []byte(src1), nil, dontInsertCommas)
554 if f1.Size() != len(src1) {
555 t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
556 }
557 s.Scan() // false
558 s.Scan() // true
559 _, tok, _ := s.Scan() // {
560 if tok != token.LBRACE {
561 t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE)
562 }
563
564 // 2nd init
565 src2 := "null true { ]"
566 f2 := fset.AddFile("src2", fset.Base(), len(src2))
567 s.Init(f2, []byte(src2), nil, dontInsertCommas)
568 if f2.Size() != len(src2) {
569 t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
570 }
571 _, tok, _ = s.Scan() // go
572 if tok != token.NULL {
573 t.Errorf("bad token: got %s, expected %s", tok, token.NULL)
574 }
575
576 if s.ErrorCount != 0 {
577 t.Errorf("found %d errors", s.ErrorCount)
578 }
579}
580
581func TestScanTemplate(t *testing.T) {
582 // error handler
583 eh := func(pos token.Position, msg string) {
584 t.Errorf("error handler called (pos = %v, msg = %s)", pos, msg)
585 }
586 trim := func(s string) string { return strings.Trim(s, `"\\()`) }
587
588 sources := []string{
589 `"first\(first)\\second\(second)"`,
590 `"level\( ["foo", "level", level ][2] )end\( end )"`,
591 `"level\( { "foo": 1, "bar": level } )end\(end)"`,
592 }
593 for i, src := range sources {
594 name := fmt.Sprintf("tsrc%d", i)
595 t.Run(name, func(t *testing.T) {
596 f := fset.AddFile(name, fset.Base(), len(src))
597
598 // verify scan
599 var s Scanner
600 s.Init(f, []byte(src), eh, ScanComments)
601
602 count := 0
603 var lit, str string
604 for tok := token.ILLEGAL; tok != token.EOF; {
605 switch tok {
606 case token.LPAREN:
607 count++
608 case token.RPAREN:
609 if count--; count == 0 {
610 str = trim(s.ResumeInterpolation('"', 1))
611 }
612 case token.INTERPOLATION:
613 str = trim(lit)
614 case token.IDENT:
615 if lit != str {
616 t.Errorf("str: got %v; want %v", lit, str)
617 }
618 }
619 _, tok, lit = s.Scan()
620 }
621 })
622 }
623}
624
625func TestStdErrorHander(t *testing.T) {
626 const src = "#\n" + // illegal character, cause an error
627 "# #\n" + // two errors on the same line
628 "//line File2:20\n" +
629 "#\n" + // different file, but same line
630 "//line File2:1\n" +
631 "# #\n" + // same file, decreasing line number
632 "//line File1:1\n" +
633 "# # #" // original file, line 1 again
634
635 var list errors.List
636 eh := func(pos token.Position, msg string) { list.AddNew(pos, msg) }
637
638 var s Scanner
639 s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), eh, dontInsertCommas)
640 for {
641 if _, tok, _ := s.Scan(); tok == token.EOF {
642 break
643 }
644 }
645
646 if len(list) != s.ErrorCount {
647 t.Errorf("found %d errors, expected %d", len(list), s.ErrorCount)
648 }
649
650 if len(list) != 9 {
651 t.Errorf("found %d raw errors, expected 9", len(list))
652 errors.Print(os.Stderr, list)
653 }
654
655 list.Sort()
656 if len(list) != 9 {
657 t.Errorf("found %d sorted errors, expected 9", len(list))
658 errors.Print(os.Stderr, list)
659 }
660
661 list.RemoveMultiples()
662 if len(list) != 4 {
663 t.Errorf("found %d one-per-line errors, expected 4", len(list))
664 errors.Print(os.Stderr, list)
665 }
666}
667
668type errorCollector struct {
669 cnt int // number of errors encountered
670 msg string // last error message encountered
671 pos token.Position // last error position encountered
672}
673
674func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) {
675 t.Helper()
676 var s Scanner
677 var h errorCollector
678 eh := func(pos token.Position, msg string) {
679 h.cnt++
680 h.msg = msg
681 h.pos = pos
682 }
683 s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertCommas)
684 _, tok0, lit0 := s.Scan()
685 if tok0 != tok {
686 t.Errorf("%q: got %s, expected %s", src, tok0, tok)
687 }
688 if tok0 != token.ILLEGAL && lit0 != lit {
689 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
690 }
691 cnt := 0
692 if err != "" {
693 cnt = 1
694 }
695 if h.cnt != cnt {
696 t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt)
697 }
698 if h.msg != err {
699 t.Errorf("%q: got msg %q, expected %q", src, h.msg, err)
700 }
701 if h.pos.Offset != pos {
702 t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos)
703 }
704}
705
706var errorTests = []struct {
707 src string
708 tok token.Token
709 pos int
710 lit string
711 err string
712}{
713 {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"},
714 {`?`, token.ILLEGAL, 0, "", "illegal character U+003F '?'"},
715 {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
716 {`_|`, token.ILLEGAL, 0, "", "illegal token '_|'; expected '_'"},
717 // {`' '`, STRING, 0, `' '`, ""},
718 // {"`\0`", STRING, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"},
719 // {`'\07'`, STRING, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"},
720 {`"\8"`, token.STRING, 2, `"\8"`, "unknown escape sequence"},
721 {`"\08"`, token.STRING, 3, `"\08"`, "illegal character U+0038 '8' in escape sequence"},
722 {`"\x"`, token.STRING, 3, `"\x"`, "illegal character U+0022 '\"' in escape sequence"},
723 {`"\x0"`, token.STRING, 4, `"\x0"`, "illegal character U+0022 '\"' in escape sequence"},
724 {`"\x0g"`, token.STRING, 4, `"\x0g"`, "illegal character U+0067 'g' in escape sequence"},
725 {`"\u"`, token.STRING, 3, `"\u"`, "illegal character U+0022 '\"' in escape sequence"},
726 {`"\u0"`, token.STRING, 4, `"\u0"`, "illegal character U+0022 '\"' in escape sequence"},
727 {`"\u00"`, token.STRING, 5, `"\u00"`, "illegal character U+0022 '\"' in escape sequence"},
728 {`"\u000"`, token.STRING, 6, `"\u000"`, "illegal character U+0022 '\"' in escape sequence"},
729 // {`"\u000`, token.STRING, 6, `"\u000`, "string literal not terminated"}, two errors
730 {`"\u0000"`, token.STRING, 0, `"\u0000"`, ""},
731 {`"\U"`, token.STRING, 3, `"\U"`, "illegal character U+0022 '\"' in escape sequence"},
732 {`"\U0"`, token.STRING, 4, `"\U0"`, "illegal character U+0022 '\"' in escape sequence"},
733 {`"\U00"`, token.STRING, 5, `"\U00"`, "illegal character U+0022 '\"' in escape sequence"},
734 {`"\U000"`, token.STRING, 6, `"\U000"`, "illegal character U+0022 '\"' in escape sequence"},
735 {`"\U0000"`, token.STRING, 7, `"\U0000"`, "illegal character U+0022 '\"' in escape sequence"},
736 {`"\U00000"`, token.STRING, 8, `"\U00000"`, "illegal character U+0022 '\"' in escape sequence"},
737 {`"\U000000"`, token.STRING, 9, `"\U000000"`, "illegal character U+0022 '\"' in escape sequence"},
738 {`"\U0000000"`, token.STRING, 10, `"\U0000000"`, "illegal character U+0022 '\"' in escape sequence"},
739 // {`"\U0000000`, token.STRING, 10, `"\U0000000`, "string literal not terminated"}, // escape sequence not terminated"}, two errors
740 {`"\U00000000"`, token.STRING, 0, `"\U00000000"`, ""},
741 {`"\Uffffffff"`, token.STRING, 2, `"\Uffffffff"`, "escape sequence is invalid Unicode code point"},
742 {`'`, token.STRING, 0, `'`, "string literal not terminated"},
743 // TODO
744 // {`'\`, token.STRING, 0, `'\`, "raw string literal not terminated"}, // "escape sequence not terminated"},
745 // {"`\n", token.STRING, 0, s"`\n", "raw string literal not terminated"},
746 // {"'\n ", token.STRING, 0, "'", "raw string literal not terminated"},
747 {`""`, token.STRING, 0, `""`, ""},
748 {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
749 {`""abc`, token.STRING, 0, `""`, ""},
750 {`"""abc`, token.STRING, 0, `"""abc`, "string literal not terminated"},
751 {`'''abc`, token.STRING, 0, `'''abc`, "string literal not terminated"},
752 {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
753 {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"},
754 {"``", token.STRING, 0, "``", ""},
755 // {"$", IDENT, 0, "$", ""}, // TODO: for root of file?
756 {"`", token.STRING, 0, "`", "raw string literal not terminated"},
757 {"''", token.STRING, 0, "''", ""},
758 {"'", token.STRING, 0, "'", "string literal not terminated"},
759 {"/**/", token.COMMENT, 0, "/**/", ""},
760 {"/*", token.COMMENT, 0, "/*", "comment not terminated"},
761 {"077", token.INT, 0, "077", ""},
762 {"078.", token.FLOAT, 0, "078.", ""},
763 {"07801234567.", token.FLOAT, 0, "07801234567.", ""},
764 {"078e0", token.FLOAT, 0, "078e0", ""},
765 {"078", token.INT, 0, "078", "illegal octal number"},
766 {"07800000009", token.INT, 0, "07800000009", "illegal octal number"},
767 {"0x", token.INT, 0, "0x", "illegal hexadecimal number"},
768 {"0X", token.INT, 0, "0X", "illegal hexadecimal number"},
769 {"0Xbeef_", token.INT, 6, "0Xbeef_", "illegal '_' in number"},
770 {"0b", token.INT, 0, "0b", "illegal binary number"},
771 {"0B", token.INT, 0, "0B", "illegal binary number"},
772 // {"123456789012345678890_i", IMAG, 21, "123456789012345678890_i", "illegal '_' in number"},
773 {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"},
774 {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"},
775 {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored
776 {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored
777 // {"`a\ufeff`", IDENT, 2, "`a\ufeff`", "illegal byte order mark"}, // only first BOM is ignored
778 {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored
779}
780
781func TestScanErrors(t *testing.T) {
782 for _, e := range errorTests {
783 t.Run(e.src, func(t *testing.T) {
784 checkError(t, e.src, e.tok, e.pos, e.lit, e.err)
785 })
786 }
787}
788
789// Verify that no comments show up as literal values when skipping comments.
790func TestNoLiteralComments(t *testing.T) {
791 var src = `
792 a: {
793 A: 1 // foo
794 }
795
796 b: {
797 B: 2
798 // foo
799 }
800
801 c: 3 // foo
802
803 d: 4
804 // foo
805
806 b anycode(): {
807 // foo
808 }
809 `
810 var s Scanner
811 s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), nil, 0)
812 for {
813 pos, tok, lit := s.Scan()
814 class := tokenclass(tok)
815 if lit != "" && class != keyword && class != literal && tok != token.COMMA {
816 t.Errorf("%s: tok = %s, lit = %q", fset.Position(pos), tok, lit)
817 }
818 if tok <= token.EOF {
819 break
820 }
821 }
822}
823
824func BenchmarkScan(b *testing.B) {
825 b.StopTimer()
826 fset := token.NewFileSet()
827 file := fset.AddFile("", fset.Base(), len(source))
828 var s Scanner
829 b.StartTimer()
830 for i := 0; i < b.N; i++ {
831 s.Init(file, source, nil, ScanComments)
832 for {
833 _, tok, _ := s.Scan()
834 if tok == token.EOF {
835 break
836 }
837 }
838 }
839}
840
841func BenchmarkScanFile(b *testing.B) {
842 b.StopTimer()
843 const filename = "go"
844 src, err := ioutil.ReadFile(filename)
845 if err != nil {
846 panic(err)
847 }
848 fset := token.NewFileSet()
849 file := fset.AddFile(filename, fset.Base(), len(src))
850 b.SetBytes(int64(len(src)))
851 var s Scanner
852 b.StartTimer()
853 for i := 0; i < b.N; i++ {
854 s.Init(file, src, nil, ScanComments)
855 for {
856 _, tok, _ := s.Scan()
857 if tok == token.EOF {
858 break
859 }
860 }
861 }
862}
863
864func TestScanner_next(t *testing.T) {
865 tests := []struct {
866 name string
867 s *Scanner
868 }{
869 // TODO: Add test cases.
870 }
871 for _, tt := range tests {
872 tt.s.next()
873 }
874}
875
876func TestScanner_Init(t *testing.T) {
877 type args struct {
878 file *token.File
879 src []byte
880 err errors.Handler
881 mode Mode
882 }
883 tests := []struct {
884 name string
885 s *Scanner
886 args args
887 }{
888 // TODO: Add test cases.
889 }
890 for _, tt := range tests {
891 tt.s.Init(tt.args.file, tt.args.src, tt.args.err, tt.args.mode)
892 }
893}
894
895func TestScanner_error(t *testing.T) {
896 type args struct {
897 offs int
898 msg string
899 }
900 tests := []struct {
901 name string
902 s *Scanner
903 args args
904 }{
905 // TODO: Add test cases.
906 }
907 for _, tt := range tests {
908 tt.s.error(tt.args.offs, tt.args.msg)
909 }
910}
911
912func TestScanner_interpretLineComment(t *testing.T) {
913 type args struct {
914 text []byte
915 }
916 tests := []struct {
917 name string
918 s *Scanner
919 args args
920 }{
921 // TODO: Add test cases.
922 }
923 for _, tt := range tests {
924 tt.s.interpretLineComment(tt.args.text)
925 }
926}
927
928func TestScanner_scanComment(t *testing.T) {
929 tests := []struct {
930 name string
931 s *Scanner
932 want string
933 }{
934 // TODO: Add test cases.
935 }
936 for _, tt := range tests {
937 if got := tt.s.scanComment(); got != tt.want {
938 t.Errorf("%q. Scanner.scanComment() = %v, want %v", tt.name, got, tt.want)
939 }
940 }
941}
942
943func TestScanner_findLineEnd(t *testing.T) {
944 tests := []struct {
945 name string
946 s *Scanner
947 want bool
948 }{
949 // TODO: Add test cases.
950 }
951 for _, tt := range tests {
952 if got := tt.s.findLineEnd(); got != tt.want {
953 t.Errorf("%q. Scanner.findLineEnd() = %v, want %v", tt.name, got, tt.want)
954 }
955 }
956}
957
958func Test_isLetter(t *testing.T) {
959 type args struct {
960 ch rune
961 }
962 tests := []struct {
963 name string
964 args args
965 want bool
966 }{
967 // TODO: Add test cases.
968 }
969 for _, tt := range tests {
970 if got := isLetter(tt.args.ch); got != tt.want {
971 t.Errorf("%q. isLetter() = %v, want %v", tt.name, got, tt.want)
972 }
973 }
974}
975
976func Test_isDigit(t *testing.T) {
977 type args struct {
978 ch rune
979 }
980 tests := []struct {
981 name string
982 args args
983 want bool
984 }{
985 // TODO: Add test cases.
986 }
987 for _, tt := range tests {
988 if got := isDigit(tt.args.ch); got != tt.want {
989 t.Errorf("%q. isDigit() = %v, want %v", tt.name, got, tt.want)
990 }
991 }
992}
993
994func TestScanner_scanIdentifier(t *testing.T) {
995 tests := []struct {
996 name string
997 s *Scanner
998 want string
999 }{
1000 // TODO: Add test cases.
1001 }
1002 for _, tt := range tests {
1003 if got := tt.s.scanIdentifier(); got != tt.want {
1004 t.Errorf("%q. Scanner.scanIdentifier() = %v, want %v", tt.name, got, tt.want)
1005 }
1006 }
1007}
1008
1009func Test_digitVal(t *testing.T) {
1010 type args struct {
1011 ch rune
1012 }
1013 tests := []struct {
1014 name string
1015 args args
1016 want int
1017 }{
1018 // TODO: Add test cases.
1019 }
1020 for _, tt := range tests {
1021 if got := digitVal(tt.args.ch); got != tt.want {
1022 t.Errorf("%q. digitVal() = %v, want %v", tt.name, got, tt.want)
1023 }
1024 }
1025}
1026
1027func TestScanner_scanMantissa(t *testing.T) {
1028 type args struct {
1029 base int
1030 }
1031 tests := []struct {
1032 name string
1033 s *Scanner
1034 args args
1035 }{
1036 // TODO: Add test cases.
1037 }
1038 for _, tt := range tests {
1039 tt.s.scanMantissa(tt.args.base)
1040 }
1041}
1042
1043func TestScanner_scanNumber(t *testing.T) {
1044 type args struct {
1045 seenDecimalPoint bool
1046 }
1047 tests := []struct {
1048 name string
1049 s *Scanner
1050 args args
1051 want token.Token
1052 want1 string
1053 }{
1054 // TODO: Add test cases.
1055 }
1056 for _, tt := range tests {
1057 got, got1 := tt.s.scanNumber(tt.args.seenDecimalPoint)
1058 if !reflect.DeepEqual(got, tt.want) {
1059 t.Errorf("%q. Scanner.scanNumber() got = %v, want %v", tt.name, got, tt.want)
1060 }
1061 if got1 != tt.want1 {
1062 t.Errorf("%q. Scanner.scanNumber() got1 = %v, want %v", tt.name, got1, tt.want1)
1063 }
1064 }
1065}
1066
1067func TestScanner_scanEscape(t *testing.T) {
1068 type args struct {
1069 quote rune
1070 }
1071 tests := []struct {
1072 name string
1073 s *Scanner
1074 args args
1075 want bool
1076 }{
1077 // TODO: Add test cases.
1078 }
1079 for _, tt := range tests {
1080 if got, _ := tt.s.scanEscape(tt.args.quote); got != tt.want {
1081 t.Errorf("%q. Scanner.scanEscape() = %v, want %v", tt.name, got, tt.want)
1082 }
1083 }
1084}
1085
1086func TestScanner_scanString(t *testing.T) {
1087 tests := []struct {
1088 name string
1089 s *Scanner
1090 want string
1091 }{
1092 // TODO: Add test cases.
1093 }
1094 for _, tt := range tests {
1095 if _, got := tt.s.scanString(rune(tt.name[0]), 1, 1); got != tt.want {
1096 t.Errorf("%q. Scanner.scanString() = %v, want %v", tt.name, got, tt.want)
1097 }
1098 }
1099}
1100
1101func Test_stripCR(t *testing.T) {
1102 type args struct {
1103 b []byte
1104 }
1105 tests := []struct {
1106 name string
1107 args args
1108 want []byte
1109 }{
1110 // TODO: Add test cases.
1111 }
1112 for _, tt := range tests {
1113 if got := stripCR(tt.args.b); !reflect.DeepEqual(got, tt.want) {
1114 t.Errorf("%q. stripCR() = %v, want %v", tt.name, got, tt.want)
1115 }
1116 }
1117}
1118
1119func TestScanner_scanRawString(t *testing.T) {
1120 tests := []struct {
1121 name string
1122 s *Scanner
1123 want string
1124 }{
1125 // TODO: Add test cases.
1126 }
1127 for _, tt := range tests {
1128 if got := tt.s.scanRawString(); got != tt.want {
1129 t.Errorf("%q. Scanner.scanRawString() = %v, want %v", tt.name, got, tt.want)
1130 }
1131 }
1132}
1133
1134func TestScanner_skipWhitespace(t *testing.T) {
1135 tests := []struct {
1136 name string
1137 s *Scanner
1138 }{
1139 // TODO: Add test cases.
1140 }
1141 for _, tt := range tests {
1142 tt.s.skipWhitespace(1)
1143 }
1144}
1145
1146func TestScanner_switch2(t *testing.T) {
1147 type args struct {
1148 tok0 token.Token
1149 tok1 token.Token
1150 }
1151 tests := []struct {
1152 name string
1153 s *Scanner
1154 args args
1155 want token.Token
1156 }{
1157 // TODO: Add test cases.
1158 }
1159 for _, tt := range tests {
1160 if got := tt.s.switch2(tt.args.tok0, tt.args.tok1); !reflect.DeepEqual(got, tt.want) {
1161 t.Errorf("%q. Scanner.switch2() = %v, want %v", tt.name, got, tt.want)
1162 }
1163 }
1164}
1165
1166func TestScanner_Scan(t *testing.T) {
1167 tests := []struct {
1168 name string
1169 s *Scanner
1170 wantPos token.Pos
1171 wantTok token.Token
1172 wantLit string
1173 }{
1174 // TODO: Add test cases.
1175 }
1176 for _, tt := range tests {
1177 gotPos, gotTok, gotLit := tt.s.Scan()
1178 if !reflect.DeepEqual(gotPos, tt.wantPos) {
1179 t.Errorf("%q. Scanner.Scan() gotPos = %v, want %v", tt.name, gotPos, tt.wantPos)
1180 }
1181 if !reflect.DeepEqual(gotTok, tt.wantTok) {
1182 t.Errorf("%q. Scanner.Scan() gotTok = %v, want %v", tt.name, gotTok, tt.wantTok)
1183 }
1184 if gotLit != tt.wantLit {
1185 t.Errorf("%q. Scanner.Scan() gotLit = %v, want %v", tt.name, gotLit, tt.wantLit)
1186 }
1187 }
1188}