blob: 64ac5fbb5b4de9cce4bed679dc997568b00a40d5 [file] [log] [blame]
Marcel van Lohuizenda386112018-12-10 15:27:50 +01001// Copyright 2018 The CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package ast declares the types used to represent syntax trees for CUE
16// packages.
17package ast // import "cuelang.org/go/cue/ast"
18
19import (
20 "strconv"
21 "strings"
22
23 "cuelang.org/go/cue/token"
24)
25
26// ----------------------------------------------------------------------------
27// Interfaces
28//
29// There are two main classes of nodes: expressions, clauses, and declaration
30// nodes. The node names usually match the corresponding CUE spec production
31// names to which they correspond. The node fields correspond to the individual
32// parts of the respective productions.
33//
34// All nodes contain position information marking the beginning of the
35// corresponding source text segment; it is accessible via the Pos accessor
36// method. Nodes may contain additional position info for language constructs
37// where comments may be found between parts of the construct (typically any
38// larger, parenthesized subpart). That position information is needed to
39// properly position comments when printing the construct.
40
41// A Node represents any node in the abstract syntax tree.
42type Node interface {
43 Pos() token.Pos // position of first character belonging to the node
44 End() token.Pos // position of first character immediately after the node
45
46 // TODO: SetPos(p token.RelPos)
47
48 Comments() []*CommentGroup
49 AddComment(*CommentGroup)
50}
51
52// An Expr is implemented by all expression nodes.
53type Expr interface {
54 Node
55 exprNode()
56}
57
58func (*BadExpr) exprNode() {}
59func (*Ident) exprNode() {}
60func (*Ellipsis) exprNode() {}
61func (*BasicLit) exprNode() {}
62func (*Interpolation) exprNode() {}
63func (*StructLit) exprNode() {}
64func (*ListLit) exprNode() {}
65func (*LambdaExpr) exprNode() {}
66
67// func (*StructComprehension) exprNode() {}
68func (*ListComprehension) exprNode() {}
69func (*ParenExpr) exprNode() {}
70func (*SelectorExpr) exprNode() {}
71func (*IndexExpr) exprNode() {}
72func (*SliceExpr) exprNode() {}
73func (*CallExpr) exprNode() {}
74func (*UnaryExpr) exprNode() {}
75func (*BinaryExpr) exprNode() {}
76func (*BottomLit) exprNode() {}
77
78// A Decl node is implemented by all declarations.
79type Decl interface {
80 Node
81 declNode()
82}
83
84func (*Field) declNode() {}
85func (*ComprehensionDecl) declNode() {}
86func (*ImportDecl) declNode() {}
87func (*BadDecl) declNode() {}
88func (*EmitDecl) declNode() {}
89func (*Alias) declNode() {}
90
91// A Label is any prduction that can be used as a LHS label.
92type Label interface {
93 Node
94 labelName() (name string, isIdent bool)
95}
96
97func (n *Ident) labelName() (string, bool) {
98 return n.Name, true
99}
100
101func (n *BasicLit) labelName() (string, bool) {
102 switch n.Kind {
103 case token.STRING:
104 // Use strconv to only allow double-quoted, single-line strings.
105 if str, err := strconv.Unquote(n.Value); err == nil {
106 return str, true
107 }
108 case token.NULL, token.TRUE, token.FALSE:
109 return n.Value, true
110
111 // TODO: allow numbers to be fields?
112 }
113 return "", false
114}
115
116func (n *TemplateLabel) labelName() (string, bool) {
117 return n.Ident.Name, false
118}
119
120func (n *Interpolation) labelName() (string, bool) {
121 return "", false
122}
Marcel van Lohuizenda386112018-12-10 15:27:50 +0100123
124// LabelName reports the name of a label, if known, and whether it is valid.
125func LabelName(x Label) (name string, ok bool) {
126 return x.labelName()
127}
128
Marcel van Lohuizenda386112018-12-10 15:27:50 +0100129// Clause nodes are part of comprehensions.
130type Clause interface {
131 Node
132 clauseNode()
133}
134
135func (x *ForClause) clauseNode() {}
136func (x *IfClause) clauseNode() {}
137func (x *Alias) clauseNode() {}
138
139// Comments
140
141type comments struct {
142 groups *[]*CommentGroup
143}
144
145func (c *comments) Comments() []*CommentGroup {
146 if c.groups == nil {
147 return []*CommentGroup{}
148 }
149 return *c.groups
150}
151
152// // AddComment adds the given comments to the fields.
153// // If line is true the comment is inserted at the preceding token.
154
155func (c *comments) AddComment(cg *CommentGroup) {
156 if cg == nil {
157 return
158 }
159 if c.groups == nil {
160 a := []*CommentGroup{cg}
161 c.groups = &a
162 return
163 }
164 *c.groups = append(*c.groups, cg)
165}
166
167// A Comment node represents a single //-style or /*-style comment.
168type Comment struct {
169 Slash token.Pos // position of "/" starting the comment
170 Text string // comment text (excluding '\n' for //-style comments)
171}
172
173func (g *Comment) Comments() []*CommentGroup { return nil }
174func (g *Comment) AddComment(*CommentGroup) {}
175
176func (c *Comment) Pos() token.Pos { return c.Slash }
177func (c *Comment) End() token.Pos { return c.Slash.Add(len(c.Text)) }
178
179// A CommentGroup represents a sequence of comments
180// with no other tokens and no empty lines between.
181type CommentGroup struct {
182 // TODO: remove and use the token position of the first commment.
183 Doc bool
184 Line bool // true if it is on the same line as the node's end pos.
185
186 // Position indicates where a comment should be attached if a node has
187 // multiple tokens. 0 means before the first token, 1 means before the
188 // second, etc. For instance, for a field, the positions are:
189 // <0> Label <1> ":" <2> Expr <3> "," <4>
190 Position int8
191 List []*Comment // len(List) > 0
192}
193
194func (g *CommentGroup) Pos() token.Pos { return g.List[0].Pos() }
195func (g *CommentGroup) End() token.Pos { return g.List[len(g.List)-1].End() }
196
197func (g *CommentGroup) Comments() []*CommentGroup { return nil }
198func (g *CommentGroup) AddComment(*CommentGroup) {}
199
200func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
201
202func stripTrailingWhitespace(s string) string {
203 i := len(s)
204 for i > 0 && isWhitespace(s[i-1]) {
205 i--
206 }
207 return s[0:i]
208}
209
210// Text returns the text of the comment.
211// Comment markers (//, /*, and */), the first space of a line comment, and
212// leading and trailing empty lines are removed. Multiple empty lines are
213// reduced to one, and trailing space on lines is trimmed. Unless the result
214// is empty, it is newline-terminated.
215func (g *CommentGroup) Text() string {
216 if g == nil {
217 return ""
218 }
219 comments := make([]string, len(g.List))
220 for i, c := range g.List {
221 comments[i] = c.Text
222 }
223
224 lines := make([]string, 0, 10) // most comments are less than 10 lines
225 for _, c := range comments {
226 // Remove comment markers.
227 // The parser has given us exactly the comment text.
228 switch c[1] {
229 case '/':
230 //-style comment (no newline at the end)
231 c = c[2:]
232 // strip first space - required for Example tests
233 if len(c) > 0 && c[0] == ' ' {
234 c = c[1:]
235 }
236 case '*':
237 /*-style comment */
238 c = c[2 : len(c)-2]
239 }
240
241 // Split on newlines.
242 cl := strings.Split(c, "\n")
243
244 // Walk lines, stripping trailing white space and adding to list.
245 for _, l := range cl {
246 lines = append(lines, stripTrailingWhitespace(l))
247 }
248 }
249
250 // Remove leading blank lines; convert runs of
251 // interior blank lines to a single blank line.
252 n := 0
253 for _, line := range lines {
254 if line != "" || n > 0 && lines[n-1] != "" {
255 lines[n] = line
256 n++
257 }
258 }
259 lines = lines[0:n]
260
261 // Add final "" entry to get trailing newline from Join.
262 if n > 0 && lines[n-1] != "" {
263 lines = append(lines, "")
264 }
265
266 return strings.Join(lines, "\n")
267}
268
269// A Field represents a field declaration in a struct.
270type Field struct {
271 comments
272 Label Label // must have at least one element.
273
274 // No colon: Value must be an StructLit with one field or a
275 // LambdaExpr.
276 Colon token.Pos
277 Value Expr // the value associated with this field.
278}
279
280func (d *Field) Pos() token.Pos { return d.Label.Pos() }
281func (d *Field) End() token.Pos { return d.Value.End() }
282
283// An Alias binds another field to the alias name in the current struct.
284type Alias struct {
285 comments
286 Ident *Ident // field name, always an Ident
287 Equal token.Pos // position of "="
288 Expr Expr // An Ident or SelectorExpr
289}
290
291func (a *Alias) Pos() token.Pos { return a.Ident.Pos() }
292func (a *Alias) End() token.Pos { return a.Expr.End() }
293
294// A ComprehensionDecl node represents a field comprehension.
295type ComprehensionDecl struct {
296 comments
297 Field *Field
298 Select token.Pos
299 Clauses []Clause
300}
301
302func (x *ComprehensionDecl) Pos() token.Pos { return x.Field.Pos() }
303func (x *ComprehensionDecl) End() token.Pos {
304 if len(x.Clauses) > 0 {
305 return x.Clauses[len(x.Clauses)-1].End()
306 }
307 return x.Select
308}
309
310// ----------------------------------------------------------------------------
311// Expressions and types
312//
313// An expression is represented by a tree consisting of one
314// or more of the following concrete expression nodes.
315
316// A LambdaExpr defines a function expression.
317//
318// Lambdas are only used internally under controlled conditions. Although
319// the implementation of lambdas is fully functional, enabling them will
320// cause the language to be Turing-complete (if not otherwise limited).
321// Also, lambdas would provide yet another way to create structure, and one
322// that is known to not work well for declarative configuration languages.
323type LambdaExpr struct {
324 comments
325 Lparen token.Pos // position of "("
326 Params []*Field // parameters with possible initializers
327 Rparen token.Pos // position of ")"
328 Expr Expr
329}
330
331func (t *LambdaExpr) Pos() token.Pos { return t.Lparen }
332func (t *LambdaExpr) End() token.Pos { return t.Rparen }
333
334// A BadExpr node is a placeholder for expressions containing
335// syntax errors for which no correct expression nodes can be
336// created. This is different from an ErrorExpr which represents
337// an explicitly marked error in the source.
338type BadExpr struct {
339 comments
340 From, To token.Pos // position range of bad expression
341}
342
343// A BottomLit indicates an error.
344type BottomLit struct {
345 comments
346 Bottom token.Pos
347}
348
349// An Ident node represents an left-hand side identifier.
350type Ident struct {
351 comments
352 NamePos token.Pos // identifier position
353
354 // This LHS path element may be an identifier. Possible forms:
355 // foo: a normal identifier
356 // "foo": JSON compatible
357 // <foo>: a template shorthand
358 Name string
359
360 Scope Node // scope in which node was found or nil if referring directly
361 Node Node
362}
363
364// A TemplateLabel represents a field template declaration in a struct.
365type TemplateLabel struct {
366 comments
367 Langle token.Pos
368 Ident *Ident
369 Rangle token.Pos
370}
371
Marcel van Lohuizenda386112018-12-10 15:27:50 +0100372// An Ellipsis node stands for the "..." type in a
373// parameter list or the "..." length in an array type.
374type Ellipsis struct {
375 comments
376 Ellipsis token.Pos // position of "..."
377 Elt Expr // ellipsis element type (parameter lists only); or nil
378}
379
380// A BasicLit node represents a literal of basic type.
381type BasicLit struct {
382 comments
383 ValuePos token.Pos // literal position
384 Kind token.Token // INT, FLOAT, DURATION, or STRING
385 Value string // literal string; e.g. 42, 0x7f, 3.14, 1_234_567, 1e-9, 2.4i, 'a', '\x7f', "foo", or '\m\n\o'
386}
387
388// A Interpolation node represents a string or bytes interpolation.
389type Interpolation struct { // TODO: rename to TemplateLit
390 comments
391 Elts []Expr // interleaving of strings and expressions.
392}
393
394// A StructLit node represents a literal struct.
395type StructLit struct {
396 comments
397 Lbrace token.Pos // position of "{"
398 Elts []Decl // list of elements; or nil
399 Rbrace token.Pos // position of "}"
400}
401
402// A ListLit node represents a literal list.
403type ListLit struct {
404 comments
405 Lbrack token.Pos // position of "["
406 Elts []Expr // list of composite elements; or nil
407 Ellipsis token.Pos // open list if set
408 Type Expr // type for the remaining elements
409 Rbrack token.Pos // position of "]"
410}
411
412// A ListComprehension node represents as list comprehension.
413type ListComprehension struct {
414 comments
415 Lbrack token.Pos // position of "["
416 Expr Expr
417 Clauses []Clause // Feed or Guard (TODO let)
418 Rbrack token.Pos // position of "]"
419}
420
421// A ForClause node represents a for clause in a comprehension.
422type ForClause struct {
423 comments
424 For token.Pos
425 Key *Ident // allow pattern matching?
426 Colon token.Pos
427 Value *Ident // allow pattern matching?
428 In token.Pos
429 Source Expr
430}
431
432// A IfClause node represents an if guard clause in a comprehension.
433type IfClause struct {
434 comments
435 If token.Pos
436 Condition Expr
437}
438
439// A ParenExpr node represents a parenthesized expression.
440type ParenExpr struct {
441 comments
442 Lparen token.Pos // position of "("
443 X Expr // parenthesized expression
444 Rparen token.Pos // position of ")"
445}
446
447// A SelectorExpr node represents an expression followed by a selector.
448type SelectorExpr struct {
449 comments
450 X Expr // expression
451 Sel *Ident // field selector
452}
453
454// An IndexExpr node represents an expression followed by an index.
455type IndexExpr struct {
456 comments
457 X Expr // expression
458 Lbrack token.Pos // position of "["
459 Index Expr // index expression
460 Rbrack token.Pos // position of "]"
461}
462
463// An SliceExpr node represents an expression followed by slice indices.
464type SliceExpr struct {
465 comments
466 X Expr // expression
467 Lbrack token.Pos // position of "["
468 Low Expr // begin of slice range; or nil
469 High Expr // end of slice range; or nil
470 Rbrack token.Pos // position of "]"
471}
472
473// A CallExpr node represents an expression followed by an argument list.
474type CallExpr struct {
475 comments
476 Fun Expr // function expression
477 Lparen token.Pos // position of "("
478 Args []Expr // function arguments; or nil
479 Rparen token.Pos // position of ")"
480}
481
482// A UnaryExpr node represents a unary expression.
483type UnaryExpr struct {
484 comments
485 OpPos token.Pos // position of Op
486 Op token.Token // operator
487 X Expr // operand
488}
489
490// A BinaryExpr node represents a binary expression.
491type BinaryExpr struct {
492 comments
493 X Expr // left operand
494 OpPos token.Pos // position of Op
495 Op token.Token // operator
496 Y Expr // right operand
497}
498
499// token.Pos and End implementations for expression/type nodes.
500
501func (x *BadExpr) Pos() token.Pos { return x.From }
502func (x *Ident) Pos() token.Pos { return x.NamePos }
503func (x *TemplateLabel) Pos() token.Pos { return x.Langle }
Marcel van Lohuizenda386112018-12-10 15:27:50 +0100504func (x *Ellipsis) Pos() token.Pos { return x.Ellipsis }
505func (x *BasicLit) Pos() token.Pos { return x.ValuePos }
506func (x *Interpolation) Pos() token.Pos { return x.Elts[0].Pos() }
507func (x *StructLit) Pos() token.Pos {
508 if x.Lbrace == token.NoPos && len(x.Elts) > 0 {
509 return x.Elts[0].Pos()
510 }
511 return x.Lbrace
512}
513
514func (x *ListLit) Pos() token.Pos { return x.Lbrack }
515func (x *ListComprehension) Pos() token.Pos { return x.Lbrack }
516func (x *ForClause) Pos() token.Pos { return x.For }
517func (x *IfClause) Pos() token.Pos { return x.If }
518func (x *ParenExpr) Pos() token.Pos { return x.Lparen }
519func (x *SelectorExpr) Pos() token.Pos { return x.X.Pos() }
520func (x *IndexExpr) Pos() token.Pos { return x.X.Pos() }
521func (x *SliceExpr) Pos() token.Pos { return x.X.Pos() }
522func (x *CallExpr) Pos() token.Pos { return x.Fun.Pos() }
523func (x *UnaryExpr) Pos() token.Pos { return x.OpPos }
524func (x *BinaryExpr) Pos() token.Pos { return x.X.Pos() }
525func (x *BottomLit) Pos() token.Pos { return x.Bottom }
526
527func (x *BadExpr) End() token.Pos { return x.To }
528func (x *Ident) End() token.Pos {
529 return x.NamePos.Add(len(x.Name))
530}
531func (x *TemplateLabel) End() token.Pos { return x.Rangle }
Marcel van Lohuizenda386112018-12-10 15:27:50 +0100532func (x *Ellipsis) End() token.Pos {
533 if x.Elt != nil {
534 return x.Elt.End()
535 }
536 return x.Ellipsis + 3 // len("...")
537}
538func (x *BasicLit) End() token.Pos { return token.Pos(int(x.ValuePos) + len(x.Value)) }
539func (x *Interpolation) End() token.Pos { return x.Elts[len(x.Elts)-1].Pos() }
540func (x *StructLit) End() token.Pos {
541 if x.Rbrace == token.NoPos && len(x.Elts) > 0 {
542 return x.Elts[len(x.Elts)-1].Pos()
543 }
544 return x.Rbrace.Add(1)
545}
546func (x *ListLit) End() token.Pos { return x.Rbrack.Add(1) }
547func (x *ListComprehension) End() token.Pos { return x.Rbrack }
548func (x *ForClause) End() token.Pos { return x.Source.End() }
549func (x *IfClause) End() token.Pos { return x.Condition.End() }
550func (x *ParenExpr) End() token.Pos { return x.Rparen.Add(1) }
551func (x *SelectorExpr) End() token.Pos { return x.Sel.End() }
552func (x *IndexExpr) End() token.Pos { return x.Rbrack.Add(1) }
553func (x *SliceExpr) End() token.Pos { return x.Rbrack.Add(1) }
554func (x *CallExpr) End() token.Pos { return x.Rparen.Add(1) }
555func (x *UnaryExpr) End() token.Pos { return x.X.End() }
556func (x *BinaryExpr) End() token.Pos { return x.Y.End() }
557func (x *BottomLit) End() token.Pos { return x.Bottom.Add(1) }
558
559// ----------------------------------------------------------------------------
560// Convenience functions for Idents
561
562// NewIdent creates a new Ident without position.
563// Useful for ASTs generated by code other than the Go
564func NewIdent(name string) *Ident {
565 return &Ident{comments{}, token.NoPos, name, nil, nil}
566}
567
568func (id *Ident) String() string {
569 if id != nil {
570 return id.Name
571 }
572 return "<nil>"
573}
574
575// ----------------------------------------------------------------------------
576// Declarations
577
578// An ImportSpec node represents a single package import.
579type ImportSpec struct {
580 comments
581 Name *Ident // local package name (including "."); or nil
582 Path *BasicLit // import path
583 EndPos token.Pos // end of spec (overrides Path.Pos if nonzero)
584}
585
586// Pos and End implementations for spec nodes.
587
588func (s *ImportSpec) Pos() token.Pos {
589 if s.Name != nil {
590 return s.Name.Pos()
591 }
592 return s.Path.Pos()
593}
594
595// func (s *AliasSpec) Pos() token.Pos { return s.Name.Pos() }
596// func (s *ValueSpec) Pos() token.Pos { return s.Names[0].Pos() }
597// func (s *TypeSpec) Pos() token.Pos { return s.Name.Pos() }
598
599func (s *ImportSpec) End() token.Pos {
600 if s.EndPos != 0 {
601 return s.EndPos
602 }
603 return s.Path.End()
604}
605
606// specNode() ensures that only spec nodes can be assigned to a Spec.
607func (*ImportSpec) specNode() {}
608
609// A declaration is represented by one of the following declaration nodes.
610type (
611 // A BadDecl node is a placeholder for declarations containing
612 // syntax errors for which no correct declaration nodes can be
613 // created.
614 BadDecl struct {
615 comments
616 From, To token.Pos // position range of bad declaration
617 }
618
619 // A ImportDecl node represents a series of import declarations. A valid
620 // Lparen position (Lparen.Line > 0) indicates a parenthesized declaration.
621 ImportDecl struct {
622 comments
623 Import token.Pos
624 Lparen token.Pos // position of '(', if any
625 Specs []*ImportSpec
626 Rparen token.Pos // position of ')', if any
627 }
628
629 // An EmitDecl node represents a single expression used as a declaration.
630 // The expressions in this declaration is what will be emitted as
631 // configuration output.
632 //
633 // An EmitDecl may only appear at the top level.
634 EmitDecl struct {
635 comments
636 Expr Expr
637 }
638)
639
640// Pos and End implementations for declaration nodes.
641
642func (d *BadDecl) Pos() token.Pos { return d.From }
643func (d *ImportDecl) Pos() token.Pos { return d.Import }
644func (d *EmitDecl) Pos() token.Pos { return d.Expr.Pos() }
645
646func (d *BadDecl) End() token.Pos { return d.To }
647func (d *ImportDecl) End() token.Pos {
648 if d.Rparen.IsValid() {
649 return d.Rparen.Add(1)
650 }
651 return d.Specs[0].End()
652}
653func (d *EmitDecl) End() token.Pos { return d.Expr.End() }
654
655// ----------------------------------------------------------------------------
656// Files and packages
657
658// A File node represents a Go source file.
659//
660// The Comments list contains all comments in the source file in order of
661// appearance, including the comments that are pointed to from other nodes
662// via Doc and Comment fields.
663type File struct {
664 Filename string
665 comments
666 Package token.Pos // position of "package" pseudo-keyword
667 Name *Ident // package names
668 // TODO: Change Expr to Decl?
669 Imports []*ImportSpec // imports in this file
670 Decls []Decl // top-level declarations; or nil
671 Unresolved []*Ident // unresolved identifiers in this file
672}
673
674func (f *File) Pos() token.Pos {
675 if f.Package != token.NoPos {
676 return f.Package
677 }
678 if len(f.Decls) > 0 {
679 return f.Decls[0].Pos()
680 }
681 return token.NoPos
682}
683func (f *File) End() token.Pos {
684 if n := len(f.Decls); n > 0 {
685 return f.Decls[n-1].End()
686 }
687 if f.Name != nil {
688 return f.Name.End()
689 }
690 return token.NoPos
691}