Marcel van Lohuizen | 8a7fc45 | 2018-12-10 15:19:41 +0100 | [diff] [blame] | 1 | // Copyright 2018 The CUE Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package token |
| 16 | |
| 17 | import ( |
| 18 | "fmt" |
| 19 | "sort" |
| 20 | "sync" |
| 21 | ) |
| 22 | |
| 23 | // ----------------------------------------------------------------------------- |
| 24 | // Positions |
| 25 | |
| 26 | // Position describes an arbitrary source position |
| 27 | // including the file, line, and column location. |
| 28 | // A Position is valid if the line number is > 0. |
| 29 | type Position struct { |
| 30 | Filename string // filename, if any |
| 31 | Offset int // offset, starting at 0 |
| 32 | Line int // line number, starting at 1 |
| 33 | Column int // column number, starting at 1 (byte count) |
| 34 | // RelPos Pos // relative position information |
| 35 | } |
| 36 | |
| 37 | // IsValid reports whether the position is valid. |
| 38 | func (pos *Position) IsValid() bool { return pos.Line > 0 } |
| 39 | |
| 40 | // String returns a string in one of several forms: |
| 41 | // |
| 42 | // file:line:column valid position with file name |
| 43 | // line:column valid position without file name |
| 44 | // file invalid position with file name |
| 45 | // - invalid position without file name |
| 46 | // |
| 47 | func (pos Position) String() string { |
| 48 | s := pos.Filename |
| 49 | if pos.IsValid() { |
| 50 | if s != "" { |
| 51 | s += ":" |
| 52 | } |
| 53 | s += fmt.Sprintf("%d:%d", pos.Line, pos.Column) |
| 54 | } |
| 55 | if s == "" { |
| 56 | s = "-" |
| 57 | } |
| 58 | return s |
| 59 | } |
| 60 | |
| 61 | // Pos is a compact encoding of a source position within a file set, as well as |
| 62 | // relative positioning information. It can be converted into a Position for a |
| 63 | // more convenient, but much larger, representation. |
| 64 | // |
| 65 | // The Pos value for a given file is a number in the range [base, base+size], |
| 66 | // where base and size are specified when adding the file to the file set via |
| 67 | // AddFile. |
| 68 | // |
| 69 | // To create the Pos value for a specific source offset (measured in bytes), |
| 70 | // first add the respective file to the current file set using FileSet.AddFile |
| 71 | // and then call File.Pos(offset) for that file. Given a Pos value p for a |
| 72 | // specific file set fset, the corresponding Position value is obtained by |
| 73 | // calling fset.Position(p). |
| 74 | // |
| 75 | // Pos values can be compared directly with the usual comparison operators: If |
| 76 | // two Pos values p and q are in the same file, comparing p and q is equivalent |
| 77 | // to comparing the respective source file offsets. If p and q are in different |
| 78 | // files, p < q is true if the file implied by p was added to the respective |
| 79 | // file set before the file implied by cue. |
| 80 | type Pos int |
| 81 | |
| 82 | // NoPos is the zero value for Pos; there is no file and line information |
| 83 | // associated with it, and NoPos().IsValid() is false. NoPos is always |
| 84 | // smaller than any other Pos value. The corresponding Position value |
| 85 | // for NoPos is the zero value for Position. |
| 86 | const NoPos Pos = 0 |
| 87 | |
| 88 | // RelPos indicates the relative position of token to the previous token. |
| 89 | type RelPos int |
| 90 | |
| 91 | const ( |
| 92 | // NoRelPos indicates no relative position is specified. |
| 93 | NoRelPos RelPos = iota |
| 94 | |
| 95 | // Elided indicates that the token for which this position is defined is |
| 96 | // not rendered at all. |
| 97 | Elided |
| 98 | |
| 99 | // NoSpace indicates there is no whitespace after this token. |
| 100 | NoSpace |
| 101 | |
| 102 | // Blank means there is horizontal space after this token. |
| 103 | Blank |
| 104 | |
| 105 | // Newline means there is a single newline after this token. |
| 106 | Newline |
| 107 | |
| 108 | // NewSection means there are two or more newlines after this token. |
| 109 | NewSection |
| 110 | |
| 111 | relMask Pos = 0xf |
| 112 | relShift = 4 |
| 113 | ) |
| 114 | |
| 115 | var relNames = []string{ |
| 116 | "invalid", "elided", "nospace", "blank", "newline", "section", |
| 117 | } |
| 118 | |
| 119 | func (p RelPos) String() string { return relNames[p] } |
| 120 | |
| 121 | // HasRelPos repors whether p has a relative position. |
| 122 | func (p Pos) HasRelPos() bool { |
| 123 | return p&relMask != 0 |
| 124 | |
| 125 | } |
| 126 | |
| 127 | func (p Pos) Add(n int) Pos { |
| 128 | return p + toPos(index(n)) |
| 129 | } |
| 130 | |
| 131 | // IsValid reports whether the position is valid. |
| 132 | func (p Pos) IsValid() bool { |
| 133 | return p != NoPos |
| 134 | } |
| 135 | |
| 136 | // IsNewline reports whether the relative information suggests this node should |
| 137 | // be printed on a new lien. |
| 138 | func (p Pos) IsNewline() bool { |
| 139 | return p.RelPos() >= Newline |
| 140 | } |
| 141 | |
| 142 | func (p Pos) WithRel(rel RelPos) Pos { |
| 143 | return p&^relMask | Pos(rel) |
| 144 | } |
| 145 | |
| 146 | func (p Pos) RelPos() RelPos { |
| 147 | return RelPos(p & relMask) |
| 148 | } |
| 149 | |
| 150 | func (p Pos) index() index { |
| 151 | return index(p) >> relShift |
| 152 | } |
| 153 | |
| 154 | func toPos(x index) Pos { |
| 155 | return (Pos(x) << relShift) |
| 156 | } |
| 157 | |
| 158 | // ----------------------------------------------------------------------------- |
| 159 | // File |
| 160 | |
| 161 | type index int |
| 162 | |
| 163 | // A File is a handle for a file belonging to a FileSet. |
| 164 | // A File has a name, size, and line offset table. |
| 165 | type File struct { |
| 166 | set *FileSet |
| 167 | name string // file name as provided to AddFile |
| 168 | base index // Pos index range for this file is [base...base+size] |
| 169 | size index // file size as provided to AddFile |
| 170 | |
| 171 | // lines and infos are protected by set.mutex |
| 172 | lines []index // lines contains the offset of the first character for each line (the first entry is always 0) |
| 173 | infos []lineInfo |
| 174 | } |
| 175 | |
| 176 | // Name returns the file name of file f as registered with AddFile. |
| 177 | func (f *File) Name() string { |
| 178 | return f.name |
| 179 | } |
| 180 | |
| 181 | // Base returns the base offset of file f as registered with AddFile. |
| 182 | func (f *File) Base() int { |
| 183 | return int(f.base) |
| 184 | } |
| 185 | |
| 186 | // Size returns the size of file f as registered with AddFile. |
| 187 | func (f *File) Size() int { |
| 188 | return int(f.size) |
| 189 | } |
| 190 | |
| 191 | // LineCount returns the number of lines in file f. |
| 192 | func (f *File) LineCount() int { |
| 193 | f.set.mutex.RLock() |
| 194 | n := len(f.lines) |
| 195 | f.set.mutex.RUnlock() |
| 196 | return n |
| 197 | } |
| 198 | |
| 199 | // AddLine adds the line offset for a new line. |
| 200 | // The line offset must be larger than the offset for the previous line |
| 201 | // and smaller than the file size; otherwise the line offset is ignored. |
| 202 | // |
| 203 | func (f *File) AddLine(offset int) { |
| 204 | x := index(offset) |
| 205 | f.set.mutex.Lock() |
| 206 | if i := len(f.lines); (i == 0 || f.lines[i-1] < x) && x < f.size { |
| 207 | f.lines = append(f.lines, x) |
| 208 | } |
| 209 | f.set.mutex.Unlock() |
| 210 | } |
| 211 | |
| 212 | // MergeLine merges a line with the following line. It is akin to replacing |
| 213 | // the newline character at the end of the line with a space (to not change the |
| 214 | // remaining offsets). To obtain the line number, consult e.g. Position.Line. |
| 215 | // MergeLine will panic if given an invalid line number. |
| 216 | // |
| 217 | func (f *File) MergeLine(line int) { |
| 218 | if line <= 0 { |
| 219 | panic("illegal line number (line numbering starts at 1)") |
| 220 | } |
| 221 | f.set.mutex.Lock() |
| 222 | defer f.set.mutex.Unlock() |
| 223 | if line >= len(f.lines) { |
| 224 | panic("illegal line number") |
| 225 | } |
| 226 | // To merge the line numbered <line> with the line numbered <line+1>, |
| 227 | // we need to remove the entry in lines corresponding to the line |
| 228 | // numbered <line+1>. The entry in lines corresponding to the line |
| 229 | // numbered <line+1> is located at index <line>, since indices in lines |
| 230 | // are 0-based and line numbers are 1-based. |
| 231 | copy(f.lines[line:], f.lines[line+1:]) |
| 232 | f.lines = f.lines[:len(f.lines)-1] |
| 233 | } |
| 234 | |
| 235 | // SetLines sets the line offsets for a file and reports whether it succeeded. |
| 236 | // The line offsets are the offsets of the first character of each line; |
| 237 | // for instance for the content "ab\nc\n" the line offsets are {0, 3}. |
| 238 | // An empty file has an empty line offset table. |
| 239 | // Each line offset must be larger than the offset for the previous line |
| 240 | // and smaller than the file size; otherwise SetLines fails and returns |
| 241 | // false. |
| 242 | // Callers must not mutate the provided slice after SetLines returns. |
| 243 | // |
| 244 | func (f *File) SetLines(lines []int) bool { |
| 245 | // verify validity of lines table |
| 246 | size := f.size |
| 247 | for i, offset := range lines { |
| 248 | if i > 0 && offset <= lines[i-1] || size <= index(offset) { |
| 249 | return false |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | // set lines table |
| 254 | f.set.mutex.Lock() |
| 255 | f.lines = f.lines[:0] |
| 256 | for _, l := range lines { |
| 257 | f.lines = append(f.lines, index(l)) |
| 258 | } |
| 259 | f.set.mutex.Unlock() |
| 260 | return true |
| 261 | } |
| 262 | |
| 263 | // SetLinesForContent sets the line offsets for the given file content. |
| 264 | // It ignores position-altering //line comments. |
| 265 | func (f *File) SetLinesForContent(content []byte) { |
| 266 | var lines []index |
| 267 | line := index(0) |
| 268 | for offset, b := range content { |
| 269 | if line >= 0 { |
| 270 | lines = append(lines, line) |
| 271 | } |
| 272 | line = -1 |
| 273 | if b == '\n' { |
| 274 | line = index(offset) + 1 |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | // set lines table |
| 279 | f.set.mutex.Lock() |
| 280 | f.lines = lines |
| 281 | f.set.mutex.Unlock() |
| 282 | } |
| 283 | |
| 284 | // A lineInfo object describes alternative file and line number |
| 285 | // information (such as provided via a //line comment in a .go |
| 286 | // file) for a given file offset. |
| 287 | type lineInfo struct { |
| 288 | // fields are exported to make them accessible to gob |
| 289 | Offset int |
| 290 | Filename string |
| 291 | Line int |
| 292 | } |
| 293 | |
| 294 | // AddLineInfo adds alternative file and line number information for |
| 295 | // a given file offset. The offset must be larger than the offset for |
| 296 | // the previously added alternative line info and smaller than the |
| 297 | // file size; otherwise the information is ignored. |
| 298 | // |
| 299 | // AddLineInfo is typically used to register alternative position |
| 300 | // information for //line filename:line comments in source files. |
| 301 | // |
| 302 | func (f *File) AddLineInfo(offset int, filename string, line int) { |
| 303 | x := index(offset) |
| 304 | f.set.mutex.Lock() |
| 305 | if i := len(f.infos); i == 0 || index(f.infos[i-1].Offset) < x && x < f.size { |
| 306 | f.infos = append(f.infos, lineInfo{offset, filename, line}) |
| 307 | } |
| 308 | f.set.mutex.Unlock() |
| 309 | } |
| 310 | |
| 311 | // Pos returns the Pos value for the given file offset; |
| 312 | // the offset must be <= f.Size(). |
| 313 | // f.Pos(f.Offset(p)) == p. |
| 314 | // |
| 315 | func (f *File) Pos(offset int, rel RelPos) Pos { |
| 316 | if index(offset) > f.size { |
| 317 | panic("illegal file offset") |
| 318 | } |
| 319 | return toPos(f.base+index(offset)) + Pos(rel) |
| 320 | } |
| 321 | |
| 322 | // Offset returns the offset for the given file position p; |
| 323 | // p must be a valid Pos value in that file. |
| 324 | // f.Offset(f.Pos(offset)) == offset. |
| 325 | // |
| 326 | func (f *File) Offset(p Pos) int { |
| 327 | x := p.index() |
| 328 | if x < f.base || x > f.base+index(f.size) { |
| 329 | panic("illegal Pos value") |
| 330 | } |
| 331 | return int(x - f.base) |
| 332 | } |
| 333 | |
| 334 | // Line returns the line number for the given file position p; |
| 335 | // p must be a Pos value in that file or NoPos. |
| 336 | // |
| 337 | func (f *File) Line(p Pos) int { |
| 338 | return f.Position(p).Line |
| 339 | } |
| 340 | |
| 341 | func searchLineInfos(a []lineInfo, x int) int { |
| 342 | return sort.Search(len(a), func(i int) bool { return a[i].Offset > x }) - 1 |
| 343 | } |
| 344 | |
| 345 | // unpack returns the filename and line and column number for a file offset. |
| 346 | // If adjusted is set, unpack will return the filename and line information |
| 347 | // possibly adjusted by //line comments; otherwise those comments are ignored. |
| 348 | // |
| 349 | func (f *File) unpack(offset index, adjusted bool) (filename string, line, column int) { |
| 350 | filename = f.name |
| 351 | if i := searchInts(f.lines, offset); i >= 0 { |
| 352 | line, column = int(i+1), int(offset-f.lines[i]+1) |
| 353 | } |
| 354 | if adjusted && len(f.infos) > 0 { |
| 355 | // almost no files have extra line infos |
| 356 | if i := searchLineInfos(f.infos, int(offset)); i >= 0 { |
| 357 | alt := &f.infos[i] |
| 358 | filename = alt.Filename |
| 359 | if i := searchInts(f.lines, index(alt.Offset)); i >= 0 { |
| 360 | line += alt.Line - i - 1 |
| 361 | } |
| 362 | } |
| 363 | } |
| 364 | return |
| 365 | } |
| 366 | |
| 367 | func (f *File) position(p Pos, adjusted bool) (pos Position) { |
| 368 | offset := p.index() - f.base |
| 369 | pos.Offset = int(offset) |
| 370 | pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted) |
| 371 | return |
| 372 | } |
| 373 | |
| 374 | // PositionFor returns the Position value for the given file position p. |
| 375 | // If adjusted is set, the position may be adjusted by position-altering |
| 376 | // //line comments; otherwise those comments are ignored. |
| 377 | // p must be a Pos value in f or NoPos. |
| 378 | // |
| 379 | func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) { |
| 380 | x := p.index() |
| 381 | if p != NoPos { |
| 382 | if x < f.base || x > f.base+f.size { |
| 383 | panic("illegal Pos value") |
| 384 | } |
| 385 | pos = f.position(p, adjusted) |
| 386 | } |
| 387 | return |
| 388 | } |
| 389 | |
| 390 | // Position returns the Position value for the given file position p. |
| 391 | // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true). |
| 392 | // |
| 393 | func (f *File) Position(p Pos) (pos Position) { |
| 394 | return f.PositionFor(p, true) |
| 395 | } |
| 396 | |
| 397 | // A FileSet represents a set of source files. |
| 398 | // Methods of file sets are synchronized; multiple goroutines |
| 399 | // may invoke them concurrently. |
| 400 | type FileSet struct { |
| 401 | mutex sync.RWMutex // protects the file set |
| 402 | base int // base offset for the next file |
| 403 | files []*File // list of files in the order added to the set |
| 404 | last *File // cache of last file looked up |
| 405 | } |
| 406 | |
| 407 | // NewFileSet creates a new file set. |
| 408 | func NewFileSet() *FileSet { |
| 409 | return &FileSet{ |
| 410 | base: 1, // 0 == NoPos |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | // Base returns the minimum base offset that must be provided to |
| 415 | // AddFile when adding the next file. |
| 416 | func (s *FileSet) Base() int { |
| 417 | s.mutex.RLock() |
| 418 | b := s.base |
| 419 | s.mutex.RUnlock() |
| 420 | return b |
| 421 | |
| 422 | } |
| 423 | |
| 424 | // AddFile adds a new file with a given filename, base offset, and file size |
| 425 | // to the file set s and returns the file. Multiple files may have the same |
| 426 | // name. The base offset must not be smaller than the FileSet's Base(), and |
| 427 | // size must not be negative. As a special case, if a negative base is provided, |
| 428 | // the current value of the FileSet's Base() is used instead. |
| 429 | // |
| 430 | // Adding the file will set the file set's Base() value to base + size + 1 |
| 431 | // as the minimum base value for the next file. The following relationship |
| 432 | // exists between a Pos value p for a given file offset offs: |
| 433 | // |
| 434 | // int(p) = base + offs |
| 435 | // |
| 436 | // with offs in the range [0, size] and thus p in the range [base, base+size]. |
| 437 | // For convenience, File.Pos may be used to create file-specific position |
| 438 | // values from a file offset. |
| 439 | func (s *FileSet) AddFile(filename string, base, size int) *File { |
| 440 | s.mutex.Lock() |
| 441 | defer s.mutex.Unlock() |
| 442 | if base < 0 { |
| 443 | base = s.base |
| 444 | } |
| 445 | if base < s.base || size < 0 { |
| 446 | panic("illegal base or size") |
| 447 | } |
| 448 | // base >= s.base && size >= 0 |
| 449 | f := &File{s, filename, index(base), index(size), []index{0}, nil} |
| 450 | base += size + 1 // +1 because EOF also has a position |
| 451 | if base < 0 { |
| 452 | panic("token.Pos offset overflow (> 2G of source code in file set)") |
| 453 | } |
| 454 | // add the file to the file set |
| 455 | s.base = base |
| 456 | s.files = append(s.files, f) |
| 457 | s.last = f |
| 458 | return f |
| 459 | } |
| 460 | |
| 461 | // Iterate calls f for the files in the file set in the order they were added |
| 462 | // until f returns false. |
| 463 | // |
| 464 | func (s *FileSet) Iterate(f func(*File) bool) { |
| 465 | for i := 0; ; i++ { |
| 466 | var file *File |
| 467 | s.mutex.RLock() |
| 468 | if i < len(s.files) { |
| 469 | file = s.files[i] |
| 470 | } |
| 471 | s.mutex.RUnlock() |
| 472 | if file == nil || !f(file) { |
| 473 | break |
| 474 | } |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | func searchFiles(a []*File, x index) int { |
| 479 | return sort.Search(len(a), func(i int) bool { return a[i].base > x }) - 1 |
| 480 | } |
| 481 | |
| 482 | func (s *FileSet) file(p Pos) *File { |
| 483 | x := p.index() |
| 484 | s.mutex.RLock() |
| 485 | // common case: p is in last file |
| 486 | if f := s.last; f != nil && f.base <= x && x <= f.base+f.size { |
| 487 | s.mutex.RUnlock() |
| 488 | return f |
| 489 | } |
| 490 | // p is not in last file - search all files |
| 491 | if i := searchFiles(s.files, x); i >= 0 { |
| 492 | f := s.files[i] |
| 493 | // f.base <= int(p) by definition of searchFiles |
| 494 | if x <= f.base+f.size { |
| 495 | s.mutex.RUnlock() |
| 496 | s.mutex.Lock() |
| 497 | s.last = f // race is ok - s.last is only a cache |
| 498 | s.mutex.Unlock() |
| 499 | return f |
| 500 | } |
| 501 | } |
| 502 | s.mutex.RUnlock() |
| 503 | return nil |
| 504 | } |
| 505 | |
| 506 | // File returns the file that contains the position p. |
| 507 | // If no such file is found (for instance for p == NoPos), |
| 508 | // the result is nil. |
| 509 | // |
| 510 | func (s *FileSet) File(p Pos) (f *File) { |
| 511 | if p.index() != 0 { |
| 512 | f = s.file(p) |
| 513 | } |
| 514 | return |
| 515 | } |
| 516 | |
| 517 | // PositionFor converts a Pos p in the fileset into a Position value. |
| 518 | // If adjusted is set, the position may be adjusted by position-altering |
| 519 | // //line comments; otherwise those comments are ignored. |
| 520 | // p must be a Pos value in s or NoPos. |
| 521 | // |
| 522 | func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) { |
| 523 | if p.index() != 0 { |
| 524 | if f := s.file(p); f != nil { |
| 525 | s.mutex.RLock() |
| 526 | pos = f.position(p, adjusted) |
| 527 | s.mutex.RUnlock() |
| 528 | } |
| 529 | } |
| 530 | return |
| 531 | } |
| 532 | |
| 533 | // Position converts a Pos p in the fileset into a Position value. |
| 534 | // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true). |
| 535 | // |
| 536 | func (s *FileSet) Position(p Pos) (pos Position) { |
| 537 | return s.PositionFor(p, true) |
| 538 | } |
| 539 | |
| 540 | // ----------------------------------------------------------------------------- |
| 541 | // Helper functions |
| 542 | |
| 543 | func searchInts(a []index, x index) int { |
| 544 | // This function body is a manually inlined version of: |
| 545 | // |
| 546 | // return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1 |
| 547 | // |
| 548 | // With better compiler optimizations, this may not be needed in the |
| 549 | // future, but at the moment this change improves the go/printer |
| 550 | // benchmark performance by ~30%. This has a direct impact on the |
| 551 | // speed of gofmt and thus seems worthwhile (2011-04-29). |
| 552 | // TODO(gri): Remove this when compilers have caught up. |
| 553 | i, j := 0, len(a) |
| 554 | for i < j { |
| 555 | h := i + (j-i)/2 // avoid overflow when computing h |
| 556 | // i ≤ h < j |
| 557 | if a[h] <= x { |
| 558 | i = h + 1 |
| 559 | } else { |
| 560 | j = h |
| 561 | } |
| 562 | } |
| 563 | return i - 1 |
| 564 | } |