1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package token 6 7 import ( 8 "fmt" 9 "sort" 10 "strconv" 11 "sync" 12 "sync/atomic" 13 ) 14 15 // ----------------------------------------------------------------------------- 16 // Positions 17 18 // Position describes an arbitrary source position 19 // including the file, line, and column location. 20 // A Position is valid if the line number is > 0. 21 type Position struct { 22 Filename string // filename, if any 23 Offset int // offset, starting at 0 24 Line int // line number, starting at 1 25 Column int // column number, starting at 1 (byte count) 26 } 27 28 // IsValid reports whether the position is valid. 29 func (pos *Position) IsValid() bool { return pos.Line > 0 } 30 31 // String returns a string in one of several forms: 32 // 33 // file:line:column valid position with file name 34 // file:line valid position with file name but no column (column == 0) 35 // line:column valid position without file name 36 // line valid position without file name and no column (column == 0) 37 // file invalid position with file name 38 // - invalid position without file name 39 func (pos Position) String() string { 40 s := pos.Filename 41 if pos.IsValid() { 42 if s != "" { 43 s += ":" 44 } 45 s += strconv.Itoa(pos.Line) 46 if pos.Column != 0 { 47 s += fmt.Sprintf(":%d", pos.Column) 48 } 49 } 50 if s == "" { 51 s = "-" 52 } 53 return s 54 } 55 56 // Pos is a compact encoding of a source position within a file set. 57 // It can be converted into a [Position] for a more convenient, but much 58 // larger, representation. 59 // 60 // The Pos value for a given file is a number in the range [base, base+size], 61 // where base and size are specified when a file is added to the file set. 62 // The difference between a Pos value and the corresponding file base 63 // corresponds to the byte offset of that position (represented by the Pos value) 64 // from the beginning of the file. Thus, the file base offset is the Pos value 65 // representing the first byte in the file. 66 // 67 // To create the Pos value for a specific source offset (measured in bytes), 68 // first add the respective file to the current file set using [FileSet.AddFile] 69 // and then call [File.Pos](offset) for that file. Given a Pos value p 70 // for a specific file set fset, the corresponding [Position] value is 71 // obtained by calling fset.Position(p). 72 // 73 // Pos values can be compared directly with the usual comparison operators: 74 // If two Pos values p and q are in the same file, comparing p and q is 75 // equivalent to comparing the respective source file offsets. If p and q 76 // are in different files, p < q is true if the file implied by p was added 77 // to the respective file set before the file implied by q. 78 type Pos int 79 80 // The zero value for [Pos] is NoPos; there is no file and line information 81 // associated with it, and NoPos.IsValid() is false. NoPos is always 82 // smaller than any other [Pos] value. The corresponding [Position] value 83 // for NoPos is the zero value for [Position]. 84 const NoPos Pos = 0 85 86 // IsValid reports whether the position is valid. 87 func (p Pos) IsValid() bool { 88 return p != NoPos 89 } 90 91 // ----------------------------------------------------------------------------- 92 // File 93 94 // A File is a handle for a file belonging to a [FileSet]. 95 // A File has a name, size, and line offset table. 96 type File struct { 97 name string // file name as provided to AddFile 98 base int // Pos value range for this file is [base...base+size] 99 size int // file size as provided to AddFile 100 101 // lines and infos are protected by mutex 102 mutex sync.Mutex 103 lines []int // lines contains the offset of the first character for each line (the first entry is always 0) 104 infos []lineInfo 105 } 106 107 // Name returns the file name of file f as registered with AddFile. 108 func (f *File) Name() string { 109 return f.name 110 } 111 112 // Base returns the base offset of file f as registered with AddFile. 113 func (f *File) Base() int { 114 return f.base 115 } 116 117 // Size returns the size of file f as registered with AddFile. 118 func (f *File) Size() int { 119 return f.size 120 } 121 122 // LineCount returns the number of lines in file f. 123 func (f *File) LineCount() int { 124 f.mutex.Lock() 125 n := len(f.lines) 126 f.mutex.Unlock() 127 return n 128 } 129 130 // AddLine adds the line offset for a new line. 131 // The line offset must be larger than the offset for the previous line 132 // and smaller than the file size; otherwise the line offset is ignored. 133 func (f *File) AddLine(offset int) { 134 f.mutex.Lock() 135 if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size { 136 f.lines = append(f.lines, offset) 137 } 138 f.mutex.Unlock() 139 } 140 141 // MergeLine merges a line with the following line. It is akin to replacing 142 // the newline character at the end of the line with a space (to not change the 143 // remaining offsets). To obtain the line number, consult e.g. [Position.Line]. 144 // MergeLine will panic if given an invalid line number. 145 func (f *File) MergeLine(line int) { 146 if line < 1 { 147 panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line)) 148 } 149 f.mutex.Lock() 150 defer f.mutex.Unlock() 151 if line >= len(f.lines) { 152 panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines))) 153 } 154 // To merge the line numbered <line> with the line numbered <line+1>, 155 // we need to remove the entry in lines corresponding to the line 156 // numbered <line+1>. The entry in lines corresponding to the line 157 // numbered <line+1> is located at index <line>, since indices in lines 158 // are 0-based and line numbers are 1-based. 159 copy(f.lines[line:], f.lines[line+1:]) 160 f.lines = f.lines[:len(f.lines)-1] 161 } 162 163 // Lines returns the effective line offset table of the form described by [File.SetLines]. 164 // Callers must not mutate the result. 165 func (f *File) Lines() []int { 166 f.mutex.Lock() 167 lines := f.lines 168 f.mutex.Unlock() 169 return lines 170 } 171 172 // SetLines sets the line offsets for a file and reports whether it succeeded. 173 // The line offsets are the offsets of the first character of each line; 174 // for instance for the content "ab\nc\n" the line offsets are {0, 3}. 175 // An empty file has an empty line offset table. 176 // Each line offset must be larger than the offset for the previous line 177 // and smaller than the file size; otherwise SetLines fails and returns 178 // false. 179 // Callers must not mutate the provided slice after SetLines returns. 180 func (f *File) SetLines(lines []int) bool { 181 // verify validity of lines table 182 size := f.size 183 for i, offset := range lines { 184 if i > 0 && offset <= lines[i-1] || size <= offset { 185 return false 186 } 187 } 188 189 // set lines table 190 f.mutex.Lock() 191 f.lines = lines 192 f.mutex.Unlock() 193 return true 194 } 195 196 // SetLinesForContent sets the line offsets for the given file content. 197 // It ignores position-altering //line comments. 198 func (f *File) SetLinesForContent(content []byte) { 199 var lines []int 200 line := 0 201 for offset, b := range content { 202 if line >= 0 { 203 lines = append(lines, line) 204 } 205 line = -1 206 if b == '\n' { 207 line = offset + 1 208 } 209 } 210 211 // set lines table 212 f.mutex.Lock() 213 f.lines = lines 214 f.mutex.Unlock() 215 } 216 217 // LineStart returns the [Pos] value of the start of the specified line. 218 // It ignores any alternative positions set using [File.AddLineColumnInfo]. 219 // LineStart panics if the 1-based line number is invalid. 220 func (f *File) LineStart(line int) Pos { 221 if line < 1 { 222 panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line)) 223 } 224 f.mutex.Lock() 225 defer f.mutex.Unlock() 226 if line > len(f.lines) { 227 panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines))) 228 } 229 return Pos(f.base + f.lines[line-1]) 230 } 231 232 // A lineInfo object describes alternative file, line, and column 233 // number information (such as provided via a //line directive) 234 // for a given file offset. 235 type lineInfo struct { 236 // fields are exported to make them accessible to gob 237 Offset int 238 Filename string 239 Line, Column int 240 } 241 242 // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument. 243 // It is here for backward-compatibility for code prior to Go 1.11. 244 func (f *File) AddLineInfo(offset int, filename string, line int) { 245 f.AddLineColumnInfo(offset, filename, line, 1) 246 } 247 248 // AddLineColumnInfo adds alternative file, line, and column number 249 // information for a given file offset. The offset must be larger 250 // than the offset for the previously added alternative line info 251 // and smaller than the file size; otherwise the information is 252 // ignored. 253 // 254 // AddLineColumnInfo is typically used to register alternative position 255 // information for line directives such as //line filename:line:column. 256 func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) { 257 f.mutex.Lock() 258 if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size { 259 f.infos = append(f.infos, lineInfo{offset, filename, line, column}) 260 } 261 f.mutex.Unlock() 262 } 263 264 // Pos returns the Pos value for the given file offset; 265 // the offset must be <= f.Size(). 266 // f.Pos(f.Offset(p)) == p. 267 func (f *File) Pos(offset int) Pos { 268 if offset > f.size { 269 panic(fmt.Sprintf("invalid file offset %d (should be <= %d)", offset, f.size)) 270 } 271 return Pos(f.base + offset) 272 } 273 274 // Offset returns the offset for the given file position p; 275 // p must be a valid [Pos] value in that file. 276 // f.Offset(f.Pos(offset)) == offset. 277 func (f *File) Offset(p Pos) int { 278 if int(p) < f.base || int(p) > f.base+f.size { 279 panic(fmt.Sprintf("invalid Pos value %d (should be in [%d, %d])", p, f.base, f.base+f.size)) 280 } 281 return int(p) - f.base 282 } 283 284 // Line returns the line number for the given file position p; 285 // p must be a [Pos] value in that file or [NoPos]. 286 func (f *File) Line(p Pos) int { 287 return f.Position(p).Line 288 } 289 290 func searchLineInfos(a []lineInfo, x int) int { 291 return sort.Search(len(a), func(i int) bool { return a[i].Offset > x }) - 1 292 } 293 294 // unpack returns the filename and line and column number for a file offset. 295 // If adjusted is set, unpack will return the filename and line information 296 // possibly adjusted by //line comments; otherwise those comments are ignored. 297 func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) { 298 f.mutex.Lock() 299 filename = f.name 300 if i := searchInts(f.lines, offset); i >= 0 { 301 line, column = i+1, offset-f.lines[i]+1 302 } 303 if adjusted && len(f.infos) > 0 { 304 // few files have extra line infos 305 if i := searchLineInfos(f.infos, offset); i >= 0 { 306 alt := &f.infos[i] 307 filename = alt.Filename 308 if i := searchInts(f.lines, alt.Offset); i >= 0 { 309 // i+1 is the line at which the alternative position was recorded 310 d := line - (i + 1) // line distance from alternative position base 311 line = alt.Line + d 312 if alt.Column == 0 { 313 // alternative column is unknown => relative column is unknown 314 // (the current specification for line directives requires 315 // this to apply until the next PosBase/line directive, 316 // not just until the new newline) 317 column = 0 318 } else if d == 0 { 319 // the alternative position base is on the current line 320 // => column is relative to alternative column 321 column = alt.Column + (offset - alt.Offset) 322 } 323 } 324 } 325 } 326 // TODO(mvdan): move Unlock back under Lock with a defer statement once 327 // https://go.dev/issue/38471 is fixed to remove the performance penalty. 328 f.mutex.Unlock() 329 return 330 } 331 332 func (f *File) position(p Pos, adjusted bool) (pos Position) { 333 offset := int(p) - f.base 334 pos.Offset = offset 335 pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted) 336 return 337 } 338 339 // PositionFor returns the Position value for the given file position p. 340 // If adjusted is set, the position may be adjusted by position-altering 341 // //line comments; otherwise those comments are ignored. 342 // p must be a Pos value in f or NoPos. 343 func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) { 344 if p != NoPos { 345 if int(p) < f.base || int(p) > f.base+f.size { 346 panic(fmt.Sprintf("invalid Pos value %d (should be in [%d, %d])", p, f.base, f.base+f.size)) 347 } 348 pos = f.position(p, adjusted) 349 } 350 return 351 } 352 353 // Position returns the Position value for the given file position p. 354 // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true). 355 func (f *File) Position(p Pos) (pos Position) { 356 return f.PositionFor(p, true) 357 } 358 359 // ----------------------------------------------------------------------------- 360 // FileSet 361 362 // A FileSet represents a set of source files. 363 // Methods of file sets are synchronized; multiple goroutines 364 // may invoke them concurrently. 365 // 366 // The byte offsets for each file in a file set are mapped into 367 // distinct (integer) intervals, one interval [base, base+size] 368 // per file. [FileSet.Base] represents the first byte in the file, and size 369 // is the corresponding file size. A [Pos] value is a value in such 370 // an interval. By determining the interval a [Pos] value belongs 371 // to, the file, its file base, and thus the byte offset (position) 372 // the [Pos] value is representing can be computed. 373 // 374 // When adding a new file, a file base must be provided. That can 375 // be any integer value that is past the end of any interval of any 376 // file already in the file set. For convenience, [FileSet.Base] provides 377 // such a value, which is simply the end of the Pos interval of the most 378 // recently added file, plus one. Unless there is a need to extend an 379 // interval later, using the [FileSet.Base] should be used as argument 380 // for [FileSet.AddFile]. 381 // 382 // A [File] may be removed from a FileSet when it is no longer needed. 383 // This may reduce memory usage in a long-running application. 384 type FileSet struct { 385 mutex sync.RWMutex // protects the file set 386 base int // base offset for the next file 387 files []*File // list of files in the order added to the set 388 last atomic.Pointer[File] // cache of last file looked up 389 } 390 391 // NewFileSet creates a new file set. 392 func NewFileSet() *FileSet { 393 return &FileSet{ 394 base: 1, // 0 == NoPos 395 } 396 } 397 398 // Base returns the minimum base offset that must be provided to 399 // [FileSet.AddFile] when adding the next file. 400 func (s *FileSet) Base() int { 401 s.mutex.RLock() 402 b := s.base 403 s.mutex.RUnlock() 404 return b 405 } 406 407 // AddFile adds a new file with a given filename, base offset, and file size 408 // to the file set s and returns the file. Multiple files may have the same 409 // name. The base offset must not be smaller than the [FileSet.Base], and 410 // size must not be negative. As a special case, if a negative base is provided, 411 // the current value of the [FileSet.Base] is used instead. 412 // 413 // Adding the file will set the file set's [FileSet.Base] value to base + size + 1 414 // as the minimum base value for the next file. The following relationship 415 // exists between a [Pos] value p for a given file offset offs: 416 // 417 // int(p) = base + offs 418 // 419 // with offs in the range [0, size] and thus p in the range [base, base+size]. 420 // For convenience, [File.Pos] may be used to create file-specific position 421 // values from a file offset. 422 func (s *FileSet) AddFile(filename string, base, size int) *File { 423 // Allocate f outside the critical section. 424 f := &File{name: filename, size: size, lines: []int{0}} 425 426 s.mutex.Lock() 427 defer s.mutex.Unlock() 428 if base < 0 { 429 base = s.base 430 } 431 if base < s.base { 432 panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base)) 433 } 434 f.base = base 435 if size < 0 { 436 panic(fmt.Sprintf("invalid size %d (should be >= 0)", size)) 437 } 438 // base >= s.base && size >= 0 439 base += size + 1 // +1 because EOF also has a position 440 if base < 0 { 441 panic("token.Pos offset overflow (> 2G of source code in file set)") 442 } 443 // add the file to the file set 444 s.base = base 445 s.files = append(s.files, f) 446 s.last.Store(f) 447 return f 448 } 449 450 // RemoveFile removes a file from the [FileSet] so that subsequent 451 // queries for its [Pos] interval yield a negative result. 452 // This reduces the memory usage of a long-lived [FileSet] that 453 // encounters an unbounded stream of files. 454 // 455 // Removing a file that does not belong to the set has no effect. 456 func (s *FileSet) RemoveFile(file *File) { 457 s.last.CompareAndSwap(file, nil) // clear last file cache 458 459 s.mutex.Lock() 460 defer s.mutex.Unlock() 461 462 if i := searchFiles(s.files, file.base); i >= 0 && s.files[i] == file { 463 last := &s.files[len(s.files)-1] 464 s.files = append(s.files[:i], s.files[i+1:]...) 465 *last = nil // don't prolong lifetime when popping last element 466 } 467 } 468 469 // Iterate calls f for the files in the file set in the order they were added 470 // until f returns false. 471 func (s *FileSet) Iterate(f func(*File) bool) { 472 for i := 0; ; i++ { 473 var file *File 474 s.mutex.RLock() 475 if i < len(s.files) { 476 file = s.files[i] 477 } 478 s.mutex.RUnlock() 479 if file == nil || !f(file) { 480 break 481 } 482 } 483 } 484 485 func searchFiles(a []*File, x int) int { 486 return sort.Search(len(a), func(i int) bool { return a[i].base > x }) - 1 487 } 488 489 func (s *FileSet) file(p Pos) *File { 490 // common case: p is in last file. 491 if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size { 492 return f 493 } 494 495 s.mutex.RLock() 496 defer s.mutex.RUnlock() 497 498 // p is not in last file - search all files 499 if i := searchFiles(s.files, int(p)); i >= 0 { 500 f := s.files[i] 501 // f.base <= int(p) by definition of searchFiles 502 if int(p) <= f.base+f.size { 503 // Update cache of last file. A race is ok, 504 // but an exclusive lock causes heavy contention. 505 s.last.Store(f) 506 return f 507 } 508 } 509 return nil 510 } 511 512 // File returns the file that contains the position p. 513 // If no such file is found (for instance for p == [NoPos]), 514 // the result is nil. 515 func (s *FileSet) File(p Pos) (f *File) { 516 if p != NoPos { 517 f = s.file(p) 518 } 519 return 520 } 521 522 // PositionFor converts a [Pos] p in the fileset into a [Position] value. 523 // If adjusted is set, the position may be adjusted by position-altering 524 // //line comments; otherwise those comments are ignored. 525 // p must be a [Pos] value in s or [NoPos]. 526 func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) { 527 if p != NoPos { 528 if f := s.file(p); f != nil { 529 return f.position(p, adjusted) 530 } 531 } 532 return 533 } 534 535 // Position converts a [Pos] p in the fileset into a Position value. 536 // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true). 537 func (s *FileSet) Position(p Pos) (pos Position) { 538 return s.PositionFor(p, true) 539 } 540 541 // ----------------------------------------------------------------------------- 542 // Helper functions 543 544 func searchInts(a []int, x int) int { 545 // This function body is a manually inlined version of: 546 // 547 // return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1 548 // 549 // With better compiler optimizations, this may not be needed in the 550 // future, but at the moment this change improves the go/printer 551 // benchmark performance by ~30%. This has a direct impact on the 552 // speed of gofmt and thus seems worthwhile (2011-04-29). 553 // TODO(gri): Remove this when compilers have caught up. 554 i, j := 0, len(a) 555 for i < j { 556 h := int(uint(i+j) >> 1) // avoid overflow when computing h 557 // i ≤ h < j 558 if a[h] <= x { 559 i = h + 1 560 } else { 561 j = h 562 } 563 } 564 return i - 1 565 } 566