pos_scanner.go - hashicorp/hcl/v2 - Git at Google

 package hcl

 import (
 	"bufio"
 	"bytes"

 	"github.com/apparentlymart/go-textseg/v13/textseg"
 )

 // RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc
 // and visit a source range for each token matched.
 //
 // For example, this can be used with bufio.ScanLines to find the source range
 // for each line in the file, skipping over the actual newline characters, which
 // may be useful when printing source code snippets as part of diagnostic
 // messages.
 //
 // The line and column information in the returned ranges is produced by
 // counting newline characters and grapheme clusters respectively, which
 // mimics the behavior we expect from a parser when producing ranges.
 type RangeScanner struct {
 	filename string
 	b        []byte
 	cb       bufio.SplitFunc

 	pos Pos    // position of next byte to process in b
 	cur Range  // latest range
 	tok []byte // slice of b that is covered by cur
 	err error  // error from last scan, if any
 }

 // NewRangeScanner creates a new RangeScanner for the given buffer, producing
 // ranges for the given filename.
 //
 // Since ranges have grapheme-cluster granularity rather than byte granularity,
 // the scanner will produce incorrect results if the given SplitFunc creates
 // tokens between grapheme cluster boundaries. In particular, it is incorrect
 // to use RangeScanner with bufio.ScanRunes because it will produce tokens
 // around individual UTF-8 sequences, which will split any multi-sequence
 // grapheme clusters.
 func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner {
 	return NewRangeScannerFragment(b, filename, InitialPos, cb)
 }

 // NewRangeScannerFragment is like NewRangeScanner but the ranges it produces
 // will be offset by the given starting position, which is appropriate for
 // sub-slices of a file, whereas NewRangeScanner assumes it is scanning an
 // entire file.
 func NewRangeScannerFragment(b []byte, filename string, start Pos, cb bufio.SplitFunc) *RangeScanner {
 	return &RangeScanner{
 		filename: filename,
 		b:        b,
 		cb:       cb,
 		pos:      start,
 	}
 }

 func (sc *RangeScanner) Scan() bool {
 	if sc.pos.Byte >= len(sc.b) || sc.err != nil {
 		// All done
 		return false
 	}

 	// Since we're operating on an in-memory buffer, we always pass the whole
 	// remainder of the buffer to our SplitFunc and set isEOF to let it know
 	// that it has the whole thing.
 	advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true)

 	// Since we are setting isEOF to true this should never happen, but
 	// if it does we will just abort and assume the SplitFunc is misbehaving.
 	if advance == 0 && token == nil && err == nil {
 		return false
 	}

 	if err != nil {
 		sc.err = err
 		sc.cur = Range{
 			Filename: sc.filename,
 			Start:    sc.pos,
 			End:      sc.pos,
 		}
 		sc.tok = nil
 		return false
 	}

 	sc.tok = token
 	start := sc.pos
 	end := sc.pos
 	new := sc.pos

 	// adv is similar to token but it also includes any subsequent characters
 	// we're being asked to skip over by the SplitFunc.
 	// adv is a slice covering any additional bytes we are skipping over, based
 	// on what the SplitFunc told us to do with advance.
 	adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance]

 	// We now need to scan over our token to count the grapheme clusters
 	// so we can correctly advance Column, and count the newlines so we
 	// can correctly advance Line.
 	advR := bytes.NewReader(adv)
 	gsc := bufio.NewScanner(advR)
 	advanced := 0
 	gsc.Split(textseg.ScanGraphemeClusters)
 	for gsc.Scan() {
 		gr := gsc.Bytes()
 		new.Byte += len(gr)
 		new.Column++

 		// We rely here on the fact that \r\n is considered a grapheme cluster
 		// and so we don't need to worry about miscounting additional lines
 		// on files with Windows-style line endings.
 		if len(gr) != 0 && (gr[0] == '\r' || gr[0] == '\n') {
 			new.Column = 1
 			new.Line++
 		}

 		if advanced < len(token) {
 			// If we've not yet found the end of our token then we'll
 			// also push our "end" marker along.
 			// (if advance > len(token) then we'll stop moving "end" early
 			// so that the caller only sees the range covered by token.)
 			end = new
 		}
 		advanced += len(gr)
 	}

 	sc.cur = Range{
 		Filename: sc.filename,
 		Start:    start,
 		End:      end,
 	}
 	sc.pos = new
 	return true
 }

 // Range returns a range that covers the latest token obtained after a call
 // to Scan returns true.
 func (sc *RangeScanner) Range() Range {
 	return sc.cur
 }

 // Bytes returns the slice of the input buffer that is covered by the range
 // that would be returned by Range.
 func (sc *RangeScanner) Bytes() []byte {
 	return sc.tok
 }

 // Err can be called after Scan returns false to determine if the latest read
 // resulted in an error, and obtain that error if so.
 func (sc *RangeScanner) Err() error {
 	return sc.err
 }
	package hcl

	import (
	"bufio"
	"bytes"

	"github.com/apparentlymart/go-textseg/v13/textseg"
	)

	// RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc
	// and visit a source range for each token matched.
	//
	// For example, this can be used with bufio.ScanLines to find the source range
	// for each line in the file, skipping over the actual newline characters, which
	// may be useful when printing source code snippets as part of diagnostic
	// messages.
	//
	// The line and column information in the returned ranges is produced by
	// counting newline characters and grapheme clusters respectively, which
	// mimics the behavior we expect from a parser when producing ranges.
	type RangeScanner struct {
	filename string
	b []byte
	cb bufio.SplitFunc

	pos Pos // position of next byte to process in b
	cur Range // latest range
	tok []byte // slice of b that is covered by cur
	err error // error from last scan, if any
	}

	// NewRangeScanner creates a new RangeScanner for the given buffer, producing
	// ranges for the given filename.
	//
	// Since ranges have grapheme-cluster granularity rather than byte granularity,
	// the scanner will produce incorrect results if the given SplitFunc creates
	// tokens between grapheme cluster boundaries. In particular, it is incorrect
	// to use RangeScanner with bufio.ScanRunes because it will produce tokens
	// around individual UTF-8 sequences, which will split any multi-sequence
	// grapheme clusters.
	func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner {
	return NewRangeScannerFragment(b, filename, InitialPos, cb)
	}

	// NewRangeScannerFragment is like NewRangeScanner but the ranges it produces
	// will be offset by the given starting position, which is appropriate for
	// sub-slices of a file, whereas NewRangeScanner assumes it is scanning an
	// entire file.
	func NewRangeScannerFragment(b []byte, filename string, start Pos, cb bufio.SplitFunc) *RangeScanner {
	return &RangeScanner{
	filename: filename,
	b: b,
	cb: cb,
	pos: start,
	}
	}

	func (sc *RangeScanner) Scan() bool {
	if sc.pos.Byte >= len(sc.b) \|\| sc.err != nil {
	// All done
	return false
	}

	// Since we're operating on an in-memory buffer, we always pass the whole
	// remainder of the buffer to our SplitFunc and set isEOF to let it know
	// that it has the whole thing.
	advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true)

	// Since we are setting isEOF to true this should never happen, but
	// if it does we will just abort and assume the SplitFunc is misbehaving.
	if advance == 0 && token == nil && err == nil {
	return false
	}

	if err != nil {
	sc.err = err
	sc.cur = Range{
	Filename: sc.filename,
	Start: sc.pos,
	End: sc.pos,
	}
	sc.tok = nil
	return false
	}

	sc.tok = token
	start := sc.pos
	end := sc.pos
	new := sc.pos

	// adv is similar to token but it also includes any subsequent characters
	// we're being asked to skip over by the SplitFunc.
	// adv is a slice covering any additional bytes we are skipping over, based
	// on what the SplitFunc told us to do with advance.
	adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance]

	// We now need to scan over our token to count the grapheme clusters
	// so we can correctly advance Column, and count the newlines so we
	// can correctly advance Line.
	advR := bytes.NewReader(adv)
	gsc := bufio.NewScanner(advR)
	advanced := 0
	gsc.Split(textseg.ScanGraphemeClusters)
	for gsc.Scan() {
	gr := gsc.Bytes()
	new.Byte += len(gr)
	new.Column++

	// We rely here on the fact that \r\n is considered a grapheme cluster
	// and so we don't need to worry about miscounting additional lines
	// on files with Windows-style line endings.
	if len(gr) != 0 && (gr[0] == '\r' \|\| gr[0] == '\n') {
	new.Column = 1
	new.Line++
	}

	if advanced < len(token) {
	// If we've not yet found the end of our token then we'll
	// also push our "end" marker along.
	// (if advance > len(token) then we'll stop moving "end" early
	// so that the caller only sees the range covered by token.)
	end = new
	}
	advanced += len(gr)
	}

	sc.cur = Range{
	Filename: sc.filename,
	Start: start,
	End: end,
	}
	sc.pos = new
	return true
	}

	// Range returns a range that covers the latest token obtained after a call
	// to Scan returns true.
	func (sc *RangeScanner) Range() Range {
	return sc.cur
	}

	// Bytes returns the slice of the input buffer that is covered by the range
	// that would be returned by Range.
	func (sc *RangeScanner) Bytes() []byte {
	return sc.tok
	}

	// Err can be called after Scan returns false to determine if the latest read
	// resulted in an error, and obtain that error if so.
	func (sc *RangeScanner) Err() error {
	return sc.err
	}