v1.14.6/helper/random/string_generator.go - hashicorp/vault - Git at Google

 // Copyright (c) HashiCorp, Inc.
 // SPDX-License-Identifier: MPL-2.0

 package random

 import (
 	"context"
 	"crypto/rand"
 	"fmt"
 	"io"
 	"math"
 	"sort"
 	"sync"
 	"time"
 	"unicode"

 	"github.com/hashicorp/go-multierror"
 )

 var (
 	LowercaseCharset   = sortCharset("abcdefghijklmnopqrstuvwxyz")
 	UppercaseCharset   = sortCharset("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
 	NumericCharset     = sortCharset("0123456789")
 	FullSymbolCharset  = sortCharset("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
 	ShortSymbolCharset = sortCharset("-")

 	AlphabeticCharset              = sortCharset(UppercaseCharset + LowercaseCharset)
 	AlphaNumericCharset            = sortCharset(AlphabeticCharset + NumericCharset)
 	AlphaNumericShortSymbolCharset = sortCharset(AlphaNumericCharset + ShortSymbolCharset)
 	AlphaNumericFullSymbolCharset  = sortCharset(AlphaNumericCharset + FullSymbolCharset)

 	LowercaseRuneset   = []rune(LowercaseCharset)
 	UppercaseRuneset   = []rune(UppercaseCharset)
 	NumericRuneset     = []rune(NumericCharset)
 	FullSymbolRuneset  = []rune(FullSymbolCharset)
 	ShortSymbolRuneset = []rune(ShortSymbolCharset)

 	AlphabeticRuneset              = []rune(AlphabeticCharset)
 	AlphaNumericRuneset            = []rune(AlphaNumericCharset)
 	AlphaNumericShortSymbolRuneset = []rune(AlphaNumericShortSymbolCharset)
 	AlphaNumericFullSymbolRuneset  = []rune(AlphaNumericFullSymbolCharset)

 	// DefaultStringGenerator has reasonable default rules for generating strings
 	DefaultStringGenerator = &StringGenerator{
 		Length: 20,
 		Rules: []Rule{
 			CharsetRule{
 				Charset:  LowercaseRuneset,
 				MinChars: 1,
 			},
 			CharsetRule{
 				Charset:  UppercaseRuneset,
 				MinChars: 1,
 			},
 			CharsetRule{
 				Charset:  NumericRuneset,
 				MinChars: 1,
 			},
 			CharsetRule{
 				Charset:  ShortSymbolRuneset,
 				MinChars: 1,
 			},
 		},
 	}
 )

 func sortCharset(chars string) string {
 	r := runes(chars)
 	sort.Sort(r)
 	return string(r)
 }

 // StringGenerator generates random strings from the provided charset & adhering to a set of rules. The set of rules
 // are things like CharsetRule which requires a certain number of characters from a sub-charset.
 type StringGenerator struct {
 	// Length of the string to generate.
 	Length int `mapstructure:"length" json:"length"`

 	// Rules the generated strings must adhere to.
 	Rules serializableRules `mapstructure:"-" json:"rule"` // This is "rule" in JSON so it matches the HCL property type

 	// CharsetRule to choose runes from. This is computed from the rules, not directly configurable
 	charset     runes
 	charsetLock sync.RWMutex
 }

 // Generate a random string from the charset and adhering to the provided rules.
 // The io.Reader is optional. If not provided, it will default to the reader from crypto/rand
 func (g *StringGenerator) Generate(ctx context.Context, rng io.Reader) (str string, err error) {
 	if _, hasTimeout := ctx.Deadline(); !hasTimeout {
 		var cancel func()
 		ctx, cancel = context.WithTimeout(ctx, 1*time.Second) // Ensure there's a timeout on the context
 		defer cancel()
 	}

 	// Ensure the generator is configured well since it may be manually created rather than parsed from HCL
 	err = g.validateConfig()
 	if err != nil {
 		return "", err
 	}

 LOOP:
 	for {
 		select {
 		case <-ctx.Done():
 			return "", fmt.Errorf("timed out generating string")
 		default:
 			str, err = g.generate(rng)
 			if err != nil {
 				return "", err
 			}
 			if str == "" {
 				continue LOOP
 			}
 			return str, err
 		}
 	}
 }

 func (g *StringGenerator) generate(rng io.Reader) (str string, err error) {
 	// If performance improvements need to be made, this can be changed to read a batch of
 	// potential strings at once rather than one at a time. This will significantly
 	// improve performance, but at the cost of added complexity.
 	g.charsetLock.RLock()
 	charset := g.charset
 	g.charsetLock.RUnlock()
 	candidate, err := randomRunes(rng, charset, g.Length)
 	if err != nil {
 		return "", fmt.Errorf("unable to generate random characters: %w", err)
 	}

 	for _, rule := range g.Rules {
 		if !rule.Pass(candidate) {
 			return "", nil
 		}
 	}

 	// Passed all rules
 	return string(candidate), nil
 }

 const (
 	// maxCharsetLen is the maximum length a charset is allowed to be when generating a candidate string.
 	// This is the total number of numbers available for selecting an index out of the charset slice.
 	maxCharsetLen = 256
 )

 // randomRunes creates a random string based on the provided charset. The charset is limited to 255 characters, but
 // could be expanded if needed. Expanding the maximum charset size will decrease performance because it will need to
 // combine bytes into a larger integer using binary.BigEndian.Uint16() function.
 func randomRunes(rng io.Reader, charset []rune, length int) (candidate []rune, err error) {
 	if len(charset) == 0 {
 		return nil, fmt.Errorf("no charset specified")
 	}
 	if len(charset) > maxCharsetLen {
 		return nil, fmt.Errorf("charset is too long: limited to %d characters", math.MaxUint8)
 	}
 	if length <= 0 {
 		return nil, fmt.Errorf("unable to generate a zero or negative length runeset")
 	}

 	// This can't always select indexes from [0-maxCharsetLen) because it could introduce bias to the character selection.
 	// For instance, if the length of the charset is [a-zA-Z0-9-] (length of 63):
 	// RNG ranges: [0-62][63-125][126-188][189-251] will equally select from the entirety of the charset. However,
 	// the RNG values [252-255] will select the first 4 characters of the charset while ignoring the remaining 59.
 	// This results in a bias towards the front of the charset.
 	//
 	// To avoid this, we determine the largest integer multiplier of the charset length that is <= maxCharsetLen
 	// For instance, if the maxCharsetLen is 256 (the size of one byte) and the charset is length 63, the multiplier
 	// equals 4:
 	//   256/63 => 4.06
 	//   Trunc(4.06) => 4
 	// Multiply by the charset length
 	// Subtract 1 to account for 0-based counting and you get the max index value: 251
 	maxAllowedRNGValue := (maxCharsetLen/len(charset))*len(charset) - 1

 	// rngBufferMultiplier increases the size of the RNG buffer to account for lost
 	// indexes due to the maxAllowedRNGValue
 	rngBufferMultiplier := 1.0

 	// Don't set a multiplier if we are able to use the entire range of indexes
 	if maxAllowedRNGValue < maxCharsetLen {
 		// Anything more complicated than an arbitrary percentage appears to have little practical performance benefit
 		rngBufferMultiplier = 1.5
 	}

 	// Default to the standard crypto reader if one isn't provided
 	if rng == nil {
 		rng = rand.Reader
 	}

 	charsetLen := byte(len(charset))

 	runes := make([]rune, 0, length)

 	for len(runes) < length {
 		// Generate a bunch of indexes
 		data := make([]byte, int(float64(length)*rngBufferMultiplier))
 		numBytes, err := rng.Read(data)
 		if err != nil {
 			return nil, err
 		}

 		// Append characters until either we're out of indexes or the length is long enough
 		for i := 0; i < numBytes; i++ {
 			// Be careful to ensure that maxAllowedRNGValue isn't >= 256 as it will overflow and this
 			// comparison will prevent characters from being selected from the charset
 			if data[i] > byte(maxAllowedRNGValue) {
 				continue
 			}

 			index := data[i]
 			if len(charset) != maxCharsetLen {
 				index = index % charsetLen
 			}
 			r := charset[index]
 			runes = append(runes, r)

 			if len(runes) == length {
 				break
 			}
 		}
 	}

 	return runes, nil
 }

 // validateConfig of the generator to ensure that we can successfully generate a string.
 func (g *StringGenerator) validateConfig() (err error) {
 	merr := &multierror.Error{}

 	// Ensure the sum of minimum lengths in the rules doesn't exceed the length specified
 	minLen := getMinLength(g.Rules)
 	if g.Length <= 0 {
 		merr = multierror.Append(merr, fmt.Errorf("length must be > 0"))
 	} else if g.Length < minLen {
 		merr = multierror.Append(merr, fmt.Errorf("specified rules require at least %d characters but %d is specified", minLen, g.Length))
 	}

 	g.charsetLock.Lock()
 	defer g.charsetLock.Unlock()
 	// Ensure we have a charset & all characters are printable
 	if len(g.charset) == 0 {
 		// Yes this is mutating the generator but this is done so we don't have to compute this on every generation
 		g.charset = getChars(g.Rules)
 	}
 	if len(g.charset) == 0 {
 		merr = multierror.Append(merr, fmt.Errorf("no charset specified"))
 	} else {
 		for _, r := range g.charset {
 			if !unicode.IsPrint(r) {
 				merr = multierror.Append(merr, fmt.Errorf("non-printable character in charset"))
 				break
 			}
 		}
 	}
 	return merr.ErrorOrNil()
 }

 // getMinLength from the rules using the optional interface: `MinLength() int`
 func getMinLength(rules []Rule) (minLen int) {
 	type minLengthProvider interface {
 		MinLength() int
 	}

 	for _, rule := range rules {
 		mlp, ok := rule.(minLengthProvider)
 		if !ok {
 			continue
 		}
 		minLen += mlp.MinLength()
 	}
 	return minLen
 }

 // getChars from the rules using the optional interface: `Chars() []rune`
 func getChars(rules []Rule) (chars []rune) {
 	type charsetProvider interface {
 		Chars() []rune
 	}

 	for _, rule := range rules {
 		cp, ok := rule.(charsetProvider)
 		if !ok {
 			continue
 		}
 		chars = append(chars, cp.Chars()...)
 	}
 	return deduplicateRunes(chars)
 }

 // deduplicateRunes returns a new slice of sorted & de-duplicated runes
 func deduplicateRunes(original []rune) (deduped []rune) {
 	if len(original) == 0 {
 		return nil
 	}

 	m := map[rune]bool{}
 	dedupedRunes := []rune(nil)

 	for _, r := range original {
 		if m[r] {
 			continue
 		}
 		m[r] = true
 		dedupedRunes = append(dedupedRunes, r)
 	}

 	// They don't have to be sorted, but this is being done to make the charset easier to visualize
 	sort.Sort(runes(dedupedRunes))
 	return dedupedRunes
 }
	// Copyright (c) HashiCorp, Inc.
	// SPDX-License-Identifier: MPL-2.0

	package random

	import (
	"context"
	"crypto/rand"
	"fmt"
	"io"
	"math"
	"sort"
	"sync"
	"time"
	"unicode"

	"github.com/hashicorp/go-multierror"
	)

	var (
	LowercaseCharset = sortCharset("abcdefghijklmnopqrstuvwxyz")
	UppercaseCharset = sortCharset("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
	NumericCharset = sortCharset("0123456789")
	FullSymbolCharset = sortCharset("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{\|}~")
	ShortSymbolCharset = sortCharset("-")

	AlphabeticCharset = sortCharset(UppercaseCharset + LowercaseCharset)
	AlphaNumericCharset = sortCharset(AlphabeticCharset + NumericCharset)
	AlphaNumericShortSymbolCharset = sortCharset(AlphaNumericCharset + ShortSymbolCharset)
	AlphaNumericFullSymbolCharset = sortCharset(AlphaNumericCharset + FullSymbolCharset)

	LowercaseRuneset = []rune(LowercaseCharset)
	UppercaseRuneset = []rune(UppercaseCharset)
	NumericRuneset = []rune(NumericCharset)
	FullSymbolRuneset = []rune(FullSymbolCharset)
	ShortSymbolRuneset = []rune(ShortSymbolCharset)

	AlphabeticRuneset = []rune(AlphabeticCharset)
	AlphaNumericRuneset = []rune(AlphaNumericCharset)
	AlphaNumericShortSymbolRuneset = []rune(AlphaNumericShortSymbolCharset)
	AlphaNumericFullSymbolRuneset = []rune(AlphaNumericFullSymbolCharset)

	// DefaultStringGenerator has reasonable default rules for generating strings
	DefaultStringGenerator = &StringGenerator{
	Length: 20,
	Rules: []Rule{
	CharsetRule{
	Charset: LowercaseRuneset,
	MinChars: 1,
	},
	CharsetRule{
	Charset: UppercaseRuneset,
	MinChars: 1,
	},
	CharsetRule{
	Charset: NumericRuneset,
	MinChars: 1,
	},
	CharsetRule{
	Charset: ShortSymbolRuneset,
	MinChars: 1,
	},
	},
	}
	)

	func sortCharset(chars string) string {
	r := runes(chars)
	sort.Sort(r)
	return string(r)
	}

	// StringGenerator generates random strings from the provided charset & adhering to a set of rules. The set of rules
	// are things like CharsetRule which requires a certain number of characters from a sub-charset.
	type StringGenerator struct {
	// Length of the string to generate.
	Length int `mapstructure:"length" json:"length"`

	// Rules the generated strings must adhere to.
	Rules serializableRules `mapstructure:"-" json:"rule"` // This is "rule" in JSON so it matches the HCL property type

	// CharsetRule to choose runes from. This is computed from the rules, not directly configurable
	charset runes
	charsetLock sync.RWMutex
	}

	// Generate a random string from the charset and adhering to the provided rules.
	// The io.Reader is optional. If not provided, it will default to the reader from crypto/rand
	func (g *StringGenerator) Generate(ctx context.Context, rng io.Reader) (str string, err error) {
	if _, hasTimeout := ctx.Deadline(); !hasTimeout {
	var cancel func()
	ctx, cancel = context.WithTimeout(ctx, 1*time.Second) // Ensure there's a timeout on the context
	defer cancel()
	}

	// Ensure the generator is configured well since it may be manually created rather than parsed from HCL
	err = g.validateConfig()
	if err != nil {
	return "", err
	}

	LOOP:
	for {
	select {
	case <-ctx.Done():
	return "", fmt.Errorf("timed out generating string")
	default:
	str, err = g.generate(rng)
	if err != nil {
	return "", err
	}
	if str == "" {
	continue LOOP
	}
	return str, err
	}
	}
	}

	func (g *StringGenerator) generate(rng io.Reader) (str string, err error) {
	// If performance improvements need to be made, this can be changed to read a batch of
	// potential strings at once rather than one at a time. This will significantly
	// improve performance, but at the cost of added complexity.
	g.charsetLock.RLock()
	charset := g.charset
	g.charsetLock.RUnlock()
	candidate, err := randomRunes(rng, charset, g.Length)
	if err != nil {
	return "", fmt.Errorf("unable to generate random characters: %w", err)
	}

	for _, rule := range g.Rules {
	if !rule.Pass(candidate) {
	return "", nil
	}
	}

	// Passed all rules
	return string(candidate), nil
	}

	const (
	// maxCharsetLen is the maximum length a charset is allowed to be when generating a candidate string.
	// This is the total number of numbers available for selecting an index out of the charset slice.
	maxCharsetLen = 256
	)

	// randomRunes creates a random string based on the provided charset. The charset is limited to 255 characters, but
	// could be expanded if needed. Expanding the maximum charset size will decrease performance because it will need to
	// combine bytes into a larger integer using binary.BigEndian.Uint16() function.
	func randomRunes(rng io.Reader, charset []rune, length int) (candidate []rune, err error) {
	if len(charset) == 0 {
	return nil, fmt.Errorf("no charset specified")
	}
	if len(charset) > maxCharsetLen {
	return nil, fmt.Errorf("charset is too long: limited to %d characters", math.MaxUint8)
	}
	if length <= 0 {
	return nil, fmt.Errorf("unable to generate a zero or negative length runeset")
	}

	// This can't always select indexes from [0-maxCharsetLen) because it could introduce bias to the character selection.
	// For instance, if the length of the charset is [a-zA-Z0-9-] (length of 63):
	// RNG ranges: [0-62][63-125][126-188][189-251] will equally select from the entirety of the charset. However,
	// the RNG values [252-255] will select the first 4 characters of the charset while ignoring the remaining 59.
	// This results in a bias towards the front of the charset.
	//
	// To avoid this, we determine the largest integer multiplier of the charset length that is <= maxCharsetLen
	// For instance, if the maxCharsetLen is 256 (the size of one byte) and the charset is length 63, the multiplier
	// equals 4:
	// 256/63 => 4.06
	// Trunc(4.06) => 4
	// Multiply by the charset length
	// Subtract 1 to account for 0-based counting and you get the max index value: 251
	maxAllowedRNGValue := (maxCharsetLen/len(charset))*len(charset) - 1

	// rngBufferMultiplier increases the size of the RNG buffer to account for lost
	// indexes due to the maxAllowedRNGValue
	rngBufferMultiplier := 1.0

	// Don't set a multiplier if we are able to use the entire range of indexes
	if maxAllowedRNGValue < maxCharsetLen {
	// Anything more complicated than an arbitrary percentage appears to have little practical performance benefit
	rngBufferMultiplier = 1.5
	}

	// Default to the standard crypto reader if one isn't provided
	if rng == nil {
	rng = rand.Reader
	}

	charsetLen := byte(len(charset))

	runes := make([]rune, 0, length)

	for len(runes) < length {
	// Generate a bunch of indexes
	data := make([]byte, int(float64(length)*rngBufferMultiplier))
	numBytes, err := rng.Read(data)
	if err != nil {
	return nil, err
	}

	// Append characters until either we're out of indexes or the length is long enough
	for i := 0; i < numBytes; i++ {
	// Be careful to ensure that maxAllowedRNGValue isn't >= 256 as it will overflow and this
	// comparison will prevent characters from being selected from the charset
	if data[i] > byte(maxAllowedRNGValue) {
	continue
	}

	index := data[i]
	if len(charset) != maxCharsetLen {
	index = index % charsetLen
	}
	r := charset[index]
	runes = append(runes, r)

	if len(runes) == length {
	break
	}
	}
	}

	return runes, nil
	}

	// validateConfig of the generator to ensure that we can successfully generate a string.
	func (g *StringGenerator) validateConfig() (err error) {
	merr := &multierror.Error{}

	// Ensure the sum of minimum lengths in the rules doesn't exceed the length specified
	minLen := getMinLength(g.Rules)
	if g.Length <= 0 {
	merr = multierror.Append(merr, fmt.Errorf("length must be > 0"))
	} else if g.Length < minLen {
	merr = multierror.Append(merr, fmt.Errorf("specified rules require at least %d characters but %d is specified", minLen, g.Length))
	}

	g.charsetLock.Lock()
	defer g.charsetLock.Unlock()
	// Ensure we have a charset & all characters are printable
	if len(g.charset) == 0 {
	// Yes this is mutating the generator but this is done so we don't have to compute this on every generation
	g.charset = getChars(g.Rules)
	}
	if len(g.charset) == 0 {
	merr = multierror.Append(merr, fmt.Errorf("no charset specified"))
	} else {
	for _, r := range g.charset {
	if !unicode.IsPrint(r) {
	merr = multierror.Append(merr, fmt.Errorf("non-printable character in charset"))
	break
	}
	}
	}
	return merr.ErrorOrNil()
	}

	// getMinLength from the rules using the optional interface: `MinLength() int`
	func getMinLength(rules []Rule) (minLen int) {
	type minLengthProvider interface {
	MinLength() int
	}

	for _, rule := range rules {
	mlp, ok := rule.(minLengthProvider)
	if !ok {
	continue
	}
	minLen += mlp.MinLength()
	}
	return minLen
	}

	// getChars from the rules using the optional interface: `Chars() []rune`
	func getChars(rules []Rule) (chars []rune) {
	type charsetProvider interface {
	Chars() []rune
	}

	for _, rule := range rules {
	cp, ok := rule.(charsetProvider)
	if !ok {
	continue
	}
	chars = append(chars, cp.Chars()...)
	}
	return deduplicateRunes(chars)
	}

	// deduplicateRunes returns a new slice of sorted & de-duplicated runes
	func deduplicateRunes(original []rune) (deduped []rune) {
	if len(original) == 0 {
	return nil
	}

	m := map[rune]bool{}
	dedupedRunes := []rune(nil)

	for _, r := range original {
	if m[r] {
	continue
	}
	m[r] = true
	dedupedRunes = append(dedupedRunes, r)
	}

	// They don't have to be sorted, but this is being done to make the charset easier to visualize
	sort.Sort(runes(dedupedRunes))
	return dedupedRunes
	}