blob: 48f08ed8924afe49a86cdc809b8fdc445c0d59de [file] [log] [blame]
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package random
import (
"context"
"crypto/rand"
"fmt"
"io"
"math"
"sort"
"sync"
"time"
"unicode"
"github.com/hashicorp/go-multierror"
)
var (
LowercaseCharset = sortCharset("abcdefghijklmnopqrstuvwxyz")
UppercaseCharset = sortCharset("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
NumericCharset = sortCharset("0123456789")
FullSymbolCharset = sortCharset("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
ShortSymbolCharset = sortCharset("-")
AlphabeticCharset = sortCharset(UppercaseCharset + LowercaseCharset)
AlphaNumericCharset = sortCharset(AlphabeticCharset + NumericCharset)
AlphaNumericShortSymbolCharset = sortCharset(AlphaNumericCharset + ShortSymbolCharset)
AlphaNumericFullSymbolCharset = sortCharset(AlphaNumericCharset + FullSymbolCharset)
LowercaseRuneset = []rune(LowercaseCharset)
UppercaseRuneset = []rune(UppercaseCharset)
NumericRuneset = []rune(NumericCharset)
FullSymbolRuneset = []rune(FullSymbolCharset)
ShortSymbolRuneset = []rune(ShortSymbolCharset)
AlphabeticRuneset = []rune(AlphabeticCharset)
AlphaNumericRuneset = []rune(AlphaNumericCharset)
AlphaNumericShortSymbolRuneset = []rune(AlphaNumericShortSymbolCharset)
AlphaNumericFullSymbolRuneset = []rune(AlphaNumericFullSymbolCharset)
// DefaultStringGenerator has reasonable default rules for generating strings
DefaultStringGenerator = &StringGenerator{
Length: 20,
Rules: []Rule{
CharsetRule{
Charset: LowercaseRuneset,
MinChars: 1,
},
CharsetRule{
Charset: UppercaseRuneset,
MinChars: 1,
},
CharsetRule{
Charset: NumericRuneset,
MinChars: 1,
},
CharsetRule{
Charset: ShortSymbolRuneset,
MinChars: 1,
},
},
}
)
func sortCharset(chars string) string {
r := runes(chars)
sort.Sort(r)
return string(r)
}
// StringGenerator generates random strings from the provided charset & adhering to a set of rules. The set of rules
// are things like CharsetRule which requires a certain number of characters from a sub-charset.
type StringGenerator struct {
// Length of the string to generate.
Length int `mapstructure:"length" json:"length"`
// Rules the generated strings must adhere to.
Rules serializableRules `mapstructure:"-" json:"rule"` // This is "rule" in JSON so it matches the HCL property type
// CharsetRule to choose runes from. This is computed from the rules, not directly configurable
charset runes
charsetLock sync.RWMutex
}
// Generate a random string from the charset and adhering to the provided rules.
// The io.Reader is optional. If not provided, it will default to the reader from crypto/rand
func (g *StringGenerator) Generate(ctx context.Context, rng io.Reader) (str string, err error) {
if _, hasTimeout := ctx.Deadline(); !hasTimeout {
var cancel func()
ctx, cancel = context.WithTimeout(ctx, 1*time.Second) // Ensure there's a timeout on the context
defer cancel()
}
// Ensure the generator is configured well since it may be manually created rather than parsed from HCL
err = g.validateConfig()
if err != nil {
return "", err
}
LOOP:
for {
select {
case <-ctx.Done():
return "", fmt.Errorf("timed out generating string")
default:
str, err = g.generate(rng)
if err != nil {
return "", err
}
if str == "" {
continue LOOP
}
return str, err
}
}
}
func (g *StringGenerator) generate(rng io.Reader) (str string, err error) {
// If performance improvements need to be made, this can be changed to read a batch of
// potential strings at once rather than one at a time. This will significantly
// improve performance, but at the cost of added complexity.
g.charsetLock.RLock()
charset := g.charset
g.charsetLock.RUnlock()
candidate, err := randomRunes(rng, charset, g.Length)
if err != nil {
return "", fmt.Errorf("unable to generate random characters: %w", err)
}
for _, rule := range g.Rules {
if !rule.Pass(candidate) {
return "", nil
}
}
// Passed all rules
return string(candidate), nil
}
const (
// maxCharsetLen is the maximum length a charset is allowed to be when generating a candidate string.
// This is the total number of numbers available for selecting an index out of the charset slice.
maxCharsetLen = 256
)
// randomRunes creates a random string based on the provided charset. The charset is limited to 255 characters, but
// could be expanded if needed. Expanding the maximum charset size will decrease performance because it will need to
// combine bytes into a larger integer using binary.BigEndian.Uint16() function.
func randomRunes(rng io.Reader, charset []rune, length int) (candidate []rune, err error) {
if len(charset) == 0 {
return nil, fmt.Errorf("no charset specified")
}
if len(charset) > maxCharsetLen {
return nil, fmt.Errorf("charset is too long: limited to %d characters", math.MaxUint8)
}
if length <= 0 {
return nil, fmt.Errorf("unable to generate a zero or negative length runeset")
}
// This can't always select indexes from [0-maxCharsetLen) because it could introduce bias to the character selection.
// For instance, if the length of the charset is [a-zA-Z0-9-] (length of 63):
// RNG ranges: [0-62][63-125][126-188][189-251] will equally select from the entirety of the charset. However,
// the RNG values [252-255] will select the first 4 characters of the charset while ignoring the remaining 59.
// This results in a bias towards the front of the charset.
//
// To avoid this, we determine the largest integer multiplier of the charset length that is <= maxCharsetLen
// For instance, if the maxCharsetLen is 256 (the size of one byte) and the charset is length 63, the multiplier
// equals 4:
// 256/63 => 4.06
// Trunc(4.06) => 4
// Multiply by the charset length
// Subtract 1 to account for 0-based counting and you get the max index value: 251
maxAllowedRNGValue := (maxCharsetLen/len(charset))*len(charset) - 1
// rngBufferMultiplier increases the size of the RNG buffer to account for lost
// indexes due to the maxAllowedRNGValue
rngBufferMultiplier := 1.0
// Don't set a multiplier if we are able to use the entire range of indexes
if maxAllowedRNGValue < maxCharsetLen {
// Anything more complicated than an arbitrary percentage appears to have little practical performance benefit
rngBufferMultiplier = 1.5
}
// Default to the standard crypto reader if one isn't provided
if rng == nil {
rng = rand.Reader
}
charsetLen := byte(len(charset))
runes := make([]rune, 0, length)
for len(runes) < length {
// Generate a bunch of indexes
data := make([]byte, int(float64(length)*rngBufferMultiplier))
numBytes, err := rng.Read(data)
if err != nil {
return nil, err
}
// Append characters until either we're out of indexes or the length is long enough
for i := 0; i < numBytes; i++ {
// Be careful to ensure that maxAllowedRNGValue isn't >= 256 as it will overflow and this
// comparison will prevent characters from being selected from the charset
if data[i] > byte(maxAllowedRNGValue) {
continue
}
index := data[i]
if len(charset) != maxCharsetLen {
index = index % charsetLen
}
r := charset[index]
runes = append(runes, r)
if len(runes) == length {
break
}
}
}
return runes, nil
}
// validateConfig of the generator to ensure that we can successfully generate a string.
func (g *StringGenerator) validateConfig() (err error) {
merr := &multierror.Error{}
// Ensure the sum of minimum lengths in the rules doesn't exceed the length specified
minLen := getMinLength(g.Rules)
if g.Length <= 0 {
merr = multierror.Append(merr, fmt.Errorf("length must be > 0"))
} else if g.Length < minLen {
merr = multierror.Append(merr, fmt.Errorf("specified rules require at least %d characters but %d is specified", minLen, g.Length))
}
g.charsetLock.Lock()
defer g.charsetLock.Unlock()
// Ensure we have a charset & all characters are printable
if len(g.charset) == 0 {
// Yes this is mutating the generator but this is done so we don't have to compute this on every generation
g.charset = getChars(g.Rules)
}
if len(g.charset) == 0 {
merr = multierror.Append(merr, fmt.Errorf("no charset specified"))
} else {
for _, r := range g.charset {
if !unicode.IsPrint(r) {
merr = multierror.Append(merr, fmt.Errorf("non-printable character in charset"))
break
}
}
}
return merr.ErrorOrNil()
}
// getMinLength from the rules using the optional interface: `MinLength() int`
func getMinLength(rules []Rule) (minLen int) {
type minLengthProvider interface {
MinLength() int
}
for _, rule := range rules {
mlp, ok := rule.(minLengthProvider)
if !ok {
continue
}
minLen += mlp.MinLength()
}
return minLen
}
// getChars from the rules using the optional interface: `Chars() []rune`
func getChars(rules []Rule) (chars []rune) {
type charsetProvider interface {
Chars() []rune
}
for _, rule := range rules {
cp, ok := rule.(charsetProvider)
if !ok {
continue
}
chars = append(chars, cp.Chars()...)
}
return deduplicateRunes(chars)
}
// deduplicateRunes returns a new slice of sorted & de-duplicated runes
func deduplicateRunes(original []rune) (deduped []rune) {
if len(original) == 0 {
return nil
}
m := map[rune]bool{}
dedupedRunes := []rune(nil)
for _, r := range original {
if m[r] {
continue
}
m[r] = true
dedupedRunes = append(dedupedRunes, r)
}
// They don't have to be sorted, but this is being done to make the charset easier to visualize
sort.Sort(runes(dedupedRunes))
return dedupedRunes
}