blob: 8430e3c7dfc40465024eac7eee5f213d816ec201 [file] [log] [blame]
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package regsrc
import (
"regexp"
"strings"
svchost "github.com/hashicorp/terraform-svchost"
)
var (
// InvalidHostString is a placeholder returned when a raw host can't be
// converted by IDNA spec. It will never be returned for any host for which
// Valid() is true.
InvalidHostString = "<invalid host>"
// urlLabelEndSubRe is a sub-expression that matches any character that's
// allowed at the start or end of a URL label according to RFC1123.
urlLabelEndSubRe = "[0-9A-Za-z]"
// urlLabelEndSubRe is a sub-expression that matches any character that's
// allowed at in a non-start or end of a URL label according to RFC1123.
urlLabelMidSubRe = "[0-9A-Za-z-]"
// urlLabelUnicodeSubRe is a sub-expression that matches any non-ascii char
// in an IDN (Unicode) display URL. It's not strict - there are only ~15k
// valid Unicode points in IDN RFC (some with conditions). We are just going
// with being liberal with matching and then erroring if we fail to convert
// to punycode later (which validates chars fully). This at least ensures
// ascii chars dissalowed by the RC1123 parts above don't become legal
// again.
urlLabelUnicodeSubRe = "[^[:ascii:]]"
// hostLabelSubRe is the sub-expression that matches a valid hostname label.
// It does not anchor the start or end so it can be composed into more
// complex RegExps below. Note that for sanity we don't handle disallowing
// raw punycode in this regexp (esp. since re2 doesn't support negative
// lookbehind, but we can capture it's presence here to check later).
hostLabelSubRe = "" +
// Match valid initial char, or unicode char
"(?:" + urlLabelEndSubRe + "|" + urlLabelUnicodeSubRe + ")" +
// Optionally, match 0 to 61 valid URL or Unicode chars,
// followed by one valid end char or unicode char
"(?:" +
"(?:" + urlLabelMidSubRe + "|" + urlLabelUnicodeSubRe + "){0,61}" +
"(?:" + urlLabelEndSubRe + "|" + urlLabelUnicodeSubRe + ")" +
")?"
// hostSubRe is the sub-expression that matches a valid host prefix.
// Allows custom port.
hostSubRe = hostLabelSubRe + "(?:\\." + hostLabelSubRe + ")+(?::\\d+)?"
// hostRe is a regexp that matches a valid host prefix. Additional
// validation of unicode strings is needed for matches.
hostRe = regexp.MustCompile("^" + hostSubRe + "$")
)
// FriendlyHost describes a registry instance identified in source strings by a
// simple bare hostname like registry.terraform.io.
type FriendlyHost struct {
Raw string
}
func NewFriendlyHost(host string) *FriendlyHost {
return &FriendlyHost{Raw: host}
}
// ParseFriendlyHost attempts to parse a valid "friendly host" prefix from the
// given string. If no valid prefix is found, host will be nil and rest will
// contain the full source string. The host prefix must terminate at the end of
// the input or at the first / character. If one or more characters exist after
// the first /, they will be returned as rest (without the / delimiter).
// Hostnames containing punycode WILL be parsed successfully since they may have
// come from an internal normalized source string, however should be considered
// invalid if the string came from a user directly. This must be checked
// explicitly for user-input strings by calling Valid() on the
// returned host.
func ParseFriendlyHost(source string) (host *FriendlyHost, rest string) {
parts := strings.SplitN(source, "/", 2)
if hostRe.MatchString(parts[0]) {
host = &FriendlyHost{Raw: parts[0]}
if len(parts) == 2 {
rest = parts[1]
}
return
}
// No match, return whole string as rest along with nil host
rest = source
return
}
// Valid returns whether the host prefix is considered valid in any case.
// Example of invalid prefixes might include ones that don't conform to the host
// name specifications. Not that IDN prefixes containing punycode are not valid
// input which we expect to always be in user-input or normalised display form.
func (h *FriendlyHost) Valid() bool {
return svchost.IsValid(h.Raw)
}
// Display returns the host formatted for display to the user in CLI or web
// output.
func (h *FriendlyHost) Display() string {
return svchost.ForDisplay(h.Raw)
}
// Normalized returns the host formatted for internal reference or comparison.
func (h *FriendlyHost) Normalized() string {
host, err := svchost.ForComparison(h.Raw)
if err != nil {
return InvalidHostString
}
return string(host)
}
// String returns the host formatted as the user originally typed it assuming it
// was parsed from user input.
func (h *FriendlyHost) String() string {
return h.Raw
}
// Equal compares the FriendlyHost against another instance taking normalization
// into account. Invalid hosts cannot be compared and will always return false.
func (h *FriendlyHost) Equal(other *FriendlyHost) bool {
if other == nil {
return false
}
otherHost, err := svchost.ForComparison(other.Raw)
if err != nil {
return false
}
host, err := svchost.ForComparison(h.Raw)
if err != nil {
return false
}
return otherHost == host
}