gitea/vendor/golang.org/x/tools/internal/module/module.go

541 lines
17 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package module defines the module.Version type
// along with support code.
package module
// IMPORTANT NOTE
//
// This file essentially defines the set of valid import paths for the go command.
// There are many subtle considerations, including Unicode ambiguity,
// security, network, and file system representations.
//
// This file also defines the set of valid module path and version combinations,
// another topic with many subtle considerations.
//
// Changes to the semantics in this file require approval from rsc.
import (
"fmt"
"sort"
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/tools/internal/semver"
)
// A Version is defined by a module path and version pair.
type Version struct {
Path string
// Version is usually a semantic version in canonical form.
// There are two exceptions to this general rule.
// First, the top-level target of a build has no specific version
// and uses Version = "".
// Second, during MVS calculations the version "none" is used
// to represent the decision to take no version of a given module.
Version string `json:",omitempty"`
}
// Check checks that a given module path, version pair is valid.
// In addition to the path being a valid module path
// and the version being a valid semantic version,
// the two must correspond.
// For example, the path "yaml/v2" only corresponds to
// semantic versions beginning with "v2.".
func Check(path, version string) error {
if err := CheckPath(path); err != nil {
return err
}
if !semver.IsValid(version) {
return fmt.Errorf("malformed semantic version %v", version)
}
_, pathMajor, _ := SplitPathVersion(path)
if !MatchPathMajor(version, pathMajor) {
if pathMajor == "" {
pathMajor = "v0 or v1"
}
if pathMajor[0] == '.' { // .v1
pathMajor = pathMajor[1:]
}
return fmt.Errorf("mismatched module path %v and version %v (want %v)", path, version, pathMajor)
}
return nil
}
// firstPathOK reports whether r can appear in the first element of a module path.
// The first element of the path must be an LDH domain name, at least for now.
// To avoid case ambiguity, the domain name must be entirely lower case.
func firstPathOK(r rune) bool {
return r == '-' || r == '.' ||
'0' <= r && r <= '9' ||
'a' <= r && r <= 'z'
}
// pathOK reports whether r can appear in an import path element.
// Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: + - . _ and ~.
// This matches what "go get" has historically recognized in import paths.
// TODO(rsc): We would like to allow Unicode letters, but that requires additional
// care in the safe encoding (see note below).
func pathOK(r rune) bool {
if r < utf8.RuneSelf {
return r == '+' || r == '-' || r == '.' || r == '_' || r == '~' ||
'0' <= r && r <= '9' ||
'A' <= r && r <= 'Z' ||
'a' <= r && r <= 'z'
}
return false
}
// fileNameOK reports whether r can appear in a file name.
// For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
// If we expand the set of allowed characters here, we have to
// work harder at detecting potential case-folding and normalization collisions.
// See note about "safe encoding" below.
func fileNameOK(r rune) bool {
if r < utf8.RuneSelf {
// Entire set of ASCII punctuation, from which we remove characters:
// ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
// We disallow some shell special characters: " ' * < > ? ` |
// (Note that some of those are disallowed by the Windows file system as well.)
// We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
// We allow spaces (U+0020) in file names.
const allowed = "!#$%&()+,-.=@[]^_{}~ "
if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
return true
}
for i := 0; i < len(allowed); i++ {
if rune(allowed[i]) == r {
return true
}
}
return false
}
// It may be OK to add more ASCII punctuation here, but only carefully.
// For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
return unicode.IsLetter(r)
}
// CheckPath checks that a module path is valid.
func CheckPath(path string) error {
if err := checkPath(path, false); err != nil {
return fmt.Errorf("malformed module path %q: %v", path, err)
}
i := strings.Index(path, "/")
if i < 0 {
i = len(path)
}
if i == 0 {
return fmt.Errorf("malformed module path %q: leading slash", path)
}
if !strings.Contains(path[:i], ".") {
return fmt.Errorf("malformed module path %q: missing dot in first path element", path)
}
if path[0] == '-' {
return fmt.Errorf("malformed module path %q: leading dash in first path element", path)
}
for _, r := range path[:i] {
if !firstPathOK(r) {
return fmt.Errorf("malformed module path %q: invalid char %q in first path element", path, r)
}
}
if _, _, ok := SplitPathVersion(path); !ok {
return fmt.Errorf("malformed module path %q: invalid version", path)
}
return nil
}
// CheckImportPath checks that an import path is valid.
func CheckImportPath(path string) error {
if err := checkPath(path, false); err != nil {
return fmt.Errorf("malformed import path %q: %v", path, err)
}
return nil
}
// checkPath checks that a general path is valid.
// It returns an error describing why but not mentioning path.
// Because these checks apply to both module paths and import paths,
// the caller is expected to add the "malformed ___ path %q: " prefix.
// fileName indicates whether the final element of the path is a file name
// (as opposed to a directory name).
func checkPath(path string, fileName bool) error {
if !utf8.ValidString(path) {
return fmt.Errorf("invalid UTF-8")
}
if path == "" {
return fmt.Errorf("empty string")
}
if strings.Contains(path, "..") {
return fmt.Errorf("double dot")
}
if strings.Contains(path, "//") {
return fmt.Errorf("double slash")
}
if path[len(path)-1] == '/' {
return fmt.Errorf("trailing slash")
}
elemStart := 0
for i, r := range path {
if r == '/' {
if err := checkElem(path[elemStart:i], fileName); err != nil {
return err
}
elemStart = i + 1
}
}
if err := checkElem(path[elemStart:], fileName); err != nil {
return err
}
return nil
}
// checkElem checks whether an individual path element is valid.
// fileName indicates whether the element is a file name (not a directory name).
func checkElem(elem string, fileName bool) error {
if elem == "" {
return fmt.Errorf("empty path element")
}
if strings.Count(elem, ".") == len(elem) {
return fmt.Errorf("invalid path element %q", elem)
}
if elem[0] == '.' && !fileName {
return fmt.Errorf("leading dot in path element")
}
if elem[len(elem)-1] == '.' {
return fmt.Errorf("trailing dot in path element")
}
charOK := pathOK
if fileName {
charOK = fileNameOK
}
for _, r := range elem {
if !charOK(r) {
return fmt.Errorf("invalid char %q", r)
}
}
// Windows disallows a bunch of path elements, sadly.
// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
short := elem
if i := strings.Index(short, "."); i >= 0 {
short = short[:i]
}
for _, bad := range badWindowsNames {
if strings.EqualFold(bad, short) {
return fmt.Errorf("disallowed path element %q", elem)
}
}
return nil
}
// CheckFilePath checks whether a slash-separated file path is valid.
func CheckFilePath(path string) error {
if err := checkPath(path, true); err != nil {
return fmt.Errorf("malformed file path %q: %v", path, err)
}
return nil
}
// badWindowsNames are the reserved file path elements on Windows.
// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
var badWindowsNames = []string{
"CON",
"PRN",
"AUX",
"NUL",
"COM1",
"COM2",
"COM3",
"COM4",
"COM5",
"COM6",
"COM7",
"COM8",
"COM9",
"LPT1",
"LPT2",
"LPT3",
"LPT4",
"LPT5",
"LPT6",
"LPT7",
"LPT8",
"LPT9",
}
// SplitPathVersion returns prefix and major version such that prefix+pathMajor == path
// and version is either empty or "/vN" for N >= 2.
// As a special case, gopkg.in paths are recognized directly;
// they require ".vN" instead of "/vN", and for all N, not just N >= 2.
func SplitPathVersion(path string) (prefix, pathMajor string, ok bool) {
if strings.HasPrefix(path, "gopkg.in/") {
return splitGopkgIn(path)
}
i := len(path)
dot := false
for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9' || path[i-1] == '.') {
if path[i-1] == '.' {
dot = true
}
i--
}
if i <= 1 || i == len(path) || path[i-1] != 'v' || path[i-2] != '/' {
return path, "", true
}
prefix, pathMajor = path[:i-2], path[i-2:]
if dot || len(pathMajor) <= 2 || pathMajor[2] == '0' || pathMajor == "/v1" {
return path, "", false
}
return prefix, pathMajor, true
}
// splitGopkgIn is like SplitPathVersion but only for gopkg.in paths.
func splitGopkgIn(path string) (prefix, pathMajor string, ok bool) {
if !strings.HasPrefix(path, "gopkg.in/") {
return path, "", false
}
i := len(path)
if strings.HasSuffix(path, "-unstable") {
i -= len("-unstable")
}
for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9') {
i--
}
if i <= 1 || path[i-1] != 'v' || path[i-2] != '.' {
// All gopkg.in paths must end in vN for some N.
return path, "", false
}
prefix, pathMajor = path[:i-2], path[i-2:]
if len(pathMajor) <= 2 || pathMajor[2] == '0' && pathMajor != ".v0" {
return path, "", false
}
return prefix, pathMajor, true
}
// MatchPathMajor reports whether the semantic version v
// matches the path major version pathMajor.
func MatchPathMajor(v, pathMajor string) bool {
if strings.HasPrefix(pathMajor, ".v") && strings.HasSuffix(pathMajor, "-unstable") {
pathMajor = strings.TrimSuffix(pathMajor, "-unstable")
}
if strings.HasPrefix(v, "v0.0.0-") && pathMajor == ".v1" {
// Allow old bug in pseudo-versions that generated v0.0.0- pseudoversion for gopkg .v1.
// For example, gopkg.in/yaml.v2@v2.2.1's go.mod requires gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405.
return true
}
m := semver.Major(v)
if pathMajor == "" {
return m == "v0" || m == "v1" || semver.Build(v) == "+incompatible"
}
return (pathMajor[0] == '/' || pathMajor[0] == '.') && m == pathMajor[1:]
}
// CanonicalVersion returns the canonical form of the version string v.
// It is the same as semver.Canonical(v) except that it preserves the special build suffix "+incompatible".
func CanonicalVersion(v string) string {
cv := semver.Canonical(v)
if semver.Build(v) == "+incompatible" {
cv += "+incompatible"
}
return cv
}
// Sort sorts the list by Path, breaking ties by comparing Versions.
func Sort(list []Version) {
sort.Slice(list, func(i, j int) bool {
mi := list[i]
mj := list[j]
if mi.Path != mj.Path {
return mi.Path < mj.Path
}
// To help go.sum formatting, allow version/file.
// Compare semver prefix by semver rules,
// file by string order.
vi := mi.Version
vj := mj.Version
var fi, fj string
if k := strings.Index(vi, "/"); k >= 0 {
vi, fi = vi[:k], vi[k:]
}
if k := strings.Index(vj, "/"); k >= 0 {
vj, fj = vj[:k], vj[k:]
}
if vi != vj {
return semver.Compare(vi, vj) < 0
}
return fi < fj
})
}
// Safe encodings
//
// Module paths appear as substrings of file system paths
// (in the download cache) and of web server URLs in the proxy protocol.
// In general we cannot rely on file systems to be case-sensitive,
// nor can we rely on web servers, since they read from file systems.
// That is, we cannot rely on the file system to keep rsc.io/QUOTE
// and rsc.io/quote separate. Windows and macOS don't.
// Instead, we must never require two different casings of a file path.
// Because we want the download cache to match the proxy protocol,
// and because we want the proxy protocol to be possible to serve
// from a tree of static files (which might be stored on a case-insensitive
// file system), the proxy protocol must never require two different casings
// of a URL path either.
//
// One possibility would be to make the safe encoding be the lowercase
// hexadecimal encoding of the actual path bytes. This would avoid ever
// needing different casings of a file path, but it would be fairly illegible
// to most programmers when those paths appeared in the file system
// (including in file paths in compiler errors and stack traces)
// in web server logs, and so on. Instead, we want a safe encoding that
// leaves most paths unaltered.
//
// The safe encoding is this:
// replace every uppercase letter with an exclamation mark
// followed by the letter's lowercase equivalent.
//
// For example,
// github.com/Azure/azure-sdk-for-go -> github.com/!azure/azure-sdk-for-go.
// github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
// github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
//
// Import paths that avoid upper-case letters are left unchanged.
// Note that because import paths are ASCII-only and avoid various
// problematic punctuation (like : < and >), the safe encoding is also ASCII-only
// and avoids the same problematic punctuation.
//
// Import paths have never allowed exclamation marks, so there is no
// need to define how to encode a literal !.
//
// Although paths are disallowed from using Unicode (see pathOK above),
// the eventual plan is to allow Unicode letters as well, to assume that
// file systems and URLs are Unicode-safe (storing UTF-8), and apply
// the !-for-uppercase convention. Note however that not all runes that
// are different but case-fold equivalent are an upper/lower pair.
// For example, U+004B ('K'), U+006B ('k'), and U+212A ('' for Kelvin)
// are considered to case-fold to each other. When we do add Unicode
// letters, we must not assume that upper/lower are the only case-equivalent pairs.
// Perhaps the Kelvin symbol would be disallowed entirely, for example.
// Or perhaps it would encode as "!!k", or perhaps as "(212A)".
//
// Also, it would be nice to allow Unicode marks as well as letters,
// but marks include combining marks, and then we must deal not
// only with case folding but also normalization: both U+00E9 ('é')
// and U+0065 U+0301 ('e' followed by combining acute accent)
// look the same on the page and are treated by some file systems
// as the same path. If we do allow Unicode marks in paths, there
// must be some kind of normalization to allow only one canonical
// encoding of any character used in an import path.
// EncodePath returns the safe encoding of the given module path.
// It fails if the module path is invalid.
func EncodePath(path string) (encoding string, err error) {
if err := CheckPath(path); err != nil {
return "", err
}
return encodeString(path)
}
// EncodeVersion returns the safe encoding of the given module version.
// Versions are allowed to be in non-semver form but must be valid file names
// and not contain exclamation marks.
func EncodeVersion(v string) (encoding string, err error) {
if err := checkElem(v, true); err != nil || strings.Contains(v, "!") {
return "", fmt.Errorf("disallowed version string %q", v)
}
return encodeString(v)
}
func encodeString(s string) (encoding string, err error) {
haveUpper := false
for _, r := range s {
if r == '!' || r >= utf8.RuneSelf {
// This should be disallowed by CheckPath, but diagnose anyway.
// The correctness of the encoding loop below depends on it.
return "", fmt.Errorf("internal error: inconsistency in EncodePath")
}
if 'A' <= r && r <= 'Z' {
haveUpper = true
}
}
if !haveUpper {
return s, nil
}
var buf []byte
for _, r := range s {
if 'A' <= r && r <= 'Z' {
buf = append(buf, '!', byte(r+'a'-'A'))
} else {
buf = append(buf, byte(r))
}
}
return string(buf), nil
}
// DecodePath returns the module path of the given safe encoding.
// It fails if the encoding is invalid or encodes an invalid path.
func DecodePath(encoding string) (path string, err error) {
path, ok := decodeString(encoding)
if !ok {
return "", fmt.Errorf("invalid module path encoding %q", encoding)
}
if err := CheckPath(path); err != nil {
return "", fmt.Errorf("invalid module path encoding %q: %v", encoding, err)
}
return path, nil
}
// DecodeVersion returns the version string for the given safe encoding.
// It fails if the encoding is invalid or encodes an invalid version.
// Versions are allowed to be in non-semver form but must be valid file names
// and not contain exclamation marks.
func DecodeVersion(encoding string) (v string, err error) {
v, ok := decodeString(encoding)
if !ok {
return "", fmt.Errorf("invalid version encoding %q", encoding)
}
if err := checkElem(v, true); err != nil {
return "", fmt.Errorf("disallowed version string %q", v)
}
return v, nil
}
func decodeString(encoding string) (string, bool) {
var buf []byte
bang := false
for _, r := range encoding {
if r >= utf8.RuneSelf {
return "", false
}
if bang {
bang = false
if r < 'a' || 'z' < r {
return "", false
}
buf = append(buf, byte(r+'A'-'a'))
continue
}
if r == '!' {
bang = true
continue
}
if 'A' <= r && r <= 'Z' {
return "", false
}
buf = append(buf, byte(r))
}
if bang {
return "", false
}
return string(buf), true
}