mirror of
https://github.com/dutchcoders/transfer.sh.git
synced 2024-11-30 16:10:19 +01:00
244 lines
5.4 KiB
Go
244 lines
5.4 KiB
Go
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||
|
//
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file or at
|
||
|
// https://developers.google.com/open-source/licenses/bsd.
|
||
|
|
||
|
package database
|
||
|
|
||
|
import (
|
||
|
"path"
|
||
|
"regexp"
|
||
|
"strings"
|
||
|
"unicode"
|
||
|
|
||
|
"github.com/golang/gddo/doc"
|
||
|
"github.com/golang/gddo/gosrc"
|
||
|
)
|
||
|
|
||
|
func isStandardPackage(path string) bool {
|
||
|
return strings.Index(path, ".") < 0
|
||
|
}
|
||
|
|
||
|
func isTermSep(r rune) bool {
|
||
|
return unicode.IsSpace(r) ||
|
||
|
r != '.' && unicode.IsPunct(r) ||
|
||
|
unicode.IsSymbol(r)
|
||
|
}
|
||
|
|
||
|
func normalizeProjectRoot(projectRoot string) string {
|
||
|
if projectRoot == "" {
|
||
|
return "go"
|
||
|
}
|
||
|
return projectRoot
|
||
|
}
|
||
|
|
||
|
var synonyms = map[string]string{
|
||
|
"redis": "redisdb", // append db to avoid stemming to 'red'
|
||
|
"rand": "random",
|
||
|
"postgres": "postgresql",
|
||
|
"mongo": "mongodb",
|
||
|
}
|
||
|
|
||
|
func term(s string) string {
|
||
|
s = strings.ToLower(s)
|
||
|
if x, ok := synonyms[s]; ok {
|
||
|
s = x
|
||
|
}
|
||
|
|
||
|
// Trim the trailing period at the end of any sentence.
|
||
|
return stem(strings.TrimSuffix(s, "."))
|
||
|
}
|
||
|
|
||
|
var httpPat = regexp.MustCompile(`https?://\S+`)
|
||
|
|
||
|
func collectSynopsisTerms(terms map[string]bool, synopsis string) {
|
||
|
|
||
|
synopsis = httpPat.ReplaceAllLiteralString(synopsis, "")
|
||
|
|
||
|
fields := strings.FieldsFunc(synopsis, isTermSep)
|
||
|
for i := range fields {
|
||
|
fields[i] = strings.ToLower(fields[i])
|
||
|
}
|
||
|
|
||
|
// Ignore boilerplate in the following common patterns:
|
||
|
// Package foo ...
|
||
|
// Command foo ...
|
||
|
// Package foo implements ... (and provides, contains)
|
||
|
// The foo package ...
|
||
|
// The foo package implements ...
|
||
|
// The foo command ...
|
||
|
|
||
|
checkPackageVerb := false
|
||
|
switch {
|
||
|
case len(fields) >= 1 && fields[0] == "package":
|
||
|
fields = fields[1:]
|
||
|
checkPackageVerb = true
|
||
|
case len(fields) >= 1 && fields[0] == "command":
|
||
|
fields = fields[1:]
|
||
|
case len(fields) >= 3 && fields[0] == "the" && fields[2] == "package":
|
||
|
fields[2] = fields[1]
|
||
|
fields = fields[2:]
|
||
|
checkPackageVerb = true
|
||
|
case len(fields) >= 3 && fields[0] == "the" && fields[2] == "command":
|
||
|
fields[2] = fields[1]
|
||
|
fields = fields[2:]
|
||
|
}
|
||
|
|
||
|
if checkPackageVerb && len(fields) >= 2 &&
|
||
|
(fields[1] == "implements" || fields[1] == "provides" || fields[1] == "contains") {
|
||
|
fields[1] = fields[0]
|
||
|
fields = fields[1:]
|
||
|
}
|
||
|
|
||
|
for _, s := range fields {
|
||
|
if !stopWord[s] {
|
||
|
terms[term(s)] = true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func termSlice(terms map[string]bool) []string {
|
||
|
result := make([]string, 0, len(terms))
|
||
|
for term := range terms {
|
||
|
result = append(result, term)
|
||
|
}
|
||
|
return result
|
||
|
}
|
||
|
|
||
|
func documentTerms(pdoc *doc.Package, score float64) []string {
|
||
|
|
||
|
terms := make(map[string]bool)
|
||
|
|
||
|
// Project root
|
||
|
|
||
|
projectRoot := normalizeProjectRoot(pdoc.ProjectRoot)
|
||
|
terms["project:"+projectRoot] = true
|
||
|
|
||
|
if strings.HasPrefix(pdoc.ImportPath, "golang.org/x/") {
|
||
|
terms["project:subrepo"] = true
|
||
|
}
|
||
|
|
||
|
// Imports
|
||
|
|
||
|
for _, path := range pdoc.Imports {
|
||
|
if gosrc.IsValidPath(path) {
|
||
|
terms["import:"+path] = true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if score > 0 {
|
||
|
|
||
|
for _, term := range parseQuery(pdoc.ImportPath) {
|
||
|
terms[term] = true
|
||
|
}
|
||
|
if !isStandardPackage(pdoc.ImportPath) {
|
||
|
terms["all:"] = true
|
||
|
for _, term := range parseQuery(pdoc.ProjectName) {
|
||
|
terms[term] = true
|
||
|
}
|
||
|
for _, term := range parseQuery(pdoc.Name) {
|
||
|
terms[term] = true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Synopsis
|
||
|
|
||
|
collectSynopsisTerms(terms, pdoc.Synopsis)
|
||
|
|
||
|
}
|
||
|
|
||
|
return termSlice(terms)
|
||
|
}
|
||
|
|
||
|
// vendorPat matches the path of a vendored package.
|
||
|
var vendorPat = regexp.MustCompile(
|
||
|
// match directories used by tools to vendor packages.
|
||
|
`/(?:_?third_party|vendors|Godeps/_workspace/src)/` +
|
||
|
// match a domain name.
|
||
|
`[^./]+\.[^/]+`)
|
||
|
|
||
|
func documentScore(pdoc *doc.Package) float64 {
|
||
|
if pdoc.Name == "" ||
|
||
|
pdoc.Status != gosrc.Active ||
|
||
|
len(pdoc.Errors) > 0 ||
|
||
|
strings.HasSuffix(pdoc.ImportPath, ".go") ||
|
||
|
strings.HasPrefix(pdoc.ImportPath, "gist.github.com/") ||
|
||
|
strings.HasSuffix(pdoc.ImportPath, "/internal") ||
|
||
|
strings.Contains(pdoc.ImportPath, "/internal/") ||
|
||
|
vendorPat.MatchString(pdoc.ImportPath) {
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
for _, p := range pdoc.Imports {
|
||
|
if strings.HasSuffix(p, ".go") {
|
||
|
return 0
|
||
|
}
|
||
|
}
|
||
|
|
||
|
r := 1.0
|
||
|
if pdoc.IsCmd {
|
||
|
if pdoc.Doc == "" {
|
||
|
// Do not include command in index if it does not have documentation.
|
||
|
return 0
|
||
|
}
|
||
|
if !importsGoPackages(pdoc) {
|
||
|
// Penalize commands that don't use the "go/*" packages.
|
||
|
r *= 0.9
|
||
|
}
|
||
|
} else {
|
||
|
if !pdoc.Truncated &&
|
||
|
len(pdoc.Consts) == 0 &&
|
||
|
len(pdoc.Vars) == 0 &&
|
||
|
len(pdoc.Funcs) == 0 &&
|
||
|
len(pdoc.Types) == 0 &&
|
||
|
len(pdoc.Examples) == 0 {
|
||
|
// Do not include package in index if it does not have exports.
|
||
|
return 0
|
||
|
}
|
||
|
if pdoc.Doc == "" {
|
||
|
// Penalty for no documentation.
|
||
|
r *= 0.95
|
||
|
}
|
||
|
if path.Base(pdoc.ImportPath) != pdoc.Name {
|
||
|
// Penalty for last element of path != package name.
|
||
|
r *= 0.9
|
||
|
}
|
||
|
for i := 0; i < strings.Count(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/"); i++ {
|
||
|
// Penalty for deeply nested packages.
|
||
|
r *= 0.99
|
||
|
}
|
||
|
if strings.Index(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/src/") > 0 {
|
||
|
r *= 0.95
|
||
|
}
|
||
|
for _, p := range pdoc.Imports {
|
||
|
if vendorPat.MatchString(p) {
|
||
|
// Penalize packages that import vendored packages.
|
||
|
r *= 0.1
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return r
|
||
|
}
|
||
|
|
||
|
func parseQuery(q string) []string {
|
||
|
var terms []string
|
||
|
q = strings.ToLower(q)
|
||
|
for _, s := range strings.FieldsFunc(q, isTermSep) {
|
||
|
if !stopWord[s] {
|
||
|
terms = append(terms, term(s))
|
||
|
}
|
||
|
}
|
||
|
return terms
|
||
|
}
|
||
|
|
||
|
func importsGoPackages(pdoc *doc.Package) bool {
|
||
|
for _, m := range pdoc.Imports {
|
||
|
if strings.HasPrefix(m, "go/") {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
return false
|
||
|
}
|