Source File
search.go
Belonging Package
golang.org/x/pkgsite/internal/postgres
package postgres
import (
)
searchLatency = stats.Float64(
"go-discovery/search/latency",
"Latency of a search query.",
stats.UnitMilliseconds,
SearchLatencyDistribution = &view.View{
Name: "go-discovery/search/latency",
Measure: searchLatency,
Aggregation: ochttp.DefaultLatencyDistribution,
Description: "Search latency, by result source query type.",
TagKeys: []tag.Key{keySearchSource},
SearchResponseCount = &view.View{
Name: "go-discovery/search/count",
Measure: searchLatency,
Aggregation: view.Count(),
Description: "Search count, by result source query type.",
TagKeys: []tag.Key{keySearchSource},
}
)
var pkgSearchers = map[string]searcher{
"popular": (*DB).popularSearch,
"deep": (*DB).deepSearch,
}
var symbolSearchers = map[string]searcher{
"symbol": (*DB).symbolSearch,
}
*= 5
}
var map[string]searcher
if &&
experiment.IsActive(, internal.ExperimentSearchGrouping) &&
experiment.IsActive(, internal.ExperimentSymbolSearch) {
= symbolSearchers
} else {
= pkgSearchers
}
, := .hedgedSearch(, , , , , , nil)
if != nil {
return nil,
var []*internal.SearchResult
for , := range .results {
, := .IsExcluded(, .PackagePath)
if != nil {
return nil,
}
if ! {
= append(, )
}
}
if experiment.IsActive(, internal.ExperimentSearchGrouping) && ! {
= groupSearchResults()
}
if len() > {
= [:]
}
return , nil
}
noGoModPenalty = 0.8
)
var scoreExpr = fmt.Sprintf(`
ts_rank('{0.1, 0.2, 1.0, 1.0}', tsv_search_tokens, websearch_to_tsquery($1)) *
ln(exp(1)+imported_by_count) *
CASE WHEN redistributable THEN 1 ELSE %f END *
CASE WHEN COALESCE(has_go_mod, true) THEN 1 ELSE %f END
`, nonRedistributablePenalty, noGoModPenalty)
, := context.WithCancel()
defer ()
:= float64(time.Since()) / float64(time.Millisecond)
stats.RecordWithTags(,
[]tag.Mutator{tag.Upsert(keySearchSource, .source)},
if := .addPackageDataToSearchResults(, .results); != nil {
return nil,
}
return &, nil
}
const hllRegisterCount = 128
func ( *DB) ( context.Context, string, , , int) searchResponse {
:= fmt.Sprintf(`
SELECT *, COUNT(*) OVER() AS total
FROM (
SELECT
package_path,
version,
module_path,
commit_time,
imported_by_count,
(%s) AS score
FROM
search_documents
WHERE tsv_search_tokens @@ websearch_to_tsquery($1)
ORDER BY
score DESC,
commit_time DESC,
package_path
) r
WHERE r.score > 0.1
LIMIT $2
OFFSET $3`, scoreExpr)
var []*internal.SearchResult
:= func( *sql.Rows) error {
var internal.SearchResult
if := .Scan(&.PackagePath, &.Version, &.ModulePath, &.CommitTime,
&.NumImportedBy, &.Score, &.NumResults); != nil {
return fmt.Errorf("rows.Scan(): %v", )
}
= append(, &)
return nil
}
:= .db.RunQuery(, , , , , )
if != nil {
= nil
}
if len() > 0 && [0].NumResults > uint64() {
for , := range {
.NumResults = uint64()
}
}
return searchResponse{
source: "deep",
results: ,
err: ,
}
}
func ( *DB) ( context.Context, string, , , int) searchResponse {
:= `
SELECT
package_path,
version,
module_path,
commit_time,
imported_by_count,
score
FROM popular_search($1, $2, $3, $4, $5)`
var []*internal.SearchResult
:= func( *sql.Rows) error {
var internal.SearchResult
if := .Scan(&.PackagePath, &.Version, &.ModulePath, &.CommitTime,
&.NumImportedBy, &.Score); != nil {
return fmt.Errorf("rows.Scan(): %v", )
}
= append(, &)
return nil
}
:= .db.RunQuery(, , , , , , nonRedistributablePenalty, noGoModPenalty)
if != nil {
= nil
}
:=
= + len()
}
for , := range {
.NumResults = uint64()
}
return searchResponse{
source: "popular",
results: ,
err: ,
}
}
func ( *DB) ( context.Context, string, , , int) searchResponse {
:= fmt.Sprintf(`
SELECT
package_path,
version,
module_path,
commit_time,
imported_by_count,
ARRAY_AGG(name) AS symbol_names,
COUNT(*) OVER() AS total
FROM (
SELECT
sd.package_path,
sd.version,
sd.module_path,
sd.commit_time,
sd.imported_by_count,
(%s) AS score,
s.name
FROM search_documents sd
INNER JOIN symbol_search_documents ssd
ON sd.package_path_id = ssd.package_path_id
INNER JOIN symbol_names s
ON s.id = ssd.symbol_name_id
WHERE
ssd.tsv_symbol_tokens @@ to_tsquery('simple', $1)
ORDER BY
score DESC,
commit_time DESC,
package_path
) r
WHERE r.score > 0.1
GROUP BY 1, 2, 3, 4, 5
LIMIT $2
OFFSET $3`, symbolScoreExpr)
var []*internal.SearchResult
:= func( *sql.Rows) error {
var (
internal.SearchResult
[]sql.NullString
)
if := .Scan(&.PackagePath, &.Version, &.ModulePath, &.CommitTime,
&.NumImportedBy, pq.Array(&), &.NumResults); != nil {
return fmt.Errorf("symbolSearch: rows.Scan(): %v", )
}
for , := range {
if .Valid {
.Symbols = append(.Symbols, .String)
}
}
= append(, &)
return nil
}
= strings.Join(strings.Split(, " "), " | ")
:= .db.RunQuery(, , , , , )
if != nil {
= nil
}
if len() > 0 && [0].NumResults > uint64() {
for , := range {
.NumResults = uint64()
}
}
return searchResponse{
source: "symbol",
results: ,
err: ,
}
}
var symbolScoreExpr = fmt.Sprintf(`
ts_rank('{0.1, 0.2, 1.0, 1.0}', ssd.tsv_symbol_tokens, to_tsquery('simple', $1)) *
ln(exp(1)+imported_by_count) *
CASE WHEN redistributable THEN 1 ELSE %f END *
CASE WHEN COALESCE(has_go_mod, true) THEN 1 ELSE %f END
`, nonRedistributablePenalty, noGoModPenalty)
= make(map[string]*internal.SearchResult)
)
for , := range {
[.PackagePath] =
:= fmt.Sprintf("(%s, %s, %s)", pq.QuoteLiteral(.PackagePath),
pq.QuoteLiteral(.Version), pq.QuoteLiteral(.ModulePath))
= append(, )
}
:= fmt.Sprintf(`
SELECT
p.path,
u.name,
d.synopsis,
u.license_types,
u.redistributable
FROM
units u
INNER JOIN
paths p
ON u.path_id = p.id
INNER JOIN
modules m
ON u.module_id = m.id
LEFT JOIN
documentation d
ON u.id = d.unit_id
WHERE
(p.path, m.version, m.module_path) IN (%s)`, strings.Join(, ","))
:= func( *sql.Rows) error {
var (
, , string
[]string
bool
)
if := .Scan(&, &, database.NullIsEmpty(&), pq.Array(&), &); != nil {
return fmt.Errorf("rows.Scan(): %v", )
}
, := []
if ! {
return fmt.Errorf("BUG: unexpected package path: %q", )
}
.Name =
if || .bypassLicenseCheck {
.Synopsis =
}
for , := range {
if != "" {
.Licenses = append(.Licenses, )
}
}
.Licenses = sortAndDedup(.Licenses)
return nil
}
return .db.RunQuery(, , )
}
func ( []string) []string {
var []string
:= map[string]bool{}
for , := range {
[] = true
}
for := range {
= append(, )
}
sort.Strings()
return
}
sort.Slice(, func(, int) bool {
, := internal.SeriesPathAndMajorVersion([].ModulePath)
, := internal.SeriesPathAndMajorVersion([].ModulePath)
if != {
return [].Score > [].Score
}
return >
})
:= map[string]*internal.SearchResult{} // from module path to first result
:= map[string]*internal.SearchResult{} // for series path to first result
var []*internal.SearchResult
for , := range {
:= [.ModulePath]
[.ModulePath] =
= append(, )
.SameModule = append(.SameModule, )
}
:= internal.SeriesPathForModule(.ModulePath)
= []
[] =
.LowerMajor = append(.LowerMajor, )
}
}
return
}
var upsertSearchStatement = fmt.Sprintf(`
INSERT INTO search_documents (
package_path,
package_path_id,
version,
module_path,
module_path_id,
name,
synopsis,
license_types,
redistributable,
version_updated_at,
commit_time,
has_go_mod,
tsv_search_tokens,
hll_register,
hll_leading_zeros
)
SELECT
p1.path,
p1.id,
m.version,
m.module_path,
p2.id,
u.name,
d.synopsis,
u.license_types,
u.redistributable,
CURRENT_TIMESTAMP,
m.commit_time,
m.has_go_mod,
(
SETWEIGHT(TO_TSVECTOR('path_tokens', $4), 'A') ||
SETWEIGHT(TO_TSVECTOR($5), 'B') ||
SETWEIGHT(TO_TSVECTOR($6), 'C') ||
SETWEIGHT(TO_TSVECTOR($7), 'D')
),
hll_hash(p1.path) & (%d - 1),
hll_zeros(hll_hash(p1.path))
FROM units u
INNER JOIN modules m ON u.module_id = m.id
INNER JOIN paths p1 ON p1.id = u.path_id
LEFT JOIN paths p2 ON p2.path = m.module_path
LEFT JOIN documentation d ON u.id = d.unit_id
WHERE
p1.path = $1
AND m.module_path = $2
AND m.version = $3
LIMIT 1 -- could be multiple build contexts
ON CONFLICT (package_path)
DO UPDATE SET
package_path=excluded.package_path,
version=excluded.version,
module_path=excluded.module_path,
name=excluded.name,
synopsis=excluded.synopsis,
license_types=excluded.license_types,
redistributable=excluded.redistributable,
commit_time=excluded.commit_time,
has_go_mod=excluded.has_go_mod,
tsv_search_tokens=excluded.tsv_search_tokens,
-- the hll fields are functions of path, so they don't change
version_updated_at=(
CASE WHEN excluded.version = search_documents.version
THEN search_documents.version_updated_at
ELSE CURRENT_TIMESTAMP
END)
;`, hllRegisterCount)
func ( context.Context, *database.DB, *internal.Module) ( error) {
defer derrors.WrapStack(&, "upsertSearchDocuments(ctx, %q, %q)", .ModulePath, .Version)
, := trace.StartSpan(, "UpsertSearchDocuments")
defer .End()
for , := range .Packages() {
if isInternalPackage(.Path) {
continue
}
:= UpsertSearchDocumentArgs{
PackagePath: .Path,
ModulePath: .ModulePath,
Version: .Version,
}
.Synopsis = .Documentation[0].Synopsis
}
if .Readme != nil {
.ReadmeFilePath = .Readme.Filepath
.ReadmeContents = .Readme.Contents
}
if := UpsertSearchDocument(, , ); != nil {
return
}
if := upsertSearchDocumentSymbols(, , .Path, .ModulePath, .Version); != nil {
return
}
}
return nil
}
type UpsertSearchDocumentArgs struct {
PackagePath string
ModulePath string
Version string
Synopsis string
ReadmeFilePath string
ReadmeContents string
}
func ( context.Context, *database.DB, UpsertSearchDocumentArgs) ( error) {
defer derrors.WrapStack(&, "DB.UpsertSearchDocument(ctx, ddb, %q, %q)", .PackagePath, .ModulePath)
if .PackagePath != .ModulePath {
.ReadmeFilePath = ""
.ReadmeContents = ""
}
:= strings.Join(GeneratePathTokens(.PackagePath), " ")
, , := SearchDocumentSections(.Synopsis, .ReadmeFilePath, .ReadmeContents)
_, = .Exec(, upsertSearchStatement, .PackagePath, .ModulePath, .Version, , , , )
return
}
func ( *DB) ( context.Context, time.Time, int) ( []UpsertSearchDocumentArgs, error) {
defer derrors.WrapStack(&, "GetPackagesForSearchDocumentUpsert(ctx, %s, %d)", , )
:= `
SELECT
sd.package_path,
sd.module_path,
sd.version,
sd.synopsis,
sd.redistributable,
r.file_path,
r.contents
FROM modules m
INNER JOIN units u
ON m.id = u.module_id
INNER JOIN paths p
ON p.id = u.path_id
LEFT JOIN readmes r
ON u.id = r.unit_id
INNER JOIN search_documents sd
ON sd.package_path = p.path
AND sd.module_path = m.module_path
AND sd.version = m.version
WHERE sd.updated_at < $1
LIMIT $2`
:= func( *sql.Rows) error {
var (
UpsertSearchDocumentArgs
bool
)
if := .Scan(&.PackagePath, &.ModulePath, &.Version, &.Synopsis, &,
database.NullIsEmpty(&.ReadmeFilePath), database.NullIsEmpty(&.ReadmeContents)); != nil {
return
}
if ! && !.bypassLicenseCheck {
.Synopsis = ""
.ReadmeFilePath = ""
.ReadmeContents = ""
}
= append(, )
return nil
}
if := .db.RunQuery(, , , , ); != nil {
return nil,
}
return , nil
}
func ( *DB) ( context.Context) ( int64, error) {
defer derrors.WrapStack(&, "UpdateSearchDocumentsImportedByCount(ctx)")
, := .getSearchPackages()
if != nil {
return 0,
}
, := .computeImportedByCounts(, )
if != nil {
return 0,
}
= .db.Transact(, sql.LevelDefault, func( *database.DB) error {
if := insertImportedByCounts(, , ); != nil {
return
}
if := compareImportedByCounts(, ); != nil {
return
}
, = updateImportedByCounts(, )
return
})
return ,
}
func ( *DB) ( context.Context) ( map[string]bool, error) {
defer derrors.WrapStack(&, "DB.getSearchPackages(ctx)")
= map[string]bool{}
= .db.RunQuery(, `SELECT package_path FROM search_documents`, func( *sql.Rows) error {
var string
if := .Scan(&); != nil {
return
}
[] = true
return nil
})
if != nil {
return nil,
}
return , nil
}
func ( *DB) ( context.Context, map[string]bool) ( map[string]int, error) {
defer derrors.WrapStack(&, "db.computeImportedByCounts(ctx)")
if ![] {
continue
if ( == stdlib.ModulePath && stdlib.Contains()) || strings.HasPrefix(+"/", +"/") {
continue
}
[]++
}
if := .Err(); != nil {
return nil,
}
return , nil
}
func ( context.Context, *database.DB, map[string]int) ( error) {
defer derrors.WrapStack(&, "insertImportedByCounts(ctx, db, counts)")
const = `
CREATE TEMPORARY TABLE computed_imported_by_counts (
package_path TEXT NOT NULL,
imported_by_count INTEGER DEFAULT 0 NOT NULL
) ON COMMIT DROP;
`
if , := .Exec(, ); != nil {
return fmt.Errorf("CREATE TABLE: %v", )
}
:= make([]interface{}, 0, 2*len())
for , := range {
= append(, , )
}
:= []string{"package_path", "imported_by_count"}
return .BulkInsert(, "computed_imported_by_counts", , , "")
}
func ( context.Context, *database.DB) ( error) {
defer derrors.WrapStack(&, "compareImportedByCounts(ctx, tx)")
:= `
SELECT
s.package_path,
s.imported_by_count,
c.imported_by_count
FROM
search_documents s
INNER JOIN
computed_imported_by_counts c
ON
s.package_path = c.package_path
const = 0.05 // count how many counts change by at least this fraction
var , , , int
= .RunQuery(, , func( *sql.Rows) error {
var string
var , int
if := .Scan(&, &, &); != nil {
return
}
++
if != {
++
}
if == 0 {
++
return nil
}
:= math.Abs(float64(-)) / float64()
if > {
++
}
return nil
})
if != nil {
return
}
log.Infof(, "%6d total rows in search_documents match computed_imported_by_counts", )
log.Infof(, "%6d will change", )
log.Infof(, "%6d currently have a zero imported-by count", )
log.Infof(, "%6d of the non-zero rows will change by more than %d%%", , int(*100))
return nil
}
const = `
LOCK TABLE search_documents IN SHARE ROW EXCLUSIVE MODE;
UPDATE search_documents s
SET
imported_by_count = c.imported_by_count,
imported_by_count_updated_at = CURRENT_TIMESTAMP
FROM computed_imported_by_counts c
WHERE s.package_path = c.package_path;`
, := .Exec(, )
if != nil {
return 0, fmt.Errorf("error updating imported_by_count and imported_by_count_updated_at for search documents: %v", )
}
return , nil
}
var (
commonHostnames = map[string]bool{
"bitbucket.org": true,
"code.cloudfoundry.org": true,
"gitea.com": true,
"gitee.com": true,
"github.com": true,
"gitlab.com": true,
"go.etcd.io": true,
"go.googlesource.com": true,
"golang.org": true,
"google.golang.org": true,
"gopkg.in": true,
}
commonHostParts = map[string]bool{
"code": true,
"git": true,
"gitlab": true,
"go": true,
"google": true,
"www": true,
}
)
func ( string) []string {
= strings.Trim(, "/")
:= make(map[string]bool)
:= strings.Split(, "/")
for , := range {
:= strings.Split(, "-")
if len() > 1 {
for , := range {
[] = true
}
}
for := + 2; <= len(); ++ {
:= strings.Join([:], "/")
= strings.Trim(, "/")
[] = true
}
if == 0 && commonHostnames[] {
continue
func ( *DB) ( context.Context, UpsertSearchDocumentArgs, int) ( error) {
defer derrors.WrapStack(&, "DB.UpsertSearchDocumentWithImportedByCount(ctx, ddb, %q, %q)", .PackagePath, .ModulePath)
if := UpsertSearchDocument(, .db, ); != nil {
return
}
_, = .db.Exec(,
`UPDATE search_documents SET imported_by_count=$1 WHERE package_path=$2;`,
, .PackagePath)
return
![]() |
The pages are generated with Golds v0.3.2-preview. (GOOS=darwin GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds. |