Copyright 2019 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

package postgres

import (
	
	
	
	
	
	
	

	
	
	
	
	
	
	
	
	
	
	
	
	
)

searchLatency holds observed latency in individual search queries.
	searchLatency = stats.Float64(
		"go-discovery/search/latency",
		"Latency of a search query.",
		stats.UnitMilliseconds,
keySearchSource is a census tag for search query types.
SearchLatencyDistribution aggregates search request latency by search query type.
	SearchLatencyDistribution = &view.View{
		Name:        "go-discovery/search/latency",
		Measure:     searchLatency,
		Aggregation: ochttp.DefaultLatencyDistribution,
		Description: "Search latency, by result source query type.",
		TagKeys:     []tag.Key{keySearchSource},
SearchResponseCount counts search responses by search query type.
	SearchResponseCount = &view.View{
		Name:        "go-discovery/search/count",
		Measure:     searchLatency,
		Aggregation: view.Count(),
		Description: "Search count, by result source query type.",
		TagKeys:     []tag.Key{keySearchSource},
	}
)
searchResponse is used for internal bookkeeping when fanning-out search request to multiple different search queries.
source is a unique identifier for the search query type (e.g. 'deep', 'popular'), to be used in logging and reporting.
results are partially filled out from only the search_documents table.
err indicates a technical failure of the search query, or that results are not provably complete.
searchEvent is used to log structured information about search events for later analysis. A 'search event' occurs when a searcher or count estimate returns.
Type is either the searcher name or 'estimate' (the count estimate).
Latency is the duration that that the operation took.
Err is the error returned by the operation, if any.
A searcher is used to execute a single search request.
type searcher func(db *DB, ctx context.Context, q string, limit, offset, maxResultCount int) searchResponse
The pkgSearchers used by Search.
var pkgSearchers = map[string]searcher{
	"popular": (*DB).popularSearch,
	"deep":    (*DB).deepSearch,
}

var symbolSearchers = map[string]searcher{
	"symbol": (*DB).symbolSearch,
}
Search executes two search requests concurrently: - a sequential scan of packages in descending order of popularity. - all packages ("deep" search) using an inverted index to filter to search terms. The sequential scan takes significantly less time when searching for very common terms (e.g. "errors", "cloud", or "kubernetes"), due to its ability to exit early once the requested page of search results is provably complete. Because 0 <= ts_rank() <= 1, we know that the highest score of any unscanned package is ln(e+N), where N is imported_by_count of the package we are currently considering. Therefore if the lowest scoring result of popular search is greater than ln(e+N), we know that we haven't missed any results and can return the search result immediately, cancelling other searches. On the other hand, if the popular search is slow, it is likely that the search term is infrequent, and deep search will be fast due to our inverted gin index on search tokens. The gap in this optimization is search terms that are very frequent, but rarely relevant: "int" or "package", for example. In these cases we'll pay the penalty of a deep search that scans nearly every package.
func ( *DB) ( context.Context,  string, , ,  int,  bool) ( []*internal.SearchResult,  error) {
	defer derrors.WrapStack(&, "DB.Search(ctx, %q, %d, %d)", , , )

	 := 
Gather extra results for better grouping by module and series.
		 *= 5
	}

	var  map[string]searcher
	if  &&
		experiment.IsActive(, internal.ExperimentSearchGrouping) &&
		experiment.IsActive(, internal.ExperimentSymbolSearch) {
		 = symbolSearchers
	} else {
		 = pkgSearchers
	}
	,  := .hedgedSearch(, , , , , , nil)
	if  != nil {
		return nil, 
Filter out excluded paths.
	var  []*internal.SearchResult
	for ,  := range .results {
		,  := .IsExcluded(, .PackagePath)
		if  != nil {
			return nil, 
		}
		if ! {
			 = append(, )
		}
	}
	if experiment.IsActive(, internal.ExperimentSearchGrouping) && ! {
		 = groupSearchResults()
	}
	if len() >  {
		 = [:]
	}
	return , nil
}
Penalties to search scores, applied as multipliers to the score.
Module license is non-redistributable.
Module does not have a go.mod file. Start this off gently (close to 1), but consider lowering it as time goes by and more of the ecosystem converts to modules.
	noGoModPenalty = 0.8
)
scoreExpr is the expression that computes the search score. It is the product of: - The Postgres ts_rank score, based the relevance of the document to the query. - The log of the module's popularity, estimated by the number of importing packages. The log factor contains exp(1) so that it is always >= 1. Taking the log of imported_by_count instead of using it directly makes the effect less dramatic: being 2x as popular only has an additive effect. - A penalty factor for non-redistributable modules, since a lot of details cannot be displayed. The first argument to ts_rank is an array of weights for the four tsvector sections, in the order D, C, B, A. The weights below match the defaults except for B.
var scoreExpr = fmt.Sprintf(`
		ts_rank('{0.1, 0.2, 1.0, 1.0}', tsv_search_tokens, websearch_to_tsquery($1)) *
		ln(exp(1)+imported_by_count) *
		CASE WHEN redistributable THEN 1 ELSE %f END *
		CASE WHEN COALESCE(has_go_mod, true) THEN 1 ELSE %f END
	`, nonRedistributablePenalty, noGoModPenalty)
hedgedSearch executes multiple search methods and returns the first available result. The optional guardTestResult func may be used to allow tests to control the order in which search results are returned.
func ( *DB) ( context.Context,  string, , ,  int,  map[string]searcher,  func(string) func()) ( *searchResponse,  error) {
	defer derrors.WrapStack(&, "hedgedSearch(ctx, %q, %d, %d, %d)", , , , )

	 := time.Now()
cancel all unfinished searches when a result (or error) is returned. The effectiveness of this depends on the database driver.
	,  := context.WithCancel()
	defer ()
Fan out our search requests.
	for ,  := range  {
		 := 
		go func() {
			 := time.Now()
			 := (, , , , , )
			log.Debug(, searchEvent{
				Type:    .source,
				Latency: time.Since(),
				Err:     .err,
			})
			if  != nil {
				defer (.source)()
			}
			 <- 
		}()
Note for future readers: in previous iterations of this code we kept reading responses if the first one had an error, with the goal to minimize error ratio. That didn't behave well if Postgres was overloaded.
	 := <-
	if .err != nil {
		return nil, fmt.Errorf("%q search failed: %v", .source, .err)
cancel proactively here: we've got the search result we need.
latency is only recorded for valid search results, as fast failures could skew the latency distribution. Note that this latency measurement might differ meaningfully from the resp.Latency, if time was spent waiting for the result count estimate.
To avoid fighting with the query planner, our searches only hit the search_documents table and we enrich after getting the results. In the future, we may want to fully denormalize and put all search data in the search_documents table.
	if  := .addPackageDataToSearchResults(, .results);  != nil {
		return nil, 
	}
	return &, nil
}

const hllRegisterCount = 128
deepSearch searches all packages for the query. It is slower, but results are always valid.
func ( *DB) ( context.Context,  string, , ,  int) searchResponse {
	 := fmt.Sprintf(`
		SELECT *, COUNT(*) OVER() AS total
		FROM (
			SELECT
				package_path,
				version,
				module_path,
				commit_time,
				imported_by_count,
				(%s) AS score
				FROM
					search_documents
				WHERE tsv_search_tokens @@ websearch_to_tsquery($1)
				ORDER BY
					score DESC,
					commit_time DESC,
					package_path
		) r
		WHERE r.score > 0.1
		LIMIT $2
		OFFSET $3`, scoreExpr)
	var  []*internal.SearchResult
	 := func( *sql.Rows) error {
		var  internal.SearchResult
		if  := .Scan(&.PackagePath, &.Version, &.ModulePath, &.CommitTime,
			&.NumImportedBy, &.Score, &.NumResults);  != nil {
			return fmt.Errorf("rows.Scan(): %v", )
		}
		 = append(, &)
		return nil
	}
	 := .db.RunQuery(, , , , , )
	if  != nil {
		 = nil
	}
	if len() > 0 && [0].NumResults > uint64() {
		for ,  := range  {
			.NumResults = uint64()
		}
	}
	return searchResponse{
		source:  "deep",
		results: ,
		err:     ,
	}
}

func ( *DB) ( context.Context,  string, , ,  int) searchResponse {
	 := `
		SELECT
			package_path,
			version,
			module_path,
			commit_time,
			imported_by_count,
			score
		FROM popular_search($1, $2, $3, $4, $5)`
	var  []*internal.SearchResult
	 := func( *sql.Rows) error {
		var  internal.SearchResult
		if  := .Scan(&.PackagePath, &.Version, &.ModulePath, &.CommitTime,
			&.NumImportedBy, &.Score);  != nil {
			return fmt.Errorf("rows.Scan(): %v", )
		}
		 = append(, &)
		return nil
	}
	 := .db.RunQuery(, , , , , , nonRedistributablePenalty, noGoModPenalty)
	if  != nil {
		 = nil
	}
	 := 
It is practically impossible that len(results) < limit, because popular search will never linearly scan everything before deep search completes, but just to be slightly more theoretically correct, if our search results are partial we know that we have exhausted all results.
		 =  + len()
	}
	for ,  := range  {
		.NumResults = uint64()
	}
	return searchResponse{
		source:  "popular",
		results: ,
		err:     ,
	}
}
symbolSearch searches all symbols in the symbol_search_documents table for the query. TODO(https://golang.org/issue/44142): factor out common code between symbolSearch and deepSearch.
func ( *DB) ( context.Context,  string, , ,  int) searchResponse {
	 := fmt.Sprintf(`
		SELECT
			package_path,
			version,
			module_path,
			commit_time,
			imported_by_count,
			ARRAY_AGG(name) AS symbol_names,
			COUNT(*) OVER() AS total
		FROM (
			SELECT
				sd.package_path,
				sd.version,
				sd.module_path,
				sd.commit_time,
				sd.imported_by_count,
				(%s) AS score,
				s.name
			FROM search_documents sd
			INNER JOIN symbol_search_documents ssd
				ON sd.package_path_id = ssd.package_path_id
			INNER JOIN symbol_names s
				ON s.id = ssd.symbol_name_id
			WHERE
				ssd.tsv_symbol_tokens @@ to_tsquery('simple', $1)
			ORDER BY
				score DESC,
				commit_time DESC,
				package_path
		) r
		WHERE r.score > 0.1
		GROUP BY 1, 2, 3, 4, 5
		LIMIT $2
		OFFSET $3`, symbolScoreExpr)

	var  []*internal.SearchResult
	 := func( *sql.Rows) error {
		var (
			    internal.SearchResult
			 []sql.NullString
		)
		if  := .Scan(&.PackagePath, &.Version, &.ModulePath, &.CommitTime,
			&.NumImportedBy, pq.Array(&), &.NumResults);  != nil {
			return fmt.Errorf("symbolSearch: rows.Scan(): %v", )
		}
		for ,  := range  {
			if .Valid {
				.Symbols = append(.Symbols, .String)
			}
		}
		 = append(, &)
		return nil
	}
Search for an OR of the terms, so that if the user searches for "db begin", queries matching "db" and "begin" will be returned.
	 = strings.Join(strings.Split(, " "), " | ")
	 := .db.RunQuery(, , , , , )
	if  != nil {
		 = nil
	}
	if len() > 0 && [0].NumResults > uint64() {
		for ,  := range  {
			.NumResults = uint64()
		}
	}
	return searchResponse{
		source:  "symbol",
		results: ,
		err:     ,
	}
}

var symbolScoreExpr = fmt.Sprintf(`
		ts_rank('{0.1, 0.2, 1.0, 1.0}', ssd.tsv_symbol_tokens, to_tsquery('simple', $1)) *
		ln(exp(1)+imported_by_count) *
		CASE WHEN redistributable THEN 1 ELSE %f END *
		CASE WHEN COALESCE(has_go_mod, true) THEN 1 ELSE %f END
	`, nonRedistributablePenalty, noGoModPenalty)
addPackageDataToSearchResults adds package information to SearchResults that is not stored in the search_documents table.
func ( *DB) ( context.Context,  []*internal.SearchResult) ( error) {
	defer derrors.WrapStack(&, "DB.addPackageDataToSearchResults(results)")
	if len() == 0 {
		return nil
	}
	var (
resultMap tracks PackagePath->SearchResult, to allow joining with the returned package data.
		 = make(map[string]*internal.SearchResult)
	)
	for ,  := range  {
		[.PackagePath] = 
		 := fmt.Sprintf("(%s, %s, %s)", pq.QuoteLiteral(.PackagePath),
			pq.QuoteLiteral(.Version), pq.QuoteLiteral(.ModulePath))
		 = append(, )
	}
	 := fmt.Sprintf(`
		SELECT
			p.path,
			u.name,
			d.synopsis,
			u.license_types,
			u.redistributable
		FROM
			units u
		INNER JOIN
			paths p
		ON u.path_id = p.id
		INNER JOIN
			modules m
		ON u.module_id = m.id
		LEFT JOIN
			documentation d
		ON u.id = d.unit_id
		WHERE
			(p.path, m.version, m.module_path) IN (%s)`, strings.Join(, ","))
	 := func( *sql.Rows) error {
		var (
			, ,  string
			         []string
			               bool
		)
		if  := .Scan(&, &, database.NullIsEmpty(&), pq.Array(&), &);  != nil {
			return fmt.Errorf("rows.Scan(): %v", )
		}
		,  := []
		if ! {
			return fmt.Errorf("BUG: unexpected package path: %q", )
		}
		.Name = 
		if  || .bypassLicenseCheck {
			.Synopsis = 
		}
		for ,  := range  {
			if  != "" {
				.Licenses = append(.Licenses, )
			}
		}
		.Licenses = sortAndDedup(.Licenses)
		return nil
	}
	return .db.RunQuery(, , )
}

func ( []string) []string {
	var  []string
	 := map[string]bool{}
	for ,  := range  {
		[] = true
	}
	for  := range  {
		 = append(, )
	}
	sort.Strings()
	return 
}
groupSearchResults groups and re-orders the list of SearchResults by module and series path and returns a new list of SearchResults. The second and later packages from a module are grouped under the first package, and removed from the top-level list. Higher major versions of a module are put before lower ones. Packages from lower major versions of the module are grouped under the first package of the highest major version. But they are not removed from the top-level list.
Put higher major versions first, otherwise observing score.
	sort.Slice(, func(,  int) bool {
		,  := internal.SeriesPathAndMajorVersion([].ModulePath)
		,  := internal.SeriesPathAndMajorVersion([].ModulePath)
		if  !=  {
			return [].Score > [].Score
		}
		return  > 
	})

	 := map[string]*internal.SearchResult{} // from module path to first result
	 := map[string]*internal.SearchResult{}  // for series path to first result
	var  []*internal.SearchResult
	for ,  := range  {
		 := [.ModulePath]
First result with this module path; remember it and keep it.
			[.ModulePath] = 
			 = append(, )
Record this result under the first result.
			.SameModule = append(.SameModule, )
		}

		 := internal.SeriesPathForModule(.ModulePath)
		 = []
First time we've seen anything from this series: remember it.
			[] = 
Result is from a different (lower) major version. Record this result under the first.
			.LowerMajor = append(.LowerMajor, )
		}
	}
	return 
}

var upsertSearchStatement = fmt.Sprintf(`
	INSERT INTO search_documents (
		package_path,
		package_path_id,
		version,
		module_path,
		module_path_id,
		name,
		synopsis,
		license_types,
		redistributable,
		version_updated_at,
		commit_time,
		has_go_mod,
		tsv_search_tokens,
		hll_register,
		hll_leading_zeros
	)
	SELECT
		p1.path,
		p1.id,
		m.version,
		m.module_path,
		p2.id,
		u.name,
		d.synopsis,
		u.license_types,
		u.redistributable,
		CURRENT_TIMESTAMP,
		m.commit_time,
		m.has_go_mod,
		(
			SETWEIGHT(TO_TSVECTOR('path_tokens', $4), 'A') ||
			SETWEIGHT(TO_TSVECTOR($5), 'B') ||
			SETWEIGHT(TO_TSVECTOR($6), 'C') ||
			SETWEIGHT(TO_TSVECTOR($7), 'D')
		),
		hll_hash(p1.path) & (%d - 1),
		hll_zeros(hll_hash(p1.path))
	FROM units u
	INNER JOIN modules m ON u.module_id = m.id
	INNER JOIN paths p1 ON p1.id = u.path_id
	LEFT JOIN paths p2 ON p2.path = m.module_path
	LEFT JOIN documentation d ON u.id = d.unit_id
	WHERE
		p1.path = $1
		AND m.module_path = $2
		AND m.version = $3
	LIMIT 1 -- could be multiple build contexts
	ON CONFLICT (package_path)
	DO UPDATE SET
		package_path=excluded.package_path,
		version=excluded.version,
		module_path=excluded.module_path,
		name=excluded.name,
		synopsis=excluded.synopsis,
		license_types=excluded.license_types,
		redistributable=excluded.redistributable,
		commit_time=excluded.commit_time,
		has_go_mod=excluded.has_go_mod,
		tsv_search_tokens=excluded.tsv_search_tokens,
		-- the hll fields are functions of path, so they don't change
		version_updated_at=(
			CASE WHEN excluded.version = search_documents.version
			THEN search_documents.version_updated_at
			ELSE CURRENT_TIMESTAMP
			END)
	;`, hllRegisterCount)
upsertSearchDocuments adds search information for mod to the search_documents table. It assumes that all non-redistributable data has been removed from mod.
func ( context.Context,  *database.DB,  *internal.Module) ( error) {
	defer derrors.WrapStack(&, "upsertSearchDocuments(ctx, %q, %q)", .ModulePath, .Version)
	,  := trace.StartSpan(, "UpsertSearchDocuments")
	defer .End()
	for ,  := range .Packages() {
		if isInternalPackage(.Path) {
			continue
		}
		 := UpsertSearchDocumentArgs{
			PackagePath: .Path,
			ModulePath:  .ModulePath,
			Version:     .Version,
		}
Use the synopsis of the first GOOS/GOARCH pair.
			.Synopsis = .Documentation[0].Synopsis
		}
		if .Readme != nil {
			.ReadmeFilePath = .Readme.Filepath
			.ReadmeContents = .Readme.Contents
		}
		if  := UpsertSearchDocument(, , );  != nil {
			return 
		}
		if  := upsertSearchDocumentSymbols(, , .Path, .ModulePath, .Version);  != nil {
			return 
		}
	}
	return nil
}

type UpsertSearchDocumentArgs struct {
	PackagePath    string
	ModulePath     string
	Version        string
	Synopsis       string
	ReadmeFilePath string
	ReadmeContents string
}
UpsertSearchDocument inserts a row in search_documents for the given package. The given module should have already been validated via a call to validateModule.
func ( context.Context,  *database.DB,  UpsertSearchDocumentArgs) ( error) {
	defer derrors.WrapStack(&, "DB.UpsertSearchDocument(ctx, ddb, %q, %q)", .PackagePath, .ModulePath)
Only summarize the README if the package and module have the same path. If this changes, fix DB.ReInsertLatestVersion.
	if .PackagePath != .ModulePath {
		.ReadmeFilePath = ""
		.ReadmeContents = ""
	}
	 := strings.Join(GeneratePathTokens(.PackagePath), " ")
	, ,  := SearchDocumentSections(.Synopsis, .ReadmeFilePath, .ReadmeContents)
	_,  = .Exec(, upsertSearchStatement, .PackagePath, .ModulePath, .Version, , , , )
	return 
}
GetPackagesForSearchDocumentUpsert fetches search information for packages in search_documents whose update time is before the given time.
func ( *DB) ( context.Context,  time.Time,  int) ( []UpsertSearchDocumentArgs,  error) {
	defer derrors.WrapStack(&, "GetPackagesForSearchDocumentUpsert(ctx, %s, %d)", , )

	 := `
		SELECT
			sd.package_path,
			sd.module_path,
			sd.version,
			sd.synopsis,
			sd.redistributable,
			r.file_path,
			r.contents
		FROM modules m
		INNER JOIN units u
		ON m.id = u.module_id
		INNER JOIN paths p
		ON p.id = u.path_id
		LEFT JOIN readmes r
		ON u.id = r.unit_id
		INNER JOIN search_documents sd
		ON sd.package_path = p.path
		    AND sd.module_path = m.module_path
		    AND sd.version = m.version
		WHERE sd.updated_at < $1
		LIMIT $2`

	 := func( *sql.Rows) error {
		var (
			      UpsertSearchDocumentArgs
			 bool
		)
		if  := .Scan(&.PackagePath, &.ModulePath, &.Version, &.Synopsis, &,
			database.NullIsEmpty(&.ReadmeFilePath), database.NullIsEmpty(&.ReadmeContents));  != nil {
			return 
		}
		if ! && !.bypassLicenseCheck {
			.Synopsis = ""
			.ReadmeFilePath = ""
			.ReadmeContents = ""
		}
		 = append(, )
		return nil
	}
	if  := .db.RunQuery(, , , , );  != nil {
		return nil, 
	}
	return , nil
}
UpdateSearchDocumentsImportedByCount updates imported_by_count and imported_by_count_updated_at. It does so by completely recalculating the imported-by counts from the imports_unique table. UpdateSearchDocumentsImportedByCount returns the number of rows updated.
func ( *DB) ( context.Context) ( int64,  error) {
	defer derrors.WrapStack(&, "UpdateSearchDocumentsImportedByCount(ctx)")

	,  := .getSearchPackages()
	if  != nil {
		return 0, 
	}
	,  := .computeImportedByCounts(, )
	if  != nil {
		return 0, 
	}
	 = .db.Transact(, sql.LevelDefault, func( *database.DB) error {
		if  := insertImportedByCounts(, , );  != nil {
			return 
		}
		if  := compareImportedByCounts(, );  != nil {
			return 
		}
		,  = updateImportedByCounts(, )
		return 
	})
	return , 
}
getSearchPackages returns the set of package paths that are in the search_documents table.
func ( *DB) ( context.Context) ( map[string]bool,  error) {
	defer derrors.WrapStack(&, "DB.getSearchPackages(ctx)")

	 = map[string]bool{}
	 = .db.RunQuery(, `SELECT package_path FROM search_documents`, func( *sql.Rows) error {
		var  string
		if  := .Scan(&);  != nil {
			return 
		}
		[] = true
		return nil
	})
	if  != nil {
		return nil, 
	}
	return , nil
}

func ( *DB) ( context.Context,  map[string]bool) ( map[string]int,  error) {
	defer derrors.WrapStack(&, "db.computeImportedByCounts(ctx)")

Get all (from_path, to_path) pairs, deduped. Also get the from_path's module path.
	,  := .db.Query(, `
		SELECT
			from_path, from_module_path, to_path
		FROM
			imports_unique
		GROUP BY
			from_path, from_module_path, to_path;
	`)
	if  != nil {
		return nil, 
	}
	defer .Close()
	for .Next() {
		var , ,  string
		if  := .Scan(&, &, &);  != nil {
			return nil, 
Don't count an importer if it's not in search_documents.
		if ![] {
			continue
Don't count an importer if it's in the same module as what it's importing. Approximate that check by seeing if from_module_path is a prefix of to_path. (In some cases, e.g. when to_path is in a nested module, that is not correct.)
		if ( == stdlib.ModulePath && stdlib.Contains()) || strings.HasPrefix(+"/", +"/") {
			continue
		}
		[]++
	}
	if  := .Err();  != nil {
		return nil, 
	}
	return , nil
}

func ( context.Context,  *database.DB,  map[string]int) ( error) {
	defer derrors.WrapStack(&, "insertImportedByCounts(ctx, db, counts)")

	const  = `
		CREATE TEMPORARY TABLE computed_imported_by_counts (
			package_path      TEXT NOT NULL,
			imported_by_count INTEGER DEFAULT 0 NOT NULL
		) ON COMMIT DROP;
    `
	if ,  := .Exec(, );  != nil {
		return fmt.Errorf("CREATE TABLE: %v", )
	}
	 := make([]interface{}, 0, 2*len())
	for ,  := range  {
		 = append(, , )
	}
	 := []string{"package_path", "imported_by_count"}
	return .BulkInsert(, "computed_imported_by_counts", , , "")
}

func ( context.Context,  *database.DB) ( error) {
	defer derrors.WrapStack(&, "compareImportedByCounts(ctx, tx)")

	 := `
		SELECT
			s.package_path,
			s.imported_by_count,
			c.imported_by_count
		FROM
			search_documents s
		INNER JOIN
			computed_imported_by_counts c
		ON
			s.package_path = c.package_path
Compute some info about the changes to import-by counts.
	const  = 0.05 // count how many counts change by at least this fraction
	var , , ,  int
	 = .RunQuery(, , func( *sql.Rows) error {
		var  string
		var ,  int
		if  := .Scan(&, &, &);  != nil {
			return 
		}
		++
		if  !=  {
			++
		}
		if  == 0 {
			++
			return nil
		}
		 := math.Abs(float64(-)) / float64()
		if  >  {
			++
		}
		return nil
	})
	if  != nil {
		return 
	}
	log.Infof(, "%6d total rows in search_documents match computed_imported_by_counts", )
	log.Infof(, "%6d will change", )
	log.Infof(, "%6d currently have a zero imported-by count", )
	log.Infof(, "%6d of the non-zero rows will change by more than %d%%", , int(*100))
	return nil
}
updateImportedByCounts updates the imported_by_count column in search_documents for every package in computed_imported_by_counts. A row is updated even if the value doesn't change, so that the imported_by_count_updated_at column is set. Note that if a package is never imported, its imported_by_count column will be the default (0) and its imported_by_count_updated_at column will never be set.
Lock the entire table to avoid deadlock. Without the lock, the update can fail because module inserts are concurrently modifying rows of search_documents. See https://www.postgresql.org/docs/11/explicit-locking.html for what locks mean. See https://www.postgresql.org/docs/11/sql-lock.html for the LOCK statement, notably the paragraph beginning "If a transaction of this sort is going to change the data...".
	const  = `
		LOCK TABLE search_documents IN SHARE ROW EXCLUSIVE MODE;
		UPDATE search_documents s
		SET
			imported_by_count = c.imported_by_count,
			imported_by_count_updated_at = CURRENT_TIMESTAMP
		FROM computed_imported_by_counts c
		WHERE s.package_path = c.package_path;`

	,  := .Exec(, )
	if  != nil {
		return 0, fmt.Errorf("error updating imported_by_count and imported_by_count_updated_at for search documents: %v", )
	}
	return , nil
}

var (
	commonHostnames = map[string]bool{
		"bitbucket.org":         true,
		"code.cloudfoundry.org": true,
		"gitea.com":             true,
		"gitee.com":             true,
		"github.com":            true,
		"gitlab.com":            true,
		"go.etcd.io":            true,
		"go.googlesource.com":   true,
		"golang.org":            true,
		"google.golang.org":     true,
		"gopkg.in":              true,
	}
	commonHostParts = map[string]bool{
		"code":   true,
		"git":    true,
		"gitlab": true,
		"go":     true,
		"google": true,
		"www":    true,
	}
)
GeneratePathTokens returns the subPaths and path token parts that will be indexed for search, which includes (1) the packagePath (2) all sub-paths of the packagePath (3) all parts for a path element that is delimited by a dash and (4) all parts of a path element that is delimited by a dot, except for the last element.
func ( string) []string {
	 = strings.Trim(, "/")

	 := make(map[string]bool)
	 := strings.Split(, "/")
	for ,  := range  {
		 := strings.Split(, "-")
		if len() > 1 {
			for ,  := range  {
				[] = true
			}
		}
		for  :=  + 2;  <= len(); ++ {
			 := strings.Join([:], "/")
			 = strings.Trim(, "/")
			[] = true
		}

		if  == 0 && commonHostnames[] {
			continue
Only index host names if they are not part of commonHostnames.
		[] = true
		 := strings.Split(, ".")
		if len() > 1 {
			for ,  := range [:len()-1] {
If the host is not in commonHostnames, we want to index each element up to the extension. For example, if the host is sigs.k8s.io, we want to index sigs and k8s. Skip common host parts.
					[] = true
				}
			}
		}
	}

	var  []string
	for  := range  {
		if len() > 0 {
			 = append(, )
		}
	}
	sort.Strings()
	return 
}
isInternalPackage reports whether the path represents an internal directory.
func ( string) bool {
	for ,  := range strings.Split(, "/") {
		if  == "internal" {
			return true
		}
	}
	return false
}
DeleteOlderVersionFromSearchDocuments deletes from search_documents every package with the given module path whose version is older than the given version. It is used when fetching a module with an alternative path. See internal/worker/fetch.go:fetchAndUpdateState.
func ( *DB) ( context.Context, ,  string) ( error) {
	defer derrors.WrapStack(&, "DeleteOlderVersionFromSearchDocuments(ctx, %q, %q)", , )

Collect all package paths in search_documents with the given module path and an older version. (package_path is the primary key of search_documents.)
		var  []string
		 := `
			SELECT package_path, version
			FROM search_documents
			WHERE module_path = $1
		`
		 := .RunQuery(, , func( *sql.Rows) error {
			var ,  string
			if  := .Scan(&, &);  != nil {
				return 
			}
			if semver.Compare(, ) < 0 {
				 = append(, )
			}
			return nil
		}, )
		if  != nil {
			return 
		}
		if len() == 0 {
			return nil
		}
Delete all of those paths.
		 := fmt.Sprintf(`DELETE FROM search_documents WHERE package_path IN ('%s')`, strings.Join(, `', '`))
		,  := .Exec(, )
		if  != nil {
			return 
		}
		log.Infof(, "deleted %d rows from search_documents", )
		return nil
	})
}
UpsertSearchDocumentWithImportedByCount is the same as UpsertSearchDocument, except it also updates the imported by count. This is only used for testing.
func ( *DB) ( context.Context,  UpsertSearchDocumentArgs,  int) ( error) {
	defer derrors.WrapStack(&, "DB.UpsertSearchDocumentWithImportedByCount(ctx, ddb, %q, %q)", .PackagePath, .ModulePath)

	if  := UpsertSearchDocument(, .db, );  != nil {
		return 
	}
	_,  = .db.Exec(,
		`UPDATE search_documents SET imported_by_count=$1 WHERE package_path=$2;`,
		, .PackagePath)
	return