Copyright 2020 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

package frontend

import (
	
	
	
	
	
	
	
	

	
	
	
	
	
	
	
	
	
	
	
	
	
)

errModuleDoesNotExist indicates that we have attempted to fetch the module, and the proxy returned a status 404/410. There is a row for this module version in version_map.
errPathDoesNotExistInModule indicates that a module for the path prefix exists, but within that module version, this fullPath could not be found.
	errPathDoesNotExistInModule = errors.New("path does not exist in module")
	fetchTimeout                = 30 * time.Second
	pollEvery                   = 1 * time.Second
keyFetchStatus is a census tag for frontend fetch status types.
frontendFetchLatency holds observed latency in individual frontend fetch queries.
	frontendFetchLatency = stats.Float64(
		"go-discovery/frontend-fetch/latency",
		"Latency of a frontend fetch request.",
		stats.UnitMilliseconds,
	)
FetchLatencyDistribution aggregates frontend fetch request latency by status code.
	FetchLatencyDistribution = &view.View{
		Name:    "go-discovery/frontend-fetch/latency",
Modified from ochttp.DefaultLatencyDistribution to remove high values. Because our unit is seconds rather than milliseconds, the high values are too large (100000 = 27 hours). The main consequence is that the Fetch Latency heatmap on the dashboard is less informative.
		Aggregation: view.Distribution(
			1, 2, 3, 4, 5, 6, 8, 10, 13, 16, 20, 25, 30, 40, 50, 65, 80, 100,
			130, 160, 200, 250, 300, 400, 500, 650, 800, 1000,
			30*60, // half hour: the max time an HTTP task can run
			60*60),
		Description: "FrontendFetch latency, by result source query type.",
		TagKeys:     []tag.Key{keyFetchStatus},
FetchResponseCount counts frontend fetch responses by response type.
	FetchResponseCount = &view.View{
		Name:        "go-discovery/frontend-fetch/count",
		Measure:     frontendFetchLatency,
		Aggregation: view.Count(),
		Description: "Frontend fetch request count",
		TagKeys:     []tag.Key{keyFetchStatus},
statusNotFoundInVersionMap indicates that a row does not exist in version_map for the module_path and requested_version.
serveFetch checks if a requested path and version exists in the database. If not, it will enqueuing potential module versions that could contain the requested path and version to a task queue, to be fetched by the worker. Meanwhile, the request will poll the database until a row is found, or a timeout occurs. A status and responseText will be returned based on the result of the request.
func ( *Server) ( http.ResponseWriter,  *http.Request,  internal.DataSource) ( error) {
	defer derrors.Wrap(&, "serveFetch(%q)", .URL.Path)
There's no reason for the proxydatasource to need this codepath.
If the experiment flag is not on, or the user makes a GET request, treat this as a request for the "fetch" package, which does not exist.
fetchHander accepts a requests following the same URL format as the detailsHandler.
	,  := extractURLPathInfo(strings.TrimPrefix(.URL.Path, "/fetch"))
	if  != nil {
		return &serverError{status: http.StatusBadRequest}
	}
TODO(https://golang.org/issue/39973): add support for fetching the latest and master versions of the standard library.
		(stdlib.Contains(.fullPath) && .requestedVersion == internal.LatestVersion) {
		return &serverError{status: http.StatusBadRequest}
	}
	,  := .fetchAndPoll(.Context(), , .modulePath, .fullPath, .requestedVersion)
	if  != http.StatusOK {
		return &serverError{status: , responseText: }
	}
	return nil
}

type fetchResult struct {
	modulePath   string
	goModPath    string
	status       int
	err          error
	responseText string
	updatedAt    time.Time
}

func ( *Server) ( context.Context,  internal.DataSource, , ,  string) ( int,  string) {
	 := time.Now()
	defer func() {
		log.Infof(, "fetchAndPoll(ctx, ds, q, %q, %q, %q): status=%d, responseText=%q",
			, , , , )
		recordFrontendFetchMetric(, , time.Since())
	}()

TODO(https://golang.org/issue/39973): add support for fetching the latest and master versions of the standard library
Generate all possible module paths for the fullPath.
	 := .(*postgres.DB)
	,  := modulePathsToFetch(, , , )
	if  != nil {
		var  *serverError
		if errors.As(, &) {
			return .status, http.StatusText(.status)
		}
		log.Errorf(, "fetchAndPoll(ctx, ds, q, %q, %q, %q): %v", , , , )
		return http.StatusInternalServerError, http.StatusText(http.StatusInternalServerError)
	}
	 := .checkPossibleModulePaths(, , , , , true)
	,  := resultFromFetchRequest(, , )
	if  != nil {
		log.Errorf(, "fetchAndPoll(ctx, ds, q, %q, %q, %q): %v", , , , )
		return http.StatusInternalServerError, http.StatusText(http.StatusInternalServerError)
	}
	if .status == derrors.ToStatus(derrors.AlternativeModule) {
		.status = http.StatusNotFound
	}
	return .status, .responseText
}
checkPossibleModulePaths checks all modulePaths at the requestedVersion, to see if the fullPath exists. For each module path, it first checks version_map to see if we already attempted to fetch the module. If not, and shouldQueue is true, it will enqueue the module to the frontend task queue to be fetched. checkPossibleModulePaths will then poll the database for each module path, until a result is returned or the request times out. If shouldQueue is false, it will return the fetchResult, regardless of what the status is.
func ( *Server) ( context.Context,  *postgres.DB,
	,  string,  []string,  bool) []*fetchResult {
	var  sync.WaitGroup
	,  := context.WithTimeout(, fetchTimeout)
	defer ()
	 := make([]*fetchResult, len())
	for ,  := range  {
		.Add(1)
		 := 
		 := 
		go func() {
			defer .Done()
			 := time.Now()
Before enqueuing the module version to be fetched, check if we have already attempted to fetch it in the past. If so, just return the result from that fetch process.
			 := checkForPath(, , , , , .taskIDChangeInterval)
			log.Debugf(, "initial checkForPath(ctx, db, %q, %q, %q, %d): status=%d, err=%v", , , , .taskIDChangeInterval, .status, .err)
			if ! || .status != statusNotFoundInVersionMap {
				[] = 
				return
			}
A row for this modulePath and requestedVersion combination does not exist in version_map. Enqueue the module version to be fetched.
			if ,  := .queue.ScheduleFetch(, , , "", false);  != nil {
				.err = 
				.status = http.StatusInternalServerError
			}
			log.Debugf(, "queued %s@%s to frontend-fetch task queue", , )
After the fetch request is enqueued, poll the database until it has been inserted or the request times out.
			 = pollForPath(, , pollEvery, , , , .taskIDChangeInterval)
			 := log.Infof
			if .status == http.StatusInternalServerError {
				 = log.Errorf
			}
			(, "fetched %s@%s for %s: status=%d, err=%v; took %.3fs", , , , .status, .err, time.Since().Seconds())
			[] = 
		}()
	}
	.Wait()
	return 
}
resultFromFetchRequest returns the HTTP status code and response text from the results of fetching possible module paths for fullPath at the requestedVersion. It is assumed the results are sorted in order of decreasing modulePath length, so the first result that is not a StatusNotFound is returned. If all of the results are 404, but a module path was found that shares the path prefix of fullPath, the responseText will contain that information. The status and responseText will be displayed to the user.
func ( []*fetchResult, ,  string) ( *fetchResult,  error) {
	defer derrors.Wrap(&, "resultFromFetchRequest(results, %q, %q)", , )
	if len() == 0 {
		return nil, fmt.Errorf("no results")
	}

	var  string
	for ,  := range  {
Results are in order of longest module path first. Once an appropriate result is found, return. Otherwise, look at the next path.
		case http.StatusOK:
			if .err == nil {
				return , nil
			}
If the context timed out or was canceled before all of the requests finished, return an error letting the user to check back later. The worker will still be processing the modules in the background.
			.responseText = fmt.Sprintf("We're still working on ā€œ%sā€. Check back in a few minutes!", displayPath(, ))
			return , nil
		case http.StatusInternalServerError:
			.responseText = "Oops! Something went wrong."
			return , nil
		case derrors.ToStatus(derrors.AlternativeModule):
			if  := module.CheckPath(.goModPath);  != nil {
				.status = http.StatusNotFound
				.responseText = fmt.Sprintf(`%q does not have a valid module path (%q).`, , .goModPath)
				return , nil
			}
			 := template.Must(template.New("").Parse(`{{.}}`))
			,  := .ExecuteToHTML(fmt.Sprintf("%s is not a valid path. Were you looking for ā€œ<a href='https://pkg.go.dev/%s'>%s</a>ā€?",
				displayPath(, ), .goModPath, .goModPath))
			if  != nil {
				.status = http.StatusInternalServerError
				return , 
			}
			.responseText = .String()
			return , nil
There are 3 categories of 490 errors that we see: - module contains 0 packages - empty commit time - zip.NewReader: zip: not a valid zip file: bad module (only seen for foo.maxj.us/oops.fossil) Provide a specific message for the first error.
			.status = http.StatusNotFound
			 := 
			if  != internal.LatestVersion {
				 =  + "@" + 
			}
			.responseText = fmt.Sprintf("%s could not be processed.", )
			if .err != nil && strings.Contains(.err.Error(), fetch.ErrModuleContainsNoPackages.Error()) {
				.responseText = fmt.Sprintf("There are no packages in module %s.", )
			}
			return , nil
		}
A module was found for a prefix of the path, but the path did not exist in that module. Note the longest possible modulePath in this case, and let the user know that it exists. For example, if the request was for github.com/hashicorp/vault/@master/api, github.com/hashicorp/vault/api does not exist at master, but it does in older versions of github.com/hashicorp/vault.
		if errors.Is(.err, errPathDoesNotExistInModule) &&  == "" {
			 = .modulePath
		}
	}

	 := [0]
	if  != "" {
		 := template.Must(template.New("").Parse(`{{.}}`))
		,  := .ExecuteToHTML(fmt.Sprintf(`
		    <div class="Error-message">%s could not be found.</div>
		    <div class="Error-message">However, you can view <a href="https://pkg.go.dev/%s">module %s</a>.</div>`,
			displayPath(, ),
			displayPath(, ),
			displayPath(, ),
		))
		if  != nil {
			.status = http.StatusInternalServerError
			return , 
		}
		.status = http.StatusNotFound
		.responseText = .String()
		return , nil
	}
	 := 
	if  != internal.LatestVersion {
		 =  + "@" + 
	}
	.status = http.StatusNotFound
	.responseText = fmt.Sprintf("%q could not be found.", )
	return , nil
}

func (,  string) string {
	if  == internal.LatestVersion {
		return 
	}
	return fmt.Sprintf("%s@%s", , )
}
pollForPath polls the database until a row for fullPath is found.
func ( context.Context,  *postgres.DB,  time.Duration,
	, ,  string,  time.Duration) *fetchResult {
	 := &fetchResult{modulePath: }
	defer derrors.Wrap(&.err, "pollForRedirectURL(%q, %q, %q)", , , )
	 := time.NewTicker()
	defer .Stop()
	for {
		select {
The request timed out before the fetch process completed.
			.status = http.StatusRequestTimeout
			.err = .Err()
			return 
		case <-.C:
			,  := context.WithTimeout(, )
			defer ()
			 = checkForPath(, , , , , )
			if .status != statusNotFoundInVersionMap {
				return 
			}
		}
	}
}
checkForPath checks for the existence of fullPath, modulePath, and requestedVersion in the database. If the modulePath does not exist in version_map, it returns errModuleNotInVersionMap, signaling that the fetch process that was initiated is not yet complete. If the row exists version_map but not paths, it means that a module was found at the requestedVersion, but not the fullPath, so errPathDoesNotExistInModule is returned. Note that if an error occurs while writing to the version_map table, checkForPath will not know. Instead, it will keep running until the request times out.
func ( context.Context,  *postgres.DB,
	, ,  string,  time.Duration) ( *fetchResult) {
Based on https://github.com/lib/pq/issues/577#issuecomment-298341053, it seems that ctx.Err() will return nil because this error is coming from postgres. This is also how github.com/lib/pq currently handles the error in their tests: https://github.com/lib/pq/blob/e53edc9b26000fec4c4e357122d56b0f66ace6ea/go18_test.go#L89
		if .Err() != nil || (.err != nil && strings.Contains(.err.Error(), "pq: canceling statement due to user request")) {
			.err = fmt.Errorf("%v: %w", .err, context.DeadlineExceeded)
			.status = http.StatusRequestTimeout
		}
		derrors.Wrap(&.err, "checkForPath(%q, %q, %q)", , , )
	}()
Check the version_map table to see if a row exists for modulePath and requestedVersion.
	,  := .GetVersionMap(, , )
If an error is returned, there are two possibilities: (1) A row for this modulePath and version does not exist. This means that the fetch request is not done yet, so return statusNotFoundInVersionMap so the fetchHandler will call checkForPath again in a few seconds. (2) Something went wrong, so return that error.
		 = &fetchResult{
			modulePath: ,
			status:     derrors.ToStatus(),
			err:        ,
		}
		if errors.Is(, derrors.NotFound) {
			.status = statusNotFoundInVersionMap
		}
		return 
	}
We successfully retrieved a row in version_map for the modulePath and requestedVersion. Look at the status of that row to determine whether an error should be returned.
If the duration of taskIDChangeInterval has passed since a module_path was last inserted into version_map with a failed status, treat that data as expired. It is possible that the module has appeared in the Go Module Mirror during that time, the failure was transient, or the error has been fixed but the module version has not yet been reprocessed. Return statusNotFoundInVersionMap here, so that the fetch request will try to fetch this module version again. Since the taskIDChangeInterval has passed, it is now possible to enqueue that module version to the frontend task queue again.
The version_map indicates that the proxy returned a 404/410.
			.err = errModuleDoesNotExist
		}
		return 
The row indicates that the provided module path did not match the module path returned by a request to /<modulePath>/@v/<requestedPath>.mod.
		.err = derrors.AlternativeModule
		return 
The module was marked for reprocessing by the worker. Return statusNotFoundInVersionMap here, so that the tasks gets enqueued to frontend tasks, and we don't return a result to the user until that is complete.
All remaining non-200 statuses will be in the 40x range. In that case, just return a not found error.
		if .status >= 400 {
			.status = http.StatusNotFound
			.err = errModuleDoesNotExist
			return
		}
	}
The row in version_map indicated that the module version exists (status was 200 or 290). Now check the paths table to see if the fullPath exists. vm.status for the module version was either a 200 or 290. Now determine if the fullPath exists in that module.
	if ,  := .GetUnitMeta(, , , .ResolvedVersion);  != nil {
The module version exists, but the fullPath does not exist in that module version.
Something went wrong when we made the DB request to ds.GetUnitMeta.
		.status = http.StatusInternalServerError
		.err = 
		return 
Success! The fullPath exists in the requested module version.
	.status = http.StatusOK
	return 
}
modulePathsToFetch returns the slice of module paths that we should check for the path. If modulePath is known, only check that modulePath. If a row for the fullPath already exists, check that modulePath. Otherwise, check all possible module paths based on the elements for the fullPath. Resulting paths are returned in reverse length order.
func ( context.Context,  internal.DataSource, ,  string) ( []string,  error) {
	defer derrors.Wrap(&, "modulePathsToFetch(ctx, ds, %q, %q)", , )
	if  != internal.UnknownModulePath {
		return []string{}, nil
	}
	,  := .GetUnitMeta(, , , internal.LatestVersion)
	if  != nil && !errors.Is(, derrors.NotFound) {
		return nil, &serverError{
			status: http.StatusInternalServerError,
			err:    ,
		}
	}
	if  == nil {
		return []string{.ModulePath}, nil
	}
	return candidateModulePaths()
}

var vcsHostsWithThreeElementRepoName = map[string]bool{
	"bitbucket.org": true,
	"gitea.com":     true,
	"gitee.com":     true,
	"github.com":    true,
	"gitlab.com":    true,
}
maxPathsToFetch is the number of modulePaths that are fetched from a single fetch request. The longest module path we've seen in our database had 7 path elements. maxPathsToFetch is set to 10 as a buffer.
var maxPathsToFetch = 10
candidateModulePaths returns the potential module paths that could contain the fullPath. The paths are returned in reverse length order.
func ( string) ( []string,  error) {
	if !isValidPath() {
		return nil, &serverError{
			status: http.StatusBadRequest,
			err:    fmt.Errorf("isValidPath(%q): false", ),
		}
	}
	 := internal.CandidateModulePaths()
	if  == nil {
		return nil, &serverError{
			status: http.StatusBadRequest,
			err:    fmt.Errorf("invalid path: %q", ),
		}
	}
	if len() > maxPathsToFetch {
		return [len()-maxPathsToFetch:], nil
	}
	return , nil
}
FetchAndUpdateState is used by the InMemory queue for testing in internal/frontend and running cmd/frontend locally. It is a copy of worker.FetchAndUpdateState that does not update module_version_states, so that we don't have to import internal/worker here. It is not meant to be used when running on AppEngine.
func ( context.Context, ,  string,  *proxy.Client,  *source.Client,  *postgres.DB) ( int,  error) {
	defer func() {
		if  != nil {
			log.Infof(, "FetchAndUpdateState(%q, %q) completed with err: %v. ", , , )
		} else {
			log.Infof(, "FetchAndUpdateState(%q, %q) succeeded", , )
		}
		derrors.Wrap(&, "FetchAndUpdateState(%q, %q)", , )
	}()

	 := fetch.FetchModule(, , , , )
	defer .Defer()
Only attempt to insert the module into module_version_states if the fetch process was successful.
		if ,  := .InsertModule(, .Module, nil);  != nil {
			.Status = http.StatusInternalServerError
			log.Errorf(, "FetchAndUpdateState(%q, %q): db.InsertModule failed: %v", , , )
		}
		log.Infof(, "FetchAndUpdateState(%q, %q): db.InsertModule succeeded", , )
	}

	var  string
	if .Error != nil {
		 = .Error.Error()
	}
	 := &internal.VersionMap{
		ModulePath:       .ModulePath,
		RequestedVersion: .RequestedVersion,
		ResolvedVersion:  .ResolvedVersion,
		GoModPath:        .GoModPath,
		Status:           .Status,
		Error:            ,
	}
	if  := .UpsertVersionMap(, );  != nil {
		return http.StatusInternalServerError, 
	}
	if .Error != nil {
		return .Status, .Error
	}
	return http.StatusOK, nil
}

func ( context.Context,  int,  time.Duration) {
	 := float64() / float64(time.Millisecond)

	stats.RecordWithTags(, []tag.Mutator{
		tag.Upsert(keyFetchStatus, strconv.Itoa()),
	}, frontendFetchLatency.M())