Source: queue.go in package golang.org/x/pkgsite/internal/queue

Package queue provides queue implementations that can be used for asynchronous scheduling of fetch actions.

package queue

import (
	"context"
	"errors"
	"fmt"
	"hash/fnv"
	"io"
	"math"
	"strings"
	"time"

	cloudtasks "cloud.google.com/go/cloudtasks/apiv2"
	"github.com/golang/protobuf/ptypes"
	"golang.org/x/pkgsite/internal"
	"golang.org/x/pkgsite/internal/config"
	"golang.org/x/pkgsite/internal/derrors"
	"golang.org/x/pkgsite/internal/experiment"
	"golang.org/x/pkgsite/internal/log"
	"golang.org/x/pkgsite/internal/middleware"
	taskspb "google.golang.org/genproto/googleapis/cloud/tasks/v2"
	"google.golang.org/grpc/codes"
	"google.golang.org/grpc/status"
)

A Queue provides an interface for asynchronous scheduling of fetch actions.

type Queue interface {
	ScheduleFetch(ctx context.Context, modulePath, version, suffix string, disableProxyFetch bool) (bool, error)
}

New creates a new Queue with name queueName based on the configuration in cfg. When running locally, Queue uses numWorkers concurrent workers.

func New(ctx context.Context, cfg *config.Config, queueName string, numWorkers int, expGetter middleware.ExperimentGetter, processFunc inMemoryProcessFunc) (Queue, error) {
	if !cfg.OnGCP() {
		experiments, err := expGetter(ctx)
		if err != nil {
			return nil, err
		}
		var names []string
		for _, e := range experiments {
			if e.Rollout > 0 {
				names = append(names, e.Name)
			}
		}
		return NewInMemory(ctx, numWorkers, names, processFunc), nil
	}

	client, err := cloudtasks.NewClient(ctx)
	if err != nil {
		return nil, err
	}
	g, err := newGCP(cfg, client, queueName)
	if err != nil {
		return nil, err
	}
	log.Infof(ctx, "enqueuing at %s with queueURL=%q", g.queueName, g.queueURL)
	return g, nil
}

GCP provides a Queue implementation backed by the Google Cloud Tasks API.

type GCP struct {
	client    *cloudtasks.Client
	queueName string // full GCP name of the queue

token holds information that lets the task queue construct an authorized request to the worker. Since the worker sits behind the IAP, the queue needs an identity token that includes the identity of a service account that has access, and the client ID for the IAP. We use the service account of the current process.

	token *taskspb.HttpRequest_OidcToken
}

NewGCP returns a new Queue that can be used to enqueue tasks using the cloud tasks API. The given queueID should be the name of the queue in the cloud tasks console.

func newGCP(cfg *config.Config, client *cloudtasks.Client, queueID string) (_ *GCP, err error) {
	defer derrors.Wrap(&err, "newGCP(cfg, client, %q)", queueID)
	if queueID == "" {
		return nil, errors.New("empty queueID")
	}
	if cfg.ProjectID == "" {
		return nil, errors.New("empty ProjectID")
	}
	if cfg.LocationID == "" {
		return nil, errors.New("empty LocationID")
	}
	if cfg.QueueURL == "" {
		return nil, errors.New("empty QueueURL")
	}
	if cfg.ServiceAccount == "" {
		return nil, errors.New("empty ServiceAccount")
	}
	if cfg.QueueAudience == "" {
		return nil, errors.New("empty QueueAudience")
	}
	return &GCP{
		client:    client,
		queueName: fmt.Sprintf("projects/%s/locations/%s/queues/%s", cfg.ProjectID, cfg.LocationID, queueID),
		queueURL:  cfg.QueueURL,
		token: &taskspb.HttpRequest_OidcToken{
			OidcToken: &taskspb.OidcToken{
				ServiceAccountEmail: cfg.ServiceAccount,
				Audience:            cfg.QueueAudience,
			},
		},
	}, nil
}

ScheduleFetch enqueues a task on GCP to fetch the given modulePath and version. It returns an error if there was an error hashing the task name, or an error pushing the task to GCP. If the task was a duplicate, it returns (false, nil).

func (q *GCP) ScheduleFetch(ctx context.Context, modulePath, version, suffix string, disableProxyFetch bool) (enqueued bool, err error) {
	defer derrors.WrapStack(&err, "queue.ScheduleFetch(%q, %q, %q)", modulePath, version, suffix)

Cloud Tasks enforces an RPC timeout of at most 30s. I couldn't find this in the documentation, but using a larger value, or no timeout, results in an InvalidArgument error with the text "The deadline cannot be more than 30s in the future."

	ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
	defer cancel()

	if modulePath == internal.UnknownModulePath {
		return false, errors.New("given unknown module path")
	}
	req := q.newTaskRequest(modulePath, version, suffix, disableProxyFetch)
	enqueued = true
	if _, err := q.client.CreateTask(ctx, req); err != nil {
		if status.Code(err) == codes.AlreadyExists {
			log.Debugf(ctx, "ignoring duplicate task ID %s: %s@%s", req.Task.Name, modulePath, version)
			enqueued = false
		} else {
			return false, fmt.Errorf("q.client.CreateTask(ctx, req): %v", err)
		}
	}
	return enqueued, nil
}

Maximum timeout for HTTP tasks. See https://cloud.google.com/tasks/docs/creating-http-target-tasks.

const maxCloudTasksTimeout = 30 * time.Minute

const (
	DisableProxyFetchParam = "proxyfetch"
	DisableProxyFetchValue = "off"
)

func (q *GCP) newTaskRequest(modulePath, version, suffix string, disableProxyFetch bool) *taskspb.CreateTaskRequest {
	taskID := newTaskID(modulePath, version)
	relativeURI := fmt.Sprintf("/fetch/%s/@v/%s", modulePath, version)
	if disableProxyFetch {
		relativeURI += fmt.Sprintf("?%s=%s", DisableProxyFetchParam, DisableProxyFetchValue)
	}
	task := &taskspb.Task{
		Name:             fmt.Sprintf("%s/tasks/%s", q.queueName, taskID),
		DispatchDeadline: ptypes.DurationProto(maxCloudTasksTimeout),
	}
	task.MessageType = &taskspb.Task_HttpRequest{
		HttpRequest: &taskspb.HttpRequest{
			HttpMethod:          taskspb.HttpMethod_POST,
			Url:                 q.queueURL + relativeURI,
			AuthorizationHeader: q.token,
		},
	}
	req := &taskspb.CreateTaskRequest{
		Parent: q.queueName,
		Task:   task,

If suffix is non-empty, append it to the task name. This lets us force reprocessing of tasks that would normally be de-duplicated.

	if suffix != "" {
		req.Task.Name += "-" + suffix
	}
	return req
}

Create a task ID for the given module path and version. Task IDs can contain only letters ([A-Za-z]), numbers ([0-9]), hyphens (-), or underscores (_).

func newTaskID(modulePath, version string) string {

Compute a hash to use as a prefix, so the task IDs are distributed uniformly. See https://cloud.google.com/tasks/docs/reference/rpc/google.cloud.tasks.v2#task under "Task De-duplication".

	hasher := fnv.New32()
	io.WriteString(hasher, mv)

Escape the name so it contains only valid characters. Do our best to make it readable.

	var b strings.Builder
	for _, r := range mv {
		switch {
		case r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' || r >= '0' && r <= '9' || r == '-':
			b.WriteRune(r)
		case r == '_':
			b.WriteString("__")
		case r == '/':
			b.WriteString("_-")
		case r == '@':
			b.WriteString("_v")
		case r == '.':
			b.WriteString("_o")
		default:
			fmt.Fprintf(&b, "_%04x", r)
		}
	}
	return fmt.Sprintf("%04x-%s", hash, &b)
}

type moduleVersion struct {
	modulePath, version string
}

InMemory is a Queue implementation that schedules in-process fetch operations. Unlike the GCP task queue, it will not automatically retry tasks on failure. This should only be used for local development.

type InMemory struct {
	queue       chan moduleVersion
	sem         chan struct{}
	experiments []string
}

type inMemoryProcessFunc func(context.Context, string, string) (int, error)

NewInMemory creates a new InMemory that asynchronously fetches from proxyClient and stores in db. It uses workerCount parallelism to execute these fetches.

func NewInMemory(ctx context.Context, workerCount int, experiments []string, processFunc inMemoryProcessFunc) *InMemory {
	q := &InMemory{
		queue:       make(chan moduleVersion, 1000),
		sem:         make(chan struct{}, workerCount),
		experiments: experiments,
	}
	go func() {
		for v := range q.queue {
			select {
			case <-ctx.Done():
				return
			case q.sem <- struct{}{}:
			}

If a worker is available, make a request to the fetch service inside a goroutine and wait for it to finish.

			go func(v moduleVersion) {
				defer func() { <-q.sem }()

				log.Infof(ctx, "Fetch requested: %q %q (workerCount = %d)", v.modulePath, v.version, cap(q.sem))

				fetchCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
				fetchCtx = experiment.NewContext(fetchCtx, experiments...)
				defer cancel()

				if _, err := processFunc(fetchCtx, v.modulePath, v.version); err != nil {
					log.Error(fetchCtx, err)
				}
			}(v)
		}
	}()
	return q
}

ScheduleFetch pushes a fetch task into the local queue to be processed asynchronously.

func (q *InMemory) ScheduleFetch(ctx context.Context, modulePath, version, _ string, _ bool) (bool, error) {
	q.queue <- moduleVersion{modulePath, version}
	return true, nil
}

WaitForTesting waits for all queued requests to finish. It should only be used by test code.

func (q InMemory) WaitForTesting(ctx context.Context) {
	for i := 0; i < cap(q.sem); i++ {
		select {
		case <-ctx.Done():
			return
		case q.sem <- struct{}{}:
		}
	}
	close(q.queue)