Copyright 2020 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

package postgres

import (
	
	
	

	
)

const (
	maxSectionWords   = 50
	maxReadmeFraction = 0.5
)
SearchDocumentSections computes the B and C sections of a Postgres search document from a package synopsis and a README. By "B section" and "C section" we mean the portion of the tsvector with weight "B" and "C", respectively. The B section consists of the synopsis. The C section consists of the first sentence of the README. The D section consists of the remainder of the README. All sections are split into words and processed for replacements. Each section is limited to maxSectionWords words, and in addition the D section is limited to an initial fraction of the README, determined by maxReadmeFraction.
func (, ,  string) (, ,  string) {
	return searchDocumentSections(, , , maxSectionWords, maxReadmeFraction)
}

func (, ,  string,  int,  float64) (, ,  string) {
	var ,  string
	if isMarkdown() {
		 = processMarkdown()
	}
	if  := sentenceEndIndex();  > 0 {
		,  = [:+1], [+1:]
	} else {
		 = 
	}
	 := processWords()
	 := processWords()
	 := processWords()

	,  := split(, )
section D is the part of the readme that is not in sectionC.
Keep maxSecWords of section D, but not more than maxReadmeFrac.
	 := int( * float64(len()))
	 := 
	if  >  {
		 = 
	}
	,  := split(, )
If there is no synopsis, use first sentence of the README. But do not promote the rest of the README to section C.
	if len() == 0 {
		 = 
		 = nil
	}

	 := func( []string) string {
		return makeValidUnicode(strings.Join(, " "))
	}

	return (), (), ()
}
split splits a slice of strings into two parts. The first has length <= n, and the second is the rest of the slice. If n is negative, the first part is nil and the second part is the entire slice.
func ( []string,  int) ([]string, []string) {
	if  >= len() {
		return , nil
	}
	return [:], [:]
}
sentenceEndIndex returns the index in s of the end of the first sentence, or -1 if no end can be found. A sentence ends at a '.', '!' or '?' that is followed by a space (or ends the string), and is not preceded by an uppercase letter.
func ( string) int {
	var ,  rune

	 := func() bool {
		return !unicode.IsUpper() && ( == '.' ||  == '!' ||  == '?')
	}

	for ,  := range  {
		if unicode.IsSpace() && () {
			return  - 1
		}
		 = 
		 = 
	}
	if () {
		return len() - 1
	}
	return -1
}
processWords splits s into words at whitespace, then processes each word.
func ( string) []string {
	 := strings.Fields(strings.ToLower())
	var  []string
	for ,  := range  {
		 = append(, processWord()...)
	}
	return 
}
summaryReplacements is used to replace words with other words. It is used by processWord, below. Example key-value pairs: "deleteMe": nil // removes "deleteMe" "rand": []string{"random"} // replace "rand" with "random" "utf-8": []string{"utf-8", "utf8"} // add "utf8" whenever "utf-8" is seen
var summaryReplacements = map[string][]string{
	"postgres":   {"postgres", "postgresql"},
	"postgresql": {"postgres", "postgresql"},
	"rand":       {"random"},
	"mongo":      {"mongo", "mongodb"},
	"mongodb":    {"mongo", "mongodb"},
	"redis":      {"redis", "redisdb"},
	"redisdb":    {"redis", "redisdb"},
	"logger":     {"logger", "log"}, // Postgres stemmer does not handle -er
	"parser":     {"parser", "parse"},
	"utf-8":      {"utf-8", "utf8"},
}
processWord performs processing on s, returning zero or more words. Its main purpose is to apply summaryReplacements to replace certain words with synonyms or additional search terms.
func ( string) []string {
	 = strings.TrimFunc(, unicode.IsPunct)
	if  == "" {
		return nil
	}
	if ,  := summaryReplacements[];  {
		return 
	}
	if !hyphenSplit() {
		return []string{}
Apply replacements to parts of hyphenated words.
	 := strings.Split(, "-")
	if len() == 1 {
		return 
	}
	 := []string{} // Include the full hyphenated word.
	for ,  := range  {
		if ,  := summaryReplacements[];  {
			 = append(, ...)
We don't need to include the parts; the Postgres text-search processor will do that.
	}
	return 
}
hyphenSplit reports whether s should be split on hyphens.
func ( string) bool {
	return !(strings.HasPrefix(, "http://") || strings.HasPrefix(, "https://"))
}
isMarkdown reports whether filename says that the file contains markdown.
func ( string) bool {
https://tools.ietf.org/html/rfc7763 mentions both extensions.
	return  == ".md" ||  == ".markdown"
}
processMarkdown returns the text of a markdown document. It omits all formatting and images.
func ( string) string {
	 := blackfriday.New(blackfriday.WithExtensions(blackfriday.CommonExtensions))
	 := .Parse([]byte())
	 := walkMarkdown(, nil, 0)
	return string()
}
walkMarkdown traverses a blackfriday parse tree, extracting text.
func ( *blackfriday.Node,  []byte,  int) []byte {
	if  == nil {
		return 
	}
	switch .Type {
Skip images because they usually are irrelevant to the package (badges and such).
		return 
Skip code blocks because they have a wide variety of unrelated symbols.
		return 
	case blackfriday.Paragraph, blackfriday.Heading:
		if len() > 0 {
			 = append(, ' ')
		}
	default:
		 = append(, .Literal...)
	}
	for  := .FirstChild;  != nil;  = .Next {
		 = (, , +1)
	}
	return