Copyright 2019 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

package frontend

import (
	
	
	
	
	
	
	
	

	
	
	
	
	
	
	
	
	
	
)

Render HTML similar to blackfriday.Run(), but here we implement a custom Walk function in order to modify image paths in the rendered HTML.
	 := &bytes.Buffer{}
	 := bytes.ReplaceAll([]byte(.Contents), []byte("\r"), nil)
	 := .Parse()
	var  error
	.Walk(func( *blackfriday.Node,  bool) blackfriday.WalkStatus {
		switch .Type {
		case blackfriday.Heading:
Prefix HeadingID with "readme-" on the unit page to prevent a namespace clash with the documentation section.
				.HeadingID = "readme-" + .HeadingID
			}
		case blackfriday.Image, blackfriday.Link:
			 := .Type == blackfriday.Image
			if  := translateLink(string(.LinkData.Destination), .SourceInfo, , );  != "" {
				.LinkData.Destination = []byte()
			}
		case blackfriday.HTMLBlock, blackfriday.HTMLSpan:
			,  := translateHTML(.Literal, .SourceInfo, )
			if  != nil {
				 = fmt.Errorf("couldn't transform html block(%s): %w", .Literal, )
				return blackfriday.Terminate
			}
			.Literal = 
		}
		return .RenderNode(, , )
	})
	if  != nil {
		return safehtml.HTML{}, 
	}
	return legacySanitizeHTML(), nil
}
LegacyReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns a safehtml.HTML. If readmeFilePath indicates that this is a markdown file, it will also render the markdown contents using blackfriday. This function is exported for use in an external tool that uses this package to compare readme files to see how changes in processing will affect them.
func ( context.Context,  *internal.ModuleInfo,  *internal.Readme) ( safehtml.HTML,  error) {
	defer derrors.Wrap(&, "LegacyReadmeHTML(%s@%s)", .ModulePath, .Version)
	if  == nil || .Contents == "" {
		return safehtml.HTML{}, nil
	}
	if !isMarkdown(.Filepath) {
		 := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
		,  := .ExecuteToHTML(.Contents)
		if  != nil {
			return safehtml.HTML{}, 
		}
		return , nil
	}

	return blackfridayReadmeHTML(, )
}
legacySanitizeHTML reads HTML from r and sanitizes it to ensure it is safe.
bluemonday.UGCPolicy allows a broad selection of HTML elements and attributes that are safe for user generated content. This policy does not allow iframes, object, embed, styles, script, etc.
Allow width and align attributes on img, div, and p tags. This is used to center elements in a readme as well as to size it images appropriately where used, like the gin-gonic/logo/color.png image in the github.com/gin-gonic/gin README.
	.AllowAttrs("width", "align").OnElements("img")
	.AllowAttrs("width", "align").OnElements("div")
	.AllowAttrs("width", "align").OnElements("p")
Trust that bluemonday properly sanitizes the HTML.
isMarkdown reports whether filename says that the file contains markdown.
func ( string) bool {
https://tools.ietf.org/html/rfc7763 mentions both extensions.
	return  == ".md" ||  == ".markdown"
}
translateLink converts image links so that they will work on pkgsite. README files sometimes use relative image paths to image files inside the repository. As the discovery site doesn't host the full repository content, in order for the image to render, we need to convert the relative path to an absolute URL to a hosted image. In addition, GitHub will translate absolute non-raw links to image files to raw links. For example, when GitHub renders a README with <img src="https://github.com/gobuffalo/buffalo/blob/master/logo.svg"> it rewrites it to <img src="https://github.com/gobuffalo/buffalo/raw/master/logo.svg"> (replacing "blob" with "raw"). We do that too.
func ( string,  *source.Info,  bool,  *internal.Readme) string {
	,  := url.Parse()
	if  != nil {
		return ""
	}
	if .IsAbs() {
		if .Host != "github.com" {
			return ""
		}
		 := strings.Split(.Path, "/")
		if len() < 4 || [3] != "blob" {
			return ""
		}
		[3] = "raw"
		.Path = strings.Join(, "/")
		return .String()
	}
This is a fragment; leave it.
		return "#readme-" + .Fragment
Paths are relative to the README location.
	 := path.Join(path.Dir(.Filepath), path.Clean(trimmedEscapedPath()))
	if  {
		return .RawURL()
	}
	return .FileURL()
}
trimmedEscapedPath trims surrounding whitespace from u's path, then returns it escaped.
translateHTML parses html text into parsed html nodes. It then iterates through the nodes and replaces the src key with a value that properly represents the source of the image from the repo.
func ( []byte,  *source.Info,  *internal.Readme) ( []byte,  error) {
	defer derrors.Wrap(&, "translateHTML(readme.Filepath=%s)", .Filepath)

	 := bytes.NewReader()
	,  := html.ParseFragment(, nil)
	if  != nil {
		return nil, 
	}
	var  bytes.Buffer
	 := false
We expect every parsed node to begin with <html><head></head><body>.
		if .DataAtom != atom.Html {
			return nil, fmt.Errorf("top-level node is %q, expected 'html'", .DataAtom)
When the parsed html nodes don't have a valid structure (i.e: an html comment), then just return the original text.
		if .FirstChild == nil || .FirstChild.NextSibling == nil || .FirstChild.NextSibling.DataAtom != atom.Body {
			return , nil
		}
n is now the body node. Walk all its children.
		for  := .FirstChild;  != nil;  = .NextSibling {
			if walkHTML(, , ) {
				 = true
			}
			if  := html.Render(&, );  != nil {
				return nil, 
			}
		}
	}
	if  {
		return .Bytes(), nil
If there were no changes, return the original.
	return , nil
}
walkHTML crawls through an html node and replaces the src tag link with a link that properly represents the image from the repo source. It reports whether it made a change.
func ( *html.Node,  *source.Info,  *internal.Readme) bool {
	 := false
	if .Type == html.ElementNode && .DataAtom == atom.Img {
		var  []html.Attribute
		for ,  := range .Attr {
			if .Key == "src" {
				if  := translateLink(.Val, , true, );  != "" {
					.Val = 
					 = true
				}
			}
			 = append(, )
		}
		.Attr = 
	}
	for  := .FirstChild;  != nil;  = .NextSibling {
		if (, , ) {
			 = true
		}
	}
	return