// Copyright (c) 2014, David Kitchen // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, this // list of conditions and the following disclaimer. // // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * Neither the name of the organisation (Microcosm) nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package bluemonday import ( "regexp" ) // StrictPolicy returns an empty policy, which will effectively strip all HTML // elements and their attributes from a document. func StrictPolicy() *Policy { return NewPolicy() } // StripTagsPolicy is DEPRECATED. Use StrictPolicy instead. func StripTagsPolicy() *Policy { return StrictPolicy() } // UGCPolicy returns a policy aimed at user generated content that is a result // of HTML WYSIWYG tools and Markdown conversions. // // This is expected to be a fairly rich document where as much markup as // possible should be retained. Markdown permits raw HTML so we are basically // providing a policy to sanitise HTML5 documents safely but with the // least intrusion on the formatting expectations of the user. func UGCPolicy() *Policy { p := NewPolicy() /////////////////////// // Global attributes // /////////////////////// // "class" is not permitted as we are not allowing users to style their own // content p.AllowStandardAttributes() ////////////////////////////// // Global URL format policy // ////////////////////////////// p.AllowStandardURLs() //////////////////////////////// // Declarations and structure // //////////////////////////////// // "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are // expecting user generated content to be a fragment of HTML and not a full // document. ////////////////////////// // Sectioning root tags // ////////////////////////// // "article" and "aside" are permitted and takes no attributes p.AllowElements("article", "aside") // "body" is not permitted as we are expecting user generated content to be a fragment // of HTML and not a full document. // "details" is permitted, including the "open" attribute which can either // be blank or the value "open". p.AllowAttrs( "open", ).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details") // "fieldset" is not permitted as we are not allowing forms to be created. // "figure" is permitted and takes no attributes p.AllowElements("figure") // "nav" is not permitted as it is assumed that the site (and not the user) // has defined navigation elements // "section" is permitted and takes no attributes p.AllowElements("section") // "summary" is permitted and takes no attributes p.AllowElements("summary") ////////////////////////// // Headings and footers // ////////////////////////// // "footer" is not permitted as we expect user content to be a fragment and // not structural to this extent // "h1" through "h6" are permitted and take no attributes p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6") // "header" is not permitted as we expect user content to be a fragment and // not structural to this extent // "hgroup" is permitted and takes no attributes p.AllowElements("hgroup") ///////////////////////////////////// // Content grouping and separating // ///////////////////////////////////// // "blockquote" is permitted, including the "cite" attribute which must be // a standard URL. p.AllowAttrs("cite").OnElements("blockquote") // "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes p.AllowElements("br", "div", "hr", "p", "span", "wbr") /////////// // Links // /////////// // "a" is permitted p.AllowAttrs("href").OnElements("a") // "area" is permitted along with the attributes that map image maps work p.AllowAttrs("name").Matching( regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`), ).OnElements("map") p.AllowAttrs("alt").Matching(Paragraph).OnElements("area") p.AllowAttrs("coords").Matching( regexp.MustCompile(`^([0-9]+,)+[0-9]+$`), ).OnElements("area") p.AllowAttrs("href").OnElements("area") p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area") p.AllowAttrs("shape").Matching( regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`), ).OnElements("area") p.AllowAttrs("usemap").Matching( regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`), ).OnElements("img") // "link" is not permitted ///////////////////// // Phrase elements // ///////////////////// // The following are all inline phrasing elements p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em", "figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var") // "q" is permitted and "cite" is a URL and handled by URL policies p.AllowAttrs("cite").OnElements("q") // "time" is permitted p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time") //////////////////// // Style elements // //////////////////// // block and inline elements that impart no semantic meaning but style the // document p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u") // "style" is not permitted as we are not yet sanitising CSS and it is an // XSS attack vector ////////////////////// // HTML5 Formatting // ////////////////////// // "bdi" "bdo" are permitted p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo") // "rp" "rt" "ruby" are permitted p.AllowElements("rp", "rt", "ruby") /////////////////////////// // HTML5 Change tracking // /////////////////////////// // "del" "ins" are permitted p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins") p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins") /////////// // Lists // /////////// p.AllowLists() //////////// // Tables // //////////// p.AllowTables() /////////// // Forms // /////////// // By and large, forms are not permitted. However there are some form // elements that can be used to present data, and we do permit those // // "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist" // "textarea" "optgroup" "option" are all not permitted // "meter" is permitted p.AllowAttrs( "value", "min", "max", "low", "high", "optimum", ).Matching(Number).OnElements("meter") // "progress" is permitted p.AllowAttrs("value", "max").Matching(Number).OnElements("progress") ////////////////////// // Embedded content // ////////////////////// // Vast majority not permitted // "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track" // "video" are all not permitted p.AllowImages() return p }