// Copyright 2013 The Closure Library Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS-IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * @fileoverview The SafeUrl type and its builders. * * TODO(xtof): Link to document stating type contract. */ goog.provide('goog.html.SafeUrl'); goog.require('goog.asserts'); goog.require('goog.fs.url'); goog.require('goog.i18n.bidi.Dir'); goog.require('goog.i18n.bidi.DirectionalString'); goog.require('goog.string'); goog.require('goog.string.Const'); goog.require('goog.string.TypedString'); /** * A string that is safe to use in URL context in DOM APIs and HTML documents. * * A SafeUrl is a string-like object that carries the security type contract * that its value as a string will not cause untrusted script execution * when evaluated as a hyperlink URL in a browser. * * Values of this type are guaranteed to be safe to use in URL/hyperlink * contexts, such as, assignment to URL-valued DOM properties, or * interpolation into a HTML template in URL context (e.g., inside a href * attribute), in the sense that the use will not result in a * Cross-Site-Scripting vulnerability. * * Note that, as documented in {@code goog.html.SafeUrl.unwrap}, this type's * contract does not guarantee that instances are safe to interpolate into HTML * without appropriate escaping. * * Note also that this type's contract does not imply any guarantees regarding * the resource the URL refers to. In particular, SafeUrls are not * safe to use in a context where the referred-to resource is interpreted as * trusted code, e.g., as the src of a script tag. * * Instances of this type must be created via the factory methods * ({@code goog.html.SafeUrl.fromConstant}, {@code goog.html.SafeUrl.sanitize}), * etc and not by invoking its constructor. The constructor intentionally * takes no parameters and the type is immutable; hence only a default instance * corresponding to the empty string can be obtained via constructor invocation. * * @see goog.html.SafeUrl#fromConstant * @see goog.html.SafeUrl#from * @see goog.html.SafeUrl#sanitize * @constructor * @final * @struct * @implements {goog.i18n.bidi.DirectionalString} * @implements {goog.string.TypedString} */ goog.html.SafeUrl = function() { /** * The contained value of this SafeUrl. The field has a purposely ugly * name to make (non-compiled) code that attempts to directly access this * field stand out. * @private {string} */ this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = ''; /** * A type marker used to implement additional run-time type checking. * @see goog.html.SafeUrl#unwrap * @const * @private */ this.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_; }; /** * The innocuous string generated by goog.html.SafeUrl.sanitize when passed * an unsafe URL. * * about:invalid is registered in * http://www.w3.org/TR/css3-values/#about-invalid. * http://tools.ietf.org/html/rfc6694#section-2.2.1 permits about URLs to * contain a fragment, which is not to be considered when determining if an * about URL is well-known. * * Using about:invalid seems preferable to using a fixed data URL, since * browsers might choose to not report CSP violations on it, as legitimate * CSS function calls to attr() can result in this URL being produced. It is * also a standard URL which matches exactly the semantics we need: * "The about:invalid URI references a non-existent document with a generic * error condition. It can be used when a URI is necessary, but the default * value shouldn't be resolveable as any type of document". * * @const {string} */ goog.html.SafeUrl.INNOCUOUS_STRING = 'about:invalid#zClosurez'; /** * @override * @const */ goog.html.SafeUrl.prototype.implementsGoogStringTypedString = true; /** * Returns this SafeUrl's value a string. * * IMPORTANT: In code where it is security relevant that an object's type is * indeed {@code SafeUrl}, use {@code goog.html.SafeUrl.unwrap} instead of this * method. If in doubt, assume that it's security relevant. In particular, note * that goog.html functions which return a goog.html type do not guarantee that * the returned instance is of the right type. For example: * *
 * var fakeSafeHtml = new String('fake');
 * fakeSafeHtml.__proto__ = goog.html.SafeHtml.prototype;
 * var newSafeHtml = goog.html.SafeHtml.htmlEscape(fakeSafeHtml);
 * // newSafeHtml is just an alias for fakeSafeHtml, it's passed through by
 * // goog.html.SafeHtml.htmlEscape() as fakeSafeHtml instanceof
 * // goog.html.SafeHtml.
 * 
* * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST * be appropriately escaped before embedding in a HTML document. Note that the * required escaping is context-sensitive (e.g. a different escaping is * required for embedding a URL in a style property within a style * attribute, as opposed to embedding in a href attribute). * * @see goog.html.SafeUrl#unwrap * @override */ goog.html.SafeUrl.prototype.getTypedStringValue = function() { return this.privateDoNotAccessOrElseSafeHtmlWrappedValue_; }; /** * @override * @const */ goog.html.SafeUrl.prototype.implementsGoogI18nBidiDirectionalString = true; /** * Returns this URLs directionality, which is always {@code LTR}. * @override */ goog.html.SafeUrl.prototype.getDirection = function() { return goog.i18n.bidi.Dir.LTR; }; if (goog.DEBUG) { /** * Returns a debug string-representation of this value. * * To obtain the actual string value wrapped in a SafeUrl, use * {@code goog.html.SafeUrl.unwrap}. * * @see goog.html.SafeUrl#unwrap * @override */ goog.html.SafeUrl.prototype.toString = function() { return 'SafeUrl{' + this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ + '}'; }; } /** * Performs a runtime check that the provided object is indeed a SafeUrl * object, and returns its value. * * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST * be appropriately escaped before embedding in a HTML document. Note that the * required escaping is context-sensitive (e.g. a different escaping is * required for embedding a URL in a style property within a style * attribute, as opposed to embedding in a href attribute). * * @param {!goog.html.SafeUrl} safeUrl The object to extract from. * @return {string} The SafeUrl object's contained string, unless the run-time * type check fails. In that case, {@code unwrap} returns an innocuous * string, or, if assertions are enabled, throws * {@code goog.asserts.AssertionError}. */ goog.html.SafeUrl.unwrap = function(safeUrl) { // Perform additional Run-time type-checking to ensure that safeUrl is indeed // an instance of the expected type. This provides some additional protection // against security bugs due to application code that disables type checks. // Specifically, the following checks are performed: // 1. The object is an instance of the expected type. // 2. The object is not an instance of a subclass. // 3. The object carries a type marker for the expected type. "Faking" an // object requires a reference to the type marker, which has names intended // to stand out in code reviews. if (safeUrl instanceof goog.html.SafeUrl && safeUrl.constructor === goog.html.SafeUrl && safeUrl.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ === goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_) { return safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_; } else { goog.asserts.fail('expected object of type SafeUrl, got \'' + safeUrl + '\' of type ' + goog.typeOf(safeUrl)); return 'type_error:SafeUrl'; } }; /** * Creates a SafeUrl object from a compile-time constant string. * * Compile-time constant strings are inherently program-controlled and hence * trusted. * * @param {!goog.string.Const} url A compile-time-constant string from which to * create a SafeUrl. * @return {!goog.html.SafeUrl} A SafeUrl object initialized to {@code url}. */ goog.html.SafeUrl.fromConstant = function(url) { return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse( goog.string.Const.unwrap(url)); }; /** * A pattern that matches Blob or data types that can have SafeUrls created * from URL.createObjectURL(blob) or via a data: URI. Only matches image and * video types, currently. * @const * @private */ goog.html.SAFE_MIME_TYPE_PATTERN_ = /^(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm))$/i; /** * Creates a SafeUrl wrapping a blob URL for the given {@code blob}. * * The blob URL is created with {@code URL.createObjectURL}. If the MIME type * for {@code blob} is not of a known safe image or video MIME type, then the * SafeUrl will wrap {@link #INNOCUOUS_STRING}. * * @see http://www.w3.org/TR/FileAPI/#url * @param {!Blob} blob * @return {!goog.html.SafeUrl} The blob URL, or an innocuous string wrapped * as a SafeUrl. */ goog.html.SafeUrl.fromBlob = function(blob) { var url = goog.html.SAFE_MIME_TYPE_PATTERN_.test(blob.type) ? goog.fs.url.createObjectUrl(blob) : goog.html.SafeUrl.INNOCUOUS_STRING; return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url); }; /** * Matches a base-64 data URL, with the first match group being the MIME type. * @const * @private */ goog.html.DATA_URL_PATTERN_ = /^data:([^;,]*);base64,[a-z0-9+\/]+=*$/i; /** * Creates a SafeUrl wrapping a data: URL, after validating it matches a * known-safe image or video MIME type. * * @param {string} dataUrl A valid base64 data URL with one of the whitelisted * image or video MIME types. * @return {!goog.html.SafeUrl} A matching safe URL, or {@link INNOCUOUS_STRING} * wrapped as a SafeUrl if it does not pass. */ goog.html.SafeUrl.fromDataUrl = function(dataUrl) { // There's a slight risk here that a browser sniffs the content type if it // doesn't know the MIME type and executes HTML within the data: URL. For this // to cause XSS it would also have to execute the HTML in the same origin // of the page with the link. It seems unlikely that both of these will // happen, particularly in not really old IEs. var match = dataUrl.match(goog.html.DATA_URL_PATTERN_); var valid = match && goog.html.SAFE_MIME_TYPE_PATTERN_.test(match[1]); return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse( valid ? dataUrl : goog.html.SafeUrl.INNOCUOUS_STRING); }; /** * Creates a SafeUrl wrapping a tel: URL. * * @param {string} telUrl A tel URL. * @return {!goog.html.SafeUrl} A matching safe URL, or {@link INNOCUOUS_STRING} * wrapped as a SafeUrl if it does not pass. */ goog.html.SafeUrl.fromTelUrl = function(telUrl) { // There's a risk that a tel: URL could immediately place a call once // clicked, without requiring user confirmation. For that reason it is // handled in this separate function. if (!goog.string.caseInsensitiveStartsWith(telUrl, 'tel:')) { telUrl = goog.html.SafeUrl.INNOCUOUS_STRING; } return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse( telUrl); }; /** * A pattern that recognizes a commonly useful subset of URLs that satisfy * the SafeUrl contract. * * This regular expression matches a subset of URLs that will not cause script * execution if used in URL context within a HTML document. Specifically, this * regular expression matches if (comment from here on and regex copied from * Soy's EscapingConventions): * (1) Either a protocol in a whitelist (http, https, mailto or ftp). * (2) or no protocol. A protocol must be followed by a colon. The below * allows that by allowing colons only after one of the characters [/?#]. * A colon after a hash (#) must be in the fragment. * Otherwise, a colon after a (?) must be in a query. * Otherwise, a colon after a single solidus (/) must be in a path. * Otherwise, a colon after a double solidus (//) must be in the authority * (before port). * * The pattern disallows &, used in HTML entity declarations before * one of the characters in [/?#]. This disallows HTML entities used in the * protocol name, which should never happen, e.g. "http" for "http". * It also disallows HTML entities in the first path part of a relative path, * e.g. "foo<bar/baz". Our existing escaping functions should not produce * that. More importantly, it disallows masking of a colon, * e.g. "javascript:...". * * @private * @const {!RegExp} */ goog.html.SAFE_URL_PATTERN_ = /^(?:(?:https?|mailto|ftp):|[^&:/?#]*(?:[/?#]|$))/i; /** * Creates a SafeUrl object from {@code url}. If {@code url} is a * goog.html.SafeUrl then it is simply returned. Otherwise the input string is * validated to match a pattern of commonly used safe URLs. * * {@code url} may be a URL with the http, https, mailto or ftp scheme, * or a relative URL (i.e., a URL without a scheme; specifically, a * scheme-relative, absolute-path-relative, or path-relative URL). * * @see http://url.spec.whatwg.org/#concept-relative-url * @param {string|!goog.string.TypedString} url The URL to validate. * @return {!goog.html.SafeUrl} The validated URL, wrapped as a SafeUrl. */ goog.html.SafeUrl.sanitize = function(url) { if (url instanceof goog.html.SafeUrl) { return url; } else if (url.implementsGoogStringTypedString) { url = url.getTypedStringValue(); } else { url = String(url); } if (!goog.html.SAFE_URL_PATTERN_.test(url)) { url = goog.html.SafeUrl.INNOCUOUS_STRING; } return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url); }; /** * Type marker for the SafeUrl type, used to implement additional run-time * type checking. * @const {!Object} * @private */ goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = {}; /** * Package-internal utility method to create SafeUrl instances. * * @param {string} url The string to initialize the SafeUrl object with. * @return {!goog.html.SafeUrl} The initialized SafeUrl object. * @package */ goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse = function( url) { var safeUrl = new goog.html.SafeUrl(); safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = url; return safeUrl; }; /** * A SafeUrl corresponding to the special about:blank url. * @const {!goog.html.SafeUrl} */ goog.html.SafeUrl.ABOUT_BLANK = goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse( 'about:blank');