diff --git a/CHANGELOG.md b/CHANGELOG.md index 302dd4834..24a78addb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,36 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.5.1 + +[Commits](https://github.com/xmldom/xmldom/compare/0.5.0...0.5.1) + +### Fixes: + +- Security: Misinterpretation of malicious XML input [`CVE-2021-32796`](https://github.com/xmldom/xmldom/security/advisories/GHSA-5fg8-2547-mr8q) + ## 0.5.0 +[Commits](https://github.com/xmldom/xmldom/compare/0.4.0...0.5.0) + ### Fixes -- Avoid misinterpretation of malicious XML input - `GHSA-h6q6-9hqw-rwfv` (CVE-2021-21366) +- Avoid misinterpretation of malicious XML input - [`GHSA-h6q6-9hqw-rwfv`](https://github.com/xmldom/xmldom/security/advisories/GHSA-h6q6-9hqw-rwfv) (CVE-2021-21366) + - Improve error reporting; throw on duplicate attribute\ + BREAKING CHANGE: It is currently not clear how to consistently deal with duplicate attributes, so it's also safer for our users to fail when detecting them. + It's possible to configure the `DOMParser.errorHandler` before parsing, to handle those errors differently. + + To accomplish this and also be able to verify it in tests I needed to + - create a new `Error` type `ParseError` and export it + - Throw `ParseError` from `errorHandler.fatalError` and prevent those from being caught in `XMLReader`. + - export `DOMHandler` constructor as `__DOMHandler` + - Preserve quotes in DOCTYPE declaration + Since the only purpose of parsing the DOCTYPE is to be able to restore it when serializing, we decided that it would be best to leave the parsed `publicId` and `systemId` as is, including any quotes. + BREAKING CHANGE: If somebody relies on the actual unquoted values of those ids, they will need to take care of either single or double quotes and the right escaping. + (Without this change this would not have been possible because the SAX parser already dropped the information about the quotes that have been used in the source.) + + https://www.w3.org/TR/2006/REC-xml11-20060816/#dtd + https://www.w3.org/TR/2006/REC-xml11-20060816/#IDAX1KS (External Entity Declaration) + - Fix breaking preprocessors' directives when parsing attributes [`#171`](https://github.com/xmldom/xmldom/pull/171) - fix(dom): Escape `]]>` when serializing CharData [`#181`](https://github.com/xmldom/xmldom/pull/181) - Switch to (only) MIT license (drop problematic LGPL license option) [`#178`](https://github.com/xmldom/xmldom/pull/178) diff --git a/lib/dom.js b/lib/dom.js index 78ab77091..afb383149 100644 --- a/lib/dom.js +++ b/lib/dom.js @@ -973,6 +973,16 @@ function needNamespaceDefine(node,isHTML, visibleNamespaces) { //console.error(3,true,node.flag,node.prefix,node.namespaceURI) return true; } +/** + * Well-formed constraint: No < in Attribute Values + * The replacement text of any entity referred to directly or indirectly in an attribute value must not contain a <. + * @see https://www.w3.org/TR/xml/#CleanAttrVals + * @see https://www.w3.org/TR/xml/#NT-AttValue + */ +function addSerializedAttribute(buf, qualifiedName, value) { + buf.push(' ', qualifiedName, '="', value ? value.replace(/[<&"]/g,_xmlEncoder) : '', '"') +} + function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ if(nodeFilter){ node = nodeFilter(node); @@ -1014,8 +1024,7 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ if (needNamespaceDefine(attr,isHTML, visibleNamespaces)) { var prefix = attr.prefix||''; var uri = attr.namespaceURI; - var ns = prefix ? ' xmlns:' + prefix : " xmlns"; - buf.push(ns, '="' , uri , '"'); + addSerializedAttribute(buf, prefix ? 'xmlns:' + prefix : "xmlns", uri); visibleNamespaces.push({ prefix: prefix, namespace:uri }); } serializeToString(attr,buf,isHTML,nodeFilter,visibleNamespaces); @@ -1024,8 +1033,7 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ if (needNamespaceDefine(node,isHTML, visibleNamespaces)) { var prefix = node.prefix||''; var uri = node.namespaceURI; - var ns = prefix ? ' xmlns:' + prefix : " xmlns"; - buf.push(ns, '="' , uri , '"'); + addSerializedAttribute(buf, prefix ? 'xmlns:' + prefix : "xmlns", uri); visibleNamespaces.push({ prefix: prefix, namespace:uri }); } @@ -1064,7 +1072,7 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ } return; case ATTRIBUTE_NODE: - return buf.push(' ',node.name,'="',node.value.replace(/[&"]/g,_xmlEncoder),'"'); + return addSerializedAttribute(buf, node.name, node.value); case TEXT_NODE: /** * The ampersand character (&) and the left angle bracket (<) must not appear in their literal form, diff --git a/test/dom/serializer.test.js b/test/dom/serializer.test.js index 7ae9f8785..20c37c020 100644 --- a/test/dom/serializer.test.js +++ b/test/dom/serializer.test.js @@ -1,6 +1,7 @@ 'use strict' const { DOMParser } = require('../../lib/dom-parser') +const { XMLSerializer } = require('../../lib/dom') describe('XML Serializer', () => { it('supports text node containing "]]>"', () => { @@ -17,4 +18,23 @@ describe('XML Serializer', () => { '' ) }) + describe('properly escapes attribute values', () => { + it('should escape special characters in namespace attributes', () => { + const input = `` + const doc = new DOMParser().parseFromString(input, 'text/xml') + + // in this case the explicit attribute nodes are serialized + expect(new XMLSerializer().serializeToString(doc)).toBe( + '' + ) + + // in this case the namespace attributes are "inherited" from the parent, + // which is not serialized + expect( + new XMLSerializer().serializeToString(doc.documentElement.firstChild) + ).toBe( + '' + ) + }) + }) }) diff --git a/test/html/__snapshots__/normalize.test.js.snap b/test/html/__snapshots__/normalize.test.js.snap index 68351824f..a612ecf4d 100644 --- a/test/html/__snapshots__/normalize.test.js.snap +++ b/test/html/__snapshots__/normalize.test.js.snap @@ -44,13 +44,13 @@ Object { exports[`html normalizer
1`] = ` Object { - "actual": "
d\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
", + "actual": "
d\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
", } `; exports[`html normalizer
1`] = ` Object { - "actual": "
')\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
", + "actual": "
')\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
", } `; @@ -90,7 +90,7 @@ Object { exports[`html normalizer 1`] = ` Object { - "actual": "b && '&&&'\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">", + "actual": "b && '&&&'\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">", } `; diff --git a/test/parse/__snapshots__/locator.test.js.snap b/test/parse/__snapshots__/locator.test.js.snap index be4371f19..cc247f5bd 100644 --- a/test/parse/__snapshots__/locator.test.js.snap +++ b/test/parse/__snapshots__/locator.test.js.snap @@ -2,7 +2,7 @@ exports[`DOMLocator attribute position 1`] = ` Object { - "actual": "
<;test", + "actual": "
<;test", } `; diff --git a/test/parse/__snapshots__/simple.test.js.snap b/test/parse/__snapshots__/simple.test.js.snap index 28e5448b6..7478b6f71 100644 --- a/test/parse/__snapshots__/simple.test.js.snap +++ b/test/parse/__snapshots__/simple.test.js.snap @@ -12,7 +12,7 @@ Object { exports[`parse simple 1`] = ` Object { - "actual": "", + "actual": "", } `; @@ -49,6 +49,6 @@ Object { exports[`parse wrong closing tag 1`] = ` Object { - "actual": "
<;test", + "actual": "
<;test", } `; diff --git a/test/xmltest/__snapshots__/not-wf.test.js.snap b/test/xmltest/__snapshots__/not-wf.test.js.snap index 400dd6e40..34eba2916 100644 --- a/test/xmltest/__snapshots__/not-wf.test.js.snap +++ b/test/xmltest/__snapshots__/not-wf.test.js.snap @@ -112,7 +112,7 @@ Object { exports[`xmltest/not-wellformed standalone should match 014.xml with snapshot 1`] = ` Object { - "actual": "\\"/>", + "actual": "\\"/>", } `; diff --git a/test/xmltest/__snapshots__/valid.test.js.snap b/test/xmltest/__snapshots__/valid.test.js.snap index 085aa7531..fdf34ca9f 100644 --- a/test/xmltest/__snapshots__/valid.test.js.snap +++ b/test/xmltest/__snapshots__/valid.test.js.snap @@ -293,7 +293,7 @@ Object { exports[`xmltest/valid standalone should match 040.xml with snapshot 1`] = ` Object { - "actual": "'\\"/>", + "actual": "'\\"/>", "expected": "", } `;