8000 fix: replace \u2029 as part of normalizeLineEndings by kboshold · Pull Request #839 · xmldom/xmldom · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

fix: replace \u2029 as part of normalizeLineEndings #839

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1527,12 +1527,15 @@ declare module '@xmldom/xmldom' {
readonly locator?: boolean;

/**
* used to replace line endings before parsing, defaults to `normalizeLineEndings`,
* which normalizes line endings according to <https://www.w3.org/TR/xml11/#sec-line-ends>.
* used to replace line endings before parsing, defaults to exported `normalizeLineEndings`,
* which normalizes line endings according to <https://www.w3.org/TR/xml11/#sec-line-ends>,
* including some Unicode "newline" characters.
*
* @see {@link normalizeLineEndings}
*/
readonly normalizeLineEndings?: (source: string) => string;
/**
* A function that is invoked for every error that occurs during parsing.
* A function invoked for every error that occurs during parsing.
*
* If it is not provided, all errors are reported to `console.error`
* and only `fatalError`s are thrown as a `ParseError`,
Expand Down Expand Up @@ -1572,6 +1575,29 @@ declare module '@xmldom/xmldom' {
): void;
}

/**
* Normalizes line ending according to <https://www.w3.org/TR/xml11/#sec-line-ends>,
* including some Unicode "newline" characters:
*
* > XML parsed entities are often stored in computer files which,
* > for editing convenience, are organized into lines.
* > These lines are typically separated by some combination
* > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA).
* >
* > To simplify the tasks of applications, the XML processor must behave
* > as if it normalized all line breaks in external parsed entities (including the document entity)
* > on input, before parsing, by translating the following to a single #xA character:
* >
* > 1. the two-character sequence #xD #xA,
* > 2. the two-character sequence #xD #x85,
* > 3. the single character #x85,
* > 4. the single character #x2028,
* > 5. the single character #x2029,
* > 6. any #xD character that is not immediately followed by #xA or #x85.
*
* @prettierignore
*/
function normalizeLineEndings(input: string): string;
/**
* A method that prevents any further parsing when an `error`
* with level `error` is reported during parsing.
Expand Down
16 changes: 10 additions & 6 deletions lib/dom-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ var ParseError = errors.ParseError;
var XMLReader = sax.XMLReader;

/**
* Normalizes line ending according to <https://www.w3.org/TR/xml11/#sec-line-ends>:
* Normalizes line ending according to <https://www.w3.org/TR/xml11/#sec-line-ends>,
* including some Unicode "newline" characters:
*
* > XML parsed entities are often stored in computer files which,
* > for editing convenience, are organized into lines.
Expand All @@ -27,20 +28,21 @@ var XMLReader = sax.XMLReader;
* >
* > To simplify the tasks of applications, the XML processor must behave
* > as if it normalized all line breaks in external parsed entities (including the document entity)
* > on input, before parsing, by translating all of the following to a single #xA character:
* > on input, before parsing, by translating the following to a single #xA character:
* >
* > 1. the two-character sequence #xD #xA,
* > 2. the two-character sequence #xD #x85,
* > 3. the single character #x85,
* > 4. the single character #x2028,
* > 5. any #xD character that is not immediately followed by #xA or #x85.
* > 5. the single character #x2029,
* > 6. any #xD character that is not immediately followed by #xA or #x85.
*
* @param {string} input
* @returns {string}
* @prettierignore
*/
function normalizeLineEndings(input) {
return input.replace(/\r[\n\u0085]/g, '\n').replace(/[\r\u0085\u2028]/g, '\n');
return input.replace(/\r[\n\u0085]/g, '\n').replace(/[\r\u0085\u2028\u2029]/g, '\n');
}

/**
Expand All @@ -63,7 +65,7 @@ function normalizeLineEndings(input) {
* DEPRECATED! use `onError` instead.
* @property {function(level:ErrorLevel, message:string, context: DOMHandler):void}
* [onError]
* A function that is invoked for every error that occurs during parsing.
* A function invoked for every error that occurs during parsing.
*
* If it is not provided, all errors are reported to `console.error`
* and only `fatalError`s are thrown as a `ParseError`,
Expand All @@ -78,7 +80,9 @@ function normalizeLineEndings(input) {
* attribute describing their location in the XML string.
* Default is true.
* @property {(string) => string} [normalizeLineEndings]
* used to replace line endings before parsing, defaults to `normalizeLineEndings`
* used to replace line endings before parsing, defaults to exported `normalizeLineEndings`,
* which normalizes line endings according to <https://www.w3.org/TR/xml11/#sec-line-ends>,
* including some Unicode "newline" characters.
* @property {Object} [xmlns]
* The XML namespaces that should be assumed when parsing.
* The default namespace can be provided by the key that is the empty string.
Expand Down
1 change: 1 addition & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ exports.XMLSerializer = dom.XMLSerializer;

var domParser = require('./dom-parser');
exports.DOMParser = domParser.DOMParser;
exports.normalizeLineEndings = domParser.normalizeLineEndings;
exports.>
exports.>
8 changes: 8 additions & 0 deletions test/dom-parser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,14 @@ describe('DOMParser', () => {
const doc = parser.parseFromString(source, MIME_TYPE.XML_TEXT);
expect(new XMLSerializer().serializeToString(doc)).toEqual(source);
});
test('should be able to open documents with alternative whitespace without creating a bottleneck and replacing them with \\n', () => {
// issue: https://github.com/xmldom/xmldom/issues/838
const >
const { parser } = getTestParser({ onError });
const source = `<root>${'A'.repeat(50000)}\u2029${'A'.repeat(50000)}\u0085${'A'.repeat(50000)}\u2028${'A'.repeat(50000)}\u2029</root>`;
const doc = parser.parseFromString(source, MIME_TYPE.XML_TEXT);
expect(new XMLSerializer().serializeToString(doc)).toEqual(source.replace(/[\u0085\u2028\u2029]/g, '\n'));
}, 500);
});
});

Expand Down
6 changes: 6 additions & 0 deletions test/parse/normalize-line-endings.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
'use strict';

const { describe, expect, test } = require('@jest/globals');
const { DOMParser, normalizeLineEndings } = require('../../lib/dom-parser');
const { MIME_TYPE } = require('../../lib/conventions');

const whitespaceToHex = (str) => str.replace(/\s/g, (c) => `#x${c.charCodeAt(0).toString(16)}`);

describe('DOMParser constructor option normalizeLineEndings', () => {
Expand Down Expand Up @@ -41,6 +43,10 @@ describe('normalizeLineEndings', () => {
expect(whitespaceToHex(normalizeLineEndings('\u2028'))).toBe('#xa');
});

test('should normalize the single character #x2029', () => {
expect(whitespaceToHex(normalizeLineEndings('\u2029'))).toBe('#xa');
});

test('should normalize any #xD character that is not immediately followed by #xA or #x85', () => {
expect(whitespaceToHex(normalizeLineEndings('\r \n'))).toBe('#xa#x20#xa');
expect(whitespaceToHex(normalizeLineEndings(' \r\r'))).toBe('#x20#xa#xa');
Expand Down
1 change: 1 5D32 addition & 0 deletions test/parse/parse-element.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use strict';

const { describe, expect, test } = require('@jest/globals');
const { getTestParser } = require('../get-test-parser');
const { DOMParser } = require('../../lib');
const { MIME_TYPE } = require('../../lib/conventions');
Expand Down
0