8000 Refs #178 - plain text version for non-html contents. by n1k0 · Pull Request #926 · casperjs/casperjs · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
This repository was archived by the owner on Jun 19, 2020. It is now read-only.

Refs #178 - plain text version for non-html contents. #926

Merged
merged 1 commit into from
May 19, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 2 additions & 28 deletions modules/casper.js
Original file line number Diff line number Diff line change
Expand Up @@ -943,36 +943,10 @@ Casper.prototype.getPageContent = function getPageContent() {
"use strict";
this.checkStarted();
var contentType = utils.getPropertyPath(this, 'currentResponse.contentType');
if (!utils.isString(contentType)) {
if (!utils.isString(contentType) || contentType.indexOf("text/html") !== -1) {
return this.page.frameContent;
}
// for some reason (qt)webkit/Gecko will always enclose non text/html body contents within an html
// structure like this:
// webkit: <html><head></head><body><pre style="(...)">content</pre></body></html>
// gecko: <html><head><link rel="alternate stylesheet" type="text/css" href="resource://gre-resources/plaintext.css" title="..."></head><body><pre>document.write('foo');\n</pre></body></html>
var sanitizedHtml = this.evaluate(function checkHtml() {
var head = __utils__.findOne('head'),
body = __utils__.findOne('body');
if (!head || !body) {
return null;
}
// for content in Webkit
if (head.childNodes.length === 0 &&
body.childNodes.length === 1 &&
__utils__.findOne('body pre[style]')) {
return __utils__.findOne('body pre').textContent.trim();
}
// for content in Gecko
if (head.childNodes.length === 1 &&
body.childNodes.length === 1 &&
head.childNodes[0].localName === 'link' &&
head.childNodes[0].getAttribute('href') === 'resource://gre-resources/plaintext.css' &&
body.childNodes[0].localName === 'pre' ) {
return body.childNodes[0].textContent.trim();
}
return null;
});
return sanitizedHtml ? sanitizedHtml : this.page.frameContent;
return this.page.framePlainText;
};

/**
Expand Down
2 changes: 1 addition & 1 deletion tests/suites/casper/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ casper.test.begin("Casper.getPageContent() text/html content", 1, function(test)

casper.test.begin("Casper.getPageContent() non text/html content", 1, function(test) {
casper.start("tests/site/dummy.js", function() {
test.assertEquals(this.getPageContent(), "document.write('foo');",
test.assertEquals(this.getPageContent(), "document.write('foo');\n",
"Casper.getPageContent() retrieves non text/html content");
}).run(function() {
test.done();
Expand Down
0