2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
\r
4 // Licensed under the Apache License, Version 2.0 (the License);
\r
5 // you may not use this file except in compliance with the License.
\r
6 // You may obtain a copy of the License at
\r
8 // http://www.apache.org/licenses/LICENSE-2.0
\r
10 // Unless required by applicable law or agreed to in writing, software
\r
11 // distributed under the License is distributed on an "AS IS" BASIS,
\r
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
13 // See the License for the specific language governing permissions and
\r
14 // limitations under the License.
\r
28 function findBaseUrl() {
\r
29 var noUrlParams = window.location.pathname.split("?")[0],
\r
30 urlSlashes = noUrlParams.split("/").reverse(),
\r
31 cleanedSegments = [],
\r
34 for (var i = 0, slashLen = urlSlashes.length; i < slashLen; i+=1) {
\r
35 var segment = urlSlashes[i];
\r
37 if (segment.indexOf(".") !== -1) {
\r
38 possibleType = segment.split(".")[1];
\r
40 if(!possibleType.match(/[^a-zA-Z]/)) {
\r
41 segment = segment.split(".")[0];
\r
45 if(segment.indexOf(',00') !== -1) {
\r
46 segment = segment.replace(',00', '');
\r
49 if (segment.match(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i) && ((i === 1) || (i === 0))) {
\r
50 segment = segment.replace(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i, "");
\r
56 if (i < 2 && segment.match(/^\d{1,2}$/)) {
\r
60 if(i === 0 && segment.toLowerCase() === "index") {
\r
64 if(i < 2 && segment.length < 3 && !urlSlashes[0].match(/[a-z]/i)) {
\r
69 cleanedSegments.push(segment);
\r
73 return window.location.protocol + "//" + window.location.host + cleanedSegments.reverse().join("/");
\r
77 function findNextPageLink(elem) {
\r
78 var possiblePages = {},
\r
79 allLinks = elem.getElementsByTagName('a'),
\r
80 articleBaseUrl = findBaseUrl();
\r
82 for(var i = 0, il = allLinks.length; i < il; i+=1) {
\r
83 var link = allLinks[i],
\r
84 linkHref = allLinks[i].href.replace(/#.*$/, '').replace(/\/$/, '');
\r
86 if(linkHref === "" || linkHref === articleBaseUrl || linkHref === window.location.href || linkHref in parsedPages) {
\r
90 if(window.location.host !== linkHref.split(/\/+/g)[1]) {
\r
94 var linkText = getInnerText(link);
\r
96 if(linkText.match(regexps.extraneous)) {
\r
100 var linkHrefLeftover = linkHref.replace(articleBaseUrl, '');
\r
101 if(!linkHrefLeftover.match(/\d/)) {
\r
105 if(!(linkHref in possiblePages)) {
\r
106 possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref};
\r
108 possiblePages[linkHref].linkText += ' | ' + linkText;
\r
111 var linkObj = possiblePages[linkHref];
\r
113 if(linkHref.indexOf(articleBaseUrl) !== 0) {
\r
114 linkObj.score -= 25;
\r
117 var linkData = linkText + ' ' + link.className + ' ' + link.id + link.innerHTML;
\r
118 if(linkData.match(regexps.nextLink)) {
\r
119 linkObj.score += 50;
\r
121 if(linkData.match(/pag(e|ing|inat)/i)) {
\r
122 linkObj.score += 25;
\r
124 if(linkData.match(/(first|last)/i)) {
\r
125 if(!linkObj.linkText.match(regexps.nextLink)) {
\r
126 linkObj.score -= 65;
\r
129 if(linkData.match(regexps.negative) || linkData.match(regexps.extraneous)) {
\r
130 linkObj.score -= 50;
\r
132 if(linkData.match(regexps.prevLink)) {
\r
133 linkObj.score -= 200;
\r
136 var parentNode = link.parentNode,
\r
137 positiveNodeMatch = false,
\r
138 negativeNodeMatch = false;
\r
139 while(parentNode) {
\r
140 var parentNodeClassAndId = parentNode.className + ' ' + parentNode.id;
\r
141 if(!positiveNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(/pag(e|ing|inat)/i)) {
\r
142 positiveNodeMatch = true;
\r
143 linkObj.score += 15;
\r
145 if(!negativeNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(regexps.negative)) {
\r
146 if(!parentNodeClassAndId.match(regexps.positive)) {
\r
147 linkObj.score -= 25;
\r
148 negativeNodeMatch = true;
\r
152 parentNode = parentNode.parentNode;
\r
155 if (linkHref.match(/p(a|g|ag)?(e|ing|ination)?(=|\/)[0-9]{1,2}/i) || linkHref.match(/(page|paging)/i)) {
\r
156 linkObj.score += 25;
\r
159 if (linkHref.match(regexps.extraneous)) {
\r
160 linkObj.score -= 15;
\r
163 var linkTextAsNumber = parseInt(linkText, 10);
\r
164 if(linkTextAsNumber) {
\r
165 if (linkTextAsNumber === 1) {
\r
166 linkObj.score -= 10;
\r
169 linkObj.score += Math.max(0, 10 - linkTextAsNumber);
\r
174 var topPage = null;
\r
175 for(var page in possiblePages) {
\r
176 if(possiblePages.hasOwnProperty(page)) {
\r
177 if(possiblePages[page].score > 100 && (!topPage || topPage.score < possiblePages[page].score)) {
\r
178 topPage = possiblePages[page];
\r
184 var nextHref = topPage.href.replace(/\/$/,'');
\r
186 parsedPages[nextHref] = true;
\r
188 console.log("Reader :: reader.js :: INFO - Second Page link = "+nextHref);
\r
191 console.log("Reader :: reader.js :: INFO - Second Page link = null");
\r
197 if (typeof XMLHttpRequest !== 'undefined' && (window.location.protocol !== 'file:' || !window.ActiveXObject)) {
\r
198 return new XMLHttpRequest();
\r
201 try { return new ActiveXObject('Msxml2.XMLHTTP.6.0'); } catch(sixerr) { }
\r
202 try { return new ActiveXObject('Msxml2.XMLHTTP.3.0'); } catch(threrr) { }
\r
203 try { return new ActiveXObject('Msxml2.XMLHTTP'); } catch(err) { }
\r
209 function successfulRequest(request) {
\r
210 return (request.status >= 200 && request.status < 300) || request.status === 304 || (request.status === 0 && request.responseText);
\r
213 function ajax(url, options) {
\r
214 var request = xhr();
\r
216 function respondToReadyState(readyState) {
\r
217 if (request.readyState === 4) {
\r
218 if (successfulRequest(request)) {
\r
219 if (options.success) { options.success(request); }
\r
222 if (options.error) { options.error(request); }
\r
227 if (typeof options === 'undefined') { options = {}; }
\r
229 request.onreadystatechange = respondToReadyState;
\r
230 request.open('get', url, true);
\r
231 request.setRequestHeader('Accept', 'text/html');
\r
234 request.send(options.postBody);
\r
237 if (options.error) { options.error(); }
\r
245 function appendNextPage(nextPageLink) {
\r
249 var articlePage = document.createElement("DIV");
\r
250 articlePage.id = 'readability-page-' + curPageNum;
\r
251 articlePage.className = 'SISO_page';
\r
252 articlePage.innerHTML = '<p class="SISO_page-separator" title="Page ' + curPageNum + '">Page '+curPageNum+'</p>';
\r
255 if(curPageNum > maxPages) {
\r
256 var nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>";
\r
258 articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup;
\r
261 (function(pageUrl, thisPage) {
\r
263 success: function(r) {
\r
265 var eTag = r.getResponseHeader('ETag');
\r
267 if(eTag in pageETags) {
\r
268 dbg("Exact duplicate page found via ETag. Aborting.");
\r
269 articlePage.style.display = 'none';
\r
272 pageETags[eTag] = 1;
\r
276 var page = document.createElement("DIV");
\r
278 var responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/<script.*?>.*?<\/script>/gi, '');
\r
279 responseHtml = responseHtml.replace(/\n/g,'\uffff').replace(/<script.*?>.*?<\/script>/gi, '');
\r
280 responseHtml = responseHtml.replace(/\uffff/g,'\n').replace(/<(\/?)noscript/gi, '<$1div');
\r
281 responseHtml = responseHtml.replace(regexps.replaceBrs, '</p><p>');
\r
282 responseHtml = responseHtml.replace(regexps.replaceFonts, '<$1span>');
\r
284 page.innerHTML = responseHtml;
\r
286 flags = 0x1 | 0x2 | 0x4;
\r
288 var nextPageLink = findNextPageLink(page),
\r
289 content = grabArticle(page);
\r
295 thisPage.innerHTML += content;
\r
296 document.getElementById("reader_content_div").appendChild(thisPage);
\r
299 appendNextPage(nextPageLink);
\r
303 }(nextPageLink, articlePage));
\r
307 unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
\r
308 okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
\r
309 positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story|date/i,
\r
310 negative: /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|script|tool|widget|scbox|reply|div_dispalyslide|galleryad|disqus_thread|cnn_strylftcntnt|topRightNarrow|fs-stylelist-thumbnails|replText|ttalk_layer|disqus_post_message|disqus_post_title|cnn_strycntntrgt|wpadvert|sharedaddy sd-like-enabled sd-sharing-enabled|fs-slideshow-wrapper|fs-stylelist-launch|reply_box|contentHeader|jive-paginator lw-label/i,
\r
311 extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single/i,
\r
312 divToPElements: /<(a|blockquote|dl|div|img|ol|p|pre|table|ul|article)/i,
\r
313 replaceBrs: /(<br[^>]*>[ \n\r\t]*){2,}/gi,
\r
314 replaceFonts: /<(\/?)font[^>]*>/gi,
\r
315 trim: /^\s+|\s+$/g,
\r
316 normalize: /\s{2,}/g,
\r
317 killBreaks: /(<br\s*\/?>(\s| ?)*){1,}/g,
\r
318 videos: /http:\/\/(www\.)?(youtube|vimeo)\.com/i,
\r
319 skipFootnoteLink: /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
\r
320 nextLink: /(next|right|weiter|continue|>([^\|]|$)|\u00BB([^\|]|$))/i,
\r
321 prevLink: /(prev|earl|old|<|«)/i,
\r
322 retainDiv: /whois_record/i
\r
325 function cleanHeaders(e) {
\r
326 for (var headerIndex = 1; headerIndex < 3; headerIndex+=1) {
\r
327 var headers = e.getElementsByTagName('h' + headerIndex);
\r
328 for (var i=headers.length-1; i >=0; i-=1) {
\r
329 if (getClassWeight(headers[i]) < 0 || getLinkDensity(headers[i]) > 0.33) {
\r
330 headers[i].parentNode.removeChild(headers[i]);
\r
336 function clean(e, tag) {
\r
337 var targetList = e.getElementsByTagName( tag );
\r
338 var isEmbed = (tag === 'object' || tag === 'embed');
\r
340 for (var y=targetList.length-1; y >= 0; y-=1) {
\r
342 var attributeValues = "";
\r
343 for (var i=0, il=targetList[y].attributes.length; i < il; i+=1) {
\r
344 attributeValues += targetList[y].attributes[i].value + '|';
\r
347 if (attributeValues.search(regexps.videos) !== -1) {
\r
351 if (targetList[y].innerHTML.search(regexps.videos) !== -1) {
\r
356 targetList[y].parentNode.removeChild(targetList[y]);
\r
359 function cleanHTML5(e, tag) {
\r
360 var targetList = e.getElementsByTagName( tag );
\r
361 for (var y=targetList.length-1; y >= 0; y-=1) {
\r
362 targetList[y].parentNode.removeChild(targetList[y]);
\r
365 function cleanSelect(e, tag) {
\r
366 var targetList = e.getElementsByTagName( tag );
\r
367 for (var y=targetList.length-1; y >= 0; y-=1) {
\r
368 targetList[y].style.display = 'none';
\r
371 function getCharCount(e,s) {
\r
373 return getInnerText(e).split(s).length-1;
\r
375 function getClassWeight(e) {
\r
379 if (typeof(e.className) === 'string' && e.className !== '')
\r
381 if(e.className.search(regexps.negative) !== -1) {
\r
384 if(e.className.search(regexps.positive) !== -1) {
\r
388 if (typeof(e.id) === 'string' && e.id !== '')
\r
390 if(e.id.search(regexps.negative) !== -1) {
\r
393 if(e.id.search(regexps.positive) !== -1) {
\r
399 function ChineseJapneseKorean(innerCharacter) {
\r
400 if (!innerCharacter || innerCharacter.length == 0) return false;
\r
401 var innerCharacterCode = innerCharacter.charCodeAt(0);
\r
402 if (innerCharacterCode > 11904 && innerCharacterCode < 12031) return true; //CJK Radicals Supplement
\r
403 if (innerCharacterCode > 12352 && innerCharacterCode < 12543) return true; //Hiragana
\r
404 if (innerCharacterCode > 12736 && innerCharacterCode < 19903) return true;
\r
405 if (innerCharacterCode > 19968 && innerCharacterCode < 40959) return true; //CJK Unified Ideographs
\r
406 if (innerCharacterCode > 44032 && innerCharacterCode < 55215) return true; //Hangul Syllables
\r
407 if (innerCharacterCode > 63744 && innerCharacterCode < 64255) return true; //CJK Compatibility Ideographs
\r
408 if (innerCharacterCode > 65072 && innerCharacterCode < 65103) return true; //CJK Compatibility Forms
\r
409 if (innerCharacterCode > 131072 && innerCharacterCode < 173791) return true;
\r
410 if (innerCharacterCode > 194560 && innerCharacterCode < 195103) return true;
\r
413 function cleanConditionally(e, tag) {
\r
416 var tagsList = e.getElementsByTagName(tag);
\r
417 var curTagsLength = tagsList.length;
\r
419 for (var i=curTagsLength-1; i >= 0; i-=1) {
\r
420 var toRemove = false;
\r
421 var weight = getClassWeight(tagsList[i]);
\r
422 var contentScore = (typeof tagsList[i].readability !== 'undefined') ? tagsList[i].readability.contentScore : 0;
\r
424 if(weight+contentScore < 0)
\r
426 if(tag === "div" || tag === "article") {
\r
427 var readerDivClass5 = document.getElementsByClassName("view_cnt");
\r
428 var readerDivClass6 = document.getElementById("description");
\r
430 if(readerDivClass5.length > 0) {
\r
433 if(readerDivClass6!==null && readerDivClass6.innerHTML.length > 0) {
\r
438 tagsList[i].parentNode.removeChild(tagsList[i]);
\r
441 else if ( getCharCount(tagsList[i],',') < 10) {
\r
442 var p = tagsList[i].getElementsByTagName("p").length;
\r
443 var img = tagsList[i].getElementsByTagName("img").length;
\r
444 var li = tagsList[i].getElementsByTagName("li").length-100;
\r
445 var input = tagsList[i].getElementsByTagName("input").length;
\r
447 var embedCount = 0;
\r
448 var embeds = tagsList[i].getElementsByTagName("embed");
\r
449 for(var ei=0,il=embeds.length; ei < il; ei+=1) {
\r
450 if (embeds[ei].src.search(regexps.videos) === -1) {
\r
455 var linkDensity = getLinkDensity(tagsList[i]);
\r
456 var contentLength = getInnerText(tagsList[i]).length;
\r
457 var finalWord = null;
\r
458 var checkCJK = null;
\r
459 var checkCJKText = null;
\r
460 var checkWordOfTag = function(){
\r
461 checkCJKText = getInnerText(tagsList[i]);
\r
462 for(var h = 0;h < contentLength; h +=1){
\r
463 if(ChineseJapneseKorean(checkCJKText[h]) === true) {
\r
464 finalWord = checkCJKText[h];
\r
473 } else if(li > p && tag !== "ul" && tag !== "ol") {
\r
475 } else if( input > Math.floor(p/3) ) {
\r
477 } else if(contentLength < 25 && (img > 2 || img === 0) ) {
\r
479 } else if(weight < 25 && linkDensity > 0.2) {
\r
481 } else if(weight >= 25 && linkDensity > 0.5) {
\r
483 } else if((embedCount === 1 && contentLength < 75) || embedCount > 1) {
\r
486 if((checkWordOfTag() !== 'undefined') && checkCJK && toRemove){
\r
487 if(weight < 25 && linkDensity > 0.3){
\r
494 var images = tagsList[i].getElementsByTagName("img");
\r
495 for(var im=0,il=images.length; im < il; im+=1) {
\r
496 if ((images[im].height >= 150) && (images[im].width >= 200)) {
\r
501 if(tag === "div" || tag === "article") {
\r
502 var readerDivClass = document.getElementsByClassName("whois_record");
\r
503 if(readerDivClass.length > 0) {
\r
506 var readerDivClass1 = document.getElementsByClassName("sixteen columns hreview hentry hnews full");
\r
507 if(readerDivClass1.length > 0) {
\r
510 var readerDivClass2 = document.getElementsByClassName("list");
\r
511 if(readerDivClass2.length > 0) {
\r
514 var readerDivClass3 = document.getElementsByClassName("articleContent");
\r
515 if(readerDivClass3.length > 0) {
\r
518 var readerDivClass4 = document.getElementsByClassName("content");
\r
519 if(readerDivClass4.length > 0) {
\r
522 var readerDivClass5 = document.getElementsByClassName("view_cnt");
\r
523 if(readerDivClass5.length > 0) {
\r
526 var readerDivClass6 = document.getElementById("description");
\r
527 if(readerDivClass6!==null && readerDivClass6.innerHTML.length > 0) {
\r
533 tagsList[i].parentNode.removeChild(tagsList[i]);
\r
538 function killBreaks(e) {
\r
540 e.innerHTML = e.innerHTML.replace(regexps.killBreaks,'<br />');
\r
545 function cleanStyles(e) {
\r
547 var cur = e.firstChild;
\r
552 if(typeof e.removeAttribute === 'function' && e.className !== 'readability-styled') {
\r
553 e.removeAttribute('style'); }
\r
555 while ( cur !== null ) {
\r
556 if ( cur.nodeType === 1 ) {
\r
557 if(cur.className !== "readability-styled") {
\r
558 cur.removeAttribute("style");
\r
560 cleanStyles( cur );
\r
562 cur = cur.nextSibling;
\r
566 function cleanLinkHrefs(e) {
\r
567 var links = e.getElementsByTagName("a");
\r
568 for(var i=0, il=links.length; i<il;i+=1)
\r
570 links[i].removeAttribute("href");
\r
574 function prepArticle(articleContent) {
\r
575 cleanStyles(articleContent);
\r
576 cleanConditionally(articleContent, "form");
\r
577 clean(articleContent, "object");
\r
578 cleanHTML5(articleContent, "video");
\r
579 cleanSelect(articleContent, "select");
\r
581 if(articleContent.getElementsByTagName('h2').length === 1) {
\r
582 clean(articleContent, "h2");
\r
584 clean(articleContent, "iframe");
\r
585 clean(articleContent, "script");
\r
586 clean(articleContent, "style");
\r
587 clean(articleContent, "textarea");
\r
588 clean(articleContent, "input");
\r
589 clean(articleContent, "\n");
\r
590 clean(articleContent, "noscript");
\r
591 cleanLinkHrefs(articleContent);
\r
592 cleanHeaders(articleContent);
\r
593 cleanConditionally(articleContent, "table");
\r
594 cleanConditionally(articleContent, "ul");
\r
595 cleanConditionally(articleContent, "div");
\r
597 var articleParagraphs = articleContent.getElementsByTagName('p');
\r
598 for(var i = articleParagraphs.length-1; i >= 0; i-=1) {
\r
599 var imgCount = articleParagraphs[i].getElementsByTagName('img').length;
\r
600 var embedCount = articleParagraphs[i].getElementsByTagName('embed').length;
\r
601 var objectCount = articleParagraphs[i].getElementsByTagName('object').length;
\r
603 if(imgCount === 0 && embedCount === 0 && objectCount === 0 && getInnerText(articleParagraphs[i], false) === '') {
\r
604 articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]);
\r
609 articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p');
\r
615 function getLinkDensity(e) {
\r
616 var links = e.getElementsByTagName("a");
\r
617 var textLength = getInnerText(e).length;
\r
618 var linkLength = 0;
\r
619 for(var i=0, il=links.length; i<il;i+=1)
\r
621 linkLength += getInnerText(links[i]).length;
\r
623 return linkLength / textLength;
\r
625 function initializeNode(node) {
\r
626 node.readability = {"contentScore": 0};
\r
628 switch(node.tagName) {
\r
630 node.readability.contentScore += 5;
\r
633 node.readability.contentScore +=25;
\r
638 node.readability.contentScore += 3;
\r
648 node.readability.contentScore -= 3;
\r
658 node.readability.contentScore -= 5;
\r
662 node.readability.contentScore += getClassWeight(node);
\r
665 function getInnerText(e, normalizeSpaces) {
\r
666 var textContent = "";
\r
668 if(typeof(e.textContent) === "undefined" && typeof(e.innerText) === "undefined") {
\r
672 normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;
\r
673 e = e.cloneNode(true);
\r
674 if (e.nodeType != 3) {
\r
675 clean(e, "script");
\r
678 if (navigator.appName === "Microsoft Internet Explorer") {
\r
679 textContent = e.innerText.replace( regexps.trim, "" ); }
\r
681 textContent = e.textContent.replace( regexps.trim, "" ); }
\r
682 if(normalizeSpaces) {
\r
683 return textContent.replace( regexps.normalize, " "); }
\r
685 return textContent; }
\r
688 function grabArticle(argumentPage) {
\r
690 page = argumentPage ? argumentPage : document.body;
\r
692 page = page.cloneNode(true);
\r
696 var allElements = page.getElementsByTagName('*');
\r
699 var nodesToScore = [];
\r
700 for(var nodeIndex = 0; (node = allElements[nodeIndex]); nodeIndex+=1) {
\r
701 var unlikelyMatchString = node.className + node.id;
\r
702 if (unlikelyMatchString !== "undefined") {
\r
703 if (unlikelyMatchString.search(regexps.unlikelyCandidates) !== -1 && node.tagName !== "BODY") {
\r
708 if (node.tagName === "P" || node.tagName === "TD" || node.tagName === "PRE") {
\r
709 nodesToScore[nodesToScore.length] = node;
\r
712 if (node.tagName === "DIV") {
\r
713 if (node.innerHTML.search(regexps.divToPElements) === -1) {
\r
715 nodesToScore[nodesToScore.length] = node;
\r
722 for(var i = 0, il = node.childNodes.length; i < il; i+=1) {
\r
723 var childNode = node.childNodes[i];
\r
724 if(childNode.nodeType === 3) {
\r
725 nodesToScore[nodesToScore.length] = childNode;
\r
732 var candidates = [];
\r
733 for (var pt=0; pt < nodesToScore.length; pt+=1) {
\r
734 var parentNode = nodesToScore[pt].parentNode;
\r
735 var grandParentNode = parentNode ? parentNode.parentNode : null;
\r
736 var innerText = getInnerText(nodesToScore[pt]);
\r
738 if(!parentNode || typeof(parentNode.tagName) === 'undefined') {
\r
742 if(innerText.length < 25) {
\r
745 if(typeof parentNode.readability === 'undefined') {
\r
746 initializeNode(parentNode);
\r
747 candidates.push(parentNode);
\r
750 if(grandParentNode && typeof(grandParentNode.readability) === 'undefined' && typeof(grandParentNode.tagName) !== 'undefined') {
\r
751 initializeNode(grandParentNode);
\r
752 candidates.push(grandParentNode);
\r
755 var contentScore = 0;
\r
759 contentScore += innerText.split(',').length;
\r
761 contentScore += Math.min(Math.floor(innerText.length / 100), 3);
\r
763 parentNode.readability.contentScore += contentScore;
\r
765 if(grandParentNode) {
\r
766 grandParentNode.readability.contentScore += contentScore/2;
\r
770 var topCandidate = null;
\r
771 for(var c=0, cl=candidates.length; c < cl; c+=1)
\r
773 candidates[c].readability.contentScore = candidates[c].readability.contentScore * (1-getLinkDensity(candidates[c]));
\r
775 if(!topCandidate || candidates[c].readability.contentScore > topCandidate.readability.contentScore) {
\r
776 topCandidate = candidates[c]; }
\r
779 if (topCandidate === null || topCandidate.tagName === "BODY")
\r
781 topCandidate = document.createElement("DIV");
\r
782 topCandidate.innerHTML = page.innerHTML;
\r
783 page.innerHTML = "";
\r
784 page.appendChild(topCandidate);
\r
785 initializeNode(topCandidate);
\r
788 var articleContent = document.createElement("DIV");
\r
789 articleContent.id = "readability-content";
\r
790 var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2);
\r
791 var siblingNodes = topCandidate.parentNode.childNodes;
\r
794 for(var s=0, sl=siblingNodes.length; s < sl; s+=1) {
\r
795 var siblingNode = siblingNodes[s];
\r
796 var append = false;
\r
803 if(siblingNode === topCandidate)
\r
808 var contentBonus = 0;
\r
809 if(siblingNode.className === topCandidate.className && topCandidate.className !== "") {
\r
810 contentBonus += topCandidate.readability.contentScore * 0.2;
\r
813 if(typeof siblingNode.readability !== 'undefined' && (siblingNode.readability.contentScore+contentBonus) >= siblingScoreThreshold)
\r
818 if(siblingNode.nodeName === "P") {
\r
819 var linkDensity = getLinkDensity(siblingNode);
\r
820 var nodeContent = getInnerText(siblingNode);
\r
821 var nodeLength = nodeContent.length;
\r
823 if(nodeLength > 80 && linkDensity < 0.25)
\r
827 else if(nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1)
\r
834 var nodeToAppend = null;
\r
835 if(siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") {
\r
836 nodeToAppend = document.createElement("DIV");
\r
838 nodeToAppend.id = siblingNode.id;
\r
839 nodeToAppend.innerHTML = siblingNode.innerHTML;
\r
842 nodeToAppend = siblingNode;
\r
847 nodeToAppend = siblingNode;
\r
850 if(nodeToAppend.className.search(regexps.retainDiv) !== -1) {
\r
852 articleContent.appendChild(nodeToAppend.cloneNode(true));
\r
857 prepArticle(articleContent);
\r
858 for (var pt=0; pt < nodesToScore.length; pt+=1) {
\r
859 var parentNode = nodesToScore[pt].parentNode;
\r
860 var grandParentNode = parentNode ? parentNode.parentNode : null;
\r
861 delete parentNode.readability;
\r
862 if (grandParentNode) {
\r
863 delete grandParentNode.readability;
\r
867 if( articleContent.innerHTML == '' )
\r
872 return articleContent.innerHTML;
\r
875 function getArticleTitle() {
\r
880 curTitle = origTitle = document.title;
\r
881 if(typeof curTitle !== "string") {
\r
882 curTitle = origTitle = getInnerText(document.getElementsByTagName('title')[0]);
\r
887 if(curTitle.match(/ [\|\-] /))
\r
889 curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1');
\r
891 if(curTitle.split(' ').length < 3) {
\r
892 curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1');
\r
895 else if(curTitle.indexOf(': ') !== -1)
\r
897 curTitle = origTitle.replace(/.*:(.*)/gi, '$1');
\r
899 if(curTitle.split(' ').length < 3) {
\r
900 curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1');
\r
903 else if(curTitle.length > 150 || curTitle.length < 15)
\r
905 var hOnes = document.getElementsByTagName('h1');
\r
906 if(hOnes.length === 1)
\r
908 curTitle = getInnerText(hOnes[0]);
\r
912 curTitle = curTitle.replace( regexps.trim, "" );
\r
914 if(curTitle.split(' ').length <= 4) {
\r
915 curTitle = origTitle;
\r
920 function initReader() {
\r
921 var article_block = grabArticle();
\r
922 var article_title = getArticleTitle()
\r
923 if (article_block && article_block.length > 1000) {
\r
924 var title = '<h1>'+article_title+'</h1>'
\r
925 var meta = '<meta name=\"viewport\" content=\"width=0, initial-scale=1.0, maximum-scale=2.0, minimum-scale=1.0, user-scalable=no, target-densitydpi=medium-dpi\">\n';
\r
926 return meta + title + article_block;
\r
928 return "undefined";
\r
935 console.log("Reader Error - Reader.js");
\r