Initialize Tizen 2.3
[apps/osp/Internet.git] / data / reader.js
1 // \r
2 // Copyright (c) 2012 Samsung Electronics Co., Ltd. \r
3 // \r
4 // Licensed under the Apache License, Version 2.0 (the License);\r
5 // you may not use this file except in compliance with the License.\r
6 // You may obtain a copy of the License at\r
7 //\r
8 //     http://www.apache.org/licenses/LICENSE-2.0\r
9 //\r
10 // Unless required by applicable law or agreed to in writing, software\r
11 // distributed under the License is distributed on an "AS IS" BASIS,\r
12 // WITHOUT  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
13 // See the License for the specific language governing permissions and\r
14 // limitations under the License.\r
15 //\r
16 \r
17 \r
18 try {\r
19 var test;\r
20 var sample= null;\r
21 totalPageNum = 1;\r
22 var readability = {\r
23 }\r
24 var page = null;\r
25     parsedPages= {};\r
26     pageETags = {};\r
27 \r
28 function findBaseUrl() {\r
29         var noUrlParams     = window.location.pathname.split("?")[0],\r
30             urlSlashes      = noUrlParams.split("/").reverse(),\r
31             cleanedSegments = [],\r
32             possibleType    = "";\r
33 \r
34         for (var i = 0, slashLen = urlSlashes.length; i < slashLen; i+=1) {\r
35             var segment = urlSlashes[i];\r
36 \r
37             if (segment.indexOf(".") !== -1) {\r
38                 possibleType = segment.split(".")[1];\r
39 \r
40                 if(!possibleType.match(/[^a-zA-Z]/)) {\r
41                     segment = segment.split(".")[0];\r
42                 }\r
43             }\r
44 \r
45             if(segment.indexOf(',00') !== -1) {\r
46                 segment = segment.replace(',00', '');\r
47             }\r
48 \r
49             if (segment.match(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i) && ((i === 1) || (i === 0))) {\r
50                 segment = segment.replace(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i, "");\r
51             }\r
52 \r
53 \r
54             var del = false;\r
55 \r
56             if (i < 2 && segment.match(/^\d{1,2}$/)) {\r
57                 del = true;\r
58             }\r
59 \r
60             if(i === 0 && segment.toLowerCase() === "index") {\r
61                 del = true;\r
62             }\r
63 \r
64             if(i < 2 && segment.length < 3 && !urlSlashes[0].match(/[a-z]/i)) {\r
65                 del = true;\r
66             }\r
67 \r
68             if (!del) {\r
69                 cleanedSegments.push(segment);\r
70             }\r
71         }\r
72 \r
73         return window.location.protocol + "//" + window.location.host + cleanedSegments.reverse().join("/");\r
74     }\r
75 \r
76 \r
77 function findNextPageLink(elem) {\r
78         var possiblePages = {},\r
79             allLinks = elem.getElementsByTagName('a'),\r
80             articleBaseUrl = findBaseUrl();\r
81 \r
82         for(var i = 0, il = allLinks.length; i < il; i+=1) {\r
83             var link     = allLinks[i],\r
84                 linkHref = allLinks[i].href.replace(/#.*$/, '').replace(/\/$/, '');\r
85 \r
86             if(linkHref === "" || linkHref === articleBaseUrl || linkHref === window.location.href || linkHref in parsedPages) {\r
87                 continue;\r
88             }\r
89 \r
90             if(window.location.host !== linkHref.split(/\/+/g)[1]) {\r
91                 continue;\r
92             }\r
93 \r
94             var linkText = getInnerText(link);\r
95 \r
96             if(linkText.match(regexps.extraneous)) {\r
97                 continue;\r
98             }\r
99 \r
100             var linkHrefLeftover = linkHref.replace(articleBaseUrl, '');\r
101             if(!linkHrefLeftover.match(/\d/)) {\r
102                 continue;\r
103             }\r
104 \r
105             if(!(linkHref in possiblePages)) {\r
106                 possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref};\r
107             } else {\r
108                 possiblePages[linkHref].linkText += ' | ' + linkText;\r
109             }\r
110 \r
111             var linkObj = possiblePages[linkHref];\r
112 \r
113             if(linkHref.indexOf(articleBaseUrl) !== 0) {\r
114                 linkObj.score -= 25;\r
115             }\r
116 \r
117             var linkData = linkText + ' ' + link.className + ' ' + link.id + link.innerHTML;\r
118             if(linkData.match(regexps.nextLink)) {\r
119                 linkObj.score += 50;\r
120             }\r
121             if(linkData.match(/pag(e|ing|inat)/i)) {\r
122                 linkObj.score += 25;\r
123             }\r
124             if(linkData.match(/(first|last)/i)) {\r
125                 if(!linkObj.linkText.match(regexps.nextLink)) {\r
126                     linkObj.score -= 65;\r
127                 }\r
128             }\r
129             if(linkData.match(regexps.negative) || linkData.match(regexps.extraneous)) {\r
130                 linkObj.score -= 50;\r
131             }\r
132             if(linkData.match(regexps.prevLink)) {\r
133                 linkObj.score -= 200;\r
134             }\r
135 \r
136             var parentNode = link.parentNode,\r
137                 positiveNodeMatch = false,\r
138                 negativeNodeMatch = false;\r
139             while(parentNode) {\r
140                 var parentNodeClassAndId = parentNode.className + ' ' + parentNode.id;\r
141                 if(!positiveNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(/pag(e|ing|inat)/i)) {\r
142                     positiveNodeMatch = true;\r
143                     linkObj.score += 15;\r
144                 }\r
145                 if(!negativeNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(regexps.negative)) {\r
146                     if(!parentNodeClassAndId.match(regexps.positive)) {\r
147                         linkObj.score -= 25;\r
148                         negativeNodeMatch = true;\r
149                     }\r
150                 }\r
151 \r
152                 parentNode = parentNode.parentNode;\r
153             }\r
154 \r
155             if (linkHref.match(/p(a|g|ag)?(e|ing|ination)?(=|\/)[0-9]{1,2}/i) || linkHref.match(/(page|paging)/i)) {\r
156                 linkObj.score += 25;\r
157             }\r
158 \r
159             if (linkHref.match(regexps.extraneous)) {\r
160                 linkObj.score -= 15;\r
161             }\r
162 \r
163             var linkTextAsNumber = parseInt(linkText, 10);\r
164             if(linkTextAsNumber) {\r
165                 if (linkTextAsNumber === 1) {\r
166                     linkObj.score -= 10;\r
167                 }\r
168                 else {\r
169                     linkObj.score += Math.max(0, 10 - linkTextAsNumber);\r
170                 }\r
171             }\r
172         }\r
173 \r
174         var topPage = null;\r
175         for(var page in possiblePages) {\r
176             if(possiblePages.hasOwnProperty(page)) {\r
177                 if(possiblePages[page].score > 100 && (!topPage || topPage.score < possiblePages[page].score)) {\r
178                     topPage = possiblePages[page];\r
179                 }\r
180             }\r
181         }\r
182 \r
183         if(topPage) {\r
184             var nextHref = topPage.href.replace(/\/$/,'');\r
185 \r
186             parsedPages[nextHref] = true;\r
187             return nextHref;\r
188             console.log("Reader :: reader.js :: INFO - Second Page link = "+nextHref);\r
189         }\r
190         else {\r
191             console.log("Reader :: reader.js :: INFO - Second Page link = null");\r
192             return null;\r
193         }\r
194     }\r
195 \r
196    function xhr() {\r
197         if (typeof XMLHttpRequest !== 'undefined' && (window.location.protocol !== 'file:' || !window.ActiveXObject)) {\r
198             return new XMLHttpRequest();\r
199         }\r
200         else {\r
201             try { return new ActiveXObject('Msxml2.XMLHTTP.6.0'); } catch(sixerr) { }\r
202             try { return new ActiveXObject('Msxml2.XMLHTTP.3.0'); } catch(threrr) { }\r
203             try { return new ActiveXObject('Msxml2.XMLHTTP'); } catch(err) { }\r
204         }\r
205 \r
206         return false;\r
207     }\r
208 \r
209     function successfulRequest(request) {\r
210         return (request.status >= 200 && request.status < 300) || request.status === 304 || (request.status === 0 && request.responseText);\r
211     }\r
212 \r
213     function ajax(url, options) {\r
214         var request = xhr();\r
215 \r
216         function respondToReadyState(readyState) {\r
217             if (request.readyState === 4) {\r
218                 if (successfulRequest(request)) {\r
219                     if (options.success) { options.success(request); }\r
220                 }\r
221                 else {\r
222                     if (options.error) { options.error(request); }\r
223                 }\r
224             }\r
225         }\r
226 \r
227         if (typeof options === 'undefined') { options = {}; }\r
228 \r
229         request.onreadystatechange = respondToReadyState;\r
230         request.open('get', url, true);\r
231         request.setRequestHeader('Accept', 'text/html');\r
232 \r
233         try {\r
234             request.send(options.postBody);\r
235         }\r
236         catch (e) {\r
237             if (options.error) { options.error(); }\r
238         }\r
239 \r
240         return request;\r
241     }\r
242 \r
243     curPageNum = 1;\r
244     maxPages =  30;\r
245  function appendNextPage(nextPageLink) {\r
246 \r
247         curPageNum+=1;\r
248 \r
249         var articlePage       = document.createElement("DIV");\r
250         articlePage.id        = 'readability-page-' + curPageNum;\r
251         articlePage.className = 'SISO_page';\r
252         articlePage.innerHTML = '<p class="SISO_page-separator" title="Page ' + curPageNum + '">Page '+curPageNum+'</p>';\r
253 \r
254 \r
255         if(curPageNum > maxPages) {\r
256             var nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>";\r
257 \r
258             articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup;\r
259             return;\r
260         }\r
261         (function(pageUrl, thisPage) {\r
262             ajax(pageUrl, {\r
263                 success: function(r) {\r
264 \r
265                     var eTag = r.getResponseHeader('ETag');\r
266                     if(eTag) {\r
267                         if(eTag in pageETags) {\r
268                             dbg("Exact duplicate page found via ETag. Aborting.");\r
269                             articlePage.style.display = 'none';\r
270                             return;\r
271                         } else {\r
272                             pageETags[eTag] = 1;\r
273                         }\r
274                     }\r
275 \r
276                     var page = document.createElement("DIV");\r
277 \r
278                     var responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/<script.*?>.*?<\/script>/gi, '');\r
279                     responseHtml = responseHtml.replace(/\n/g,'\uffff').replace(/<script.*?>.*?<\/script>/gi, '');\r
280                     responseHtml = responseHtml.replace(/\uffff/g,'\n').replace(/<(\/?)noscript/gi, '<$1div');\r
281                     responseHtml = responseHtml.replace(regexps.replaceBrs, '</p><p>');\r
282                     responseHtml = responseHtml.replace(regexps.replaceFonts, '<$1span>');\r
283 \r
284                     page.innerHTML = responseHtml;\r
285 \r
286                     flags = 0x1 | 0x2 | 0x4;\r
287 \r
288                     var nextPageLink = findNextPageLink(page),\r
289                         content =  grabArticle(page);\r
290 \r
291                     if(!content) {\r
292                         return;\r
293                     }\r
294 \r
295                     thisPage.innerHTML += content;\r
296                     document.getElementById("reader_content_div").appendChild(thisPage);\r
297 \r
298                     if(nextPageLink) {\r
299                         appendNextPage(nextPageLink);\r
300                     }\r
301                 }\r
302             });\r
303         }(nextPageLink, articlePage));\r
304     }\r
305 \r
306     regexps = {\r
307         unlikelyCandidates:    /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,\r
308         okMaybeItsACandidate:  /and|article|body|column|main|shadow/i,\r
309         positive:              /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story|date/i,\r
310         negative:              /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|script|tool|widget|scbox|reply|div_dispalyslide|galleryad|disqus_thread|cnn_strylftcntnt|topRightNarrow|fs-stylelist-thumbnails|replText|ttalk_layer|disqus_post_message|disqus_post_title|cnn_strycntntrgt|wpadvert|sharedaddy sd-like-enabled sd-sharing-enabled|fs-slideshow-wrapper|fs-stylelist-launch|reply_box|contentHeader|jive-paginator lw-label/i,\r
311         extraneous:            /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single/i,\r
312         divToPElements:        /<(a|blockquote|dl|div|img|ol|p|pre|table|ul|article)/i,\r
313         replaceBrs:            /(<br[^>]*>[ \n\r\t]*){2,}/gi,\r
314         replaceFonts:          /<(\/?)font[^>]*>/gi,\r
315         trim:                  /^\s+|\s+$/g,\r
316         normalize:             /\s{2,}/g,\r
317         killBreaks:            /(<br\s*\/?>(\s|&nbsp;?)*){1,}/g,\r
318         videos:                /http:\/\/(www\.)?(youtube|vimeo)\.com/i,\r
319         skipFootnoteLink:      /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,\r
320         nextLink:              /(next|right|weiter|continue|>([^\|]|$)|\u00BB([^\|]|$))/i,\r
321         prevLink:              /(prev|earl|old|<|«)/i,\r
322         retainDiv:              /whois_record/i\r
323     }\r
324 \r
325     function cleanHeaders(e) {\r
326         for (var headerIndex = 1; headerIndex < 3; headerIndex+=1) {\r
327             var headers = e.getElementsByTagName('h' + headerIndex);\r
328             for (var i=headers.length-1; i >=0; i-=1) {\r
329                 if (getClassWeight(headers[i]) < 0 || getLinkDensity(headers[i]) > 0.33) {\r
330                     headers[i].parentNode.removeChild(headers[i]);\r
331                 }\r
332             }\r
333         }\r
334     }\r
335 \r
336     function clean(e, tag) {\r
337         var targetList = e.getElementsByTagName( tag );\r
338         var isEmbed    = (tag === 'object' || tag === 'embed');\r
339 \r
340         for (var y=targetList.length-1; y >= 0; y-=1) {\r
341             if(isEmbed) {\r
342                 var attributeValues = "";\r
343                 for (var i=0, il=targetList[y].attributes.length; i < il; i+=1) {\r
344                     attributeValues += targetList[y].attributes[i].value + '|';\r
345                 }\r
346 \r
347                 if (attributeValues.search(regexps.videos) !== -1) {\r
348                     continue;\r
349                 }\r
350 \r
351                 if (targetList[y].innerHTML.search(regexps.videos) !== -1) {\r
352                     continue;\r
353                 }\r
354             }\r
355 \r
356             targetList[y].parentNode.removeChild(targetList[y]);\r
357         }\r
358     }\r
359     function cleanHTML5(e, tag) {\r
360         var targetList = e.getElementsByTagName( tag );\r
361         for (var y=targetList.length-1; y >= 0; y-=1) {\r
362             targetList[y].parentNode.removeChild(targetList[y]);\r
363         }\r
364     }\r
365     function cleanSelect(e, tag) {\r
366         var targetList = e.getElementsByTagName( tag );\r
367         for (var y=targetList.length-1; y >= 0; y-=1) {\r
368             targetList[y].style.display = 'none';\r
369         }\r
370     }\r
371     function getCharCount(e,s) {\r
372         s = s || ",";\r
373         return getInnerText(e).split(s).length-1;\r
374     }\r
375     function getClassWeight(e) {\r
376 \r
377         var weight = 0;\r
378 \r
379         if (typeof(e.className) === 'string' && e.className !== '')\r
380         {\r
381             if(e.className.search(regexps.negative) !== -1) {\r
382                 weight -= 25; }\r
383 \r
384             if(e.className.search(regexps.positive) !== -1) {\r
385                 weight += 25; }\r
386         }\r
387 \r
388         if (typeof(e.id) === 'string' && e.id !== '')\r
389         {\r
390             if(e.id.search(regexps.negative) !== -1) {\r
391                 weight -= 25; }\r
392 \r
393             if(e.id.search(regexps.positive) !== -1) {\r
394                 weight += 25; }\r
395         }\r
396 \r
397         return weight;\r
398     }\r
399 function ChineseJapneseKorean(innerCharacter) {\r
400     if (!innerCharacter || innerCharacter.length == 0) return false;\r
401     var innerCharacterCode = innerCharacter.charCodeAt(0);\r
402     if (innerCharacterCode > 11904 && innerCharacterCode < 12031) return true; //CJK Radicals Supplement\r
403     if (innerCharacterCode > 12352 && innerCharacterCode < 12543) return true; //Hiragana\r
404     if (innerCharacterCode > 12736 && innerCharacterCode < 19903) return true;\r
405     if (innerCharacterCode > 19968 && innerCharacterCode < 40959) return true; //CJK Unified Ideographs\r
406     if (innerCharacterCode > 44032 && innerCharacterCode < 55215) return true; //Hangul Syllables\r
407     if (innerCharacterCode > 63744 && innerCharacterCode < 64255) return true; //CJK Compatibility Ideographs\r
408     if (innerCharacterCode > 65072 && innerCharacterCode < 65103) return true; //CJK Compatibility Forms\r
409     if (innerCharacterCode > 131072 && innerCharacterCode < 173791) return true;\r
410     if (innerCharacterCode > 194560 && innerCharacterCode < 195103) return true;\r
411     return false;\r
412 }\r
413     function cleanConditionally(e, tag) {\r
414 \r
415 \r
416         var tagsList      = e.getElementsByTagName(tag);\r
417         var curTagsLength = tagsList.length;\r
418 \r
419         for (var i=curTagsLength-1; i >= 0; i-=1) {\r
420         var toRemove      = false;\r
421             var weight = getClassWeight(tagsList[i]);\r
422             var contentScore = (typeof tagsList[i].readability !== 'undefined') ? tagsList[i].readability.contentScore : 0;\r
423 \r
424             if(weight+contentScore < 0)\r
425             {\r
426                 if(tag === "div" || tag === "article") {\r
427                 var readerDivClass5 = document.getElementsByClassName("view_cnt");\r
428                 var readerDivClass6 = document.getElementById("description");\r
429 \r
430                 if(readerDivClass5.length > 0) {\r
431                     toRemove = false;\r
432                 }\r
433                 if(readerDivClass6!==null && readerDivClass6.innerHTML.length > 0) {\r
434                     toRemove = false;\r
435                 }\r
436 \r
437                 } else {\r
438                     tagsList[i].parentNode.removeChild(tagsList[i]);\r
439                 }\r
440             }\r
441             else if ( getCharCount(tagsList[i],',') < 10) {\r
442                 var p      = tagsList[i].getElementsByTagName("p").length;\r
443                 var img    = tagsList[i].getElementsByTagName("img").length;\r
444                 var li     = tagsList[i].getElementsByTagName("li").length-100;\r
445                 var input  = tagsList[i].getElementsByTagName("input").length;\r
446 \r
447                 var embedCount = 0;\r
448                 var embeds     = tagsList[i].getElementsByTagName("embed");\r
449                 for(var ei=0,il=embeds.length; ei < il; ei+=1) {\r
450                     if (embeds[ei].src.search(regexps.videos) === -1) {\r
451                       embedCount+=1;\r
452                     }\r
453                 }\r
454 \r
455                 var linkDensity   = getLinkDensity(tagsList[i]);\r
456                 var contentLength = getInnerText(tagsList[i]).length;\r
457                 var finalWord      = null;\r
458                 var checkCJK       = null;\r
459                 var checkCJKText   = null;\r
460                 var checkWordOfTag = function(){\r
461                 checkCJKText = getInnerText(tagsList[i]);\r
462                 for(var h = 0;h < contentLength; h +=1){\r
463                     if(ChineseJapneseKorean(checkCJKText[h]) === true) {\r
464                         finalWord = checkCJKText[h];\r
465                         checkCJK  = true;\r
466                         break;\r
467                     }\r
468                 }\r
469                 return finalWord;\r
470                 }\r
471                 if ( img > p ) {\r
472                    toRemove = true;\r
473                 } else if(li > p && tag !== "ul" && tag !== "ol") {\r
474                     toRemove = true;\r
475                 } else if( input > Math.floor(p/3) ) {\r
476                     toRemove = true;\r
477                 } else if(contentLength < 25 && (img > 2 || img === 0) ) {\r
478                     toRemove = true;\r
479                 } else if(weight < 25 && linkDensity > 0.2) {\r
480                     toRemove = true;\r
481                 } else if(weight >= 25 && linkDensity > 0.5) {\r
482                     toRemove = true;\r
483                 } else if((embedCount === 1 && contentLength < 75) || embedCount > 1) {\r
484                     toRemove = true;\r
485                 }\r
486                 if((checkWordOfTag() !== 'undefined') && checkCJK && toRemove){\r
487                     if(weight < 25 && linkDensity > 0.3){\r
488                         toRemove = true;\r
489                     }else {\r
490                         toRemove = false;\r
491                     }\r
492                 }\r
493 \r
494                 var images     = tagsList[i].getElementsByTagName("img");\r
495                 for(var im=0,il=images.length; im < il; im+=1) {\r
496                     if ((images[im].height >= 150) && (images[im].width >= 200)) {\r
497                         toRemove = false;\r
498                         break;\r
499                     }\r
500                 }\r
501                 if(tag === "div" || tag === "article") {\r
502                     var readerDivClass = document.getElementsByClassName("whois_record");\r
503                     if(readerDivClass.length > 0) {\r
504                         toRemove = false;\r
505                     }\r
506                     var readerDivClass1 = document.getElementsByClassName("sixteen columns hreview hentry hnews full");\r
507                     if(readerDivClass1.length > 0) {\r
508                         toRemove = false;\r
509                     }\r
510                     var readerDivClass2 = document.getElementsByClassName("list");\r
511                     if(readerDivClass2.length > 0) {\r
512                         toRemove = false;\r
513                     }\r
514                     var readerDivClass3 = document.getElementsByClassName("articleContent");\r
515                     if(readerDivClass3.length > 0) {\r
516                         toRemove = false;\r
517                     }\r
518                     var readerDivClass4 = document.getElementsByClassName("content");\r
519                     if(readerDivClass4.length > 0) {\r
520                         toRemove = false;\r
521                     }\r
522                     var readerDivClass5 = document.getElementsByClassName("view_cnt");\r
523                     if(readerDivClass5.length > 0) {\r
524                         toRemove = false;\r
525                     }\r
526                     var readerDivClass6 = document.getElementById("description");\r
527                     if(readerDivClass6!==null && readerDivClass6.innerHTML.length > 0) {\r
528                         toRemove = false;\r
529                     }\r
530                 }\r
531 \r
532                 if(toRemove) {\r
533                     tagsList[i].parentNode.removeChild(tagsList[i]);\r
534                 }\r
535             }\r
536         }\r
537     }\r
538     function killBreaks(e) {\r
539         try {\r
540             e.innerHTML = e.innerHTML.replace(regexps.killBreaks,'<br />');\r
541         }\r
542         catch (eBreaks) {\r
543         }\r
544     }\r
545     function cleanStyles(e) {\r
546         e = e || document;\r
547         var cur = e.firstChild;\r
548 \r
549         if(!e) {\r
550             return; }\r
551 \r
552         if(typeof e.removeAttribute === 'function' && e.className !== 'readability-styled') {\r
553             e.removeAttribute('style'); }\r
554 \r
555         while ( cur !== null ) {\r
556             if ( cur.nodeType === 1 ) {\r
557                 if(cur.className !== "readability-styled") {\r
558                     cur.removeAttribute("style");\r
559                 }\r
560                 cleanStyles( cur );\r
561             }\r
562             cur = cur.nextSibling;\r
563         }\r
564     }\r
565 \r
566 function cleanLinkHrefs(e) {\r
567     var links = e.getElementsByTagName("a");\r
568     for(var i=0, il=links.length; i<il;i+=1)\r
569     {\r
570         links[i].removeAttribute("href");\r
571     }\r
572 }\r
573 \r
574     function prepArticle(articleContent) {\r
575         cleanStyles(articleContent);\r
576         cleanConditionally(articleContent, "form");\r
577         clean(articleContent, "object");\r
578         cleanHTML5(articleContent, "video");\r
579         cleanSelect(articleContent, "select");\r
580 \r
581         if(articleContent.getElementsByTagName('h2').length === 1) {\r
582             clean(articleContent, "h2");\r
583         }\r
584         clean(articleContent, "iframe");\r
585         clean(articleContent, "script");\r
586         clean(articleContent, "style");\r
587         clean(articleContent, "textarea");\r
588         clean(articleContent, "input");\r
589         clean(articleContent, "\n");\r
590         clean(articleContent, "noscript");\r
591         cleanLinkHrefs(articleContent);\r
592         cleanHeaders(articleContent);\r
593         cleanConditionally(articleContent, "table");\r
594         cleanConditionally(articleContent, "ul");\r
595         cleanConditionally(articleContent, "div");\r
596 \r
597         var articleParagraphs = articleContent.getElementsByTagName('p');\r
598         for(var i = articleParagraphs.length-1; i >= 0; i-=1) {\r
599             var imgCount    = articleParagraphs[i].getElementsByTagName('img').length;\r
600             var embedCount  = articleParagraphs[i].getElementsByTagName('embed').length;\r
601             var objectCount = articleParagraphs[i].getElementsByTagName('object').length;\r
602 \r
603             if(imgCount === 0 && embedCount === 0 && objectCount === 0 && getInnerText(articleParagraphs[i], false) === '') {\r
604                 articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]);\r
605             }\r
606         }\r
607 \r
608         try {\r
609             articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p');\r
610         }\r
611         catch (e) {\r
612         }\r
613     }\r
614 \r
615     function getLinkDensity(e) {\r
616         var links      = e.getElementsByTagName("a");\r
617         var textLength = getInnerText(e).length;\r
618         var linkLength = 0;\r
619         for(var i=0, il=links.length; i<il;i+=1)\r
620         {\r
621             linkLength += getInnerText(links[i]).length;\r
622         }\r
623         return linkLength / textLength;\r
624     }\r
625 function initializeNode(node) {\r
626         node.readability = {"contentScore": 0};\r
627 \r
628         switch(node.tagName) {\r
629             case 'DIV':\r
630                 node.readability.contentScore += 5;\r
631                 break;\r
632             case 'ARTICLE':\r
633                 node.readability.contentScore +=25;\r
634                 break;\r
635             case 'PRE':\r
636             case 'TD':\r
637             case 'BLOCKQUOTE':\r
638                 node.readability.contentScore += 3;\r
639                 break;\r
640             case 'ADDRESS':\r
641             case 'OL':\r
642             case 'UL':\r
643             case 'DL':\r
644             case 'DD':\r
645             case 'DT':\r
646             case 'LI':\r
647             case 'FORM':\r
648                 node.readability.contentScore -= 3;\r
649                 break;\r
650 \r
651             case 'H1':\r
652             case 'H2':\r
653             case 'H3':\r
654             case 'H4':\r
655             case 'H5':\r
656             case 'H6':\r
657             case 'TH':\r
658                 node.readability.contentScore -= 5;\r
659                 break;\r
660         }\r
661 \r
662         node.readability.contentScore += getClassWeight(node);\r
663     }\r
664 \r
665      function getInnerText(e, normalizeSpaces) {\r
666         var textContent    = "";\r
667 \r
668         if(typeof(e.textContent) === "undefined" && typeof(e.innerText) === "undefined") {\r
669             return "";\r
670         }\r
671 \r
672         normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;\r
673         e = e.cloneNode(true);\r
674         if (e.nodeType != 3) {\r
675             clean(e, "script");\r
676         }\r
677 \r
678         if (navigator.appName === "Microsoft Internet Explorer") {\r
679             textContent = e.innerText.replace( regexps.trim, "" ); }\r
680         else {\r
681             textContent = e.textContent.replace( regexps.trim, "" ); }\r
682         if(normalizeSpaces) {\r
683             return textContent.replace( regexps.normalize, " "); }\r
684         else {\r
685             return textContent; }\r
686     }\r
687 \r
688  function grabArticle(argumentPage) {\r
689 \r
690         page = argumentPage ? argumentPage : document.body;\r
691         if(page !== null){\r
692             page = page.cloneNode(true);\r
693         } else {\r
694             return;\r
695         }\r
696         var allElements = page.getElementsByTagName('*');\r
697 \r
698         var node = null;\r
699         var nodesToScore = [];\r
700         for(var nodeIndex = 0; (node = allElements[nodeIndex]); nodeIndex+=1) {\r
701             var unlikelyMatchString = node.className + node.id;\r
702             if (unlikelyMatchString !== "undefined") {\r
703                 if (unlikelyMatchString.search(regexps.unlikelyCandidates) !== -1 && node.tagName !== "BODY") {\r
704                     continue;\r
705                 }\r
706             }\r
707 \r
708             if (node.tagName === "P" || node.tagName === "TD" || node.tagName === "PRE") {\r
709                  nodesToScore[nodesToScore.length] = node;\r
710             }\r
711 \r
712            if (node.tagName === "DIV") {\r
713                 if (node.innerHTML.search(regexps.divToPElements) === -1) {\r
714                     try {\r
715                         nodesToScore[nodesToScore.length] = node;\r
716                     }\r
717                     catch(e) {\r
718                     }\r
719                 }\r
720                 else\r
721                 {\r
722                     for(var i = 0, il = node.childNodes.length; i < il; i+=1) {\r
723                         var childNode = node.childNodes[i];\r
724                         if(childNode.nodeType === 3) {\r
725                         nodesToScore[nodesToScore.length] = childNode;\r
726                         }\r
727                     }\r
728                 }\r
729             }\r
730         }\r
731 \r
732         var candidates = [];\r
733         for (var pt=0; pt < nodesToScore.length; pt+=1) {\r
734             var parentNode      = nodesToScore[pt].parentNode;\r
735             var grandParentNode = parentNode ? parentNode.parentNode : null;\r
736             var innerText       = getInnerText(nodesToScore[pt]);\r
737 \r
738             if(!parentNode || typeof(parentNode.tagName) === 'undefined') {\r
739                 continue;\r
740             }\r
741 \r
742             if(innerText.length < 25) {\r
743                 continue; }\r
744 \r
745             if(typeof parentNode.readability === 'undefined') {\r
746                 initializeNode(parentNode);\r
747                 candidates.push(parentNode);\r
748             }\r
749 \r
750             if(grandParentNode && typeof(grandParentNode.readability) === 'undefined' && typeof(grandParentNode.tagName) !== 'undefined') {\r
751                 initializeNode(grandParentNode);\r
752                 candidates.push(grandParentNode);\r
753             }\r
754 \r
755             var contentScore = 0;\r
756 \r
757             contentScore+=1;\r
758 \r
759             contentScore += innerText.split(',').length;\r
760 \r
761             contentScore += Math.min(Math.floor(innerText.length / 100), 3);\r
762 \r
763             parentNode.readability.contentScore += contentScore;\r
764 \r
765             if(grandParentNode) {\r
766                 grandParentNode.readability.contentScore += contentScore/2;\r
767             }\r
768         }\r
769 \r
770         var topCandidate = null;\r
771         for(var c=0, cl=candidates.length; c < cl; c+=1)\r
772         {\r
773             candidates[c].readability.contentScore = candidates[c].readability.contentScore * (1-getLinkDensity(candidates[c]));\r
774 \r
775             if(!topCandidate || candidates[c].readability.contentScore > topCandidate.readability.contentScore) {\r
776                 topCandidate = candidates[c]; }\r
777         }\r
778 \r
779         if (topCandidate === null || topCandidate.tagName === "BODY")\r
780         {\r
781             topCandidate = document.createElement("DIV");\r
782             topCandidate.innerHTML = page.innerHTML;\r
783             page.innerHTML = "";\r
784             page.appendChild(topCandidate);\r
785             initializeNode(topCandidate);\r
786         }\r
787 \r
788         var articleContent        = document.createElement("DIV");\r
789             articleContent.id     = "readability-content";\r
790         var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2);\r
791         var siblingNodes          = topCandidate.parentNode.childNodes;\r
792 \r
793 \r
794         for(var s=0, sl=siblingNodes.length; s < sl; s+=1) {\r
795             var siblingNode = siblingNodes[s];\r
796             var append      = false;\r
797 \r
798             if(!siblingNode) {\r
799                 continue;\r
800             }\r
801 \r
802 \r
803             if(siblingNode === topCandidate)\r
804             {\r
805                 append = true;\r
806             }\r
807 \r
808             var contentBonus = 0;\r
809             if(siblingNode.className === topCandidate.className && topCandidate.className !== "") {\r
810                 contentBonus += topCandidate.readability.contentScore * 0.2;\r
811             }\r
812 \r
813             if(typeof siblingNode.readability !== 'undefined' && (siblingNode.readability.contentScore+contentBonus) >= siblingScoreThreshold)\r
814             {\r
815                 append = true;\r
816             }\r
817 \r
818             if(siblingNode.nodeName === "P") {\r
819                 var linkDensity = getLinkDensity(siblingNode);\r
820                 var nodeContent = getInnerText(siblingNode);\r
821                 var nodeLength  = nodeContent.length;\r
822 \r
823                 if(nodeLength > 80 && linkDensity < 0.25)\r
824                 {\r
825                     append = true;\r
826                 }\r
827                 else if(nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1)\r
828                 {\r
829                     append = true;\r
830                 }\r
831             }\r
832 \r
833             if(append) {\r
834                 var nodeToAppend = null;\r
835                 if(siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") {\r
836                     nodeToAppend = document.createElement("DIV");\r
837                     try {\r
838                         nodeToAppend.id = siblingNode.id;\r
839                         nodeToAppend.innerHTML = siblingNode.innerHTML;\r
840                     }\r
841                     catch(er) {\r
842                         nodeToAppend = siblingNode;\r
843                         s-=1;\r
844                         sl-=1;\r
845                     }\r
846                 } else {\r
847                     nodeToAppend = siblingNode;\r
848                 }\r
849 \r
850                 if(nodeToAppend.className.search(regexps.retainDiv) !== -1) {\r
851                 }\r
852                 articleContent.appendChild(nodeToAppend.cloneNode(true));\r
853             }\r
854         }\r
855 \r
856 \r
857         prepArticle(articleContent);\r
858         for (var pt=0; pt < nodesToScore.length; pt+=1) {\r
859             var parentNode      = nodesToScore[pt].parentNode;\r
860             var grandParentNode = parentNode ? parentNode.parentNode : null;\r
861             delete parentNode.readability;\r
862             if (grandParentNode) {\r
863                 delete grandParentNode.readability;\r
864             }\r
865         }\r
866 \r
867         if( articleContent.innerHTML == '' )\r
868         {\r
869             return " " ;\r
870         }\r
871 \r
872         return articleContent.innerHTML;\r
873     }\r
874 \r
875 function getArticleTitle() {\r
876         var curTitle = "",\r
877             origTitle = "";\r
878 \r
879         try {\r
880             curTitle = origTitle = document.title;\r
881             if(typeof curTitle !== "string") {\r
882                 curTitle = origTitle = getInnerText(document.getElementsByTagName('title')[0]);\r
883             }\r
884         }\r
885         catch(e) {}\r
886 \r
887         if(curTitle.match(/ [\|\-] /))\r
888         {\r
889             curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1');\r
890 \r
891             if(curTitle.split(' ').length < 3) {\r
892                 curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1');\r
893             }\r
894         }\r
895         else if(curTitle.indexOf(': ') !== -1)\r
896         {\r
897             curTitle = origTitle.replace(/.*:(.*)/gi, '$1');\r
898 \r
899             if(curTitle.split(' ').length < 3) {\r
900                 curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1');\r
901             }\r
902         }\r
903         else if(curTitle.length > 150 || curTitle.length < 15)\r
904         {\r
905             var hOnes = document.getElementsByTagName('h1');\r
906             if(hOnes.length === 1)\r
907             {\r
908                 curTitle = getInnerText(hOnes[0]);\r
909             }\r
910         }\r
911 \r
912         curTitle = curTitle.replace( regexps.trim, "" );\r
913 \r
914         if(curTitle.split(' ').length <= 4) {\r
915             curTitle = origTitle;\r
916         }\r
917         return curTitle;\r
918 }\r
919 \r
920 function initReader() {\r
921     var article_block = grabArticle();\r
922         var article_title = getArticleTitle()\r
923     if (article_block && article_block.length > 1000) {\r
924                 var title = '<h1>'+article_title+'</h1>'\r
925         var meta = '<meta name=\"viewport\" content=\"width=0, initial-scale=1.0, maximum-scale=2.0, minimum-scale=1.0, user-scalable=no, target-densitydpi=medium-dpi\">\n';\r
926         return meta + title + article_block;\r
927     } else {\r
928         return "undefined";\r
929     }\r
930 }\r
931 \r
932 initReader();\r
933 }\r
934 catch(e) {\r
935 console.log("Reader Error - Reader.js");\r
936 }\r
937 \r