Upstream version 5.34.104.0

[platform/framework/web/crosswalk.git] / src / third_party / readability / js / readability.js
diff --git a/src/third_party/readability/js/readability.js b/src/third_party/readability/js/readability.js

index 68a0286..130ea29 100644 (file)
--- a/src/third_party/readability/js/readability.js
+++ b/src/third_party/readability/js/readability.js
@@ -1,14 +1,20 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Local modifications to this file are described in the README.chromium
+// file.
  
  var dbg = (typeof console !== 'undefined') ? function(s) {
      console.log("Readability: " + s);
  } : function() {};
  
  /*
- * Readability. An Arc90 Lab Experiment. 
+ * Readability. An Arc90 Lab Experiment.
   * Website: http://lab.arc90.com/experiments/readability
   * Source:  http://code.google.com/p/arc90labs-readability
   *
- * "Readability" is a trademark of Arc90 Inc and may not be used without explicit permission. 
+ * "Readability" is a trademark of Arc90 Inc and may not be used without explicit permission.
   *
   * Copyright (c) 2010 Arc90 Inc
   * Readability is licensed under the Apache License, Version 2.0.
@@ -20,6 +26,7 @@ var readability = {
  
      distilledHTML: '',
      distilledArticleContent: null,
+    nextPageLink: '',
  
      version:                '1.7.1',
      iframeLoads:             0,
@@ -41,7 +48,7 @@ var readability = {
      maxPages:    30, /* The maximum number of pages to loop through before we call it quits and just show a link. */
      parsedPages: {}, /* The list of pages we've parsed in this call of readability, for autopaging. As a key store for easier searching. */
      pageETags:   {}, /* A list of the ETag headers of pages we've parsed, in case they happen to match, we'll know it's a duplicate. */
-    
+
      /**
       * All of the regular expressions in use within readability.
       * Defined up here so we don't instantiate them repeatedly in loops.
@@ -66,7 +73,7 @@ var readability = {
  
      /**
       * Runs readability.
-     * 
+     *
       * Workflow:
       *  1. Prep the document by removing script tags, css, etc.
       *  2. Build readability's DOM tree.
@@ -86,8 +93,11 @@ var readability = {
          readability.parsedPages[window.location.href.replace(/\/$/, '')] = true;
  
          /* Pull out any possible next page link first */
-        var nextPageLink = readability.findNextPageLink(document.body);
-        
+        readability.nextPageLink = readability.findNextPageLink(document.body);
+
+        /* We handle processing of nextPage from C++ set nextPageLink to null */
+        var nextPageLink = null;
+
          readability.prepDocument();
  
          /* Build readability's DOM tree */
@@ -152,12 +162,8 @@ var readability = {
  
          window.scrollTo(0, 0);
  
-        // TODO(bengr): Remove this assignment of null to nextPageLink when
-        //              the processing of the next page link is safe.
-        nextPageLink = null;
-
          if (nextPageLink) {
-            /** 
+            /**
               * Append any additional pages after a small timeout so that people
               * can start reading without having to wait for this to finish processing.
              **/
@@ -179,16 +185,16 @@ var readability = {
                  var windowHeight = window.innerHeight ? window.innerHeight : (document.documentElement.clientHeight ? document.documentElement.clientHeight : document.body.clientHeight);
  
                  if(readability.reversePageScroll) {
-                    readability.scrollTo(readability.scrollTop(), readability.scrollTop() - (windowHeight - 50), 20, 10);                   
+                    readability.scrollTo(readability.scrollTop(), readability.scrollTop() - (windowHeight - 50), 20, 10);
                  }
                  else {
-                    readability.scrollTo(readability.scrollTop(), readability.scrollTop() + (windowHeight - 50), 20, 10);                   
+                    readability.scrollTo(readability.scrollTop(), readability.scrollTop() + (windowHeight - 50), 20, 10);
                  }
-                
+
                  return false;
              }
          };
-        
+
          document.onkeyup = function(e) {
              var code = (window.event) ? event.keyCode : e.keyCode;
              if (code === 16) {
@@ -200,7 +206,7 @@ var readability = {
  
      /**
       * Run any post-process modifications to article content as necessary.
-     * 
+     *
       * @param Element
       * @return void
      **/
@@ -226,7 +232,7 @@ var readability = {
  
          for(var i=0, il = images.length; i < il; i+=1) {
              var image = images[i];
-            
+
              if(image.offsetWidth > imageWidthThreshold) {
                  image.className += " blockImage";
              }
@@ -242,7 +248,7 @@ var readability = {
          var articleTools = document.createElement("DIV");
  
          articleTools.id        = "readTools";
-        articleTools.innerHTML = 
+        articleTools.innerHTML =
              "<a href='#' onclick='return window.location.reload()' title='Reload original page' id='reload-page'>Reload Original Page</a>" +
              "<a href='#' onclick='javascript:window.print();' title='Print page' id='print-page'>Print Page</a>" +
              "<a href='#' onclick='readability.emailBox(); return false;' title='Email page' id='email-page'>Email Page</a>";
@@ -259,13 +265,13 @@ var readability = {
          function sanitizeText() {
              return text.replace(/@\w+/, "");
          }
-        
+
          function countMatches(match) {
              var matches = text.match(new RegExp(match, "g"));
-            return matches !== null ? matches.length : 0; 
+            return matches !== null ? matches.length : 0;
          }
-        
-        function isRTL() {            
+
+        function isRTL() {
              var count_heb =  countMatches("[\\u05B0-\\u05F4\\uFB1D-\\uFBF4]");
              var count_arb =  countMatches("[\\u060C-\\u06FE\\uFB50-\\uFEFC]");
  
@@ -289,15 +295,15 @@ var readability = {
          try {
              curTitle = origTitle = document.title;
              if(typeof curTitle !== "string") { /* If they had an element with id "title" in their HTML */
-                curTitle = origTitle = readability.getInnerText(document.getElementsByTagName('title')[0]); 
+                curTitle = origTitle = readability.getInnerText(document.getElementsByTagName('title')[0]);
              }
          }
          catch(e) {}
-        
+
          if(curTitle.match(/ [\|\-] /))
          {
              curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1');
-            
+
              if(curTitle.split(' ').length < 3) {
                  curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1');
              }
@@ -330,7 +336,7 @@ var readability = {
      /**
       * Prepare the HTML document for readability to scrape it.
       * This includes things like stripping javascript, CSS, and handling terrible markup.
-     * 
+     *
       * @return void
       **/
      prepDocument: function () {
@@ -342,7 +348,7 @@ var readability = {
          {
              var body = document.createElement("body");
              try {
-                document.body = body;       
+                document.body = body;
              }
              catch(e) {
                  document.documentElement.appendChild(body);
@@ -374,11 +380,11 @@ var readability = {
                      biggestFrameSize         = frameSize;
                      readability.biggestFrame = frames[frameIndex];
                  }
-                
+
                  if(canAccessFrame && frameSize > bestFrameSize)
                  {
                      readability.frameHack = true;
-    
+
                      bestFrame = frames[frameIndex];
                      bestFrameSize = frameSize;
                  }
@@ -390,7 +396,7 @@ var readability = {
                  readability.moveNodeInnards(bestFrame.contentWindow.document.body, newBody);
                  newBody.style.overflow = 'scroll';
                  document.body = newBody;
-                
+
                  var frameset = document.getElementsByTagName('frameset')[0];
                  if(frameset) {
                      frameset.parentNode.removeChild(frameset); }
@@ -455,7 +461,7 @@ var readability = {
              var imgCount    = articleParagraphs[i].getElementsByTagName('img').length;
              var embedCount  = articleParagraphs[i].getElementsByTagName('embed').length;
              var objectCount = articleParagraphs[i].getElementsByTagName('object').length;
-            
+
              if(imgCount === 0 && embedCount === 0 && objectCount === 0 && readability.getInnerText(articleParagraphs[i], false) === '') {
                  articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]);
              }
@@ -468,7 +474,7 @@ var readability = {
              dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " + e);
          }
      },
-    
+
      /**
       * Initialize a node with the readability object. Also checks the
       * className/id for special names to add to its score.
@@ -477,7 +483,7 @@ var readability = {
       * @return void
      **/
      initializeNode: function (node) {
-        node.readability = {"contentScore": 0};         
+        node.readability = {"contentScore": 0};
  
          switch(node.tagName) {
              case 'DIV':
@@ -489,7 +495,7 @@ var readability = {
              case 'BLOCKQUOTE':
                  node.readability.contentScore += 3;
                  break;
-                
+
              case 'ADDRESS':
              case 'OL':
              case 'UL':
@@ -511,10 +517,10 @@ var readability = {
                  node.readability.contentScore -= 5;
                  break;
          }
-       
+
          node.readability.contentScore += readability.getClassWeight(node);
      },
-    
+
      /***
       * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
       *               most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
@@ -525,7 +531,7 @@ var readability = {
      grabArticle: function (pageToClone) {
          var stripUnlikelyCandidates = readability.flagIsActive(readability.FLAG_STRIP_UNLIKELYS),
              isPaging = (page !== null) ? true: false;
-    
+
          var page = null;
          // Never work on the actual page.
          if (isPaging) {
@@ -533,7 +539,7 @@ var readability = {
          } else {
              page = pageToClone.cloneNode(true);
          }
-     
+
          var allElements = page.getElementsByTagName('*');
  
          /**
@@ -561,7 +567,7 @@ var readability = {
                      node.parentNode.removeChild(node);
                      nodeIndex-=1;
                      continue;
-                }               
+                }
              }
  
              if (node.tagName === "P" || node.tagName === "TD" || node.tagName === "PRE") {
@@ -598,7 +604,7 @@ var readability = {
                          }
                      }
                  }
-            } 
+            }
          }
  
          /**
@@ -640,15 +646,15 @@ var readability = {
  
              /* Add points for any commas within this paragraph */
              contentScore += innerText.split(',').length;
-            
+
              /* For every 100 characters in this paragraph, add another point. Up to 3 points. */
              contentScore += Math.min(Math.floor(innerText.length / 100), 3);
-           
+
              /* Add the score to the parent. The grandparent gets half. */
              parentNode.readability.contentScore += contentScore;
  
              if(grandParentNode) {
-                grandParentNode.readability.contentScore += contentScore/2;             
+                grandParentNode.readability.contentScore += contentScore/2;
              }
          }
  
@@ -725,12 +731,12 @@ var readability = {
              {
                  append = true;
              }
-            
+
              if(siblingNode.nodeName === "P") {
                  var linkDensity = readability.getLinkDensity(siblingNode);
                  var nodeContent = readability.getInnerText(siblingNode);
                  var nodeLength  = nodeContent.length;
-                
+
                  if(nodeLength > 80 && linkDensity < 0.25)
                  {
                      append = true;
@@ -747,7 +753,7 @@ var readability = {
                  var nodeToAppend = null;
                  if(siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") {
                      /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */
-                    
+
                      dbg("Altering siblingNode of " + siblingNode.nodeName + ' to div.');
                      nodeToAppend = document.createElement("DIV");
                      try {
@@ -765,7 +771,7 @@ var readability = {
                      s-=1;
                      sl-=1;
                  }
-                
+
                  /* To ensure a node does not interfere with readability styles, remove its classnames */
                  nodeToAppend.className = "";
  
@@ -779,15 +785,15 @@ var readability = {
          **/
          readability.distilledArticleContent = articleContent.cloneNode(true);
          //readability.prepArticle(articleContent);
-      
+
          if (readability.curPageNum === 1) {
              var newNode = document.createElement('div');
              newNode.id = "readability-page-1";
              newNode.setAttribute("class", "page");
              readability.moveNodeInnards(articleContent, newNode);
              articleContent.appendChild(newNode);
-        } 
-      
+        }
+
          /**
           * Now that we've gone through the full algorithm, check to see if we got any meaningful content.
           * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
@@ -813,7 +819,7 @@ var readability = {
  
          return articleContent;
      },
-    
+
      /**
       * Removes script tags from the document.
       *
@@ -828,12 +834,12 @@ var readability = {
                  scripts[i].nodeValue="";
                  scripts[i].removeAttribute('src');
                  if (scripts[i].parentNode) {
-                        scripts[i].parentNode.removeChild(scripts[i]);          
+                        scripts[i].parentNode.removeChild(scripts[i]);
                  }
              }
          }
      },
-    
+
      /**
       * Get the inner text of a node - cross browser compatibly.
       * This also strips out any excess whitespace to be found.
@@ -896,18 +902,18 @@ var readability = {
              if ( cur.nodeType === 1 ) {
                  // Remove style attribute(s) :
                  if(cur.className !== "readability-styled") {
-                    cur.removeAttribute("style");                   
+                    cur.removeAttribute("style");
                  }
                  readability.cleanStyles( cur );
              }
              cur = cur.nextSibling;
-        }           
+        }
      },
-    
+
      /**
       * Get the density of links as a percentage of the content
       * This is the amount of text that is inside a link divided by the total text in the node.
-     * 
+     *
       * @param Element
       * @return number (float)
      **/
@@ -918,11 +924,11 @@ var readability = {
          for(var i=0, il=links.length; i<il;i+=1)
          {
              linkLength += readability.getInnerText(links[i]).length;
-        }       
+        }
  
          return linkLength / textLength;
      },
-    
+
      /**
       * Find a cleaned up version of the current URL, to use for comparing links for possible next-pageyness.
       *
@@ -944,10 +950,10 @@ var readability = {
  
                  /* If the type isn't alpha-only, it's probably not actually a file extension. */
                  if(!possibleType.match(/[^a-zA-Z]/)) {
-                    segment = segment.split(".")[0];                    
+                    segment = segment.split(".")[0];
                  }
              }
-            
+
              /**
               * EW-CMS specific segment replacement. Ugly.
               * Example: http://www.ew.com/ew/article/0,,20313460_20369436,00.html
@@ -968,7 +974,7 @@ var readability = {
              if (i < 2 && segment.match(/^\d{1,2}$/)) {
                  del = true;
              }
-            
+
              /* If this is the first segment and it's just "index", remove it. */
              if(i === 0 && segment.toLowerCase() === "index") {
                  del = true;
@@ -992,7 +998,7 @@ var readability = {
  
      /**
       * Look for any paging links that may occur within the document.
-     * 
+     *
       * @param body
       * @return object (array)
      **/
@@ -1008,7 +1014,7 @@ var readability = {
           *
           * Also possible: levenshtein distance? longest common subsequence?
           *
-         * After we do that, assign each page a score, and 
+         * After we do that, assign each page a score, and
          **/
          for(var i = 0, il = allLinks.length; i < il; i+=1) {
              var link     = allLinks[i],
@@ -1018,12 +1024,12 @@ var readability = {
              if(linkHref === "" || linkHref === articleBaseUrl || linkHref === window.location.href || linkHref in readability.parsedPages) {
                  continue;
              }
-            
+
              /* If it's on a different domain, skip it. */
              if(window.location.host !== linkHref.split(/\/+/g)[1]) {
                  continue;
              }
-            
+
              var linkText = readability.getInnerText(link);
  
              /* If the linkText looks like it's not the next page, skip it. */
@@ -1036,9 +1042,9 @@ var readability = {
              if(!linkHrefLeftover.match(/\d/)) {
                  continue;
              }
-            
+
              if(!(linkHref in possiblePages)) {
-                possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref};             
+                possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref};
              } else {
                  possiblePages[linkHref].linkText += ' | ' + linkText;
              }
@@ -1060,7 +1066,7 @@ var readability = {
              if(linkData.match(/pag(e|ing|inat)/i)) {
                  linkObj.score += 25;
              }
-            if(linkData.match(/(first|last)/i)) { // -65 is enough to negate any bonuses gotten from a > or » in the text, 
+            if(linkData.match(/(first|last)/i)) { // -65 is enough to negate any bonuses gotten from a > or » in the text,
                  /* If we already matched on "next", last is probably fine. If we didn't, then it's bad. Penalize. */
                  if(!linkObj.linkText.match(readability.regexps.nextLink)) {
                      linkObj.score -= 65;
@@ -1087,10 +1093,10 @@ var readability = {
                      /* If this is just something like "footer", give it a negative. If it's something like "body-and-footer", leave it be. */
                      if(!parentNodeClassAndId.match(readability.regexps.positive)) {
                          linkObj.score -= 25;
-                        negativeNodeMatch = true;                       
+                        negativeNodeMatch = true;
                      }
                  }
-                
+
                  parentNode = parentNode.parentNode;
              }
  
@@ -1152,7 +1158,7 @@ var readability = {
  
              dbg('NEXT PAGE IS ' + nextHref);
              readability.parsedPages[nextHref] = true;
-            return nextHref;            
+            return nextHref;
          }
          else {
              return null;
@@ -1204,7 +1210,7 @@ var readability = {
          if (typeof options === 'undefined') { options = {}; }
  
          request.onreadystatechange = respondToReadyState;
-        
+
          request.open('get', url, true);
          request.setRequestHeader('Accept', 'text/html');
  
@@ -1239,7 +1245,7 @@ var readability = {
              articlePage.appendChild(linkDiv);
              return;
          }
-        
+
          /**
           * Now that we've built the article page DOM element, get the page content
           * asynchronously and load the cleaned content into the div we created for it.
@@ -1257,7 +1263,7 @@ var readability = {
                              return;
                          } else {
                              readability.pageETags[eTag] = 1;
-                        }                       
+                        }
                      }
  
                      // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away.
@@ -1308,7 +1314,7 @@ var readability = {
                              }
                          }
                      }
-                    
+
                      readability.removeScripts(content);
  
                      readability.moveNodeInnards(content, thisPage);
@@ -1330,9 +1336,9 @@ var readability = {
              });
          }(nextPageLink, articlePage));
      },
-    
+
      /**
-     * Get an elements class/id weight. Uses regular expressions to tell if this 
+     * Get an elements class/id weight. Uses regular expressions to tell if this
       * element looks good or bad.
       *
       * @param Element
@@ -1382,7 +1388,7 @@ var readability = {
          var allElements = e.getElementsByTagName('*');
          while (i < allElements.length) {
              readability.deleteExtraBreaks(allElements[i]);
-            i++; 
+            i++;
          }
      },
  
@@ -1397,7 +1403,7 @@ var readability = {
      clean: function (e, tag) {
          var targetList = e.getElementsByTagName( tag );
          var isEmbed    = (tag === 'object' || tag === 'embed');
-        
+
          for (var y=targetList.length-1; y >= 0; y-=1) {
              /* Allow youtube and vimeo videos through as people usually want to see those. */
              if(isEmbed) {
@@ -1405,7 +1411,7 @@ var readability = {
                  for (var i=0, il=targetList[y].attributes.length; i < il; i+=1) {
                      attributeValues += targetList[y].attributes[i].value + '|';
                  }
-                
+
                  /* First, check the elements attributes to see if any of them contain youtube or vimeo */
                  if (attributeValues.search(readability.regexps.videos) !== -1) {
                      continue;
@@ -1415,13 +1421,13 @@ var readability = {
                  if (targetList[y].innerHTML.search(readability.regexps.videos) !== -1) {
                      continue;
                  }
-                
+
              }
  
              targetList[y].parentNode.removeChild(targetList[y]);
          }
      },
-    
+
      /**
       * Clean an element of all tags of type "tag" if they look fishy.
       * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
@@ -1446,7 +1452,7 @@ var readability = {
          for (var i=curTagsLength-1; i >= 0; i-=1) {
              var weight = readability.getClassWeight(tagsList[i]);
              var contentScore = (typeof tagsList[i].readability !== 'undefined') ? tagsList[i].readability.contentScore : 0;
-            
+
              dbg("Cleaning Conditionally " + tagsList[i] + " (" + tagsList[i].className + ":" + tagsList[i].id + ")" + ((typeof tagsList[i].readability !== 'undefined') ? (" with score " + tagsList[i].readability.contentScore) : ''));
  
              if(weight+contentScore < 0)
@@ -1467,7 +1473,7 @@ var readability = {
                  var embeds     = tagsList[i].getElementsByTagName("embed");
                  for(var ei=0,il=embeds.length; ei < il; ei+=1) {
                      if (embeds[ei].src.search(readability.regexps.videos) === -1) {
-                      embedCount+=1; 
+                      embedCount+=1;
                      }
                  }
  
@@ -1480,7 +1486,7 @@ var readability = {
                  } else if(li > p && tag !== "ul" && tag !== "ol") {
                      toRemove = true;
                  } else if( input > Math.floor(p/3) ) {
-                    toRemove = true; 
+                    toRemove = true;
                  } else if(contentLength < 25 && (img === 0 || img > 2) ) {
                      toRemove = true;
                  } else if(weight < 25 && linkDensity > 0.2) {
@@ -1522,7 +1528,7 @@ var readability = {
      addFlag: function(flag) {
          readability.flags = readability.flags | flag;
      },
- 
+
      removeFlag: function(flag) {
          readability.flags = readability.flags & ~flag;
      },
@@ -1591,7 +1597,7 @@ var readability = {
          }
          return ret;
      },
-  
+
      // Replaces a pair of <BR> nodes (possibly separated by whitespace), with a
      // <P> node, and makes all next siblings of that pair children of <P>, up
      // until the next pair of <BR> nodes is reached.
@@ -1600,7 +1606,7 @@ var readability = {
          var second = readability.isMultipleBr(node, true);
          if (!second) {
              return;
-        } 
+        }
          // Make all next siblings of the second BR into children of a P.
          var p = document.createElement('p');
          var curr = second.nextSibling;
@@ -1613,7 +1619,7 @@ var readability = {
              curr = next;
          }
          var ret = curr;
-  
+
          // Remove all nodes between the first and second BR.
          curr = node.nextSibling;
          while (curr && curr != second) {
@@ -1625,10 +1631,10 @@ var readability = {
          second.parentNode.removeChild(second);
          // Replace the first BR with the P.
          node.parentNode.replaceChild(p, node);
-  
+
          return ret;
      },
-  
+
      // Returns true if the NodeList contains a double <BR>.
      hasDoubleBr: function(nodeList) {
          for (var i = 0; i < nodeList.length; nodeList++) {
@@ -1637,8 +1643,8 @@ var readability = {
              }
          }
          return false;
-    }, 
-    
+    },
+
      // Replaces double <BR> tags with <P> tags.
      replaceDoubleBrsWithPs: function(node) {
          var allElements = node.getElementsByTagName('BR');
@@ -1652,8 +1658,8 @@ var readability = {
              allElements = document.body.getElementsByTagName('BR');
          }
      },
-  
-  
+
+
      // Replaces a BR and the whitespace that follows it with a P.
      replaceBrWithP: function(node) {
          if (!readability.isBrNode(node)) {
@@ -1673,7 +1679,7 @@ var readability = {
          node.parentNode.replaceChild(p, node);
          return curr;
      },
-  
+
      // Replaces all <BR> tags with <P> tags. Makes all next siblings of a <BR> tag
      // children of the <P>.
      replaceBrsWithPs: function(node) {
@@ -1687,27 +1693,27 @@ var readability = {
              allElements = document.body.getElementsByTagName('BR');
          }
      },
-  
+
      // Replaces any tag with any other tag.
      replaceTagsWithTags: function(node, srcTag, destTag) {
          var allElements = node.getElementsByTagName(srcTag);
          for (var i = 0; i < allElements.length; i++) {
              var dest = document.createElement(destTag);
              readability.moveNodeInnards(allElements[i], dest);
-            node.replaceNode(dest, allElements[i]);
+            allElements[i].parentNode.replaceChild(dest, allElements[i]);
          }
      },
-  
+
      // Replaces all <noscript> tags with <p> tags.
      replaceNoscriptsWithPs: function(node) {
          readability.replaceTagsWithTags(node, 'noscript', 'p');
      },
-  
+
      // Replaces all <font> tags with <span> tags.
      replaceFontsWithSpans: function(node) {
          readability.replaceTagsWithTags(node, 'font', 'span');
      },
-  
+
      // Returns a list of image URLs in the distilled article.
      getImages : function() {
          var images = document.getElementsByTagName('img');
@@ -1719,10 +1725,15 @@ var readability = {
          }
          return result;
      },
-  
+
      // Returns the distilled article HTML from the page(s).
      getDistilledArticleHTML : function() {
          return readability.distilledHTML;
+    },
+
+    // Returns the next page of this article.
+    getNextPageLink : function() {
+        return readability.nextPageLink;
      }
  };
  
@@ -1730,12 +1741,15 @@ var readability = {
  // element is the article title, the second element is HTML containing the
  // long-form content, and remaining elements are URLs for images referenced by
  // that HTML. Each <img> tag in the HTML has an id field set to k - 2, which
-// corresponds to a URL listed at index k in the array returned. 
+// corresponds to a URL listed at index k in the array returned.
  (function () {
      readability.init();
-    var result = new Array(2);
+    var result = new Array(4);
      result[0] = readability.getArticleTitle();
      result[1] = readability.getDistilledArticleHTML();
+    result[2] = readability.getNextPageLink();
+    // TODO(shashishekhar): Add actual previous page link here.
+    result[3] = '';
      return result.concat(readability.getImages());
  }())