copy: handle mergeable extents across fiemap scans
authorPádraig Brady <P@draigBrady.com>
Tue, 5 Apr 2011 10:04:13 +0000 (11:04 +0100)
committerPádraig Brady <P@draigBrady.com>
Wed, 6 Apr 2011 22:15:19 +0000 (23:15 +0100)
* extent-scan.h (extent_scan_free): Init the pointer to NULL,
and reset the count to 0, so that we can realloc the buffer.
* src/extent-scan.c (extent_scan_init): Likewise.
(extent_scan_read): Loop over multiple fiemap scans, so we handle
mergeable extents that span across fiemap scan boundaries.  Once
we have enough unique extents, return so as to minimize memory use.

src/extent-scan.c
src/extent-scan.h

index f8f8ca6..71a9404 100644 (file)
@@ -67,6 +67,7 @@ extent_scan_init (int src_fd, struct extent_scan *scan)
 {
   scan->fd = src_fd;
   scan->ei_count = 0;
+  scan->ext_info = NULL;
   scan->scan_start = 0;
   scan->initial_scan_failed = false;
   scan->hit_final_extent = false;
@@ -82,108 +83,125 @@ extent_scan_init (int src_fd, struct extent_scan *scan)
 extern bool
 extent_scan_read (struct extent_scan *scan)
 {
-  union { struct fiemap f; char c[4096]; } fiemap_buf;
-  struct fiemap *fiemap = &fiemap_buf.f;
-  struct fiemap_extent *fm_extents = &fiemap->fm_extents[0];
-  enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_extents };
-  verify (count != 0);
-
-  /* This is required at least to initialize fiemap->fm_start,
-     but also serves (in mid 2010) to appease valgrind, which
-     appears not to know the semantics of the FIEMAP ioctl. */
-  memset (&fiemap_buf, 0, sizeof fiemap_buf);
-
-  fiemap->fm_start = scan->scan_start;
-  fiemap->fm_flags = scan->fm_flags;
-  fiemap->fm_extent_count = count;
-  fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start;
-
-  /* Fall back to the standard copy if call ioctl(2) failed for the
-     the first time.  */
-  if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0)
-    {
-      if (scan->scan_start == 0)
-        scan->initial_scan_failed = true;
-      return false;
-    }
-
-  /* If 0 extents are returned, then more get_extent_table() are not needed.  */
-  if (fiemap->fm_mapped_extents == 0)
-    {
-      scan->hit_final_extent = true;
-      return false;
-    }
-
-  scan->ei_count = fiemap->fm_mapped_extents;
-  scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
-
-  unsigned int i, si = 0;
+  unsigned int si = 0;
   struct extent_info *last_ei IF_LINT ( = scan->ext_info);
 
-  for (i = 0; i < scan->ei_count; i++)
+  while (true)
     {
-      assert (fm_extents[i].fe_logical <= OFF_T_MAX - fm_extents[i].fe_length);
+      union { struct fiemap f; char c[4096]; } fiemap_buf;
+      struct fiemap *fiemap = &fiemap_buf.f;
+      struct fiemap_extent *fm_extents = &fiemap->fm_extents[0];
+      enum { count = (sizeof fiemap_buf - sizeof *fiemap)/sizeof *fm_extents };
+      verify (count > 1);
+
+      /* This is required at least to initialize fiemap->fm_start,
+         but also serves (in mid 2010) to appease valgrind, which
+         appears not to know the semantics of the FIEMAP ioctl. */
+      memset (&fiemap_buf, 0, sizeof fiemap_buf);
+
+      fiemap->fm_start = scan->scan_start;
+      fiemap->fm_flags = scan->fm_flags;
+      fiemap->fm_extent_count = count;
+      fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start;
+
+      /* Fall back to the standard copy if call ioctl(2) failed for the
+         the first time.  */
+      if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0)
+        {
+          if (scan->scan_start == 0)
+            scan->initial_scan_failed = true;
+          return false;
+        }
 
-      if (si && last_ei->ext_flags ==
-          (fm_extents[i].fe_flags & ~FIEMAP_EXTENT_LAST)
-          && (last_ei->ext_logical + last_ei->ext_length
-              == fm_extents[i].fe_logical))
+      /* If 0 extents are returned, then no more scans are needed.  */
+      if (fiemap->fm_mapped_extents == 0)
         {
-          /* Merge previous with last.  */
-          last_ei->ext_length += fm_extents[i].fe_length;
-          /* Copy flags in case different.  */
-          last_ei->ext_flags = fm_extents[i].fe_flags;
+          scan->hit_final_extent = true;
+          return scan->scan_start != 0;
         }
-      else if ((si == 0 && scan->scan_start > fm_extents[i].fe_logical)
-               || (si && last_ei->ext_logical + last_ei->ext_length >
-                   fm_extents[i].fe_logical))
+
+      assert (scan->ei_count <= SIZE_MAX - fiemap->fm_mapped_extents);
+      scan->ei_count += fiemap->fm_mapped_extents;
+      scan->ext_info = xnrealloc (scan->ext_info, scan->ei_count,
+                                  sizeof (struct extent_info));
+
+      unsigned int i = 0;
+      for (i = 0; i < fiemap->fm_mapped_extents; i++)
         {
-          /* BTRFS before 2.6.38 could return overlapping extents
-             for sparse files.  We adjust the returned extents
-             rather than failing, as otherwise it would be inefficient
-             to detect this on the initial scan.  */
-          uint64_t new_logical;
-          uint64_t length_adjust;
-          if (si == 0)
-            new_logical = scan->scan_start;
-          else
+          assert (fm_extents[i].fe_logical <=
+                  OFF_T_MAX - fm_extents[i].fe_length);
+
+          if (si && last_ei->ext_flags ==
+              (fm_extents[i].fe_flags & ~FIEMAP_EXTENT_LAST)
+              && (last_ei->ext_logical + last_ei->ext_length
+                  == fm_extents[i].fe_logical))
             {
-              /* We could return here if scan->scan_start == 0
-                 but don't so as to minimize special cases.  */
-              new_logical = last_ei->ext_logical + last_ei->ext_length;
+              /* Merge previous with last.  */
+              last_ei->ext_length += fm_extents[i].fe_length;
+              /* Copy flags in case different.  */
+              last_ei->ext_flags = fm_extents[i].fe_flags;
             }
-          length_adjust = new_logical - fm_extents[i].fe_logical;
-          /* If an extent is contained within the previous one, just fail.  */
-          if (length_adjust < fm_extents[i].fe_length)
+          else if ((si == 0 && scan->scan_start > fm_extents[i].fe_logical)
+                   || (si && last_ei->ext_logical + last_ei->ext_length >
+                       fm_extents[i].fe_logical))
             {
-              if (scan->scan_start == 0)
-                scan->initial_scan_failed = true;
-              return false;
+              /* BTRFS before 2.6.38 could return overlapping extents
+                 for sparse files.  We adjust the returned extents
+                 rather than failing, as otherwise it would be inefficient
+                 to detect this on the initial scan.  */
+              uint64_t new_logical;
+              uint64_t length_adjust;
+              if (si == 0)
+                new_logical = scan->scan_start;
+              else
+                {
+                  /* We could return here if scan->scan_start == 0
+                     but don't so as to minimize special cases.  */
+                  new_logical = last_ei->ext_logical + last_ei->ext_length;
+                }
+              length_adjust = new_logical - fm_extents[i].fe_logical;
+              /* If an extent is contained within the previous one, fail.  */
+              if (length_adjust < fm_extents[i].fe_length)
+                {
+                  if (scan->scan_start == 0)
+                    scan->initial_scan_failed = true;
+                  return false;
+                }
+              fm_extents[i].fe_logical = new_logical;
+              fm_extents[i].fe_length -= length_adjust;
+              /* Process the adjusted extent again.  */
+              i--;
+              continue;
+            }
+          else
+            {
+              last_ei = scan->ext_info + si;
+              last_ei->ext_logical = fm_extents[i].fe_logical;
+              last_ei->ext_length = fm_extents[i].fe_length;
+              last_ei->ext_flags = fm_extents[i].fe_flags;
+              si++;
             }
-          fm_extents[i].fe_logical = new_logical;
-          fm_extents[i].fe_length -= length_adjust;
-          /* Process the adjusted extent again.  */
-          i--;
-          continue;
-        }
-      else
-        {
-          last_ei = scan->ext_info + si;
-          last_ei->ext_logical = fm_extents[i].fe_logical;
-          last_ei->ext_length = fm_extents[i].fe_length;
-          last_ei->ext_flags = fm_extents[i].fe_flags;
-          si++;
         }
-    }
 
-  /* We don't bother reallocating.  We should though if we change
-     to looping through all extents, within this function.  */
-  scan->ei_count = si;
+      if (last_ei->ext_flags & FIEMAP_EXTENT_LAST)
+        scan->hit_final_extent = true;
+
+      /* If we have enough extents, discard the last as it might
+         be merged with one from the next scan.  */
+      if (si > count && !scan->hit_final_extent)
+        last_ei = scan->ext_info + --si - 1;
+
+      /* We don't bother reallocating any trailing slots.  */
+      scan->ei_count = si;
 
-  if (last_ei->ext_flags & FIEMAP_EXTENT_LAST)
-    scan->hit_final_extent = true;
-  else
-    scan->scan_start = last_ei->ext_logical + last_ei->ext_length;
+      if (scan->hit_final_extent)
+        break;
+      else
+        scan->scan_start = last_ei->ext_logical + last_ei->ext_length;
+
+      if (si >= count)
+        break;
+    }
 
   return true;
 }
index 8728515..5b4ded5 100644 (file)
@@ -66,6 +66,8 @@ static inline void
 extent_scan_free (struct extent_scan *scan)
 {
   free (scan->ext_info);
+  scan->ext_info = NULL;
+  scan->ei_count = 0;
 }
 
 #endif /* EXTENT_SCAN_H */