debuginfod: store only canonicalized sref pathnames in database
authorFrank Ch. Eigler <fche@redhat.com>
Fri, 18 Sep 2020 17:03:01 +0000 (13:03 -0400)
committerFrank Ch. Eigler <fche@redhat.com>
Fri, 18 Sep 2020 17:13:02 +0000 (13:13 -0400)
Since PR25548, we let debuginfod answer /buildid/HEX/source/PATH
queries with both canonicalized and raw PATHs.  It canonicalizes
incoming paths, but still stored the raw paths in the database too.
This near-dupe storage is not needed, since the queries would always
find the canonicalized version too, so stop doing that.  This saves
database space/time.

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
debuginfod/ChangeLog
debuginfod/debuginfod.cxx

index a8e0ac5..8cb8996 100644 (file)
@@ -1,3 +1,8 @@
+2020-09-18  Frank Ch. Eigler <fche@redhat.com>
+
+       * debuginfod.cxx (scan_source_file, archive_classify): Store only
+       canonicalized file names in sdef & sref records in the database.
+
 2020-09-08  Mark Wielaard  <mark@klomp.org>
 
        * Makefile.am (BUILD_STATIC): Include libcurl_LIBS in libdebuginfod
index 5621030..140b778 100644 (file)
@@ -1505,6 +1505,8 @@ handle_buildid (MHD_Connection* conn,
                           "order by sharedprefix(source0,source0ref) desc, mtime desc");
       pp->reset();
       pp->bind(1, buildid);
+      // NB: we don't store the non-canonicalized path names any more, but old databases
+      // might have them (and no canon ones), so we keep searching for both.
       pp->bind(2, suffix);
       pp->bind(3, canon_pathname(suffix));
     }
@@ -2254,41 +2256,27 @@ scan_source_file (const string& rps, const stat_t& st,
             .bind(1, srps)
             .step_ok_done();
 
-          // register the dwarfsrc name in the interning table too
+          // PR25548: store canonicalized dwarfsrc path
+          string dwarfsrc_canon = canon_pathname (dwarfsrc);
+          if (dwarfsrc_canon != dwarfsrc)
+            {
+              if (verbose > 3)
+                obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+            }
+          
           ps_upsert_files
             .reset()
-            .bind(1, dwarfsrc)
+            .bind(1, dwarfsrc_canon)
             .step_ok_done();
 
           ps_upsert_s
             .reset()
             .bind(1, buildid)
-            .bind(2, dwarfsrc)
+            .bind(2, dwarfsrc_canon)
             .bind(3, srps)
             .bind(4, sfs.st_mtime)
             .step_ok_done();
 
-          // PR25548: also store canonicalized source path
-          string dwarfsrc_canon = canon_pathname (dwarfsrc);
-          if (dwarfsrc_canon != dwarfsrc)
-            {
-              if (verbose > 3)
-                obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
-
-              ps_upsert_files
-                .reset()
-                .bind(1, dwarfsrc_canon)
-                .step_ok_done();
-
-              ps_upsert_s
-                .reset()
-                .bind(1, buildid)
-                .bind(2, dwarfsrc_canon)
-                .bind(3, srps)
-                .bind(4, sfs.st_mtime)
-                .step_ok_done();
-            }
-
           inc_metric("found_sourcerefs_total","source","files");
         }
     }
@@ -2439,37 +2427,26 @@ archive_classify (const string& rps, string& archive_extension,
                       continue;
                     }
 
+                  // PR25548: store canonicalized source path
+                  const string& dwarfsrc = s;
+                  string dwarfsrc_canon = canon_pathname (dwarfsrc);
+                  if (dwarfsrc_canon != dwarfsrc)
+                    {
+                      if (verbose > 3)
+                        obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+                    }
+                  
                   ps_upsert_files
                     .reset()
-                    .bind(1, s)
+                    .bind(1, dwarfsrc_canon)
                     .step_ok_done();
 
                   ps_upsert_sref
                     .reset()
                     .bind(1, buildid)
-                    .bind(2, s)
+                    .bind(2, dwarfsrc_canon)
                     .step_ok_done();
 
-                  // PR25548: also store canonicalized source path
-                  const string& dwarfsrc = s;
-                  string dwarfsrc_canon = canon_pathname (dwarfsrc);
-                  if (dwarfsrc_canon != dwarfsrc)
-                    {
-                      if (verbose > 3)
-                        obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
-
-                      ps_upsert_files
-                        .reset()
-                        .bind(1, dwarfsrc_canon)
-                        .step_ok_done();
-
-                      ps_upsert_sref
-                        .reset()
-                        .bind(1, buildid)
-                        .bind(2, dwarfsrc_canon)
-                        .step_ok_done();
-                    }
-
                   fts_sref ++;
                 }
             }