Brutal speed optimalization - Get rid of regexps, use own crafted expressions with...
authorTomas Mlcoch <tmlcoch@redhat.com>
Tue, 17 Jan 2012 11:20:35 +0000 (12:20 +0100)
committerTomas Mlcoch <tmlcoch@redhat.com>
Tue, 17 Jan 2012 11:20:35 +0000 (12:20 +0100)
misc.c
misc.h
parsehdr.c
xml_dump.c

diff --git a/misc.c b/misc.c
index b994465..77f77dd 100644 (file)
--- a/misc.c
+++ b/misc.c
@@ -123,55 +123,31 @@ struct VersionStruct string_to_version(const char *string, GStringChunk *chunk)
 
 
 
-struct PrimaryReStruct new_optimalized_primary_files_re()
+int is_primary(const char *filename)
 {
-    struct PrimaryReStruct res;
-    GRegexMatchFlags compile_flags = G_REGEX_OPTIMIZE|G_REGEX_MATCH_ANCHORED;
-    res.pri_re_1 = g_regex_new(".*bin/.*", compile_flags, 0, NULL);
-    res.pri_re_2 = g_regex_new("/etc/.*", compile_flags, 0, NULL);
-    res.pri_re_3 = g_regex_new("/usr/lib/sendmail$", compile_flags, 0, NULL);
-    return res;
-}
-
-void free_optimalized_primary_files_re(struct PrimaryReStruct in) {
-    g_regex_unref(in.pri_re_1);
-    g_regex_unref(in.pri_re_2);
-    g_regex_unref(in.pri_re_3);
-}
-
-int is_primary(const char *filename, struct PrimaryReStruct *user_re)
-{
-
-    GRegex *pri_re_1 = NULL;
-    GRegex *pri_re_2 = NULL;
-    GRegex *pri_re_3 = NULL;
-
-    if (!user_re) {
-        GRegexMatchFlags compile_flags = G_REGEX_MATCH_ANCHORED;
-        pri_re_1 = g_regex_new(".*bin/.*", compile_flags, 0, NULL);
-        pri_re_2 = g_regex_new("/etc/.*", compile_flags, 0, NULL);
-        pri_re_3 = g_regex_new("/usr/lib/sendmail$", compile_flags, 0, NULL);
-    } else {
-        pri_re_1 = user_re->pri_re_1;
-        pri_re_2 = user_re->pri_re_2;
-        pri_re_3 = user_re->pri_re_3;
+    if (!strncmp(filename, "/bin/", 5)) {
+        return 1;
     }
 
-    int ret = 0;
-    if (g_regex_match(pri_re_1, filename, 0, NULL)
-        || g_regex_match(pri_re_2, filename, 0, NULL)
-        || g_regex_match(pri_re_3, filename, 0, NULL))
-    {
-        ret = 1;
+    if (!strncmp(filename, "/sbin/", 6)) {
+        return 1;
     }
 
-    if (!user_re) {
-        g_regex_unref(pri_re_1);
-        g_regex_unref(pri_re_2);
-        g_regex_unref(pri_re_3);
+    if (!strncmp(filename, "/usr/", 5)) {
+        if (!strncmp(filename+5, "bin/", 4)) {
+            return 1;
+        }
+
+        if (!strncmp(filename+5, "sbin/", 5)) {
+            return 1;
+        }
+
+        if (!strcmp(filename+5, "lib/sendmail")) {
+            return 1;
+        }
     }
 
-    return ret;
+    return 0;
 }
 
 
diff --git a/misc.h b/misc.h
index 9dced4c..9f50ebb 100644 (file)
--- a/misc.h
+++ b/misc.h
@@ -18,14 +18,7 @@ struct VersionStruct {
  */
 struct VersionStruct string_to_version(const char *string, GStringChunk *chunk);
 
-struct PrimaryReStruct {
-    GRegex *pri_re_1;
-    GRegex *pri_re_2;
-    GRegex *pri_re_3;
-};
-struct PrimaryReStruct new_optimalized_primary_files_re();
-void free_optimalized_primary_files_re(struct PrimaryReStruct in);
-int is_primary(const char *filename, struct PrimaryReStruct *user_re);
+int is_primary(const char *filename);
 char *compute_file_checksum(const char *filename, ChecksumType type);
 
 struct HeaderRangeStruct {
index a9f0ac4..48cd0a9 100644 (file)
@@ -94,8 +94,6 @@ Package *parse_header(Header hdr, gint64 mtime, gint64 size, const char *checksu
             packagefile->name = rpmtdGetString(filenames);
             packagefile->path = rpmtdGetString(dirnames);
 
-            printf("%s | %s\n", packagefile->path, packagefile->name);
-
             // TODO:
             // na zaklade toho, ze se zmenila struktura package...
             // upravit hashovaci tabulku - OK
@@ -169,9 +167,6 @@ Package *parse_header(Header hdr, gint64 mtime, gint64 size, const char *checksu
     // Hashtable with already processed files from requires
     GHashTable *ap_hashtable = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, free);
 
-    // Get oprimalized regexps for primary filename matching
-    struct PrimaryReStruct re = new_optimalized_primary_files_re();
-
     int pcor_type;
     for (pcor_type=0; pcor_type <= REQUIRES; pcor_type++) {
         if (headerGet(hdr, file_tags[pcor_type], filenames, flags) &&
@@ -200,7 +195,7 @@ Package *parse_header(Header hdr, gint64 mtime, gint64 size, const char *checksu
 
                     // Skip package primary files
                     if (g_hash_table_lookup_extended(filenames_hashtable, filename, NULL, NULL)) {
-                        if (is_primary(filename, &re)) {
+                        if (is_primary(filename)) {
                             continue;
                         }
                     }
@@ -276,8 +271,6 @@ Package *parse_header(Header hdr, gint64 mtime, gint64 size, const char *checksu
     pkg->obsoletes = g_slist_reverse (pkg->obsoletes);
     pkg->requires  = g_slist_reverse (pkg->requires);
 
-    free_optimalized_primary_files_re(re);
-
     g_hash_table_remove_all(filenames_hashtable);
     g_hash_table_remove_all(provided_hashtable);
     g_hash_table_remove_all(ap_hashtable);
index 2177e6e..2d913aa 100644 (file)
@@ -76,20 +76,26 @@ dump_files(xmlTextWriterPtr writer, Package *package, int primary,
         return;
     }
 
-    struct PrimaryReStruct re;
-    if (primary) {
-        // Get optimalized regexps for primary filenames matching
-        re = new_optimalized_primary_files_re();
-    }
 
     GSList *element = NULL;
     for(element = package->files; element; element=element->next) {
         PackageFile *entry = (PackageFile*) element->data;
 
+        // File withou name or path is suspicious => Skip it
+        if (!(entry->path)) {
+            continue;
+        }
+
+        if (!(entry->name)) {
+            continue;
+        }
+
         // String concatenation (path + basename)
         char fullname_buffer[NAMEBUFF_LEN];
         int path_len = strlen(entry->path);
         int name_len = strlen(entry->name);
+
+
         if ( (path_len + name_len) > (NAMEBUFF_LEN - 1) ) {
             printf("XML FILE DUMP - ERROR: Pathname + basename is too long: %s%s\n", entry->path, entry->name);
             if (path_len >= NAMEBUFF_LEN) {
@@ -102,11 +108,15 @@ dump_files(xmlTextWriterPtr writer, Package *package, int primary,
         fullname_buffer[path_len+name_len] = '\0';
 
 
-        if (primary && !is_primary(fullname_buffer, &re)) {
+        if (primary && !is_primary(fullname_buffer)) {
             continue;
         }
 
-
+/*
+        if (primary && !is_primary(entry->path, &re)) {
+            continue;
+        }
+*/
         // ***********************************
         // Element: file
         // ************************************
@@ -130,7 +140,8 @@ dump_files(xmlTextWriterPtr writer, Package *package, int primary,
         }
 
         // Write text (file path)
-        tmp = ConvertInput(fullname_buffer, handler);
+        //tmp = ConvertInput(fullname_buffer, handler);
+        tmp = ConvertInput(entry->name, handler);
         if (tmp) {
             xmlTextWriterWriteString(writer, BAD_CAST tmp);
             if (handler && tmp != NULL) xmlFree(tmp);
@@ -143,9 +154,5 @@ dump_files(xmlTextWriterPtr writer, Package *package, int primary,
             return;
         }
     }
-
-    if (primary) {
-        free_optimalized_primary_files_re(re);
-    }
 }