Separate report submission into its own thread
authorTim Pepper <timothy.c.pepper@linux.intel.com>
Wed, 3 Oct 2012 18:09:31 +0000 (11:09 -0700)
committerTim Pepper <timothy.c.pepper@linux.intel.com>
Wed, 3 Oct 2012 18:50:30 +0000 (11:50 -0700)
The primary motivation of this commit is separated submit.c's submission
queue handling into a separate thread which sleeps on a condition variable
that is asserted in the inotify and timer event loops.  Includes simpler
data structures/locking for the submission queue.

Associated refactoring includes:
- pull inotification out into its own file to keep corewatcher.c simpler
  (subsequent event threads should be similarly separated)
- add many stderr fprintf's to allow better tracking of where the code is
  during runtime when in non-daemon mode
- log both failed and successful server submission counts
- various bits of added error checking/handling
- better document internally the expectations on core file naming, how
  the file name strings are transformed and stored in the internal
  lists/arrays/hashtables, and an audit to insure oops->filename is
  consistently populated and used
- fixed a few possible memory leaks
- updated some variable and function names to be more self documenting,
  added inline documentation in places
- added design documentation covering the basic states, code that runs for
  transitions between states, global state data structures and the
  associated locks

Signed-off-by: Tim Pepper <timothy.c.pepper@linux.intel.com>
configure.ac
src/Makefile.am
src/coredump.c
src/corewatcher.c
src/corewatcher.h
src/inotification.c [new file with mode: 0644]
src/submit.c

index bfa03a6..96c1700 100644 (file)
@@ -1,5 +1,5 @@
 AC_PREREQ([2.68])
-AC_INIT([nitra-corewatcher],[0.9.4],[timothy.c.pepper@linux.intel.com])
+AC_INIT([nitra-corewatcher],[0.9.5],[timothy.c.pepper@linux.intel.com])
 AM_INIT_AUTOMAKE([foreign -Wall -Werror])
 AC_CONFIG_FILES([Makefile src/Makefile])
 AC_CONFIG_SRCDIR([src/corewatcher.c])
index 602695d..3a449e4 100644 (file)
@@ -11,6 +11,7 @@ corewatcher_SOURCES = \
        configfile.c \
        coredump.c \
        corewatcher.c \
+       inotification.c \
        find_file.c \
        submit.c
 
index 76d20de..2990cc7 100644 (file)
@@ -22,6 +22,7 @@
  * Authors:
  *     Arjan van de Ven <arjan@linux.intel.com>
  *     William Douglas <william.douglas@intel.com>
+ *     Tim Pepper <timothy.c.pepper@linux.intel.com>
  */
 
 #include <unistd.h>
@@ -47,14 +48,6 @@ int sig = 0;
 const char *core_folder = "/var/lib/corewatcher/";
 const char *processed_folder = "/var/lib/corewatcher/processed/";
 
-/*
- * the application must initialize the GMutex's
- * core_status.processing_mtx, core_status.queued_mtx,
- * processing_queue_mtx and gdb_mtx
- * before calling into this file's scan_corefolders()
- * (also since that calls submit_queue() there are dependencies
- *  there which need taken care of too)
- */
 /* Always pick up the processing_mtx and then the
    processing_queue_mtx, reverse for setting down */
 /* Always pick up the gdb_mtx and then the
@@ -330,6 +323,8 @@ static struct oops *extract_core(char *fullpath, char *appfile)
        int parsing_maps = 0;
        struct stat stat_buf;
 
+       fprintf(stderr, "+ extract_core() called for %s\n", fullpath);
+
        if (asprintf(&command, "LANG=C gdb --batch -f %s %s -x /etc/corewatcher/gdb.command 2> /dev/null", appfile, fullpath) == -1)
                return NULL;
 
@@ -358,6 +353,8 @@ static struct oops *extract_core(char *fullpath, char *appfile)
                h1 = strdup("Unknown");
 
        file = popen(command, "r");
+       if (!file)
+               fprintf(stderr, "+ gdb failed for %s\n", fullpath);
 
        while (file && !feof(file)) {
                size_t size = 0;
@@ -374,6 +371,9 @@ static struct oops *extract_core(char *fullpath, char *appfile)
                        parsing_maps = 1;
                        /*continue;*/
                }
+               if (strncmp(line, "No shared libraries loaded at this time.", 40) == 0) {
+                       break;
+               }
 
                if (!parsing_maps) { /* parsing backtrace */
                        c2 = c1;
@@ -414,6 +414,12 @@ fixup:                     /* gdb outputs some 0x1a's which break XML */
                pclose(file);
        free(command);
 
+       if (!h1)
+               h1 = strdup("Unknown");
+
+       if (!m1)
+               m1 = strdup("        Unknown");
+
        ret = asprintf(&text,
                       "%s"
                       "backtrace: |\n"
@@ -440,52 +446,49 @@ fixup:                    /* gdb outputs some 0x1a's which break XML */
 }
 
 /*
- * filename is of the form core_XXXX[.blah]
- * we need to get the pid out as we want
- * output of the form XXXX[.ext]
+ * input filename has the form: core_$APP_$TIMESTAMP[.$PID]
+ * output filename has form of: $APP_$TIMESTAMP[.ext]
  */
 char *get_core_filename(char *filename, char *ext)
 {
-       char *pid = NULL, *c = NULL, *s = NULL, *detail_filename = NULL;
+       char *name = NULL, *dotpid = NULL, *stamp = NULL, *detail_filename = NULL;
 
        if (!filename)
                return NULL;
 
-       if (!(s = strstr(filename, "_")))
+       if (!(stamp = strstr(filename, "_")))
                return NULL;
 
-       if (!(++s))
-               return NULL;
-       /* causes valgrind whining because we copy from middle of a string */
-       if (!(pid = strdup(s)))
+       if (!(++stamp))
                return NULL;
 
-       c = strstr(pid, ".");
+       if (!(name = strdup(stamp)))
+               return NULL;
 
-       if (c)
-               *c = '\0';
+       /* strip trailing .PID if present */
+       dotpid = strstr(name, ".");
+       if (dotpid)
+               *dotpid = '\0';
 
        if (ext) {
-               /* causes valgrind whining because we copy from middle of a string */
-               if ((asprintf(&detail_filename, "%s%s.%s", processed_folder, pid, ext)) == -1) {
-                       free(pid);
+               if ((asprintf(&detail_filename, "%s%s.%s", processed_folder, name, ext)) == -1) {
+                       free(name);
                        return NULL;
                }
        } else {
-               /* causes valgrind whining because we copy from middle of a string */
-               if ((asprintf(&detail_filename, "%s%s", processed_folder, pid)) == -1) {
-                       free(pid);
+               if ((asprintf(&detail_filename, "%s%s", processed_folder, name)) == -1) {
+                       free(name);
                        return NULL;
                }
        }
+       free(name);
 
-       free(pid);
        return detail_filename;
 }
 
 /*
  * Write the backtrace from the core file into a text
- * file named after the pid
+ * file named as $APP_$TIMESTAMP.txt
  */
 static void write_core_detail_file(char *filename, char *text)
 {
@@ -517,37 +520,49 @@ static void write_core_detail_file(char *filename, char *text)
  */
 static void remove_from_processing_queue(void)
 {
+       fprintf(stderr, "+ removing processing_queue head\n");
        free(processing_queue[pq_head]);
        processing_queue[pq_head++] = NULL;
 
-       if (pq_head == MAX_PROCESSING_OOPS)
+       if (pq_head == MAX_PROCESSING_OOPS) {
+               fprintf(stderr, "+ wrapping processing_queue head to 0 (bugs here?)\n");
                pq_head = 0;
+       }
 }
 
 /*
- * Removes file from processing_oops hash based on pid.
- * Extracts pid from the fullpath such that
- * /home/user/core.pid will be tranformed into just the pid.
+ * Removes file from processing_oops hash based on core name,
+ * extracting that core name from a fullpath such as
+ * "/${processed_folder}/core_$APP_$TIMESTAMP.$PID"
+ * in order to get just "$APP_$TIMESTAMP"
  *
  * Expects the lock on the given hash to be held.
  */
-void remove_pid_from_hash(char *fullpath, GHashTable *ht)
+void remove_name_from_hash(char *fullpath, GHashTable *ht)
 {
-       char *c1 = NULL, *c2 = NULL;
+       char *name = NULL, *corename = NULL, *shortname = NULL;
 
-       if (!(c1 = strip_directories(fullpath)))
+       if (!(name = strip_directories(fullpath)))
                return;
 
-       if (!(c2 = get_core_filename(c1, NULL))) {
-               free(c1);
+       if (!(corename = get_core_filename(name, NULL))) {
+               free(name);
                return;
        }
+       free(name);
 
-       free(c1);
+       if (!(shortname = strip_directories(corename))) {
+               free(corename);
+               return;
+       }
+       free(corename);
 
-       g_hash_table_remove(ht, c2);
+       if (g_hash_table_remove(ht, shortname))
+               fprintf(stderr, "+ core %s removed from processing_oops hash table\n", shortname);
+       else
+               fprintf(stderr, "+ core %s not in processing_oops hash table\n", shortname);
 
-       free(c2);
+       free(shortname);
 }
 
 /*
@@ -581,13 +596,12 @@ static struct oops *process_common(char *fullpath)
 
 /*
  * Processes .to-process core files.
- * Also creates the pid.txt file and adds
+ * Also creates the $APP_$TIMESTAMP.txt file and adds
  * the oops struct to the submit queue
  *
  * Picks up and sets down the gdb_mtx and
  * picks up and sets down the processing_queue_mtx.
  * (held at the same time in that order)
- * Also will pick up and sets down the queued_mtx.
  */
 static void *process_new(void __unused *vp)
 {
@@ -598,28 +612,40 @@ static void *process_new(void __unused *vp)
        g_mutex_lock(&gdb_mtx);
        g_mutex_lock(&processing_queue_mtx);
 
+       fprintf(stderr, "+ Entered process_new()\n");
+
        if (!(fullpath = processing_queue[pq_head])) {
-               /* something went quite wrong */
+               fprintf(stderr, "+ processing_queue corruption?\n");
                g_mutex_unlock(&processing_queue_mtx);
                g_mutex_unlock(&gdb_mtx);
                g_mutex_unlock(&core_status.processing_mtx);
                return NULL;
        }
 
-       if (!(corefn = strip_directories(fullpath)))
+       if (!(corefn = strip_directories(fullpath))) {
+               fprintf(stderr, "+ No corefile? (%s)\n", fullpath);
                goto clean_process_new;
+       }
 
-       if (!(procfn = replace_name(fullpath, ".to-process", ".processed")))
+       if (!(procfn = replace_name(fullpath, ".to-process", ".processed"))) {
+               fprintf(stderr, "+ Problems with filename manipulation for %s\n", corefn);
                goto clean_process_new;
+       }
 
-       if (!(oops = process_common(fullpath)))
+       if (!(oops = process_common(fullpath))) {
+               fprintf(stderr, "+ Problems processing %s\n", procfn);
                goto clean_process_new;
+       }
 
-       if (!(oops->detail_filename = get_core_filename(corefn, "txt")))
+       if (!(oops->detail_filename = get_core_filename(corefn, "txt"))) {
+               fprintf(stderr, "+ Problems with filename manipulation for %s\n", procfn);
                goto clean_process_new;
+       }
 
-       if (rename(fullpath, procfn))
+       if (rename(fullpath, procfn)) {
+               fprintf(stderr, "+ Unable to move %s to %s\n", fullpath, procfn);
                goto clean_process_new;
+       }
 
        free(oops->filename);
        oops->filename = procfn;
@@ -632,20 +658,18 @@ static void *process_new(void __unused *vp)
 
        write_core_detail_file(corefn, oops->text);
 
-       g_mutex_lock(&core_status.queued_mtx);
        queue_backtrace(oops);
-       g_mutex_unlock(&core_status.queued_mtx);
 
-       /* don't need to free procfn because was set to oops->filename and that gets free'd */
+       fprintf(stderr, "+ Leaving process_new() with %s queued\n", oops->detail_filename);
+
+       /* mustn't free procfn because it was hung on oops->filename */
        free(corefn);
-       FREE_OOPS(oops);
        return NULL;
 
 clean_process_new:
-       remove_pid_from_hash(fullpath, core_status.processing_oops);
+       remove_name_from_hash(fullpath, core_status.processing_oops);
        remove_from_processing_queue();
        free(procfn);
-       procfn = NULL; /* don't know if oops->filename == procfn so be safe */
        free(corefn);
        FREE_OOPS(oops);
        g_mutex_unlock(&processing_queue_mtx);
@@ -660,7 +684,6 @@ clean_process_new:
  * Picks up and sets down the gdb_mtx.
  * Picks up and sets down the processing_queue_mtx.
  * (held at the same time in that order)
- * Also will pick up and sets down the queued_mtx.
  */
 static void *process_old(void __unused *vp)
 {
@@ -670,37 +693,45 @@ static void *process_old(void __unused *vp)
        g_mutex_lock(&gdb_mtx);
        g_mutex_lock(&processing_queue_mtx);
 
+       fprintf(stderr, "+ Entered process_old()\n");
+
        if (!(fullpath = processing_queue[pq_head])) {
-               /* something went quite wrong */
+               fprintf(stderr, "+ processing_queue corruption?\n");
                g_mutex_unlock(&processing_queue_mtx);
                g_mutex_unlock(&gdb_mtx);
                return NULL;
        }
+       fprintf(stderr, "+ Reprocessing %s\n", fullpath);
 
-       if (!(corefn = strip_directories(fullpath)))
+       if (!(corefn = strip_directories(fullpath))) {
+               fprintf(stderr, "+ No corefile? (%s)\n", fullpath);
                goto clean_process_old;
+       }
 
-       if (!(oops = process_common(fullpath)))
+       if (!(oops = process_common(fullpath))) {
+               fprintf(stderr, "+ Problems processing %s\n", corefn);
                goto clean_process_old;
+       }
 
-       if (!(oops->detail_filename = get_core_filename(corefn, "txt")))
+       if (!(oops->detail_filename = get_core_filename(corefn, "txt"))) {
+               fprintf(stderr, "+ Problems with filename manipulation for %s\n", corefn);
                goto clean_process_old;
+       }
 
        remove_from_processing_queue();
 
        g_mutex_unlock(&processing_queue_mtx);
        g_mutex_unlock(&gdb_mtx);
 
-       g_mutex_lock(&core_status.queued_mtx);
        queue_backtrace(oops);
-       g_mutex_unlock(&core_status.queued_mtx);
+
+       fprintf(stderr, "+ Leaving process_old() with %s queued\n", oops->detail_filename);
 
        free(corefn);
-       FREE_OOPS(oops);
        return NULL;
 
 clean_process_old:
-       remove_pid_from_hash(fullpath, core_status.processing_oops);
+       remove_name_from_hash(fullpath, core_status.processing_oops);
        remove_from_processing_queue();
        free(corefn);
        FREE_OOPS(oops);
@@ -710,7 +741,7 @@ clean_process_old:
 }
 
 /*
- * Adds corefile (based on pid) to the processing_oops
+ * Adds corefile (based on name) to the processing_oops
  * hash table if it is not already there, then
  * tries to add to the processing_queue.
  *
@@ -739,6 +770,7 @@ static int add_to_processing(char *fullpath)
        g_mutex_lock(&core_status.processing_mtx);
        if (g_hash_table_lookup(core_status.processing_oops, c2)) {
                g_mutex_unlock(&core_status.processing_mtx);
+               fprintf(stderr, "+ ...name %s already in processing_oops hash table\n", c2);
                goto clean_add_to_processing;
        }
 
@@ -746,6 +778,7 @@ static int add_to_processing(char *fullpath)
        if (processing_queue[pq_tail]) {
                g_mutex_unlock(&processing_queue_mtx);
                g_mutex_unlock(&core_status.processing_mtx);
+               fprintf(stderr, "+ ...processing_queue full\n");
                goto clean_add_to_processing;
        }
 
@@ -811,7 +844,7 @@ static void scan_core_folder(void __unused *unused)
        dir = opendir(core_folder);
        if (!dir)
                return;
-       fprintf(stderr, "+ scanning %s...\n", core_folder);
+       fprintf(stderr, "+ Begin scanning %s...\n", core_folder);
        while(1) {
                free(fullpath);
                fullpath = NULL;
@@ -824,7 +857,7 @@ static void scan_core_folder(void __unused *unused)
                if (strncmp(entry->d_name, "core_", 5))
                        continue;
 
-               /* matched core_#### where #### is the pid of the process */
+               /* matched core_#### */
                r = asprintf(&fullpath, "%s%s", core_folder, entry->d_name);
                if (r == -1) {
                        fullpath = NULL;
@@ -848,6 +881,7 @@ static void scan_core_folder(void __unused *unused)
                }
        }
        closedir(dir);
+       fprintf(stderr, "+ End scanning %s...\n", core_folder);
 }
 
 static void scan_processed_folder(void __unused *unused)
@@ -861,7 +895,7 @@ static void scan_processed_folder(void __unused *unused)
        dir = opendir(processed_folder);
        if (!dir)
                return;
-       fprintf(stderr, "+ scanning %s...\n", processed_folder);
+       fprintf(stderr, "+ Begin scanning %s...\n", processed_folder);
        while(1) {
                free(fullpath);
                fullpath = NULL;
@@ -889,6 +923,7 @@ static void scan_processed_folder(void __unused *unused)
                        reprocess_corefile(fullpath);
        }
        closedir(dir);
+       fprintf(stderr, "+ End scanning %s...\n", processed_folder);
 }
 
 int scan_corefolders(void __unused *unused)
@@ -896,7 +931,5 @@ int scan_corefolders(void __unused *unused)
        scan_core_folder(NULL);
        scan_processed_folder(NULL);
 
-       submit_queue();
-
        return 1;
 }
index d7b9fa8..aed3de0 100644 (file)
@@ -22,6 +22,7 @@
  * Authors:
  *     Arjan van de Ven <arjan@linux.intel.com>
  *     William Douglas <william.douglas@intel.com>
+ *     Tim Pepper <timothy.c.pepper@linux.intel.com>
  */
 
 #include <unistd.h>
 
 #include "corewatcher.h"
 
-/*
- * rather than malloc() on each inotify event, preallocate a decent chunk
- * of memory so multiple events can be read in one go, trading a little
- * extra memory for less runtime overhead if/when multiple crashes happen
- * in short order.
- */
-#include <sys/inotify.h>
-#define BUF_LEN 2048
-
 static struct option opts[] = {
        { "nodaemon", 0, NULL, 'n' },
        { "debug",    0, NULL, 'd' },
@@ -81,89 +73,6 @@ static void usage(const char *name)
        fprintf(stderr, "  -h, --help      Display this help message\n");
 }
 
-gboolean inotify_source_prepare(__unused GSource *source, gint *timeout_)
-{
-       *timeout_ = -1;
-       fprintf(stderr, "+ inotification prepare\n");
-       return FALSE;
-}
-
-gboolean inotify_source_check(__unused GSource *source)
-{
-       int fd, wd;
-       char buffer[BUF_LEN];
-       size_t len;
-
-       fprintf(stderr, "+ inotification check\n");
-       /* inotification of crashes */
-       fd = inotify_init();
-       if (fd < 0) {
-               fprintf(stderr, "corewatcher inotify init failed.. exiting\n");
-               return EXIT_FAILURE;
-       }
-       wd = inotify_add_watch(fd, core_folder, IN_CLOSE_WRITE);
-       if (wd < 0) {
-               fprintf(stderr, "corewatcher inotify add failed.. exiting\n");
-               return EXIT_FAILURE;
-       }
-       fprintf(stderr, "+ awaiting inotification...\n");
-       len = read(fd, buffer, BUF_LEN);
-       if (len <=0 ) {
-               fprintf(stderr, "corewatcher inotify read failed.. exiting\n");
-               return FALSE;
-       }
-       fprintf(stderr, "+ inotification received!\n");
-       /* for now simply ignore the actual crash files we've been notified of
-        * and let our callback be dispatched to go look for any/all crash
-        * files */
-
-       /* slight delay to minimize storms of notifications (the inotify
-        * read() can return a batch of notifications*/
-       sleep(5);
-       return TRUE;
-}
-
-gboolean inotify_source_dispatch(__unused GSource *source,
-                                 GSourceFunc callback, gpointer user_data)
-{
-       fprintf(stderr, "+ inotify dispatch\n");
-       if(callback(user_data)) {
-               fprintf(stderr, "+ inotify dispatch success\n");
-               return TRUE;
-       } else {
-               //should not happen as our callback always returns 1
-               fprintf(stderr, "+ inotify dispatch failed.\n");
-               return FALSE;
-       }
-}
-
-void *inotify_loop(void __unused *unused)
-{
-       /* inotification of crashes */
-       GMainLoop *loop;
-       GMainContext *context;
-       GSource *source;
-       GSourceFuncs InotifySourceFuncs = {
-               inotify_source_prepare,
-               inotify_source_check,
-               inotify_source_dispatch,
-               NULL,
-               NULL,
-               NULL,
-       };
-
-       context = g_main_context_new();
-       loop = g_main_loop_new(context, FALSE);
-       loop = g_main_loop_ref(loop);
-       source = g_source_new(&InotifySourceFuncs, sizeof(GSource));
-       g_source_attach(source, context);
-       g_source_set_callback(source, scan_corefolders, NULL, NULL);
-       g_main_loop_run(loop);
-       g_main_loop_unref(loop);
-
-       return NULL;
-}
-
 int main(int argc, char**argv)
 {
        GMainLoop *loop;
@@ -172,11 +81,11 @@ int main(int argc, char**argv)
        int j = 0;
        DIR *dir = NULL;
        GThread *inotify_thread = NULL;
+       GThread *submit_thread = NULL;
 
        g_thread_init (NULL);
 
        core_status.processing_oops = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
-       core_status.queued_oops = g_hash_table_new(g_str_hash, g_str_equal);
 
 /*
  * Signal the kernel that we're not timing critical
@@ -249,14 +158,11 @@ int main(int argc, char**argv)
         * We ignore this advice, since 99.99% of the time this program
         * will not use http at all, but the curl code does consume
         * memory.
-        */
-
-/*
        curl_global_init(CURL_GLOBAL_ALL);
-*/
+        */
 
        if (godaemon && daemon(0, 0)) {
-               fprintf(stderr, "corewatcher failed to daemonize.. exiting \n");
+               fprintf(stderr, "corewatcher failed to daemonize..exiting\n");
                return EXIT_FAILURE;
        }
        sched_yield();
@@ -264,8 +170,11 @@ int main(int argc, char**argv)
        loop = g_main_loop_new(NULL, FALSE);
        loop = g_main_loop_ref(loop);
 
-       if (!debug)
-               sleep(20);
+       submit_thread = g_thread_new("corewatcher submit", submit_loop, NULL);
+       if (submit_thread == NULL) {
+               fprintf(stderr, "+ Unable to start submit thread...exiting\n");
+               return EXIT_FAILURE;
+       }
 
        scan_corefolders(NULL);
 
@@ -303,8 +212,9 @@ out:
        for (j = 0; j < url_count; j++)
                free(submit_url[j]);
 
+       g_mutex_lock(&core_status.processing_mtx);
        g_hash_table_destroy(core_status.processing_oops);
-       g_hash_table_destroy(core_status.queued_oops);
+       g_mutex_unlock(&core_status.processing_mtx);
 
        return EXIT_SUCCESS;
 }
index 0e28f68..d8e1c10 100644 (file)
@@ -21,6 +21,7 @@
  * Authors:
  *     Arjan van de Ven <arjan@linux.intel.com>
  *     William Douglas <william.douglas@intel.com>
+ *     Tim Pepper <timothy.c.pepper@linux.intel.com>
  */
 
 
 #define __INCLUDE_GUARD_KERNELOOPS_H_
 
 /* borrowed from the kernel */
-#define barrier() __asm__ __volatile__("": : :"memory")
 #define __unused  __attribute__ ((__unused__))
 
 #define MAX_PROCESSING_OOPS 10
-#define MAX_URLS 9
+#define MAX_URLS 2
 
 #define FREE_OOPS(oops)                                        \
        do {                                            \
@@ -59,25 +59,23 @@ struct oops {
    queued_mtx -> processing_mtx -> gdb_mtx ->processing_queue_mtx */
 struct core_status {
        GHashTable *processing_oops;
-       GHashTable *queued_oops;
        GMutex processing_mtx;
-       GMutex queued_mtx;
 };
 
+/* inotification.c */
+extern void *inotify_loop(void * unused);
+
 /* submit.c */
-extern GMutex queued_bt_mtx;
 extern void queue_backtrace(struct oops *oops);
-extern void submit_queue(void);
 extern char *replace_name(char *filename, char *replace, char *new);
+extern void *submit_loop(void * unused);
 
 /* coredump.c */
-extern GMutex processing_queue_mtx;
-extern GMutex gdb_mtx;
 extern int move_core(char *fullpath, char *ext);
 extern int scan_corefolders(void * unused);
 extern char *strip_directories(char *fullpath);
 extern char *get_core_filename(char *filename, char *ext);
-extern void remove_pid_from_hash(char *fullpath, GHashTable *ht);
+extern void remove_name_from_hash(char *fullpath, GHashTable *ht);
 extern int uid;
 extern int sig;
 extern const char *core_folder;
diff --git a/src/inotification.c b/src/inotification.c
new file mode 100644 (file)
index 0000000..7d788c0
--- /dev/null
@@ -0,0 +1,132 @@
+#define _GNU_SOURCE
+/*
+ * Copyright 2007, Intel Corporation
+ *
+ * This file is part of corewatcher.org
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ *
+ * Authors:
+ *     Arjan van de Ven <arjan@linux.intel.com>
+ *     William Douglas <william.douglas@intel.com>
+ *     Tim Pepper <timothy.c.pepper@linux.intel.com>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <glib.h>
+
+#include "corewatcher.h"
+
+/*
+ * rather than malloc() on each inotify event, preallocate a decent chunk
+ * of memory so multiple events can be read in one go, trading a little
+ * extra memory for less runtime overhead if/when multiple crashes happen
+ * in short order.
+ */
+#include <sys/inotify.h>
+#define BUF_LEN 2048
+
+gboolean inotify_source_prepare(__unused GSource *source, gint *timeout_)
+{
+       *timeout_ = 0;
+       fprintf(stderr, "+ inotification prepare\n");
+       return FALSE;
+}
+
+gboolean inotify_source_check(__unused GSource *source)
+{
+       int fd, wd;
+       char buffer[BUF_LEN];
+       size_t len;
+
+       fprintf(stderr, "+ inotification check\n");
+       /* inotification of crashes */
+       fd = inotify_init();
+       if (fd < 0) {
+               fprintf(stderr, "corewatcher inotify init failed...exiting\n");
+               return EXIT_FAILURE;
+       }
+       wd = inotify_add_watch(fd, core_folder, IN_CLOSE_WRITE);
+       if (wd < 0) {
+               fprintf(stderr, "corewatcher inotify add failed...exiting\n");
+               return EXIT_FAILURE;
+       }
+       fprintf(stderr, "+ awaiting inotification...\n");
+       len = read(fd, buffer, BUF_LEN);
+       if (len <=0 ) {
+               fprintf(stderr, "corewatcher inotify read failed...exiting\n");
+               return FALSE;
+       }
+       fprintf(stderr, "+ inotification received!\n");
+       /* for now simply ignore the actual crash files we've been notified of
+        * and let our callback be dispatched to go look for any/all crash
+        * files */
+
+       /* slight delay to minimize storms of notifications (the inotify
+        * read() can return a batch of notifications*/
+       sleep(5);
+       return TRUE;
+}
+
+gboolean inotify_source_dispatch(__unused GSource *source,
+                                 GSourceFunc callback, gpointer user_data)
+{
+       fprintf(stderr, "+ inotify dispatch\n");
+       if(callback(user_data)) {
+               fprintf(stderr, "+ inotify dispatch success\n");
+               return TRUE;
+       } else {
+               //should not happen as our callback always returns 1
+               fprintf(stderr, "+ inotify dispatch failed\n");
+               return FALSE;
+       }
+}
+
+void *inotify_loop(void __unused *unused)
+{
+       /* inotification of crashes */
+       GMainLoop *loop;
+       GMainContext *context;
+       GSource *source;
+       GSourceFuncs InotifySourceFuncs = {
+               inotify_source_prepare,
+               inotify_source_check,
+               inotify_source_dispatch,
+               NULL,
+               NULL,
+               NULL,
+       };
+
+       context = g_main_context_new();
+       loop = g_main_loop_new(context, FALSE);
+       loop = g_main_loop_ref(loop);
+       source = g_source_new(&InotifySourceFuncs, sizeof(GSource));
+       g_source_attach(source, context);
+       g_source_set_callback(source, scan_corefolders, NULL, NULL);
+
+       fprintf(stderr, "+ inotify loop starting\n");
+       g_main_loop_run(loop);
+       fprintf(stderr, "+ inotify loop finished\n");
+
+       g_main_loop_unref(loop);
+
+       return NULL;
+}
+
index a3c9d0d..dca9f11 100644 (file)
@@ -22,6 +22,7 @@
  * Authors:
  *     Arjan van de Ven <arjan@linux.intel.com>
  *     William Douglas <william.douglas@intel.com>
+ *     Tim Pepper <timothy.c.pepper@linux.intel.com>
  */
 
 #include <unistd.h>
 
 #include "corewatcher.h"
 
-/*
- * the application must initialize the GMutex queued_bt_mtx
- * before calling into this file's functions
- */
 /* Always pick up the queued_mtx and then the
    queued_bt_mtx, reverse for setting down */
-GMutex queued_bt_mtx;
-static struct oops *queued_backtraces = NULL;
-static char result_url[4096];
+GMutex bt_mtx;
+GCond bt_work;
+GHashTable *bt_hash;
+static struct oops *bt_list = NULL;
 
 /*
- * Creates a duplicate of oops and adds it to
- * the submit queue if the oops isn't already
- * there.
- *
- * Expects the queued_mtx to be held
- * Picks up and sets down the queued_bt_mtx.
+ * Adds an oops to the work queue if the oops
+ * isn't already there.
  */
 void queue_backtrace(struct oops *oops)
 {
-       struct oops *new = NULL;
-
        if (!oops || !oops->filename)
                return;
 
-       /* first, check if we haven't already submitted the oops */
+       g_mutex_lock(&bt_mtx);
 
-       if (g_hash_table_lookup(core_status.queued_oops, oops->filename))
+       /* if this is already on bt_list / bt_hash, free and done */
+       if (g_hash_table_lookup(bt_hash, oops->filename)) {
+               FREE_OOPS(oops);
+               g_mutex_unlock(&bt_mtx);
                return;
+       }
 
-       new = malloc(sizeof(struct oops));
-       if (!new)
-               return;
-       g_mutex_lock(&queued_bt_mtx);
-       new->next = queued_backtraces;
-       if (oops->application)
-               new->application = strdup(oops->application);
-       else
-               new->application = NULL;
-       if (oops->text)
-               new->text = strdup(oops->text);
-       else
-               new->text = NULL;
-       new->filename = strdup(oops->filename);
-       if (oops->detail_filename)
-               new->detail_filename = strdup(oops->detail_filename);
-       else
-               new->detail_filename = NULL;
-       queued_backtraces = new;
-       g_mutex_unlock(&queued_bt_mtx);
-       g_hash_table_insert(core_status.queued_oops, new->filename, new->filename);
+       /* otherwise add to bt_list / bt_hash, signal work */
+       oops->next = bt_list;
+       bt_list = oops;
+       g_hash_table_insert(bt_hash, oops->filename, oops->filename);
+       g_cond_signal(&bt_work);
+       g_mutex_unlock(&bt_mtx);
 }
 
 /*
@@ -95,19 +76,15 @@ void queue_backtrace(struct oops *oops)
  * be submitted.
  *
  * Picks up and sets down the processing_mtx.
- * Picks up and sets down the queued_bt_mtx.
- * Expects the queued_mtx to be held.
+ * Picks up and sets down the bt_mtx.
  */
 static void print_queue(void)
 {
-       struct oops *oops = NULL, *next = NULL, *queue = NULL;
+       struct oops *oops = NULL, *next = NULL;
        int count = 0;
 
-       g_mutex_lock(&queued_bt_mtx);
-       queue = queued_backtraces;
-       queued_backtraces = NULL;
-       barrier();
-       oops = queue;
+       g_mutex_lock(&bt_mtx);
+       oops = bt_list;
        while (oops) {
                fprintf(stderr, "+ Submit text is:\n---[start of oops]---\n%s\n---[end of oops]---\n", oops->text);
                next = oops->next;
@@ -115,44 +92,41 @@ static void print_queue(void)
                oops = next;
                count++;
        }
-       g_mutex_unlock(&queued_bt_mtx);
+       g_hash_table_remove_all(bt_hash);
+       g_mutex_unlock(&bt_mtx);
+
        g_mutex_lock(&core_status.processing_mtx);
        g_hash_table_remove_all(core_status.processing_oops);
        g_mutex_unlock(&core_status.processing_mtx);
-
-       g_hash_table_remove_all(core_status.queued_oops);
 }
 
-static void write_logfile(int count, char *wsubmit_url)
+static size_t writefunction(void *ptr, size_t size, size_t nmemb, void __attribute((unused)) *stream)
 {
-       openlog("corewatcher", 0, LOG_KERN);
-       syslog(LOG_WARNING, "Submitted %i coredump signatures to %s", count, wsubmit_url);
-       closelog();
-}
+       char *httppost_ret = NULL;
+       char *errstr1 = NULL;
+       char *errstr2 = NULL;
+       int ret = 0;
 
-static size_t writefunction( void *ptr, size_t size, size_t nmemb, void __attribute((unused)) *stream)
-{
-       char *c = NULL, *c1 = NULL, *c2 = NULL;
-       c = malloc(size * nmemb + 1);
-       if (!c)
+       httppost_ret = malloc(size * nmemb + 1);
+       if (!httppost_ret)
                return -1;
 
-       memset(c, 0, size * nmemb + 1);
-       memcpy(c, ptr, size * nmemb);
-       printf("received %s \n", c);
-       c1 = strstr(c, "201 ");
-       if (c1) {
-               c1 += 4;
-               if (c1 >= c + strlen(c)) {
-                       free(c);
-                       return -1;
-               }
-               c2 = strchr(c1, '\n');
-               if (c2) *c2 = 0;
-               strncpy(result_url, c1, 4095);
-       }
-       free(c);
-       return size * nmemb;
+       memset(httppost_ret, 0, size * nmemb + 1);
+       memcpy(httppost_ret, ptr, size * nmemb);
+
+       fprintf(stderr, "+ received:\n");
+       fprintf(stderr, "%s", httppost_ret);
+       fprintf(stderr, "\n\n");
+
+       errstr1 = strstr(httppost_ret, "the server encountered an error");
+       errstr2 = strstr(httppost_ret, "ScannerError at /crash_submit/");
+       if (errstr1 || errstr2)
+               ret = -1;
+       else
+               ret = size * nmemb;
+
+       free(httppost_ret);
+       return ret;
 }
 
 /*
@@ -195,143 +169,143 @@ char *replace_name(char *filename, char *replace, char *new)
 }
 
 /*
- * Attempts to send the oops queue to the submission url wsubmit_url,
- * will use proxy if configured.
+ * Worker thread for submitting backtraces
  *
+ * Picks up and sets down the bt_mtx.
  * Picks up and sets down the processing_mtx.
- * Expects queued_mtx to be held.
  */
-static void submit_queue_with_url(struct oops *queue, char *wsubmit_url, char *proxy)
+void *submit_loop(void __unused *unused)
 {
-       int result = 0;
+       int i = 0, n = 0, sentcount, failcount;
        struct oops *oops = NULL;
-       CURL *handle = NULL;
-       int count = 0;
-
-       handle = curl_easy_init();
-       curl_easy_setopt(handle, CURLOPT_URL, wsubmit_url);
-       if (proxy)
-               curl_easy_setopt(handle, CURLOPT_PROXY, proxy);
-
-       oops = queue;
-       while (oops) {
-               struct curl_httppost *post = NULL;
-               struct curl_httppost *last = NULL;
-
-               /* set up the POST data */
-               curl_formadd(&post, &last,
-                       CURLFORM_COPYNAME, "crash",
-                       CURLFORM_COPYCONTENTS, oops->text, CURLFORM_END);
-
-               curl_easy_setopt(handle, CURLOPT_HTTPPOST, post);
-               curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, writefunction);
-               result = curl_easy_perform(handle);
-               curl_formfree(post);
-
-               if (!result) {
-                       char *nf = NULL;
-                       nf = replace_name(oops->filename, ".processed", ".submitted");
-                       rename(oops->filename, nf);
-                       g_mutex_lock(&core_status.processing_mtx);
-                       remove_pid_from_hash(oops->filename, core_status.processing_oops);
-                       g_mutex_unlock(&core_status.processing_mtx);
-                       free(nf);
-
-                       g_hash_table_remove(core_status.queued_oops, oops->filename);
-                       count++;
-               } else {
-                       g_hash_table_remove(core_status.queued_oops, oops->filename);
-                       queue_backtrace(oops);
-               }
-               oops = oops->next;
-       }
-
-       curl_easy_cleanup(handle);
-
-       if (count && !testmode)
-               write_logfile(count, wsubmit_url);
-}
-
-/*
- * Entry function for submitting oops data.
- *
- * Picks up and sets down the queued_mtx.
- * Picks up and sets down the queued_bt_mtx.
- */
-void submit_queue(void)
-{
-       int i = 0, n = 0, submit = 0;
-       struct oops *queue = NULL, *oops = NULL, *next = NULL;
-       CURL *handle = NULL;
+       struct oops *work_list = NULL;
+       struct oops *requeue_list = NULL;
+       char *newfilename = NULL;
        pxProxyFactory *pf = NULL;
        char **proxies = NULL;
-       char *proxy = NULL;
-
-       g_mutex_lock(&core_status.queued_mtx);
+       int result = 0;
+       CURL *handle = NULL;
+       struct curl_httppost *post = NULL;
+       struct curl_httppost *last = NULL;
 
-       if (!g_hash_table_size(core_status.queued_oops)) {
-               g_mutex_unlock(&core_status.queued_mtx);
-               return;
-       }
+       fprintf(stderr, "+ Begin submit_loop()\n");
 
-       memset(result_url, 0, 4096);
+       bt_hash = g_hash_table_new(g_str_hash, g_str_equal);
 
        if (testmode) {
+               fprintf(stderr, "+ The queue contains:\n");
                print_queue();
-               g_mutex_unlock(&core_status.queued_mtx);
-               return;
+               fprintf(stderr, "+ Leaving submit_loop(), testmode\n");
+               return NULL;
        }
 
-       g_mutex_lock(&queued_bt_mtx);
-       queue = queued_backtraces;
-       queued_backtraces = NULL;
-       barrier();
-       g_mutex_unlock(&queued_bt_mtx);
-
-       pf = px_proxy_factory_new();
-       handle = curl_easy_init();
-       curl_easy_setopt(handle, CURLOPT_NOBODY, 1);
-       curl_easy_setopt(handle, CURLOPT_TIMEOUT, 5);
-
-       for (i = 0; i < url_count; i++) {
-               curl_easy_setopt(handle, CURLOPT_URL, submit_url[i]);
-               if (pf)
-                       proxies = px_proxy_factory_get_proxies(pf, submit_url[i]);
-               if (proxies) {
-                       proxy = proxies[0];
-                       curl_easy_setopt(handle, CURLOPT_PROXY, proxy);
-               } else {
-                       proxy = NULL;
+       while (1) {
+               g_mutex_lock(&bt_mtx);
+               while (!bt_list) {
+                       if (requeue_list) {
+                               bt_list = requeue_list;
+                               requeue_list = NULL;
+                               fprintf(stderr, "+ submit_loop() requeued old work, awaiting new work\n");
+                       } else {
+                               fprintf(stderr, "+ submit_loop() queue empty, awaiting new work\n");
+                       }
+                       g_cond_wait(&bt_work, &bt_mtx);
                }
-               if (!curl_easy_perform(handle)) {
-                       submit_queue_with_url(queue, submit_url[i], proxy);
-                       submit = 1;
+               fprintf(stderr, "+ submit_loop() checking for work\n");
+               /* pull out current work and release the mutex */
+               work_list = bt_list;
+               bt_list = NULL;
+               g_mutex_unlock(&bt_mtx);
+
+               /* net init */
+               handle = curl_easy_init();
+               pf = px_proxy_factory_new();
+               curl_easy_setopt(handle, CURLOPT_NOBODY, 1);
+               curl_easy_setopt(handle, CURLOPT_TIMEOUT, 5);
+
+               sentcount = 0;
+               failcount = 0;
+
+               /* try to find a good url/proxy combo */
+               for (i = 0; i < url_count; i++) {
+                       curl_easy_setopt(handle, CURLOPT_URL, submit_url[i]);
+
+                       /* use a proxy if one is present */
+                       if (pf) {
+                               proxies = px_proxy_factory_get_proxies(pf, submit_url[i]);
+                               if (proxies)
+                                       curl_easy_setopt(handle, CURLOPT_PROXY, proxies[0]);
+                       }
+
+                       /* check the connection before POSTing form */
+                       result = curl_easy_perform(handle);
+                       if (result)
+                               continue;
+
+                       /* have a good url/proxy now...send reports there */
+                       while (work_list) {
+                               oops = work_list;
+                               work_list = oops->next;
+                               oops->next = NULL;
+
+                               fprintf(stderr, "+ attempting to POST %s\n", oops->detail_filename);
+
+                               /* set up the POST data */
+                               curl_formadd(&post, &last,
+                                       CURLFORM_COPYNAME, "crash",
+                                       CURLFORM_COPYCONTENTS, oops->text, CURLFORM_END);
+                               curl_easy_setopt(handle, CURLOPT_HTTPPOST, post);
+                               curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, writefunction);
+                               result = curl_easy_perform(handle);
+                               curl_formfree(post);
+
+                               if (!result) {
+                                       fprintf(stderr, "+ successfully sent %s\n", oops->detail_filename);
+                                       sentcount++;
+
+                                       newfilename = replace_name(oops->filename, ".processed", ".submitted");
+                                       rename(oops->filename, newfilename);
+                                       free(newfilename);
+
+                                       g_mutex_lock(&core_status.processing_mtx);
+                                       remove_name_from_hash(oops->filename, core_status.processing_oops);
+                                       g_mutex_unlock(&core_status.processing_mtx);
+
+                                       g_mutex_lock(&bt_mtx);
+                                       g_hash_table_remove(bt_hash, oops->filename);
+                                       g_mutex_unlock(&bt_mtx);
+                                       FREE_OOPS(oops);
+                               } else {
+                                       fprintf(stderr, "+ requeuing %s\n", oops->detail_filename);
+                                       failcount++;
+
+                                       oops->next = requeue_list;
+                                       requeue_list = oops;
+                               }
+                       }
+
                        for (n = 0; proxies[n]; n++)
                                free(proxies[n]);
                        free(proxies);
-                       break;
-               }
-               for (n = 0; proxies[n]; n++)
-                       free(proxies[n]);
-               free(proxies);
-       }
-
-       px_proxy_factory_free(pf);
 
-       if (submit) {
-               oops = queue;
-               while (oops) {
-                       next = oops->next;
-                       FREE_OOPS(oops);
-                       oops = next;
+                       openlog("corewatcher", 0, LOG_KERN);
+                       if (sentcount)
+                               syslog(LOG_WARNING, "Successful sent %d coredump signatures to %s", sentcount, submit_url[i]);
+                       if (failcount)
+                               syslog(LOG_WARNING, "Failed to send %d coredump signatures to %s", failcount, submit_url[i]);
+                       closelog();
                }
-       } else {
-               g_mutex_lock(&queued_bt_mtx);
-               queued_backtraces = queue;
-               g_mutex_unlock(&queued_bt_mtx);
+
+               px_proxy_factory_free(pf);
+               curl_easy_cleanup(handle);
        }
 
-       curl_easy_cleanup(handle);
+       fprintf(stderr, "+ End submit_loop()\n");
+
+       /* curl docs say this is not thread safe...but we never get here*/
        curl_global_cleanup();
-       g_mutex_unlock(&core_status.queued_mtx);
+
+       g_hash_table_destroy(bt_hash);
+
+       return NULL;
 }