Aho-Corasick performance improvements 25/159525/6
authorSaulo Moraes <s.moraes@samsung.com>
Thu, 9 Nov 2017 10:45:42 +0000 (08:45 -0200)
committerSaulo Aldighieri Moraes <s.moraes@samsung.com>
Tue, 14 Nov 2017 16:53:42 +0000 (14:53 -0200)
Change-Id: I85b9eb0f56af4d73b48ba25e3e4a67400e6f16aa
Signed-off-by: Saulo Aldighieri Moraes <s.moraes@samsung.com>
CONTRIBUTORS [new file with mode: 0644]
ahocorasick/node.c
ahocorasick/node.h
ahocorasick/types.h
server/inc/DlpRuleChangeNotification.h
server/inc/PrivacyGuardDb.h
server/src/DlpRuleChangeNotification.cpp
server/src/PrivacyGuardDb.cpp
test/tc-ahocorasick.c [new file with mode: 0644]

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644 (file)
index 0000000..97d7c58
--- /dev/null
@@ -0,0 +1,17 @@
+# This file contains a list of people who've made non-trivial contribution
+# to the Privacy Setting project. People who commit code to the project are
+# encouraged to add their names here. Please keep the list sorted by first
+# names.
+
+Bartlomiej Grzelewski <b.grzelewski@samsung.com>
+Eduardo Iyoda <emi@cesar.org.br>
+Fabio Urquiza <flus@cesar.org.br>
+Gabriel Finch <gf@cesar.org.br>
+Jooseong Lee <jooseong.lee@samsung.com>
+Jungkon Kim <jungkon.kim@samsung.com>
+Kim Kidong <kd0228.kim@samsung.com>
+Kyoungyong Lee <k.y.lee@samsung.com>
+MyungJoo Ham <myungjoo.ham@samsung.com>
+Saulo Moraes <s.moraes@samsung.com>
+Seonil Kim <seonil47.kim@samsung.com>
+Yunjin Lee <yunjin-.lee@samsung.com>
index 1b2dd9ea2ca046eda4466ad84be025feb3a8e882..65522981daacb895f40ed5066e8c3e92728f39d0 100644 (file)
 /**
    Create a new node using the given rule id and text string
  */
-static ac_node *new_node(void *user_data, const char *text)
+static ac_node *new_node_strlen(void *user_data, const char *text, size_t textlen)
 {
        ac_node *node = (ac_node *)malloc(sizeof(ac_node));
        memset(node, 0, sizeof(ac_node));
-       node->branch = strdup(text);
+       if (textlen > 0) {
+               node->branch = (char *)malloc(textlen);
+               memcpy(node->branch, text, textlen);
+               node->brlen = (int)textlen;
+       }
        node->user_data = user_data;
-       node->is_match = TRUE;
+       node->checked = FALSE;
+       node->terminal = TRUE;
        return node;
 }
 
+static ac_node *new_node(void *user_data, const char *text)
+{
+       return new_node_strlen(user_data, text, strlen(text));
+}
+
 /**
    Append child (leaf) node to parent at position idx
  */
@@ -72,48 +82,37 @@ static void add_child(ac_node *parent, int idx, ac_node *child)
 /**
        Compare string s1 with string s2
        return value:
-        0                              => s1 before s2
-        -1                     => s1 after s2
-        other values   => Number of initial letters in common
+       0                               => s1 before s2
+       -1                              => s1 after s2
+       other values    => Number of initial letters in common
+#ifdef CASE_INSENSITIVE
+       case conversion (tolower) should be done before calling compare()
+#endif
 */
-static int compare(const char *s1, const char *s2)
+static int compare(const char *s1, size_t s1len, const char *s2, size_t s2len)
 {
        int i;
-       size_t commonlen, s2len;
-
-       if (strlen(s1) == 0)
-               return 0;
+       size_t commonlen;
 
-       if (strlen(s2) == 0)
-               return -1;
-
-#ifdef CASE_INSENSITIVE
-       if (tolower(s1[0]) < tolower(s2[0]))
+       if (s1len == 0)
                return 0;
 
-       if (tolower(s2[0]) < tolower(s1[0]))
+       if (s2len == 0)
                return -1;
 
-#else
        if (s1[0] < s2[0])
                return 0;
 
        if (s2[0] < s1[0])
                return -1;
 
-#endif
-       commonlen = strlen(s1);
-       if ((s2len = strlen(s2)) < commonlen)
+       commonlen = s1len;
+       if (s2len < commonlen)
                commonlen = s2len;
 
        for (i = 1; i < commonlen; i++) {
-#ifdef CASE_INSENSITIVE
-               if (tolower(s1[i]) != tolower(s2[i]))
-                       return i;
-#else
                if (s1[i] != s2[i])
                        return i;
-#endif
        }
        return commonlen;
 }
@@ -125,7 +124,7 @@ static ac_match *create_match(void *user_data, int pos, int size)
 {
        ac_match *match = (ac_match *)malloc(sizeof(ac_match));
 
-       if(match) {
+       if (match) {
                match->user_data = user_data;
                match->position = pos;
                match->size = size;
@@ -174,7 +173,7 @@ static void add_sposn(ac_node *node, int offs)
        node->nsposns++;
        node->sposns = (int *)malloc(node->nsposns * sizeof(int));
 
-       if(node->sposns) {
+       if (node->sposns) {
                node->sposns[0] = offs;
 
                for (i = 1; i < node->nsposns; i++)
@@ -207,9 +206,9 @@ static void clean_bad_sposns(ac_node *node)
        ancestors_decrement_sposns(node, nbad);
        node->nsposns -= nbad;
 
-       if (node->nsposns == 0) {
+       if (node->nsposns == 0)
                node->sposns = NULL;
-       else {
+       else {
                node->sposns = (int *)malloc(node->nsposns * sizeof(int));
 
                for (i = 0; i < node->nsposns; i++) {
@@ -227,23 +226,47 @@ static void clean_bad_sposns(ac_node *node)
 */
 static int get_totlen(ac_node *node)
 {
-       size_t len = strlen(node->branch);
-       while (node->stem != NULL) {
-               node = node->stem;
-               len += strlen(node->branch);
-       }
-       return (int)len;
+       int len = node->brlen;
+       if (node->stem != NULL)
+               len += get_totlen(node->stem);
+       return len;
+}
+
+#ifdef CASE_INSENSITIVE
+/**
+   convert string to all lower case. Result should be freed after use.
+*/
+static char *string_tolower(const char *string, size_t len)
+{
+       int i;
+       char *tl_string = (char *)malloc(len + 1);
+       if (!tl_string) return NULL;
+       for (i = 0; i < len; i++)
+               tl_string[i] = (char)tolower(string[i]);
+       tl_string[len] = 0;
+       return tl_string;
 }
+#endif
 
 void add_node(ac_instance *ac, const char *string, void *user_data)
 {
-       char *ptr = (char *)string;
+       char *ptr, *tmpptr;
+       size_t stlen = strlen(string);
        ac_node *node = ac->root, *xnode;
-       int finished = 0;
+       boolean finished = FALSE, override_finished;
        int res;
        int i = 0;
 
+#ifdef CASE_INSENSITIVE
+       char *xstring = string_tolower(string, stlen);
+       if (xstring == NULL) return;
+       ptr = xstring;
+#else
+       ptr = (char *)string;
+#endif
+
        while (!finished) {
+               override_finished = FALSE;
                if (node != NULL && node->leaves != NULL) {
                        /* starting with the root node, we look at each leaf in turn */
                        for (i = 0; (xnode = node->leaves[i]) != NULL; i++) {
@@ -254,7 +277,7 @@ void add_node(ac_instance *ac, const char *string, void *user_data)
                                 *  - if common is l.t branch, common part becomes node, and branch and node become children
                                 * c) xnode->branch > text, insert node before xnode */
 
-                               res = compare(xnode->branch, ptr);
+                               res = compare(xnode->branch, xnode->brlen, ptr, stlen);
 
                                if (res == 0)
                                        /* xnode before new node */
@@ -268,26 +291,34 @@ void add_node(ac_instance *ac, const char *string, void *user_data)
                                } else {
                                        ac_node *child1;
                                        /* res letters were in common */
-                                       if (res == strlen(xnode->branch)) {
+                                       node = xnode; // consider only this child node
+                                       if (res == node->brlen) {
                                                /* all letters in common, so now we check the leaves */
-                                               node = xnode;
                                                ptr += res;
-                                               if (strlen(ptr) == 0) {
+                                               stlen = strlen(ptr);
+                                               if (stlen == 0) {
                                                        /* duplicate string */
                                                        node->user_data = user_data;
-                                                       node->is_match = TRUE;
+                                                       node->terminal = TRUE;
+#ifdef CASE_INSENSITIVE
+                                                       free(xstring);
+#endif
                                                        return;
                                                }
                                                /* leave for() loop and continue in while (!finished) */
-                                               i = 0;
+                                               if (node->leaves != NULL) {
+                                                       // if it also has child leaves, override setting of finished
+                                                       override_finished = TRUE;
+                                               } else i = 0; // otherwise add us as the first child
                                                break;
                                        }
                                        /* the common part was l.t. all
                                         * the common part becomes the new node
                                         * we add both this node with all its children and the new node as children */
-                                       child1 = new_node(xnode->user_data, (const char *)(xnode->branch + res));
+                                       child1 = new_node_strlen(xnode->user_data, (const char *)(xnode->branch + res), xnode->brlen - res);
                                        child1->leaves = xnode->leaves;
                                        child1->stem = xnode;
+                                       child1->terminal = xnode->terminal;
                                        if (child1->leaves != NULL) {
                                                for (i = 0; (node = child1->leaves[i]) != NULL; i++) {
                                                        /* adjust stems for each of these */
@@ -295,44 +326,52 @@ void add_node(ac_instance *ac, const char *string, void *user_data)
                                                }
                                        }
                                        /* remove xnodes leaves and shorten its text */
-                                       memset(xnode->branch + res, 0, 1);
+                                       tmpptr = (char *)malloc(res);
+                                       memcpy(tmpptr, xnode->branch, res);
+                                       free(xnode->branch);
+                                       xnode->branch = tmpptr;
+                                       xnode->brlen = res;
 
-                                       if (strlen(ptr) > res) {
+                                       if (stlen > res) {
                                                ac_node *child2;
                                                /* remainder of new string is added as child2 */
-                                               xnode->is_match = FALSE;
+                                               xnode->terminal = FALSE;
                                                xnode->leaves = (ac_node **)malloc(3 * sizeof(ac_node *));
                                                if (xnode->leaves != NULL) {
-                                                 child2 = new_node(user_data, (const char *)(ptr + res));
-                                                 ac->added++;
-                                                 child2->stem = xnode;
-
-                                                 res = compare(child1->branch, child2->branch);
-                                                 if (res == 0) {
-                                                       /* child1 before child2 */
-                                                       xnode->leaves[0] = child1;
-                                                       xnode->leaves[1] = child2;
-                                                 } else {
-                                                       /* child1 before child2 */
-                                                       xnode->leaves[0] = child2;
-                                                       xnode->leaves[1] = child1;
-                                                 }
-                                                 xnode->leaves[2] = NULL;
+                                                       child2 = new_node(user_data, (const char *)(ptr + res));
+                                                       ac->added++;
+                                                       child2->stem = xnode;
+
+                                                       res = compare(child1->branch, child1->brlen, child2->branch, child2->brlen);
+                                                       if (res == 0) {
+                                                               /* child1 before child2 */
+                                                               xnode->leaves[0] = child1;
+                                                               xnode->leaves[1] = child2;
+                                                       } else {
+                                                               /* child1 before child2 */
+                                                               xnode->leaves[0] = child2;
+                                                               xnode->leaves[1] = child1;
+                                                       }
+                                                       xnode->leaves[2] = NULL;
                                                }
                                        } else {
                                                /* the common part consumed all of new string, so we only have 1 child to add */
+                                               xnode->terminal = TRUE;
                                                xnode->leaves = (ac_node **)malloc(2 * sizeof(ac_node *));
                                                if (xnode->leaves != NULL) {
-                                                 xnode->user_data = user_data;
-                                                 xnode->leaves[0] = child1;
-                                                 xnode->leaves[1] = NULL;
+                                                       xnode->user_data = user_data;
+                                                       xnode->leaves[0] = child1;
+                                                       xnode->leaves[1] = NULL;
                                                }
                                        }
                                }
+#ifdef CASE_INSENSITIVE
+                               free(xstring);
+#endif
                                return;
                        }
                }
-               finished = 1;
+               if (!override_finished) finished = TRUE;
        }
 
        /* we checked all children and none were after ptr */
@@ -341,107 +380,109 @@ void add_node(ac_instance *ac, const char *string, void *user_data)
        if (ac->root == NULL) {
                /* new root - make a root node and add us as a leaf */
                node = ac->root = new_node(NULL, "");
-               ac->root->is_match = FALSE;
+               ac->root->terminal = FALSE;
        }
        add_child(node, i, xnode);
+#ifdef CASE_INSENSITIVE
+       free(xstring);
+#endif
 }
 
 /**
  * @callgraph
  */
-ac_match *parse_char(char ch, int offs, ac_node *node)
+static ac_match *parse_char_with_case(unsigned char ch, int offs, ac_node *node)
 {
        ac_match *acm = NULL;
        ac_node *xnode;
-       size_t brlen;
        int child_sposns = 0;
+       int sposn, totlen;
        int i, j;
 
-       if (node->checked)
-               return NULL;
+       if (!node || node->checked) return NULL;
 
-       brlen = strlen(node->branch);
-       if (brlen > 0) {
-#ifdef CASE_INSENSITIVE
-               if (node->stem->stem == NULL && tolower(node->branch[0]) == tolower(ch)) {
-                       /* leaf of root node, and first char matched, start a new sposn */
-                       add_sposn(node, offs);
+       if (node->leaves != NULL && (node->brlen == 0 || node->child_sposns != 0)) {
+               /* starting from the root, we look at each unchecked child in turn and check (recursively) for matches
+                * (skip if there are no child_sposns, to avoid parsing the whole tree) */
+               for (i = 0; (xnode = node->leaves[i]) != NULL && !xnode->checked; i++) {
+                       if (node->brlen == 0 || xnode->nsposns > 0 || xnode->child_sposns > 0) {
+                               acm = parse_char_with_case(ch, offs, xnode);
+                               if (acm != NULL)
+                                       return acm;
+                       }
                }
-#else
-               if (node->stem->stem == NULL && node->branch[0] == ch) {
+       }
+
+       /* after checking the child nodes, we look at the node itself */
+
+       if (node->brlen > 0) {
+               /* the cast is just to prevent SVACE warnings, as long as both types are the same it does not matter */
+               if (node->stem->stem == NULL && (unsigned char)node->branch[0] == ch) {
                        /* leaf of root node, and first char matched, start a new sposn */
                        add_sposn(node, offs);
                }
-#endif
+
                if (node->nsposns > 0) {
                        /* first check the ongoing matches in sposns */
                        for (i = 0; i < node->nsposns; i++) {
-                               if (node->sposns[i] > offs)
-                                       continue;
-
-#ifdef CASE_INSENSITIVE
-                               if (tolower(node->branch[offs - node->sposns[i]]) != tolower(ch)) {
-                                       /* mark for removal */
+                               sposn = node->sposns[i];
+                               /* the cast is just to prevent SVACE warnings, as long as both types are the same it does not matter */
+                               if ((unsigned char)node->branch[offs - sposn] != ch) {
+                                       /* mismatch, mark for removal */
                                        node->sposns[i] = -1;
-                               } else {
-#else
-                               if (node->branch[offs - node->sposns[i]] != ch) {
-                                       /* mark for removal */
-                                       node->sposns[i] = -1;
-                               } else {
-#endif
-                                       /* continuing match */
-                                       if (offs - node->sposns[i] == brlen - 1) {
-                                               /* the entirety of this branch was matched */
-                                               if (node->leaves == NULL || node->is_match) {
-                                                       /* we got a match ! */
-                                                       int totlen = get_totlen(node);
-                                                       acm = create_match(node->user_data, node->sposns[i] - totlen + brlen, totlen);
+                               } else if (offs - sposn == node->brlen - 1) {
+                                       /* the entirety of this branch was matched */
+                                       if (node->terminal) {
+                                               /* we got a match ! */
+                                               totlen = get_totlen(node);
+                                               if (acm == NULL) {
+                                                       // since the values of node->sposns[] are unique, acm should always be NULL
+                                                       // the check is added to prevent SVACE warnings
+                                                       acm = create_match(node->user_data, sposn - totlen + node->brlen, totlen);
                                                }
-                                               if (node->leaves != NULL) {
-                                                       /* otherwise place possible matches at next char for all children */
-                                                       for (j = 0; (xnode = node->leaves[j]) != NULL; j++) {
-                                                               add_sposn(xnode, offs + 1);
-                                                               child_sposns++;
-                                                       }
+                                       }
+                                       if (node->leaves != NULL) {
+                                               /* place possible matches at next char for all children */
+                                               for (j = 0; (xnode = node->leaves[j]) != NULL; j++) {
+                                                       add_sposn(xnode, offs + 1);
+                                                       child_sposns++;
                                                }
-                                               /* mark for removal */
-                                               node->sposns[i] = -1;
+                                               /* increase child sposn count for node and all ancestors */
+                                               ancestors_increment_sposns(node, child_sposns);
                                        }
+                                       /* mark for removal */
+                                       node->sposns[i] = -1;
                                }
                        }
-               }
-
-               /* after checking all sposns we clean up bad matches */
-               clean_bad_sposns(node);
-       }
-
 
-       if (acm == NULL) {
-               if (node->leaves != NULL && (brlen == 0 || node->child_sposns != 0)) {
-                       /* starting from the root, we look at each leaf in turn and check for matches
-                        * (skip if there are no child_sposns, to avoid parsing the whole tree) */
-                       for (i = 0; (xnode = node->leaves[i]) != NULL; i++) {
-                               acm = parse_char(ch, offs, xnode);
-                               if (acm != NULL)
-                                       return acm;
-                       }
+                       /* after checking all sposns we clean up bad matches */
+                       clean_bad_sposns(node);
                }
-       }
 
-       /* increase child sposn count for node and all ancestors */
-       if (child_sposns != 0)
-               ancestors_increment_sposns(node, child_sposns);
+               /* mark this node as checked for this position */
+               node->checked = TRUE;
+       }
 
-       /* mark this node as checked for this position */
-       node->checked = 1;
        return acm;
 }
 
+/**
+ * @callgraph
+ */
+ac_match *parse_char(unsigned char ch, int offs, ac_node *node)
+{
+#ifdef CASE_INSENSITIVE
+       return parse_char_with_case(tolower(ch), offs, node);
+#else
+       return parse_char_with_case(ch, offs, node);
+#endif
+}
+
 void reset_checks(ac_node *node)
 {
        int i;
-       node->checked = 0;
+       if (!node) return;
+       node->checked = FALSE;
        if (node->leaves != NULL) {
                ac_node *xnode;
 
@@ -468,21 +509,48 @@ void tree_free(ac_node *node)
        free(node);
 }
 
-void dump_tree(ac_node *node)
+void dump_tree(ac_node *node, FILE *stream)
 {
        int i;
        ac_node *xnode;
-       printf("%s (%p)", node->branch, node->user_data);
+       int otabbing, xtabbing;
+       static int tabbing;
+
+       if (!node) return;
+
+       if (node->branch == NULL || node->stem->brlen == 0)
+               tabbing = 0;
+
+       for (i = 0; i < tabbing; i++)
+               fprintf(stream, " ");
+
+       otabbing = tabbing;
+
+       if (node->brlen > 0) {
+               char *word = malloc(node->brlen + 1);
+               memcpy(word, node->branch, node->brlen);
+               memset(word + node->brlen, 0, 1);
+               fprintf(stream, "%s (%p)", word, node->user_data);
+               free(word);
+               tabbing += node->brlen + 10;
+               if (node->terminal) fprintf(stream, " |");
+       }
 
        if (node->leaves == NULL) {
-               printf("\n");
+               fprintf(stream, "\n");
                return;
        }
 
-       printf(" ");
+       fprintf(stream, "->\n");
+
+       xtabbing = tabbing;
+
+       for (i = 0; (xnode = node->leaves[i]) != NULL; i++) {
+               dump_tree(xnode, stream);
+               tabbing = xtabbing;
+       }
 
-       for (i = 0; (xnode = node->leaves[i]) != NULL; i++)
-               dump_tree(xnode);
+       tabbing = otabbing;
 }
 
 void clear_sposns(ac_node *node)
index 6ca983b82b99791f9d037d7c1ab68fe23c49c20e..5a646b424715808a4e3d45708566325ac39ae6d4 100644 (file)
@@ -22,6 +22,8 @@
 #ifndef _AHOCORASICK_NODE_H_
 #define _AHOCORASICK_NODE_H_
 
+#include <stdio.h>
+
 #include "types.h"
 
 #ifdef __cplusplus
@@ -39,7 +41,7 @@ extern "C" {
 void add_node(ac_instance *ac, const char *string, void *user_data);
 
 /**
- * @fn                                 ac_match *parse_char(char ch, int offs, ac_node *node)
+ * @fn                                 ac_match *parse_char(unsigned char ch, int offs, ac_node *node)
  * @brief                              Parse char at offs in the source text, using the tree rooted at node
  * @param[in] ch        The character at offs in the source text
  * @param[in] offs      The offset of the character in the source text
@@ -47,7 +49,7 @@ void add_node(ac_instance *ac, const char *string, void *user_data);
  * @return                             The next match at position offs, or NULL if no more matches.
  * @callgraph
  */
-ac_match *parse_char(char ch, int offs, ac_node *node);
+ac_match *parse_char(unsigned char ch, int offs, ac_node *node);
 
 /**
  * @fn                                 void clear_sposns(ac_node *node)
@@ -78,9 +80,10 @@ void reset_checks(ac_node *node);
  * @fn                                 void dump_tree(ac_node *node)
  * @brief                              This is a test function which prints out the current tree of string fragments.
  * @param[in] node      The root of the tree to be traversed.
+ * @param[in] stream    The stream to print the output to, e.g. stdout, stderr.
  * @callgraph
  */
-void dump_tree(ac_node *node);
+void dump_tree(ac_node *node, FILE *stream);
 
 #ifdef __cplusplus
 }
index de40ad3fd2636f2ffcb19784f9cf9f807c3211bb..21ac34d22bd1dc5f17eb0cf532c0cbe66b3f0fc8 100644 (file)
@@ -33,7 +33,7 @@ extern "C" {
 #endif
 
 typedef struct _ac_node ac_node;
-typedef int boolean;
+typedef unsigned char boolean;
 
 #ifndef TRUE
 #define TRUE 1
@@ -42,14 +42,15 @@ typedef int boolean;
 
 struct _ac_node {
        void *user_data;        ///< the user_data which was added with the string
-       boolean is_match;       ///< is this node the terminal of some match
+       boolean terminal;       ///< is this node the terminal of some match
        ac_node *stem;          ///< node which this is a leaf of (or NULL)
-       char *branch;           ///< differential text following stem
+       char *branch;           ///< differential text following stem (not NUL terminated)
+       int brlen;              ///< length of *branch* (bytes)
        ac_node **leaves;       ///< various bits of text which can be appended to stem | branch
        int *sposns;            ///< used during matching - possible start positions of this branch
        int nsposns;            ///< number of sposns
        int child_sposns;       ///< number of sposns of descendents
-       int checked;            ///< if the node has been checked for the current position
+       boolean checked;        ///< if the node + children has been checked for the current position
 };
 
 typedef struct {
@@ -64,7 +65,7 @@ typedef struct {
        int offs;                       ///< how far we have parsed
        ac_match *match;        ///< latest match
        ac_node *root;          ///< pointer to the string matching tree
-       int added;                      ///< how many nodes added
+       int added;                      ///< how many strings added
 } ac_instance;
 
 #ifdef __cplusplus
index 1e37b922f6451ad7bcc276b839d9bc63b455fcb3..028c6b486a03f1de49d9c5c07c20b079b0421151 100644 (file)
@@ -42,16 +42,17 @@ public:
         * @brief add a new client to list of clients to inform about rule changes
         * @param[in] pId                       The process id of the client to be added
         * @param[in] userId            The user id of the client to be added
+        * @param[in] appId                     The buffer to write the application id
         *
         * @return the result of operation (ERRORCODE : success, ....)
         * @since Tizen 3.0
         */
-       int addDlpClient(int pId, int userId, char **appId);
+       int addDlpClient(int pId, int userId, char *appId);
 
        /**
         * @brief get the application info for the pid
         * @param[in] pId                       The process id of the client look of info
-        * @param[out] pId                      The privacy_semaphore_app_info_s struct related to pid
+        * @param[out] app_info         The privacy_semaphore_app_info_s struct related to pid
         *
         * @return the result of operation (ERRORCODE : success, ....)
         * @since Tizen 3.0
@@ -68,4 +69,3 @@ public:
 };
 
 #endif // _DLP_RULECHANGENOTIFICATION_H_
-
index 647ac76326ef1ed99a89b66dc74ae3ee7584396e..9cfd39727170190fad87463806db27be5978d489 100755 (executable)
@@ -329,4 +329,3 @@ public:
 
 
 #endif // _PRIVACY_GUARD_DB_H_
-
index 4dcfe50cfc7568d86b00e6e853dd363734f6654a..3dc63a8b4363a453973e9da03b432e55d928dde9 100644 (file)
@@ -44,7 +44,7 @@ DlpRuleChangeNotification::getInstance(void)
 }
 
 int
-DlpRuleChangeNotification::addDlpClient(int pId, int userId, char **appId)
+DlpRuleChangeNotification::addDlpClient(int pId, int userId, char *appId)
 {
        int res = PRIV_GUARD_ERROR_SUCCESS;
        bool first_request_by_pid = true;
@@ -56,7 +56,7 @@ DlpRuleChangeNotification::addDlpClient(int pId, int userId, char **appId)
                if (it->pid == pId) {
                        // Process found, no need to add it to sem list
                        first_request_by_pid = false;
-                       *appId = it->app_info.application_id;
+                       strncpy(appId, it->app_info.application_id, LEN_APPLICATION_ID);
                } else {
                        // Just checking if the process still exists, no signal is sent
                        if (kill(it->pid, 0) == -1 && errno == ESRCH) {
@@ -79,7 +79,6 @@ DlpRuleChangeNotification::addDlpClient(int pId, int userId, char **appId)
 
                // load application ID
                ps.app_info.application_id[0] = 0;
-               *appId = ps.app_info.application_id;
                int ret = aul_app_get_appid_bypid(pId, ps.app_info.application_id, LEN_APPLICATION_ID);
                if (ret == 0) {
                        // load application info
@@ -115,6 +114,7 @@ DlpRuleChangeNotification::addDlpClient(int pId, int userId, char **appId)
                        }
                }
 
+               strncpy(appId, ps.app_info.application_id, LEN_APPLICATION_ID);
                ps.sem = sem_open(sem_name, 0);
                if (ps.sem == SEM_FAILED) {
                        PG_LOGE("errro opening sem: %s  - %s", sem_name, strerror_r(errno, buf, sizeof(buf)));
@@ -165,4 +165,3 @@ DlpRuleChangeNotification::notifyDlpClients()
 
        return res;
 }
-
index 919edf0743beb1498042b254219060d8c86e7360..3a60fc55be97674d57f1e2d94ff2ae9b04493670 100644 (file)
@@ -2362,11 +2362,11 @@ PrivacyGuardDb::PgGetDlpCurrentRules(const int userId, const int pId, std::list<
 {
        int res = -1;
        char query[256];
-       char *appId;
+       char appId[LEN_APPLICATION_ID] = {0};
        dlp_profile_s profile;
 
        if(DlpRuleChangeNotification::getInstance() != NULL)
-               DlpRuleChangeNotification::getInstance()->addDlpClient(pId, userId, &appId);
+               DlpRuleChangeNotification::getInstance()->addDlpClient(pId, userId, appId);
        else
                return PRIV_GUARD_ERROR_DB_ERROR;
 
diff --git a/test/tc-ahocorasick.c b/test/tc-ahocorasick.c
new file mode 100644 (file)
index 0000000..010c0b1
--- /dev/null
@@ -0,0 +1,252 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <privacy_guard_client.h>
+#include <privacy_guard_client_internal.h>
+#include <time.h>
+
+#include "privacy_guard_dlp.h"
+#include "../ahocorasick/ahocorasick.h"
+#include "../ahocorasick/node.h"
+
+#define BRIGHTNESS             0
+#define RED                    31
+#define GREEN                  32
+#define YELLOW                 33
+#define BG_BLACK               40
+
+static int fail_cnt = 0;
+static int success_cnt = 0;
+
+/* scenario settings */
+
+/** Test aho-corasick algorithm.
+ * Instructions: uncomment exactly ONE of the scenarios below. Set the number of target words (NWORDS).
+ * Adjust the other settings as required.
+ */
+
+/* only uncomment ONE of these at a time */
+#define SCENARIO_1 // pull words from text, parse same text
+//#define SCENARIO_2 // pull words from text, parse different text
+//#define SCENARIO_3 // pull words from text, parse random text
+//#define SCENARIO_4 // create random words, parse random text
+
+#define NWORDS 1000 // target number of words to search for
+
+#define WORD_SRCFILE "/tmp/smatch.txt"
+#define TEXT_SRCFILE "/tmp/smatch2.txt"
+
+#define SMATCH_BUFSIZE 65536 // text packet buffer size
+
+//#define SHOW_ADDED // uncomment to show strings as they are added
+#define DUMP_TREE  // uncomment to dump the tree after adding strings
+//#define SHOW_MATCHES // uncomment to show details of each match
+
+///////////////////////////////////////////////////////////////////////////////
+// test utilities (aho-corasick)
+///////////////////////////////////////////////////////////////////////////////
+
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <fcntl.h>
+
+static int fastrand(int max)
+{
+       static int next = 0;
+       next = next * 1103515245 + 12345;
+       return ((unsigned)(next / 65536) % max);
+}
+
+static void __change_color_to_red(void)
+{
+       printf("%c[%d;%dm", 0x1B, BRIGHTNESS, RED);
+}
+
+static void __change_color_to_green(void)
+{
+       printf("%c[%d;%dm", 0x1B, BRIGHTNESS, GREEN);
+}
+
+static void __change_color_to_yellow(void)
+{
+       printf("%c[%d;%dm", 0x1B, BRIGHTNESS, YELLOW);
+}
+
+static void __change_color_to_origin(void)
+{
+       printf("%c[%dm", 0x1B, 0);
+}
+
+static void __start_test(const char *function_name)
+{
+       __change_color_to_yellow();
+       printf("================================================================================\n");
+       printf("\t%s\n", function_name);
+       printf("================================================================================\n");
+       __change_color_to_origin();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// test verfication utility
+///////////////////////////////////////////////////////////////////////////////
+
+// Test String Match (ahocorasick)
+static void __test_string_match()
+{
+       __start_test(__FUNCTION__);
+       unsigned int i = 0;
+       unsigned long stime;
+
+       printf("Search match: ");
+
+       int awl = 0;
+       char buf[SMATCH_BUFSIZE];
+
+#ifndef SCENARIO_4
+       // load text from file
+       int fd = open(WORD_SRCFILE, O_RDONLY);
+       if (fd < 0) {
+               printf("Could not open " WORD_SRCFILE ". It should be a text file for me to find source words.\n");
+               return;
+       }
+       ssize_t tsize = read(fd, buf, SMATCH_BUFSIZE - 1);
+       if (tsize <= 0) {
+               printf("Could not read from " WORD_SRCFILE "\n");
+               return;
+       }
+       close(fd);
+       memset(buf + tsize, 0, 1);
+
+       // count words in text
+       int nwords = 1;
+
+       for (i = 0; i < tsize; i++) {
+               if (buf[i] == ' ') nwords++;
+       }
+#endif
+
+       int res = 0, nres;
+       ac_instance *ac = ahocorasick_init();
+       struct timeval tv;
+
+       gettimeofday(&tv, NULL);
+       stime = tv.tv_sec * 1000000 + tv.tv_usec;
+
+       for (i = 1; i <= NWORDS; i++) {
+#ifdef SCENARIO_4
+               int j;
+               char *word = malloc(8);
+               for (j = 0; j < 7; j++) word[j] = (char)('a' + fastrand(20));
+               word[7] = 0;
+#else
+               // get a random number between 1 and nwords
+               int start = -1, j;
+               int idxword = fastrand(nwords - 1) + 1;
+
+               for (j = 0; idxword > 0; j++) {
+                       if (idxword == 1 && start == -1) start = j;
+                       if (buf[j] == ' ') idxword--;
+               }
+               if (j - start < 6) {
+                       i--;
+                       continue;
+               }
+
+               char *word = strndup(buf + start, j - start - 1);
+#endif
+               awl += strlen(word);
+               nres = ahocorasick_add_string(ac, word, (void *)i);
+#ifdef SHOW_ADDED
+               printf("Added: .%s. %d\n", word, strlen(word));
+#endif
+               res = nres;
+               free(word);
+       }
+
+       awl = awl / i;
+
+       gettimeofday(&tv, NULL);
+       stime = tv.tv_sec * 1000000 + tv.tv_usec - stime;
+       printf("Build tree took %.2f msec for %d words (%d unique), average word len was %d\n", stime / 1000., NWORDS, res, awl);
+       stime = tv.tv_sec * 1000000 + tv.tv_usec;
+
+#ifdef SCENARIO_2
+       // load alternate text
+       fd = open(TEXT_SRCFILE, O_RDONLY);
+       if (fd < 0) {
+               printf("Could not open " TEXT_SRCFILE ". It should be a related text file for me to search for matches.\n");
+               return;
+       }
+       tsize = read(fd, buf, SMATCH_BUFSIZE - 1);
+       if (tsize <= 0) {
+               printf("Could not read from " TEXT_SRCFILE "\n");
+               return;
+       }
+       close(fd);
+       memset(buf + tsize, 0, 1);
+#endif
+
+#if defined SCENARIO_3 || defined SCENARIO_4
+       for (i = 0; i < 32768; i++)
+               buf[i] = (char)fastrand(100) + 30;
+       buf[32768] = 0;
+
+#endif
+       const char *text = buf;
+       ac_match *match;
+
+#ifdef DUMP_TREE
+       dump_tree(ac->root, stdout);
+#endif
+
+       ahocorasick_set_text(ac, text, strlen(text), 0);
+
+       res = 0;
+       do {
+               match = ahocorasick_find_next(ac);
+
+               if (match != NULL) {
+#ifdef SHOW_MATCHES
+                       for (i = match->position; i < match->position + match->size; i++) printf("%c", text[i]);
+                       printf("\n");
+                       printf("got match for %p at %d len %d\n", match->user_data, match->position, match->size);
+#endif
+                       res++;
+               }
+       } while (match != NULL);
+
+       gettimeofday(&tv, NULL);
+       stime = tv.tv_sec * 1000000 + tv.tv_usec - stime;
+       printf("Parsing took %.2f msec\n", stime / 1000.);
+
+       printf("Got a total of %d matches.\n", res);
+
+       ahocorasick_free(ac);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Main
+//////////////////////////////////////////////////////////////////////////
+int main()
+{
+       __change_color_to_green();
+       printf("DLP Test Start\n");
+       __change_color_to_origin();
+
+       /////////////////////////////////////////////////////////////////////////
+       // Test String Match (ahocorasick)
+       __test_string_match();
+
+       //////////////////////////////////////////////////////////////////////////
+
+       __change_color_to_green();
+
+       printf("Test Complete\n");
+       printf("success : %d, ", success_cnt);
+       __change_color_to_red();
+       printf("fail : %d\n", fail_cnt);
+       __change_color_to_origin();
+
+       return 0;
+}