Fix the wrong process of lemmatize function. 07/189207/4
authorXie Ligang <ligang0.xie@samsung.com>
Fri, 14 Sep 2018 07:34:24 +0000 (15:34 +0800)
committerXie Ligang <ligang0.xie@samsung.com>
Mon, 17 Sep 2018 00:25:51 +0000 (08:25 +0800)
Change-Id: Ie68124a1de70301e919911ef2ce20f16c9d25a77
Signed-off-by: Xie Ligang <ligang0.xie@samsung.com>
service/inc/service.h
service/src/service.c

index 8c92915..51cc5db 100755 (executable)
@@ -21,12 +21,13 @@ void nltk_finalize();
 PyObject* nltk_word_tokenize(char* sentence);
 PyObject* nltk_pos_tag(char* sentence);
 PyObject* nltk_ne_chunk(char* sentence);
-PyObject* nltk_lemmatize(char* sentence);
+PyObject* nltk_lemmatize(char* tag, char* token);
 PyObject* nltk_language_detect(char* sentence);
 PyObject* nltk_get_module(char* name);
 PyObject* nltk_get_function_handle(PyObject* module , char * func_name);
 PyObject* nltk_make_args_from_pyobject(PyObject* pyobj);
 PyObject* nltk_make_args_from_string(char* info);
+PyObject* nltk_make_args_from_strings(char* info, char* tag);
 PyObject* nltk_call_function_with_args(PyObject* func, PyObject* args);
 int nltk_get_size_from_list(PyObject* list);
 int nltk_get_size_from_tuple(PyObject* tuple);
index 3405a90..7d5aeba 100755 (executable)
@@ -321,28 +321,107 @@ static int __message_send(rpc_port_stub_message_context_h context,
         }
         case NLTK_CMD_LEMMATIZE:
         {
-            PyObject* lm_result = NULL;
-            lm_result = nltk_lemmatize(info);
-            char *lem_buf[1] = {NULL,};
-            lem_buf[0] = (char*)malloc(BUF_LEN_128*sizeof(char));
-            if(lem_buf[0]!=NULL)
+            PyObject* pt_result = NULL;
+            PyObject* pt_elm_tuple = NULL;
+            pt_result = nltk_pos_tag(info);
+            len = nltk_get_size_from_list(pt_result);
+            char *tag[BUF_LEN_128] = {NULL,};
+            char *token[BUF_LEN_128] = {NULL,};
+            for(int i = 0 ;i < len ;i++)
             {
-                memset(lem_buf[0], 0, BUF_LEN_128);
-                tmp_str = nltk_get_string_from_element(lm_result);
-                if(tmp_str!=NULL)
+                token[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
+                tag[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
+                pt_elm_tuple = nltk_get_element_from_list_by_index(pt_result, i);
+                if(tag[i]!=NULL)
                 {
-                    strncpy(lem_buf[0], tmp_str, BUF_LEN_128-1);
-                    free(tmp_str);
+                    memset(tag[i], 0, BUF_LEN_128);
+                    tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 0));
+                    if(tmp_str!=NULL)
+                    {
+                        strncpy(tag[i], tmp_str, BUF_LEN_128-1);
+                    }
+                }
+                else
+                {
+                    PERR("malloc failed");
+                }
+                if(token[i]!=NULL)
+                {
+                    memset(token[i], 0, BUF_LEN_128);
+                    tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 1));
+                    if(tmp_str!=NULL)
+                    {
+                        if (!strncmp(tmp_str,"NN", 2)) {
+                            tmp_str = "n";
+                        }
+                        else if (!strncmp(tmp_str, "VB", 2)){
+                            tmp_str = "v";
+                        }
+                        else if (!strncmp(tmp_str, "JJ", 2)){
+                            tmp_str = "a";
+                        }
+                        else if (!strncmp(tmp_str, "R", 1)){
+                            tmp_str = "r";
+                        }
+                        else{
+                            tmp_str = "e";
+                        }
+                        strncpy(token[i], tmp_str, BUF_LEN_128-1);
+                    }
+                }
+                else
+                {
+                    PERR("malloc failed");
                 }
-                bundle_add_str(reply, "command", "lemmatize");
-                bundle_add_str_array(reply, "return_token", (const char **)lem_buf, 1);
-                free(lem_buf[0]);
             }
-            else
+
+            char *lem_buf[BUF_LEN_128] = {NULL,};
+            for(int i = 0 ;i < len ;i++)
             {
-                PERR("malloc failed");
+                PyObject* lm_result = NULL;
+                if (strcmp(token[i], "e")) {
+                    lm_result = nltk_lemmatize(token[i], tag[i]);
+                    lem_buf[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
+                    if(lem_buf[i]!=NULL)
+                    {
+                        memset(lem_buf[i], 0, BUF_LEN_128);
+                        tmp_str = nltk_get_string_from_element(lm_result);
+                        if(tmp_str!=NULL)
+                        {
+                            strncpy(lem_buf[i], tmp_str, BUF_LEN_128-1);
+                        }
+                    }
+                    else
+                    {
+                        PERR("malloc failed");
+                    }
+                    if (lm_result != NULL)
+                        Py_DECREF(lm_result);
+                }
+                else
+                {
+                    lem_buf[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
+                    if(lem_buf[i]!=NULL)
+                    {
+                        strncpy(lem_buf[i], tag[i], BUF_LEN_128-1);
+                    }
+                    else
+                    {
+                        PERR("malloc failed");
+                    }
+                }
+            }
+
+            bundle_add_str(reply, "command", "lemmatize");
+            bundle_add_str_array(reply, "return_tag", lem_buf, len);
+            bundle_add_str_array(reply, "return_token", tag, len);
+            for(int j = 0 ;j < len ;j++)
+            {
+                free(tag[j]);
+                free(token[j]);
+                free(lem_buf[j]);
             }
-            Py_DECREF(lm_result);
+            free(tmp_str);
             PINFO("lemmatize process done");
             break;
         }
@@ -547,14 +626,14 @@ PyObject* nltk_ne_chunk(char* sentence)
     return result;
 }
 
-PyObject* nltk_lemmatize(char* sentence)
+PyObject* nltk_lemmatize(char* tag, char* token)
 {
     PyObject* args = NULL;
     PyObject* wn_func = NULL;
     PyObject* func = NULL;
     PyObject* wn_result = NULL;
     PyObject* result = NULL;
-    args = nltk_make_args_from_string(sentence);
+    args = nltk_make_args_from_strings(token, tag);
     wn_func = nltk_get_function_handle(globe_lemm,"WordNetLemmatizer");
     wn_result = nltk_call_function_with_args(wn_func, NULL);
     func = nltk_get_function_handle(wn_result, "lemmatize");
@@ -696,6 +775,17 @@ PyObject* nltk_make_args_from_pyobject(PyObject* pyobj)
     return pArgs;
 }
 
+PyObject* nltk_make_args_from_strings(char* info, char* tag)
+{
+    PRET_VM(!info, NULL, "Input parameter [info] is NULL!");
+    PyObject *pArgs;
+    //create args tuple struct to fill the arg one by one  ,here , only create one string with 1
+    pArgs = PyTuple_New(2);
+    PyTuple_SetItem(pArgs, 0, PyString_FromString(info));
+    PyTuple_SetItem(pArgs, 1, PyString_FromString(tag));
+    return pArgs;
+}
+
 PyObject* nltk_call_function_with_args(PyObject* func, PyObject* args)
 {
     PRET_VM(!func, NULL, "Input parameter [func] is NULL!");