From: Xie Ligang Date: Fri, 14 Sep 2018 07:34:24 +0000 (+0800) Subject: Fix the wrong process of lemmatize function. X-Git-Tag: submit/tizen/20180917.233449~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a17304600edbf7d296ee75288ba11cbf0fa35dac;p=platform%2Fcore%2Fuifw%2Fnlp.git Fix the wrong process of lemmatize function. Change-Id: Ie68124a1de70301e919911ef2ce20f16c9d25a77 Signed-off-by: Xie Ligang --- diff --git a/service/inc/service.h b/service/inc/service.h index 8c92915..51cc5db 100755 --- a/service/inc/service.h +++ b/service/inc/service.h @@ -21,12 +21,13 @@ void nltk_finalize(); PyObject* nltk_word_tokenize(char* sentence); PyObject* nltk_pos_tag(char* sentence); PyObject* nltk_ne_chunk(char* sentence); -PyObject* nltk_lemmatize(char* sentence); +PyObject* nltk_lemmatize(char* tag, char* token); PyObject* nltk_language_detect(char* sentence); PyObject* nltk_get_module(char* name); PyObject* nltk_get_function_handle(PyObject* module , char * func_name); PyObject* nltk_make_args_from_pyobject(PyObject* pyobj); PyObject* nltk_make_args_from_string(char* info); +PyObject* nltk_make_args_from_strings(char* info, char* tag); PyObject* nltk_call_function_with_args(PyObject* func, PyObject* args); int nltk_get_size_from_list(PyObject* list); int nltk_get_size_from_tuple(PyObject* tuple); diff --git a/service/src/service.c b/service/src/service.c index 3405a90..7d5aeba 100755 --- a/service/src/service.c +++ b/service/src/service.c @@ -321,28 +321,107 @@ static int __message_send(rpc_port_stub_message_context_h context, } case NLTK_CMD_LEMMATIZE: { - PyObject* lm_result = NULL; - lm_result = nltk_lemmatize(info); - char *lem_buf[1] = {NULL,}; - lem_buf[0] = (char*)malloc(BUF_LEN_128*sizeof(char)); - if(lem_buf[0]!=NULL) + PyObject* pt_result = NULL; + PyObject* pt_elm_tuple = NULL; + pt_result = nltk_pos_tag(info); + len = nltk_get_size_from_list(pt_result); + char *tag[BUF_LEN_128] = {NULL,}; + char *token[BUF_LEN_128] = {NULL,}; + for(int i = 0 ;i < len ;i++) { - memset(lem_buf[0], 0, BUF_LEN_128); - tmp_str = nltk_get_string_from_element(lm_result); - if(tmp_str!=NULL) + token[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); + tag[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); + pt_elm_tuple = nltk_get_element_from_list_by_index(pt_result, i); + if(tag[i]!=NULL) { - strncpy(lem_buf[0], tmp_str, BUF_LEN_128-1); - free(tmp_str); + memset(tag[i], 0, BUF_LEN_128); + tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 0)); + if(tmp_str!=NULL) + { + strncpy(tag[i], tmp_str, BUF_LEN_128-1); + } + } + else + { + PERR("malloc failed"); + } + if(token[i]!=NULL) + { + memset(token[i], 0, BUF_LEN_128); + tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 1)); + if(tmp_str!=NULL) + { + if (!strncmp(tmp_str,"NN", 2)) { + tmp_str = "n"; + } + else if (!strncmp(tmp_str, "VB", 2)){ + tmp_str = "v"; + } + else if (!strncmp(tmp_str, "JJ", 2)){ + tmp_str = "a"; + } + else if (!strncmp(tmp_str, "R", 1)){ + tmp_str = "r"; + } + else{ + tmp_str = "e"; + } + strncpy(token[i], tmp_str, BUF_LEN_128-1); + } + } + else + { + PERR("malloc failed"); } - bundle_add_str(reply, "command", "lemmatize"); - bundle_add_str_array(reply, "return_token", (const char **)lem_buf, 1); - free(lem_buf[0]); } - else + + char *lem_buf[BUF_LEN_128] = {NULL,}; + for(int i = 0 ;i < len ;i++) { - PERR("malloc failed"); + PyObject* lm_result = NULL; + if (strcmp(token[i], "e")) { + lm_result = nltk_lemmatize(token[i], tag[i]); + lem_buf[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); + if(lem_buf[i]!=NULL) + { + memset(lem_buf[i], 0, BUF_LEN_128); + tmp_str = nltk_get_string_from_element(lm_result); + if(tmp_str!=NULL) + { + strncpy(lem_buf[i], tmp_str, BUF_LEN_128-1); + } + } + else + { + PERR("malloc failed"); + } + if (lm_result != NULL) + Py_DECREF(lm_result); + } + else + { + lem_buf[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); + if(lem_buf[i]!=NULL) + { + strncpy(lem_buf[i], tag[i], BUF_LEN_128-1); + } + else + { + PERR("malloc failed"); + } + } + } + + bundle_add_str(reply, "command", "lemmatize"); + bundle_add_str_array(reply, "return_tag", lem_buf, len); + bundle_add_str_array(reply, "return_token", tag, len); + for(int j = 0 ;j < len ;j++) + { + free(tag[j]); + free(token[j]); + free(lem_buf[j]); } - Py_DECREF(lm_result); + free(tmp_str); PINFO("lemmatize process done"); break; } @@ -547,14 +626,14 @@ PyObject* nltk_ne_chunk(char* sentence) return result; } -PyObject* nltk_lemmatize(char* sentence) +PyObject* nltk_lemmatize(char* tag, char* token) { PyObject* args = NULL; PyObject* wn_func = NULL; PyObject* func = NULL; PyObject* wn_result = NULL; PyObject* result = NULL; - args = nltk_make_args_from_string(sentence); + args = nltk_make_args_from_strings(token, tag); wn_func = nltk_get_function_handle(globe_lemm,"WordNetLemmatizer"); wn_result = nltk_call_function_with_args(wn_func, NULL); func = nltk_get_function_handle(wn_result, "lemmatize"); @@ -696,6 +775,17 @@ PyObject* nltk_make_args_from_pyobject(PyObject* pyobj) return pArgs; } +PyObject* nltk_make_args_from_strings(char* info, char* tag) +{ + PRET_VM(!info, NULL, "Input parameter [info] is NULL!"); + PyObject *pArgs; + //create args tuple struct to fill the arg one by one ,here , only create one string with 1 + pArgs = PyTuple_New(2); + PyTuple_SetItem(pArgs, 0, PyString_FromString(info)); + PyTuple_SetItem(pArgs, 1, PyString_FromString(tag)); + return pArgs; +} + PyObject* nltk_call_function_with_args(PyObject* func, PyObject* args) { PRET_VM(!func, NULL, "Input parameter [func] is NULL!");