}
case NLTK_CMD_LEMMATIZE:
{
- PyObject* lm_result = NULL;
- lm_result = nltk_lemmatize(info);
- char *lem_buf[1] = {NULL,};
- lem_buf[0] = (char*)malloc(BUF_LEN_128*sizeof(char));
- if(lem_buf[0]!=NULL)
+ PyObject* pt_result = NULL;
+ PyObject* pt_elm_tuple = NULL;
+ pt_result = nltk_pos_tag(info);
+ len = nltk_get_size_from_list(pt_result);
+ char *tag[BUF_LEN_128] = {NULL,};
+ char *token[BUF_LEN_128] = {NULL,};
+ for(int i = 0 ;i < len ;i++)
{
- memset(lem_buf[0], 0, BUF_LEN_128);
- tmp_str = nltk_get_string_from_element(lm_result);
- if(tmp_str!=NULL)
+ token[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
+ tag[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
+ pt_elm_tuple = nltk_get_element_from_list_by_index(pt_result, i);
+ if(tag[i]!=NULL)
{
- strncpy(lem_buf[0], tmp_str, BUF_LEN_128-1);
- free(tmp_str);
+ memset(tag[i], 0, BUF_LEN_128);
+ tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 0));
+ if(tmp_str!=NULL)
+ {
+ strncpy(tag[i], tmp_str, BUF_LEN_128-1);
+ }
+ }
+ else
+ {
+ PERR("malloc failed");
+ }
+ if(token[i]!=NULL)
+ {
+ memset(token[i], 0, BUF_LEN_128);
+ tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 1));
+ if(tmp_str!=NULL)
+ {
+ if (!strncmp(tmp_str,"NN", 2)) {
+ tmp_str = "n";
+ }
+ else if (!strncmp(tmp_str, "VB", 2)){
+ tmp_str = "v";
+ }
+ else if (!strncmp(tmp_str, "JJ", 2)){
+ tmp_str = "a";
+ }
+ else if (!strncmp(tmp_str, "R", 1)){
+ tmp_str = "r";
+ }
+ else{
+ tmp_str = "e";
+ }
+ strncpy(token[i], tmp_str, BUF_LEN_128-1);
+ }
+ }
+ else
+ {
+ PERR("malloc failed");
}
- bundle_add_str(reply, "command", "lemmatize");
- bundle_add_str_array(reply, "return_token", (const char **)lem_buf, 1);
- free(lem_buf[0]);
}
- else
+
+ char *lem_buf[BUF_LEN_128] = {NULL,};
+ for(int i = 0 ;i < len ;i++)
{
- PERR("malloc failed");
+ PyObject* lm_result = NULL;
+ if (strcmp(token[i], "e")) {
+ lm_result = nltk_lemmatize(token[i], tag[i]);
+ lem_buf[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
+ if(lem_buf[i]!=NULL)
+ {
+ memset(lem_buf[i], 0, BUF_LEN_128);
+ tmp_str = nltk_get_string_from_element(lm_result);
+ if(tmp_str!=NULL)
+ {
+ strncpy(lem_buf[i], tmp_str, BUF_LEN_128-1);
+ }
+ }
+ else
+ {
+ PERR("malloc failed");
+ }
+ if (lm_result != NULL)
+ Py_DECREF(lm_result);
+ }
+ else
+ {
+ lem_buf[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
+ if(lem_buf[i]!=NULL)
+ {
+ strncpy(lem_buf[i], tag[i], BUF_LEN_128-1);
+ }
+ else
+ {
+ PERR("malloc failed");
+ }
+ }
+ }
+
+ bundle_add_str(reply, "command", "lemmatize");
+ bundle_add_str_array(reply, "return_tag", lem_buf, len);
+ bundle_add_str_array(reply, "return_token", tag, len);
+ for(int j = 0 ;j < len ;j++)
+ {
+ free(tag[j]);
+ free(token[j]);
+ free(lem_buf[j]);
}
- Py_DECREF(lm_result);
+ free(tmp_str);
PINFO("lemmatize process done");
break;
}
return result;
}
-PyObject* nltk_lemmatize(char* sentence)
+PyObject* nltk_lemmatize(char* tag, char* token)
{
PyObject* args = NULL;
PyObject* wn_func = NULL;
PyObject* func = NULL;
PyObject* wn_result = NULL;
PyObject* result = NULL;
- args = nltk_make_args_from_string(sentence);
+ args = nltk_make_args_from_strings(token, tag);
wn_func = nltk_get_function_handle(globe_lemm,"WordNetLemmatizer");
wn_result = nltk_call_function_with_args(wn_func, NULL);
func = nltk_get_function_handle(wn_result, "lemmatize");
return pArgs;
}
+PyObject* nltk_make_args_from_strings(char* info, char* tag)
+{
+ PRET_VM(!info, NULL, "Input parameter [info] is NULL!");
+ PyObject *pArgs;
+ //create args tuple struct to fill the arg one by one ,here , only create one string with 1
+ pArgs = PyTuple_New(2);
+ PyTuple_SetItem(pArgs, 0, PyString_FromString(info));
+ PyTuple_SetItem(pArgs, 1, PyString_FromString(tag));
+ return pArgs;
+}
+
PyObject* nltk_call_function_with_args(PyObject* func, PyObject* args)
{
PRET_VM(!func, NULL, "Input parameter [func] is NULL!");