{
cmd = NLTK_CMD_NECHUNK;
}
- else if (!strcmp(message, "lemmatize"))
- {
- cmd = NLTK_CMD_LEMMATIZE;
- }
else if (!strcmp(message, "langdetect"))
{
cmd = NLTK_CMD_LANGDETECT;
PINFO("ne_chunk process done");
break;
}
- case NLTK_CMD_LEMMATIZE:
- {
- PyObject* pt_result = NULL;
- PyObject* pt_elm_tuple = NULL;
- pt_result = nltk_pos_tag(info);
- len = nltk_get_size_from_list(pt_result);
- char *tag[BUF_LEN_128] = {NULL,};
- char *token[BUF_LEN_128] = {NULL,};
- for(int i = 0 ;i < len ;i++)
- {
- token[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
- tag[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
- pt_elm_tuple = nltk_get_element_from_list_by_index(pt_result, i);
- if(tag[i]!=NULL)
- {
- memset(tag[i], 0, BUF_LEN_128);
- tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 0));
- if(tmp_str!=NULL)
- {
- strncpy(tag[i], tmp_str, BUF_LEN_128-1);
- }
- }
- else
- {
- PERR("malloc failed");
- }
- if(token[i]!=NULL)
- {
- memset(token[i], 0, BUF_LEN_128);
- tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 1));
- if(tmp_str!=NULL)
- {
- if (!strncmp(tmp_str,"NN", 2)) {
- tmp_str = "n";
- }
- else if (!strncmp(tmp_str, "VB", 2)){
- tmp_str = "v";
- }
- else if (!strncmp(tmp_str, "JJ", 2)){
- tmp_str = "a";
- }
- else if (!strncmp(tmp_str, "R", 1)){
- tmp_str = "r";
- }
- else{
- tmp_str = "e";
- }
- strncpy(token[i], tmp_str, BUF_LEN_128-1);
- }
- }
- else
- {
- PERR("malloc failed");
- }
- }
-
- char *lem_buf[BUF_LEN_128] = {NULL,};
- for(int i = 0 ;i < len ;i++)
- {
- PyObject* lm_result = NULL;
- if (strcmp(token[i], "e")) {
- lm_result = nltk_lemmatize(token[i], tag[i]);
- lem_buf[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
- if(lem_buf[i]!=NULL)
- {
- memset(lem_buf[i], 0, BUF_LEN_128);
- tmp_str = nltk_get_string_from_element(lm_result);
- if(tmp_str!=NULL)
- {
- strncpy(lem_buf[i], tmp_str, BUF_LEN_128-1);
- }
- }
- else
- {
- PERR("malloc failed");
- }
- if (lm_result != NULL)
- Py_DECREF(lm_result);
- }
- else
- {
- lem_buf[i] = (char*)malloc(BUF_LEN_128*sizeof(char));
- if(lem_buf[i]!=NULL)
- {
- strncpy(lem_buf[i], tag[i], BUF_LEN_128-1);
- }
- else
- {
- PERR("malloc failed");
- }
- }
- }
-
- bundle_add_str(reply, "command", "lemmatize");
- bundle_add_str_array(reply, "return_tag", (const char **)lem_buf, len);
- bundle_add_str_array(reply, "return_token", (const char **)tag, len);
- for(int j = 0 ;j < len ;j++)
- {
- free(tag[j]);
- free(token[j]);
- free(lem_buf[j]);
- }
- free(tmp_str);
- PINFO("lemmatize process done");
- break;
- }
case NLTK_CMD_LANGDETECT:
{
PyObject* ld_result = NULL;
return result;
}
-PyObject* nltk_lemmatize(char* tag, char* token)
-{
- PyObject* args = NULL;
- PyObject* wn_func = NULL;
- PyObject* func = NULL;
- PyObject* wn_result = NULL;
- PyObject* result = NULL;
- args = nltk_make_args_from_strings(token, tag);
- wn_func = nltk_get_function_handle(globe_lemm,"WordNetLemmatizer");
- wn_result = nltk_call_function_with_args(wn_func, NULL);
- func = nltk_get_function_handle(wn_result, "lemmatize");
- result = nltk_call_function_with_args(func, args);
- Py_DECREF(args);
- Py_DECREF(wn_func);
- Py_DECREF(func);
- Py_DECREF(wn_result);
- return result;
-}
-
PyObject* nltk_language_detect(char* sentence)
{
PyObject* args = NULL;