From: Jihoon Kim Date: Tue, 20 Oct 2020 12:33:29 +0000 (+0900) Subject: Add NLP unittests X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=refs%2Fheads%2Ftizen_5.5;p=platform%2Fcore%2Fuifw%2Fnlp.git Add NLP unittests root@localhost:~# /usr/bin/nlp_unittests [==========] Running 4 tests from 1 test case. [----------] Global test environment set-up. [----------] 4 tests from NlpServiceTest [ RUN ] NlpServiceTest.utc_nlp_service_tokenize_p token: 'I' token: 'am' token: 'a' token: 'boy' word_tokenize process done [ OK ] NlpServiceTest.utc_nlp_service_tokenize_p (5015 ms) [ RUN ] NlpServiceTest.utc_nlp_service_language_detect_p Detected language: en [ OK ] NlpServiceTest.utc_nlp_service_language_detect_p (79 ms) [ RUN ] NlpServiceTest.utc_nlp_service_pos_tag_p tag : NNP tag : NNP [ OK ] NlpServiceTest.utc_nlp_service_pos_tag_p (2066 ms) [ RUN ] NlpServiceTest.utc_nlp_service_ne_chunk_p tag: PRP token: We tag: VBD token: saw tag: DT token: the tag: JJ token: yellow tag: NN token: dog ne_chunk process done [ OK ] NlpServiceTest.utc_nlp_service_ne_chunk_p (2138 ms) [----------] 4 tests from NlpServiceTest (9299 ms total) [----------] Global test environment tear-down [==========] 4 tests from 1 test case ran. (9300 ms total) [ PASSED ] 4 tests. Change-Id: I8f82875e4ea0c9351a8335976af4967a7daf1673 Signed-off-by: Jihoon Kim --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 01aa6e0..7f49e8d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,14 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6) ADD_SUBDIRECTORY(service) -ADD_SUBDIRECTORY(nlp_resource_data) \ No newline at end of file +ADD_SUBDIRECTORY(nlp_resource_data) + +## Test +IF(NOT DEFINED MINIMUM_BUILD) +ENABLE_TESTING() +SET(UTC_NLP nlp-unittests) +ADD_TEST(NAME ${UTC_NLP} COMMAND ${UTC_NLP} + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/tests) + +ADD_SUBDIRECTORY(tests) +ENDIF(NOT DEFINED MINIMUM_BUILD) diff --git a/packaging/nlp.spec b/packaging/nlp.spec index c933cd5..1009a42 100755 --- a/packaging/nlp.spec +++ b/packaging/nlp.spec @@ -15,6 +15,7 @@ BuildRequires: pkgconfig(python) BuildRequires: pkgconfig(glib-2.0) BuildRequires: pkgconfig(rpc-port) BuildRequires: pkgconfig(ecore) +BuildRequires: pkgconfig(gmock) Requires: python-numpy Requires: python-xml Requires: python-six @@ -33,6 +34,14 @@ Group: Graphics & UI Framework/Input %description data-en NLP Data files for English +%package unittests +Summary: NLP tests +Group: Development/Libraries +Requires: %{name} = %{version}-%{release} + +%description unittests +GTest for NLP + %prep %setup -q @@ -75,3 +84,6 @@ tpk-backend -d %{name} --preload --force-remove %defattr(-,root,root,-) %license LICENSE /usr/local/lib/nltk_data/* + +%files unittests +%{_bindir}/* diff --git a/service/CMakeLists.txt b/service/CMakeLists.txt index 0aae714..8a473b7 100755 --- a/service/CMakeLists.txt +++ b/service/CMakeLists.txt @@ -7,6 +7,7 @@ INCLUDE(FindPkgConfig) SET(SERVICE_SRC src/service.c src/message.c + src/main.c ) #include diff --git a/service/inc/service.h b/service/inc/service.h index 5ede478..e30e035 100755 --- a/service/inc/service.h +++ b/service/inc/service.h @@ -2,10 +2,16 @@ #define __service_H__ #include +#include +#include #define BUF_LEN_128 128 #define BUF_LEN_256 256 +#ifdef __cplusplus +extern "C" { +#endif + typedef enum { NLTK_CMD_NONE = -1, NLTK_CMD_TOKENIZE, @@ -13,18 +19,20 @@ typedef enum { NLTK_CMD_NECHUNK, NLTK_CMD_LANGDETECT, NLTK_CMD_UNKNOWN -}NLTK_CMDS; +} NLTK_CMDS; void nltk_initialize(); void nltk_finalize(); -PyObject* nltk_word_tokenize(char* sentence); -PyObject* nltk_pos_tag(char* sentence); -PyObject* nltk_ne_chunk(char* sentence); -PyObject* nltk_language_detect(char* sentence); -PyObject* nltk_get_module(char* name); +void nltk_load(); + +PyObject* nltk_word_tokenize(const char* sentence); +PyObject* nltk_pos_tag(const char* sentence); +PyObject* nltk_ne_chunk(const char* sentence); +PyObject* nltk_language_detect(const char* sentence); +PyObject* nltk_get_module(const char* name); PyObject* nltk_get_function_handle(PyObject* module , char * func_name); PyObject* nltk_make_args_from_pyobject(PyObject* pyobj); -PyObject* nltk_make_args_from_string(char* info); +PyObject* nltk_make_args_from_string(const char* info); PyObject* nltk_make_args_from_strings(char* info, char* tag); PyObject* nltk_call_function_with_args(PyObject* func, PyObject* args); int nltk_get_size_from_list(PyObject* list); @@ -32,8 +40,18 @@ int nltk_get_size_from_tuple(PyObject* tuple); PyObject* nltk_get_element_from_tuple_by_index(PyObject* tuple, int index); PyObject* nltk_get_element_from_list_by_index(PyObject* list, int index); char* nltk_get_string_from_element(PyObject* elm); -PyObject* globe_nltk; -PyObject* globe_lemm; -PyObject* globe_lang; -int globe_id = 0; + +bool service_app_create(void *data); +void service_app_terminate(void *data); +void service_app_control(app_control_h app_control, void *data); + +void service_app_lang_changed(app_event_info_h event_info, void *user_data); +void service_app_region_changed(app_event_info_h event_info, void *user_data); +void service_app_low_battery(app_event_info_h event_info, void *user_data); +void service_app_low_memory(app_event_info_h event_info, void *user_data); + +#ifdef __cplusplus +} +#endif + #endif /* __service_H__ */ diff --git a/service/src/main.c b/service/src/main.c new file mode 100755 index 0000000..9094d87 --- /dev/null +++ b/service/src/main.c @@ -0,0 +1,21 @@ +#include + +#undef _POSIX_C_SOURCE +#include "service.h" + +int main(int argc, char* argv[]) +{ + char ad[50] = {0,}; + service_app_lifecycle_callback_s event_callback; + app_event_handler_h handlers[5] = {NULL, }; + + event_callback.create = service_app_create; + event_callback.terminate = service_app_terminate; + event_callback.app_control = service_app_control; + service_app_add_event_handler(&handlers[APP_EVENT_LOW_BATTERY], APP_EVENT_LOW_BATTERY, service_app_low_battery, &ad); + service_app_add_event_handler(&handlers[APP_EVENT_LOW_MEMORY], APP_EVENT_LOW_MEMORY, service_app_low_memory, &ad); + service_app_add_event_handler(&handlers[APP_EVENT_LANGUAGE_CHANGED], APP_EVENT_LANGUAGE_CHANGED, service_app_lang_changed, &ad); + service_app_add_event_handler(&handlers[APP_EVENT_REGION_FORMAT_CHANGED], APP_EVENT_REGION_FORMAT_CHANGED, service_app_region_changed, &ad); + + return service_app_main(argc, argv, &event_callback, ad); +} diff --git a/service/src/service.c b/service/src/service.c index 8e05ca8..871ca17 100755 --- a/service/src/service.c +++ b/service/src/service.c @@ -3,10 +3,16 @@ #include #include #include + +#undef _POSIX_C_SOURCE #include "service.h" #include "nlp_log.h" #include "message.h" +static PyObject* globe_nltk = NULL; +static PyObject* globe_lemm = NULL; +static PyObject* globe_lang = NULL; + int sec = 180.0; static Ecore_Timer *service_close_timer = NULL; @@ -180,11 +186,11 @@ static int __message_send(rpc_port_stub_message_context_h context, for(int i = 0 ;i < len ;i++) { tokens[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); - if(tokens[i]!=NULL) + if (tokens[i] != NULL) { memset(tokens[i], 0, BUF_LEN_128); tmp_str = nltk_get_string_from_element(nltk_get_element_from_list_by_index(wt_lists, i)); - if(tmp_str!=NULL) + if (tmp_str != NULL) { strncpy(tokens[i], tmp_str, BUF_LEN_128-1); free(tmp_str); @@ -218,11 +224,11 @@ static int __message_send(rpc_port_stub_message_context_h context, token[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); tag[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); pt_elm_tuple = nltk_get_element_from_list_by_index(pt_result, i); - if(tag[i]!=NULL) + if (tag[i] != NULL) { memset(tag[i], 0, BUF_LEN_128); tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 1)); - if(tmp_str!=NULL) + if (tmp_str != NULL) { strncpy(tag[i], tmp_str, BUF_LEN_128-1); free(tmp_str); @@ -232,11 +238,11 @@ static int __message_send(rpc_port_stub_message_context_h context, { PERR("malloc failed"); } - if(token[i]!=NULL) + if (token[i] != NULL) { memset(token[i], 0, BUF_LEN_128); tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 0)); - if(tmp_str!=NULL) + if (tmp_str != NULL) { strncpy(token[i], tmp_str, BUF_LEN_128-1); free(tmp_str); @@ -273,11 +279,11 @@ static int __message_send(rpc_port_stub_message_context_h context, s_token[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); s_tag[i] = (char*)malloc(BUF_LEN_128*sizeof(char)); ne_elm_tuple = nltk_get_element_from_list_by_index(ne_result, i); - if(s_tag[i]!=NULL) + if (s_tag[i] != NULL) { memset(s_tag[i], 0, BUF_LEN_128); tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(ne_elm_tuple, 1)); - if(tmp_str!=NULL) + if (tmp_str!=NULL) { strncpy(s_tag[i], tmp_str, BUF_LEN_128-1); free(tmp_str); @@ -287,11 +293,11 @@ static int __message_send(rpc_port_stub_message_context_h context, { PERR("malloc failed"); } - if(s_token[i]!=NULL) + if (s_token[i] != NULL) { memset(s_token[i], 0, BUF_LEN_128); tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(ne_elm_tuple, 0)); - if(tmp_str!=NULL) + if (tmp_str != NULL) { strncpy(s_token[i], tmp_str, BUF_LEN_128-1); free(tmp_str); @@ -321,11 +327,11 @@ static int __message_send(rpc_port_stub_message_context_h context, ld_result = nltk_language_detect(info); char *lang_buf[1] = {NULL,}; lang_buf[0] = (char*)malloc(BUF_LEN_128*sizeof(char)); - if(lang_buf[0]!=NULL) + if (lang_buf[0] != NULL) { memset(lang_buf[0], 0, BUF_LEN_128); tmp_str = nltk_get_string_from_element(ld_result); - if(tmp_str!=NULL) + if (tmp_str != NULL) { strncpy(lang_buf[0], tmp_str, BUF_LEN_128-1); free(tmp_str); @@ -367,16 +373,32 @@ static void __message_unregister(rpc_port_stub_message_context_h context, __destroy_client(client); } - -bool service_app_create(void *data) +void nltk_load() { - nltk_initialize(); globe_nltk = nltk_get_module("nltk"); - PINFO("nltk library loaded success: "); + if (globe_nltk) + PINFO("nltk library loaded success: "); + else + PERR("Failed to get nltk module"); + globe_lemm = nltk_get_module("nltk.stem"); - PINFO("nltk stem library loaded success: "); + if (globe_lemm) + PINFO("nltk stem library loaded success: "); + else + PERR("Failed to get nltk.stem module"); + globe_lang = nltk_get_module("langdetect"); - PINFO("langdetect library loaded success: "); + if (globe_lang) + PINFO("langdetect library loaded success: "); + else + PERR("Failed to get nltk.stem module"); +} + +bool service_app_create(void *data) +{ + nltk_initialize(); + nltk_load(); + start_timer(); int ret; @@ -411,48 +433,31 @@ void service_app_control(app_control_h app_control, void *data) return; } -static void +void service_app_lang_changed(app_event_info_h event_info, void *user_data) { /*APP_EVENT_LANGUAGE_CHANGED*/ return; } -static void +void service_app_region_changed(app_event_info_h event_info, void *user_data) { /*APP_EVENT_REGION_FORMAT_CHANGED*/ } -static void +void service_app_low_battery(app_event_info_h event_info, void *user_data) { /*APP_EVENT_LOW_BATTERY*/ } -static void +void service_app_low_memory(app_event_info_h event_info, void *user_data) { /*APP_EVENT_LOW_MEMORY*/ } -int main(int argc, char* argv[]) -{ - char ad[50] = {0,}; - service_app_lifecycle_callback_s event_callback; - app_event_handler_h handlers[5] = {NULL, }; - - event_callback.create = service_app_create; - event_callback.terminate = service_app_terminate; - event_callback.app_control = service_app_control; - service_app_add_event_handler(&handlers[APP_EVENT_LOW_BATTERY], APP_EVENT_LOW_BATTERY, service_app_low_battery, &ad); - service_app_add_event_handler(&handlers[APP_EVENT_LOW_MEMORY], APP_EVENT_LOW_MEMORY, service_app_low_memory, &ad); - service_app_add_event_handler(&handlers[APP_EVENT_LANGUAGE_CHANGED], APP_EVENT_LANGUAGE_CHANGED, service_app_lang_changed, &ad); - service_app_add_event_handler(&handlers[APP_EVENT_REGION_FORMAT_CHANGED], APP_EVENT_REGION_FORMAT_CHANGED, service_app_region_changed, &ad); - - return service_app_main(argc, argv, &event_callback, ad); -} - void nltk_initialize() { PENTER(); @@ -465,7 +470,7 @@ void nltk_finalize() Py_Finalize(); } -PyObject* nltk_word_tokenize(char* sentence) +PyObject* nltk_word_tokenize(const char* sentence) { PyObject* args = NULL; PyObject* func = NULL; @@ -474,11 +479,12 @@ PyObject* nltk_word_tokenize(char* sentence) func = nltk_get_function_handle(globe_nltk, "word_tokenize"); lists = nltk_call_function_with_args(func, args); Py_DECREF(args); - Py_DECREF(func); + if (func) + Py_DECREF(func); return lists; } -PyObject* nltk_pos_tag(char* sentence) +PyObject* nltk_pos_tag(const char* sentence) { PyObject* args = NULL; PyObject* func = NULL; @@ -488,13 +494,20 @@ PyObject* nltk_pos_tag(char* sentence) func = nltk_get_function_handle(globe_nltk, "pos_tag"); args = nltk_make_args_from_pyobject(wt_result); result = nltk_call_function_with_args(func, args); - Py_DECREF(args); - Py_DECREF(func); - Py_DECREF(wt_result); + + if (args) + Py_DECREF(args); + + if (func) + Py_DECREF(func); + + if (wt_result) + Py_DECREF(wt_result); + return result; } -PyObject* nltk_ne_chunk(char* sentence) +PyObject* nltk_ne_chunk(const char* sentence) { PyObject* args = NULL; PyObject* pt_result = NULL; @@ -508,15 +521,26 @@ PyObject* nltk_ne_chunk(char* sentence) tmp_result = nltk_call_function_with_args(func, args); lv_func = nltk_get_function_handle(tmp_result, "leaves"); result = nltk_call_function_with_args(lv_func, NULL); - Py_DECREF(args); - Py_DECREF(func); - Py_DECREF(pt_result); - Py_DECREF(tmp_result); - Py_DECREF(lv_func); + + if (args) + Py_DECREF(args); + + if (func) + Py_DECREF(func); + + if (pt_result) + Py_DECREF(pt_result); + + if (tmp_result) + Py_DECREF(tmp_result); + + if (lv_func) + Py_DECREF(lv_func); + return result; } -PyObject* nltk_language_detect(char* sentence) +PyObject* nltk_language_detect(const char* sentence) { PyObject* args = NULL; PyObject* func = NULL; @@ -525,11 +549,13 @@ PyObject* nltk_language_detect(char* sentence) func = nltk_get_function_handle(globe_lang,"detect"); result = nltk_call_function_with_args(func, args); Py_DECREF(args); - Py_DECREF(func); + if (func) + Py_DECREF(func); + return result; } -PyObject* nltk_get_module(char* name) +PyObject* nltk_get_module(const char* name) { PRET_VM(!name, NULL, "Input parameter [name] is NULL!"); return PyImport_ImportModuleNoBlock(name); @@ -608,7 +634,7 @@ char* nltk_get_string_from_element(PyObject* elm) { PRET_VM(!elm, NULL, "Input parameter [elm] is NULL!"); char* ch = (char*) malloc(BUF_LEN_256); - if(ch == NULL) + if (ch == NULL) { PERR("malloc failed"); return ch; @@ -625,7 +651,7 @@ PyObject* nltk_get_function_handle(PyObject* module, char * func_name) return PyObject_GetAttrString(module, func_name); } -PyObject* nltk_make_args_from_string(char* info) +PyObject* nltk_make_args_from_string(const char* info) { PRET_VM(!info, NULL, "Input parameter [info] is NULL!"); PyObject *pArgs; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..b6a0f13 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,46 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.6) +PROJECT(gtest-nlp) + +# Find Packages +INCLUDE(FindPkgConfig) +pkg_check_modules(pkgs REQUIRED + capi-base-common + capi-appfw-service-application + bundle + python + glib-2.0 + rpc-port + ecore + gmock +) + +FOREACH(flag ${pkgs_CFLAGS}) + SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}") +ENDFOREACH(flag) + +SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} -fvisibility=hidden -Wall -Werror") +SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} -fPIE") +SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} -Werror") +SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS}") + +SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} -std=c++11") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS}") + +SET(SOURCES "") + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../include ${CMAKE_CURRENT_SOURCE_DIR}/../service/inc) + +AUX_SOURCE_DIRECTORY(src SOURCES) + +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/../service/src NLP_SERVICE_SOURCES) +list(REMOVE_ITEM NLP_SERVICE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../service/src/main.c") + +ADD_DEFINITIONS("-DFULLVER=\"${FULLVER}\"") + +ADD_EXECUTABLE(${UTC_NLP} + ${NLP_SERVICE_SOURCES} + ${SOURCES} + ) +TARGET_LINK_LIBRARIES(${UTC_NLP} ${GTEST_LIBRARIES} ${pkgs_LDFLAGS} ${EXTRA_LDFLAGS}) + +INSTALL(TARGETS ${UTC_NLP} DESTINATION /usr/bin) diff --git a/tests/src/main.cpp b/tests/src/main.cpp new file mode 100644 index 0000000..a37d671 --- /dev/null +++ b/tests/src/main.cpp @@ -0,0 +1,7 @@ +#include +#include + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tests/src/nlp_service_unittests.cpp b/tests/src/nlp_service_unittests.cpp new file mode 100644 index 0000000..71377db --- /dev/null +++ b/tests/src/nlp_service_unittests.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#undef _POSIX_C_SOURCE +#undef _XOPEN_SOURCE +#include "../../service/inc/service.h" + +static int g_init = false; + +using namespace std; + +namespace { + +class NlpServiceTest : public testing::Test { + public: + virtual void SetUp() { + if (!g_init) { + nltk_initialize(); + nltk_load(); + + g_init = true; + } + } + virtual void TearDown() { + } +}; + +TEST_F(NlpServiceTest, utc_nlp_service_tokenize_p) +{ + // tokenize + PyObject* wt_lists = NULL; + char *tmp_str = NULL; + vector tokens; + wt_lists = nltk_word_tokenize("I am a boy"); + ASSERT_NE(wt_lists, nullptr); + + unsigned int len = nltk_get_size_from_list(wt_lists); + for(unsigned int i = 0; i < len; i++) + { + tmp_str = nltk_get_string_from_element(nltk_get_element_from_list_by_index(wt_lists, i)); + string result = string(tmp_str); + tokens.push_back(result); + cout << "token: '" << tmp_str << "'" << endl; + } + + Py_DECREF(wt_lists); + + ASSERT_EQ(len, 4); + + EXPECT_EQ(tokens[0] == string("I"), true); + EXPECT_EQ(tokens[1] == string("am"), true); + EXPECT_EQ(tokens[2] == string("a"), true); + EXPECT_EQ(tokens[3] == string("boy"), true); + + cout << "word_tokenize process done" << endl; +} + +TEST_F(NlpServiceTest, utc_nlp_service_language_detect_p) +{ + PyObject* ld_result = NULL; + ld_result = nltk_language_detect("War doesn't show who's right, just who's left."); + ASSERT_NE(ld_result, nullptr); + + char *tmp_str = nltk_get_string_from_element(ld_result); + string detected_language = string(tmp_str); + + Py_DECREF(ld_result); + + cout << "Detected language: " << detected_language.c_str() << endl; + + EXPECT_EQ(detected_language == string("en"), true); +} + +TEST_F(NlpServiceTest, utc_nlp_service_pos_tag_p) +{ + // POS tag + PyObject* pt_result = NULL; + PyObject* pt_elm_tuple = NULL; + vector tags, tokens; + char *tmp_str = NULL; + pt_result = nltk_pos_tag("Hello World"); + ASSERT_NE(pt_result, nullptr); + + unsigned int len = nltk_get_size_from_list(pt_result); + + for(unsigned int i = 0; i < len; i++) + { + pt_elm_tuple = nltk_get_element_from_list_by_index(pt_result, i); + + tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 1)); + tags.push_back(string(tmp_str)); + + cout << "tag : " << tmp_str << endl; + + tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(pt_elm_tuple, 0)); + tokens.push_back(string(tmp_str)); + + Py_DECREF(pt_elm_tuple); + } + + ASSERT_EQ(len, 2); + + EXPECT_EQ(tags[0] == string("NNP"), true); + EXPECT_EQ(tags[1] == string("NNP"), true); +} + +TEST_F(NlpServiceTest, utc_nlp_service_ne_chunk_p) +{ + // NE Chunk + PyObject* ne_result = NULL; + PyObject* ne_elm_tuple = NULL; + ne_result = nltk_ne_chunk("We saw the yellow dog"); + ASSERT_NE(ne_result, nullptr); + + unsigned int len = nltk_get_size_from_list(ne_result); + char *tmp_str; + vector tags; + vector tokens; + + for(unsigned int i = 0; i < len; i++) + { + ne_elm_tuple = nltk_get_element_from_list_by_index(ne_result, i); + + tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(ne_elm_tuple, 0)); + tokens.push_back(string(tmp_str)); + cout << "token: " << tmp_str << endl; + + tmp_str = nltk_get_string_from_element(nltk_get_element_from_tuple_by_index(ne_elm_tuple, 1)); + tags.push_back(string(tmp_str)); + cout << "tag: " << tmp_str << endl; + + Py_DECREF(ne_elm_tuple); + } + + Py_DECREF(ne_result); + + ASSERT_EQ(len, 5); + + EXPECT_EQ(tags[0] == string("PRP"), true); // We -> PRP + EXPECT_EQ(tags[1] == string("VBD"), true); // saw -> VBD + EXPECT_EQ(tags[2] == string("DT"), true); // the -> DT + EXPECT_EQ(tags[3] == string("JJ"), true); // yellow -> JJ + EXPECT_EQ(tags[4] == string("NN"), true); // dog -> NN + + cout << "ne_chunk process done" << endl; +} + +} // namespace