From b00cf7fffdb7f022ae721faa8e0eb10c4bde284c Mon Sep 17 00:00:00 2001 From: Bernhard Urban Date: Thu, 9 May 2019 17:31:24 +0200 Subject: [PATCH] [metadata] use handle stack in mono_runtime_object_init_handle (mono/mono#14399) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fixes this crash on watchOS with llvmonly: ``` (lldb) bt 35 * thread %1, name = 'tid_303', queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x4d55545c) * frame %0: 0x01ab81de mscorlibtests`sgen_vtable_get_descriptor(vtable=0x4d555458) at sgen-client-mono.h:29:33 frame %1: 0x01a9e278 mscorlibtests`major_copy_or_mark_object_canonical at sgen-marksweep-drain-gray-stack.h:158:10 frame %2: 0x01a9debc mscorlibtests`major_copy_or_mark_object_canonical(ptr=0x182f260c, queue=0x01dd4894) at sgen-marksweep.c:1363 frame %3: 0x019d31f2 mscorlibtests`mono_handle_stack_scan(stack=0x17e200b0, func=(mscorlibtests`major_copy_or_mark_object_canonical + 1 at sgen-marksweep.c:1362), gc_data=0x01dd4894, precise=1, check=1) at handle.c:340:5 frame %4: 0x01a45680 mscorlibtests`sgen_client_scan_thread_data(start_nursery=0x00000000, end_nursery=0xffffffff, precise=1, ctx=ScanCopyContext @ 0x01dd4580) at sgen-mono.c:2254:5 frame %5: 0x01a93e80 mscorlibtests`job_scan_thread_data(worker_data_untyped=0x00000000, job=0x0511c004) at sgen-gc.c:1416:2 frame %6: 0x01adef0a mscorlibtests`sgen_workers_enqueue_job(generation=1, job=0x0511c004, enqueue=0) at sgen-workers.c:184:3 frame %7: 0x01a9334a mscorlibtests`enqueue_scan_from_roots_jobs(gc_thread_gray_queue=0x01dd4894, heap_start=0x00000000, heap_end="", ops=0x01c6aaac, enqueue=0) at sgen-gc.c:1661:2 frame %8: 0x01a94d16 mscorlibtests`major_copy_or_mark_from_roots(gc_thread_gray_queue=0x01dd4894, old_next_pin_slot=0x01dd4864, mode=COPY_OR_MARK_FROM_ROOTS_SERIAL, object_ops_nopar=0x01c6aaac, object_ops_par=0x00000000) at sgen-gc.c:2070:2 frame %9: 0x01a95658 mscorlibtests`major_start_collection(gc_thread_gray_queue=0x01dd4894, reason="LOS overflow", concurrent=0, old_next_pin_slot=0x01dd4864) at sgen-gc.c:2189:2 frame %10: 0x01a92ccc mscorlibtests`major_do_collection(reason="LOS overflow", is_overflow=0, forced=1) at sgen-gc.c:2362:2 frame %11: 0x01a8e9ce mscorlibtests`sgen_perform_collection_inner(requested_size=21616, generation_to_collect=1, reason="LOS overflow", forced_serial=1, stw=1) at sgen-gc.c:2563:14 frame %12: 0x01a8e76c mscorlibtests`sgen_perform_collection(requested_size=21616, generation_to_collect=1, reason="LOS overflow", forced_serial=1, stw=1) at sgen-gc.c:2640:2 frame %13: 0x01a8e73e mscorlibtests`sgen_ensure_free_space(size=21616, generation=1) at sgen-gc.c:2514:2 frame %14: 0x01a99184 mscorlibtests`sgen_los_alloc_large_inner(vtable=0x185749d0, size=21616) at sgen-los.c:380:2 frame %15: 0x01a81c0a mscorlibtests`sgen_alloc_obj_nolock(vtable=0x185749d0, size=21616) at sgen-alloc.c:172:16 frame %16: 0x01a44940 mscorlibtests`mono_gc_alloc_vector(vtable=0x185749d0, size=21616, max_length=900) at sgen-mono.c:1317:20 frame %17: 0x00a5b408 mscorlibtests`aot_wrapper_icall_mono_gc_alloc_vector + 102 frame %18: 0x00a56818 mscorlibtests`mscorlib_wrapper_alloc_object_AllocVector_intptr_intptr + 324 frame %19: 0x01092b26 mscorlibtests`mscorlibtests1_MonoTests_System_DecimalTest2__ctor + 7286 frame %20: 0x00a78912 mscorlibtests`aot_wrapper_gsharedvt_out_sig_pinvoke_void_this_ + 28 frame %21: 0x00a4a4d2 mscorlibtests`mscorlib_wrapper_runtime_invoke_object_runtime_invoke_sig_void_intptr_intptr_object_intptr_intptr_intptr + 312 ``` The hint here was that the crash happened in `mono_handle_stack_scan ()` and inspecting the handle stack there. After enabling `MONO_HANDLE_TRACK_OWNER` and `MONO_HANDLE_TRACK_SP` in `handle.h`, I got this: ``` warning: Handle 0x17886a0c (object = 0x16e73fd0) (allocated from "../../../../../mono/metadata/object.c:3182") is leaking. frame %1: 0x019833ba mscorlibtests`mono_handle_new(obj=0x01f194a0, info=0x17882600, owner="../../../../../mono/metadata/gc.c:1337") at handle.c:183:2 frame %2: 0x019823c0 mscorlibtests`mono_gc_alloc_handle_string(vtable=0x17888760, size=24, len=5) at gc.c:1337:9 frame %3: 0x019de896 mscorlibtests`mono_string_new_size_handle(domain=0x16e73fd0, len=5, error=0x01dc804c) at object.c:6643:6 frame %4: 0x019d6fc4 mscorlibtests`mono_string_new_size_checked(domain=0x16e73fd0, length=5, error=0x01dc804c) at object.c:6655:2 frame %5: 0x019de64e mscorlibtests`mono_string_new_utf16_checked(domain=0x16e73fd0, text=0x1842ab10, len=5, error=0x01dc804c) at object.c:6536:6 frame %6: 0x019debc2 mscorlibtests`mono_string_new_checked(domain=0x16e73fd0, text="ar-SA", error=0x01dc804c) at object.c:6778:7 frame %7: 0x019b06a8 mscorlibtests`construct_culture(this_obj=0x01f19428, ci=0x01b2d878, error=0x01dc804c) at locales.c:357:2 frame %8: 0x019b0e3a mscorlibtests`ves_icall_System_Globalization_CultureInfo_internal_get_cultures(neutral='\0', specific='\x01', installed='\0') at locales.c:714:9 frame %9: 0x007061f0 mscorlibtests`aot_wrapper___System_dot_Globalization_System_dot_Globalization_dot_CultureInfo__internal_get_cultures_pinvoke_cl2a_System_2eGlobalization_2eCultureInfo_5b_5d__cl4_bool_cl4_bool_cl4_bool_cl2a_System_2eGlobalization_2eCultureInfo_5b_5d__cl4_bool_cl4_bool_cl4_bool_ + 100 frame %10: 0x00704fb2 mscorlibtests`mscorlib_System_Globalization_CultureInfo_GetCultures_System_Globalization_CultureTypes + 192 frame %11: 0x0100f422 mscorlibtests`mscorlibtests1_MonoTests_System_DateTimeTest_Parse_Bug53023b + 192 frame %12: 0x00a28702 mscorlibtests`aot_wrapper_gsharedvt_out_sig_pinvoke_void_this_ + 28 frame %13: 0x009fa2c2 mscorlibtests`mscorlib_wrapper_runtime_invoke_object_runtime_invoke_sig_void_intptr_intptr_object_intptr_intptr_intptr + 312 frame %14: 0x019278f2 mscorlibtests`mono_llvmonly_runtime_invoke(method=0x17953820, info=0x1842a9a0, obj=0x053b7c40, params=0x00000000, exc=0x01dc8550, error=0x01dc88a4) at mini-runtime.c:3000:2 ``` @lambdageek suggested this fix after showing this trace to him. As far as I understand we do _not_ know the root cause yet. The handle leak happens in this trace: ``` (lldb) mbt * thread %1 * frame %0: 0x0176192c mscorlibtests`do_debug_me_please_kkthx at object.c:128:2 frame %1: 0x01761a62 mscorlibtests`mono_runtime_object_init_handle(this_obj=MonoObjectHandle @ 0x01b53fbc, error=0x01b5404c) at object.c:146:3 frame %2: 0x0176191e mscorlibtests`mono_runtime_object_init_checked(this_obj_raw=0x01fde880, error=0x01b5404c) at object.c:176:2 frame %3: 0x0173ce32 mscorlibtests`ves_icall_System_Globalization_CultureInfo_internal_get_cultures(neutral='\0', specific='\x01', installed='\0') at locales.c:712:4 frame %4: 0x004931d8 mscorlibtests`aot_wrapper___System_dot_Globalization_System_dot_Globalization_dot_CultureInfo__internal_get_cultures_pinvoke_cl2a_System_2eGlobalization_2eCultureInfo_5b_5d__cl4_bool_cl4_bool_cl4_bool_cl2a_System_2eGlobalization_2eCultureInfo_5b_5d__cl4_bool_cl4_bool_cl4_bool_ + 100 frame %5: 0x00491f9a mscorlibtests`mscorlib_System_Globalization_CultureInfo_GetCultures_System_Globalization_CultureTypes + 192 frame %6: 0x00d9b8f4 mscorlibtests`mscorlibtests1_MonoTests_System_DateTimeTest_Parse_Bug53023b + 192 frame %7: 0x007b56ea mscorlibtests`aot_wrapper_gsharedvt_out_sig_pinvoke_void_this_ + 28 ``` `mono_runtime_object_init_checked ()` already should take care of the handle stack 😕 Commit migrated from https://github.com/mono/mono/commit/a093cbddbab1fc9dd6b1fde89e623ad8384e6392 --- src/mono/mono/metadata/object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/metadata/object.c b/src/mono/mono/metadata/object.c index a605107..48fd1f5 100644 --- a/src/mono/mono/metadata/object.c +++ b/src/mono/mono/metadata/object.c @@ -127,6 +127,7 @@ mono_runtime_object_init_handle (MonoObjectHandle this_obj, MonoError *error) { MONO_REQ_GC_UNSAFE_MODE; + HANDLE_FUNCTION_ENTER (); error_init (error); MonoClass * const klass = MONO_HANDLE_GETVAL (this_obj, vtable)->klass; @@ -143,7 +144,7 @@ mono_runtime_object_init_handle (MonoObjectHandle this_obj, MonoError *error) mono_runtime_invoke_handle_void (method, this_obj, NULL, error); } - return is_ok (error); + HANDLE_FUNCTION_RETURN_VAL (is_ok (error)); } /** -- 2.7.4