Hi Chapel team, my program get halted frequently without giving any "Chapel-specific" error. I tried to get info by setting GASNET_BACKTRACE. I only see the name of one of Functions (bold one). I am making sure to use sync variables for shared variables to avoid data race and aside from parallelization among locales I do not have any parallelization on each locale. Each local runs some sequential steps, send results to the locale 0, if the lock lets it and reads some results in the same way and continues. Any help in reading these messages would highly be appreciated.
Caught a fatal signal (proc 0): SIGSEGV(11)
NOTICE: We recommend linking the debug version of GASNet to assist you in resolving this application issue.
[0] Invoking GDB for backtrace...
[0] /cvmfs/soft.computecanada.ca/gentoo/2020/usr/bin/gdb -nx -batch -x /tmp/gasnet_VNz9F9 '/project/6006050/masgari/RunningDDS/CompiledProgram4_real' 75566
[0] [New LWP 75603]
[0] [New LWP 75604]
[0] [New LWP 75605]
[0] [New LWP 75606]
[0] [New LWP 75607]
[0] [Thread debugging using libthread_db enabled]
[0] Using host libthread_db library "/cvmfs/soft.computecanada.ca/gentoo/2020/lib64/libthread_db.so.1".
[0] __lll_lock_wait (futex=futex@entry=0x7ffef56e5a60, private=0) at lowlevellock.c:52
[0] Id Target Id Frame
[0] * 1 Thread 0x7f77bdc1c8c0 (LWP 75566) "CompiledProgram" __lll_lock_wait (futex=futex@entry=0x7ffef56e5a60, private=0) at lowlevellock.c:52
[0] 2 Thread 0x7f4fd65ff700 (LWP 75603) "CompiledProgram" pthread_spin_lock () at ../sysdeps/x86_64/nptl/pthread_spin_lock.S:31
[0] 3 Thread 0x7f4fd5dfe700 (LWP 75604) "CompiledProgram" futex_abstimed_wait_cancelable (private=0, abstime=0x7f4fd5dfdc70, clockid=, expected=0, futex_word=0x7f4fd7eae1bc) at ../sysdeps/unix/sysv/linux/futex-internal.h:>[0] 4 Thread 0x7f4fd55fd700 (LWP 75605) "CompiledProgram" futex_abstimed_wait_cancelable (private=0, abstime=0x7f4fd55fcc70, clockid=, expected=0, futex_word=0x7f4fd7eae2bc) at ../sysdeps/unix/sysv/linux/futex-internal.h:>[0] 5 Thread 0x7f4fd4dfc700 (LWP 75606) "CompiledProgram" 0x00007f77bde177de in __GI___waitpid (pid=88648, stat_loc=stat_loc@entry=0x7f4fdd7f2b88, options=options@entry=0) at ../sysdeps/unix/sysv/linux/waitpid.c:30
[0] 6 Thread 0x7f4fcffff700 (LWP 75607) "CompiledProgram" 0x00007f77bde314d7 in sched_yield () at ../sysdeps/unix/syscall-template.S:78
[0]
[0] Thread 6 (Thread 0x7f4fcffff700 (LWP 75607)):
[0] #0 0x00007f77bde314d7 in sched_yield () at ../sysdeps/unix/syscall-template.S:78
[0] #1 0x00000000008e3185 in polling ()
[0] #2 0x00000000008e106d in comm_task_wrapper ()
[0] #3 0x00007f77be3abf27 in start_thread (arg=) at pthread_create.c:479
[0] #4 0x00007f77bde4987f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
[0]
[0] Thread 5 (Thread 0x7f4fd4dfc700 (LWP 75606)):
[0] #0 0x00007f77bde177de in __GI___waitpid (pid=88648, stat_loc=stat_loc@entry=0x7f4fdd7f2b88, options=options@entry=0) at ../sysdeps/unix/sysv/linux/waitpid.c:30
[0] #1 0x00007f77bdd95777 in do_system (line=) at ../sysdeps/posix/system.c:167
[0] #2 0x000000000091f22a in gasneti_bt_gdb ()
[0] #3 0x00000000009236e8 in gasneti_print_backtrace ()
[0] #4 0x0000000000410b09 in gasneti_defaultSignalHandler ()
[0] #5
[0] #6 __memmove_avx_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:238
[0] #7 0x000000000091dca0 in gasnetc_ofi_am_send_long ()
[0] #8 0x0000000000915595 in gasnetc_AMReplyLongM ()
#9 0x00000000008e2c83 in AM_reply_put ()
[0] #10 0x000000000091bb87 in gasnetc_ofi_poll ()
[0] #11 0x0000000000910c09 in gasnetc_AMPoll ()
[0] #12 0x00000000008e35e2 in execute_on_common ()
[0] #13 0x00000000008e728d in chpl_comm_execute_on ()
[0] #14 0x00000000005143c7 in chpl_executeOn ()
[0] #15 0x0000000000471861 in release_chpl22 ()
[0] #16 0x0000000000474de6 in doClear_chpl5 ()
[0] #17 0x0000000000475b5d in deinit_chpl70 ()
[0] #18 0x000000000047669f in chpl__autoDestroy42 ()
[0] #19 0x00000000005b40a9 in on_fn_chpl236 ()
[0] #20 0x000000000059ae00 in _fireAllDestructors_chpl12 ()
[0] #21 0x00000000005a1d1f in on_fn_chpl237 ()
[0] #22 0x000000000059b337 in _fireAllDestructors_chpl2 ()
[0] #23 0x000000000062663a in FillMasterTablesSingleObjective_chpl ()
[0] #24 0x000000000067d460 in on_fn_chpl283 ()
[0] #25 0x000000000067d8ae in wrapon_fn_chpl283 ()
[0] #26 0x00000000008e2db6 in fork_wrapper ()
[0] #27 0x00000000008e15ff in chapel_wrapper ()
[0] #28 0x00000000009cb7c2 in qthread_wrapper ()
[0] #29 0x0000000000000000 in ?? ()
[0]
[0] Thread 4 (Thread 0x7f4fd55fd700 (LWP 75605)):
[0] #0 futex_abstimed_wait_cancelable (private=0, abstime=0x7f4fd55fcc70, clockid=, expected=0, futex_word=0x7f4fd7eae2bc) at ../sysdeps/unix/sysv/linux/futex-internal.h:208
[0] #1 __pthread_cond_wait_common (abstime=0x7f4fd55fcc70, clockid=, mutex=0x7f4fd7eae2c0, cond=0x7f4fd7eae290) at pthread_cond_wait.c:520
[0] #2 __pthread_cond_timedwait (cond=0x7f4fd7eae290, mutex=0x7f4fd7eae2c0, abstime=0x7f4fd55fcc70) at pthread_cond_wait.c:656
[0] #3 0x00000000009cfc06 in qt_scheduler_get_thread ()
[0] #4 0x00000000009cd367 in qthread_master ()
[0] #5 0x00007f77be3abf27 in start_thread (arg=) at pthread_create.c:479
[0] #6 0x00007f77bde4987f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
[0]
[0] Thread 3 (Thread 0x7f4fd5dfe700 (LWP 75604)):
[0] #0 futex_abstimed_wait_cancelable (private=0, abstime=0x7f4fd5dfdc70, clockid=, expected=0, futex_word=0x7f4fd7eae1bc) at ../sysdeps/unix/sysv/linux/futex-internal.h:208
[0] #1 __pthread_cond_wait_common (abstime=0x7f4fd5dfdc70, clockid=, mutex=0x7f4fd7eae1c0, cond=0x7f4fd7eae190) at pthread_cond_wait.c:520
[0] #2 __pthread_cond_timedwait (cond=0x7f4fd7eae190, mutex=0x7f4fd7eae1c0, abstime=0x7f4fd5dfdc70) at pthread_cond_wait.c:656
[0] #3 0x00000000009cfc06 in qt_scheduler_get_thread ()
[0] #4 0x00000000009cd367 in qthread_master ()
[0] #5 0x00007f77be3abf27 in start_thread (arg=) at pthread_create.c:479
[0] #6 0x00007f77bde4987f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
[0]
[0] Thread 2 (Thread 0x7f4fd65ff700 (LWP 75603)):
#0 pthread_spin_lock () at ../sysdeps/x86_64/nptl/pthread_spin_lock.S:31
[0] #1 0x00007f77be42e2ae in ofi_cq_progress () from /cvmfs/soft.computecanada.ca/easybuild/software/2020/avx2/Core/libfabric/1.12.1/lib64/libfabric.so.1
[0] #2 0x00007f77be42d76e in ofi_cq_readfrom () from /cvmfs/soft.computecanada.ca/easybuild/software/2020/avx2/Core/libfabric/1.12.1/lib64/libfabric.so.1
[0] #3 0x000000000091afd1 in gasnetc_ofi_tx_poll ()
[0] #4 0x000000000091b0d8 in gasnetc_ofi_poll ()
[0] #5 0x0000000000910c09 in gasnetc_AMPoll ()
[0] #6 0x00000000008e35e2 in execute_on_common ()
[0] #7 0x00000000008e728d in chpl_comm_execute_on ()
[0] #8 0x00000000005143c7 in chpl_executeOn ()
[0] #9 0x0000000000471861 in release_chpl22 ()
[0] #10 0x0000000000474de6 in doClear_chpl5 ()
[0] #11 0x0000000000475b5d in deinit_chpl70 ()
[0] #12 0x000000000047669f in chpl__autoDestroy42 ()
[0] #13 0x00000000005b40a9 in on_fn_chpl236 ()
[0] #14 0x000000000059ae00 in _fireAllDestructors_chpl12 ()
[0] #15 0x00000000005a1d1f in on_fn_chpl237 ()
[0] #16 0x000000000059b337 in _fireAllDestructors_chpl2 ()
[0] #17 0x000000000062663a in FillMasterTablesSingleObjective_chpl ()
[0] #18 0x000000000067d460 in on_fn_chpl283 ()
[0] #19 0x000000000067d8ae in wrapon_fn_chpl283 ()
[0] #20 0x00000000008e2db6 in fork_wrapper ()
[0] #21 0x00000000008e15ff in chapel_wrapper ()
0] #22 0x00000000009cb7c2 in qthread_wrapper ()
[0] #23 0x0000000000000000 in ?? ()
[0]
[0] Thread 1 (Thread 0x7f77bdc1c8c0 (LWP 75566)):
[0] #0 __lll_lock_wait (futex=futex@entry=0x7ffef56e5a60, private=0) at lowlevellock.c:52
[0] #1 0x00007f77be3ae883 in __GI___pthread_mutex_lock (mutex=0x7ffef56e5a60) at ../nptl/pthread_mutex_lock.c:80
[0] #2 0x00000000009c8f52 in qthread_readFF ()
[0] #3 0x00000000008e2018 in chpl_task_callMain ()
[0] #4 0x0000000000415db5 in main ()
[0] [Inferior 1 (process 75566) detached]
Spawner: read() returned 0 (EOF)
*** Caught a signal (proc 6): SIGTERM(15)
*** Caught a signal (proc 8): SIGTERM(15)
*** Caught a signal (proc 4): SIGTERM(15)
*** Caught a signal (proc 2): SIGTERM(15)
*** Caught a signal (proc 7): SIGTERM(15)
*** Caught a signal (proc 3): SIGTERM(15)
*** Caught a signal (proc 1): SIGTERM(15)
*** Caught a signal (proc 5): SIGTERM(15)