Fire! Coredumped...

Dear friends,

A coredumped problem occured some time in a multi-thread as the following:

-- lwp# 1 / thread# 1 --

fd90b258 _lock_try_adaptive (0, fd91c000, 10018, 8, 0, 0)

fe4912bc bool cmgCall::sendEventIn(unsigned long,unsigned char*,unsigned short) (1f3f010, 2, 213c9bb, 8, 10018, 6e1b50) + 3c

fe50d060 bool cmgSs7Adapter::inputNetMsg(unsigned char*) (fe50db20, 213c9a0, fe5f677c, 8, 14d53f0, fe5b85c4) + 160

fe4a6c40 void cmgCallMgr::forwardNetEvent(XEEvent*) (28b010, 162c970, 100a, 100b, fe5526eb, 0) + 300

fe1f4a7c int engIOS_EvtHdlr::handleEvent(XEEvent*) (2571f0, 162c970, 0, bd07c, 5, 2) + 55c

fe96bac4 int XEEvtDispatcher::run() (1b9010, dc8650, 1b9124, 1b9078, 1, 2b32) + 224

fe991ce8 int XEProcShell::run() (1b69a0, 800, 9a8, fea06630, 1bb610, 1) + 2a8

00026238 main(3, ffbefb54, ffbefb64, 1617d0, 0, 0) + 1d8

00025308 _start(0, 0, 0, 0, 0, 0) + 108

-- lwp# 2 / thread# 2 --

fcf9ed7c _signotifywait (fd91c000, 0, fec47a8c, 1000, fec34124, fec47fe8) + 8

fd901c2c thr_yield (0, 0, 0, 0, 0, 0) + 8c

-- lwp# 3 / thread# 3 --

fcf9f42c _lwp_sema_wait (fcb0de60, fd91c000, 0, fcb0dd98, ffffffe0, 0) + c

fd8f93a4 _swtch(fcb0dd98, fcb0dd98, fd91c000, 5, 1000, 1) + 424

fd8fd9b8 _reap_wait (fd920980, fd91c000, 0, 18, 0, 18) + 38

fd8fd710 _reaper (fd91ce00, fd922708, fd920980, fd91cdd8, 1, fe400000) + 38

fd90b01c _thread_start (0, 0, 0, 0, 0, 0) + 40

-- lwp# 26 --

fd909200 privatelwp_cond_wait (fe153d98, fd91cd6c, fd91c000, 3, fd91c000, 1) + 8

fd8fa358 _lwp_start (fe153d98, 0, 4000, ffbeea04, 0, 0) + 18

fd901c2c thr_yield (0, 0, 0, 0, 0, 0) + 8c

-- lwp# 27 / thread# 25 --

fcf9f42c _lwp_sema_wait (f5109e60, fd91c000, 0, f5109d98, fe552627, 0) + c

fd8f90d8 _swtch(f5109d98, 0, fd91c000, 5, 1000, 0) + 158

fd8f81ac cond_wait (f5109d98, 0, 0, fd91c000, 0, 0) + 11c

fd8f8070 pthread_cond_wait (1ec658, 1ec638, f5109ab0, 0, 0, 0) + 8

fe8559b8 int ACE_Condition_Thread_Mutex::wait(const ACE_Time_Value*) (1ec658, 0, 0, febe1d60, 0, 0) + 4c

feb9d0b0 int ACE_Message_Queue<ACE_MT_SYNCH>::wait_not_empty_cond(ACE_Guard<ACE_Thr ead_Mutex>&,ACE_Time_Value*) (0, f5109b90, 0, 0, 19fdc, 0) + 30

feb9ba0c int ACE_Message_Queue<ACE_MT_SYNCH>::dequeue_head(ACE_Message_Block*&,ACE _Time_Value*) (1ec610, f5109bfc, 0, 1, 2, 2) + ac

fe478b2c int XEQueue<cmgCall>::dequeue_head(cmgCall*&) (1ec610, f5109ccc, 518, 2, fe49b500, fe5b85c4) + c

fe4a3ff8 void*cmgCallMgr::enterCPTLoop(void*) (fe49cda0, fe5c6540, 1, c44010, fe552627, ffffffff) + d8

fe85d988 void*ACE_Thread_Adapter::invoke() (c19430, fd91d658, fe85d918, 1, fd91c000, 0) + 70

fd90b01c _thread_start (c19430, 0, 0, 0, 0, 0) + 40

-- lwp# 24 --

fcf9c920 _door_return (10, fd91d658, fd91d670, 3, fd91c000, 1) + 10

fd8fa358 _lwp_start (fddf5d98, 0, 6000, fdff5b9c, 0, 0) + 18

fd901c2c thr_yield (0, 0, 0, 0, 0, 0) + 8c

-- lwp# 28 --

fd909200 privatelwp_cond_wait (fdb23d98, fd91cd6c, fd91c000, 3, fd91c000, 1) + 8

fd8fa358 _lwp_start (fdb23d98, 0, 4000, ffbeea04, 0, 10003) + 18

fd901c2c thr_yield (0, 0, 0, 0, 0, 0) + 8c

-- lwp# 29 / thread# 26 --

fcf9f42c _lwp_sema_wait (fc603e60, fd91c000, 0, fc603d98, fe552627, 0) + c

fd8f90d8 _swtch(fc603d98, 0, fd91c000, 5, 1000, 0) + 158

fd8f81ac cond_wait (fc603d98, 0, 0, fd91c000, 0, 0) + 11c

fd8f8070 pthread_cond_wait (1ec9d8, 1ec9b8, fc603ab0, 26a14, 192400, 0) + 8

fe8559b8 int ACE_Condition_Thread_Mutex::wait(const ACE_Time_Value*) (1ec9d8, 0, 0, febe1d60, 19fdc, 0) + 4c

feb9d0b0 int ACE_Message_Queue<ACE_MT_SYNCH>::wait_not_empty_cond(ACE_Guard<ACE_Thr ead_Mutex>&,ACE_Time_Value*) (0, fc603b90, 0, 0, 19fdc, 0) + 30

feb9ba0c int ACE_Message_Queue<ACE_MT_SYNCH>::dequeue_head(ACE_Message_Block*&,ACE _Time_Value*) (1ec990, fc603bfc, 0, 1, 1, 2) + ac

fe478b2c int XEQueue<cmgCall>::dequeue_head(cmgCall*&) (1ec990, fc603ccc, 518, 2, fe49b500, fe5b85c4) + c

fe4a3ff8 void*cmgCallMgr::enterCPTLoop(void*) (1a0df00, fe5c6540, 1, c47010, fe552627, ffffffff) + d8

fe85d988 void*ACE_Thread_Adapter::invoke() (c19460, fd91d658, fe85d918, 1, fd91c000, 0) + 70

fd90b01c _thread_start (c19460, 0, 0, 0, 0, 0) + 40

Please help investigate it!

BR,

Eric

[4473 byte] By [wliangya] at [2007-11-26 22:04:51]
# 1

Well, which thread actually caused the core dump? Try inspecting core file with dbx, the Sun Studio debugger. You can start dbx like this:

$ dbx - core

Dbx should report signal that caused program to dump core as well as lwp/thread. If you don't have dbx handy, you can download it from http://developers.sun.com/sunstudio/downloads/express.jsp -- it's part of Sun Studio.

MaximKartasheva at 2007-7-10 10:48:30 > top of Java-index,Development Tools,Solaris and Linux Development Tools...
# 2

Please find it below:

t at 1 (l at 1) terminated by signal SEGV (access to address exceeded protections)

0xfd90b258: _lock_try_adaptive: ldstub[%o0 + 12], %o1

(dbx) where

current thread: t at 1

=>[1] _lock_try_adaptive(0x10018, 0x2089c, 0x64d530, 0x85e18, 0xffbef338, 0x20bd288), at 0xfd90b258

[2] _ti_pthread_mutex_lock(0x0, 0xfd91c000, 0x10018, 0x3, 0x0, 0x0), at 0xfd8fb798

[3] cmgCall::sendEventIn(0x224e010, 0x2, 0x209144b, 0x3, 0x10018, 0x6ef910), at 0xfe4912bc

[4] cmgSs7Adapter::inputNetMsg(0xfe50db20, 0x2091430, 0xfe5f677c, 0x3, 0x1a7d5e0, 0xfe5b85c4), at 0xfe50d060

[5] cmgCallMgr::forwardNetEvent(0x28b010, 0xaf6f10, 0x100a, 0x100b, 0xfe5526eb, 0x0), at 0xfe4a6c40

[6] engIOS_EvtHdlr::handleEvent(0x25e1f0, 0xaf6f10, 0x0, 0xbd07c, 0x5, 0x2), at 0xfe1f4a7c

[7] XEEvtDispatcher::run(0x1b9010, 0xdce170, 0x1b9124, 0x1b9078, 0x1, 0x710381), at 0xfe96bac4

[8] XEProcShell::run(0x1b69a0, 0x800, 0x9a8, 0xfea06630, 0x1bb610, 0x1), at 0xfe991ce8

[9] main(0x3, 0xffbefb54, 0xffbefb64, 0x1617d0, 0x0, 0x0), at 0x26238

(dbx) regs

current thread: t at 1

current frame: [1]

g0-g10x00000000 0x00000000 0x00000000 0x0001b000

g2-g30x00000000 0x00000001 0x00000000 0xfe8ead08

g4-g50x00000000 0x00000074 0x00000000 0xffbef32c

g6-g70x00000000 0x00000000 0x00000000 0x00193778

o0-o10x00000000 0x00010018 0x00000000 0x0002089c

o2-o30x00000000 0x0064d530 0x00000000 0x00085e18

o4-o50x00000000 0xffbef338 0x00000000 0x020bd288

o6-o70x00000000 0xffbef318 0x00000000 0xfd8fb798

l0-l30x00000000 0xfcfc35e4 0x00000009 0x00000000

l4-l70x0064d530 0xfe8db0e8 0x0064d530 0xfe5b85c4

i0-i30x00000000 0xfd91c000 0x00010018 0x00000003

i4-i70x00000000 0x00000000 0xffbef378 0xfe4912bc

y0x000f7c26

psr0xfe401001

pc0xfd90b258:_lock_try_adaptive ldstub[%o0 + 12], %o1

npc0xfd90b25c:_lock_try_adaptive+0x4tst%o1

(dbx) lwps

o>l at 1 signal SIGSEGV in _lock_try_adaptive()

l at 2 LWP suspended in _libc_sigaction()

l at 3 LWP suspended in lwp_cond_wait()

l at 27 LWP suspended in lwp_cond_wait()

l at 28 LWP suspended in privatelwp_cond_wait()

l at 29 LWP suspended in writeValue<unsigned short>()

l at 24 LWP suspended in __door_call()

(dbx)

wliangya at 2007-7-10 10:48:30 > top of Java-index,Development Tools,Solaris and Linux Development Tools...
# 3

I noticed that your program dumps core when it tries to access unmapped address 0x10018 + 12; the same value appears in cmgCall::sendEventIn's parameters list. This does not necessarily means that one of its parameters has this value (there's no debug info to say it for sure), but it might be that you're passing wrong/uninitialized pointer to cmgCall::sendEventIn(). You can get more information from the core if you can reproduce the same problem on a program compiled with debugging information (-g option).

MaximKartasheva at 2007-7-10 10:48:30 > top of Java-index,Development Tools,Solaris and Linux Development Tools...