{5} Accepted, Active Tickets by Owner (Full Description) (1 match)
List tickets accepted, group by ticket owner. This report demonstrates the use of full-row display.
dsteffen@… (1 match)
| Ticket | Summary | Component | Milestone | Type | Created | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Description | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #35 | invalid code generated by GCC 4.5.1 for _dispatch_queue_push_list() | defect | 09/08/11 | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
When building libdispatch r197 using GCC 4.5.1 on Solaris (X86_64) we found that sometimes one of the worker threads starts spinning in tight loop in _dispatch_queue_concurrent_drain_one() function. src/queue.c struct dispatch_object_s *
_dispatch_queue_concurrent_drain_one(dispatch_queue_t dq)
{
struct dispatch_object_s *head, *next, *const mediator = (void *)~0ul;
// The mediator value acts both as a "lock" and a signal
head = dispatch_atomic_xchg(&dq->dq_items_head, mediator);
if (slowpath(head == NULL)) {
// The first xchg on the tail will tell the enqueueing thread that it
// is safe to blindly write out to the head pointer. A cmpxchg honors
// the algorithm.
dispatch_atomic_cmpxchg(&dq->dq_items_head, mediator, NULL);
_dispatch_debug("no work on global work queue");
return NULL;
}
if (slowpath(head == mediator)) {
// This thread lost the race for ownership of the queue.
//
// The ratio of work to libdispatch overhead must be bad. This
// scenario implies that there are too many threads in the pool.
// Create a new pending thread and then exit this thread.
// The kernel will grant a new thread when the load subsides.
_dispatch_debug("Contention on queue: %p", dq);
_dispatch_queue_wakeup_global(dq);
#if DISPATCH_PERF_MON
dispatch_atomic_inc(&_dispatch_bad_ratio);
#endif
return NULL;
}
// Restore the head pointer to a sane value before returning.
// If 'next' is NULL, then this item _might_ be the last item.
next = fastpath(head->do_next);
if (slowpath(!next)) {
dq->dq_items_head = NULL;
if (dispatch_atomic_cmpxchg(&dq->dq_items_tail, head, NULL)) {
// both head and tail are NULL now
goto out;
}
// There must be a next item now. This thread won't wait long.
while (!(next = head->do_next)) { // <-------------------------- SBN: spins here forever
_dispatch_hardware_pause();
}
}
dq->dq_items_head = next;
_dispatch_queue_wakeup_global(dq);
out:
return head;
}
This happens only in optimized build and under high load: 2K-16K events dispatched using dispatch_async_f() function. As it turned out the problem was in too aggressive reordering performed by GCC optimizer in _dispatch_queue_push_list() function which puts new event into lock-free queue. This function is inlined in dispatch_async_f() C source: src/queue_internal.h __attribute__((always_inline))
static inline void
_dispatch_queue_push_list(dispatch_queue_t dq, dispatch_object_t _head, dispatch_object_t _tail)
{
struct dispatch_object_s *prev, *head = _head._do, *tail = _tail._do;
tail->do_next = NULL; // <-------------------------- SBN: (1)
prev = fastpath(dispatch_atomic_xchg(&dq->dq_items_tail, tail)); // <-------------------------- SBN: (2)
if (prev) {
// if we crash here with a value less than 0x1000, then we are at a known bug in client code
// for example, see _dispatch_queue_dispose or _dispatch_atfork_child
prev->do_next = head;
} else {
_dispatch_queue_push_list_slow(dq, head);
}
}
and src/queue.c DISPATCH_NOINLINE
void
dispatch_async_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func)
{
dispatch_continuation_t dc = fastpath(_dispatch_continuation_alloc_cacheonly());
// unlike dispatch_sync_f(), we do NOT need to check the queue width,
// the "drain" function will do this test
if (!dc) {
return _dispatch_async_f_slow(dq, ctxt, func);
}
dc->do_vtable = (void *)DISPATCH_OBJ_ASYNC_BIT;
dc->dc_func = func;
dc->dc_ctxt = ctxt;
_dispatch_queue_push(dq, dc);
}
Disasm of dispatch_async_f() function generated by GCC 4.5.1: 0000000000007300 <dispatch_async_f>:
7300: 55 push %rbp
7301: 48 89 e5 mov %rsp,%rbp
7304: 48 89 5d e8 mov %rbx,0xffffffffffffffe8(%rbp)
7308: 4c 89 65 f0 mov %r12,0xfffffffffffffff0(%rbp)
730c: 48 89 fb mov %rdi,%rbx
730f: 4c 89 6d f8 mov %r13,0xfffffffffffffff8(%rbp)
7313: 48 83 ec 20 sub $0x20,%rsp
7317: 49 89 f4 mov %rsi,%r12
731a: 49 89 d5 mov %rdx,%r13
731d: e8 1e f2 ff ff callq 6540 <_dispatch_continuation_alloc_cacheonly>
7322: 48 85 c0 test %rax,%rax
7325: 74 35 je 735c <dispatch_async_f+0x5c>
7327: 48 89 c2 mov %rax,%rdx
732a: 48 c7 00 01 00 00 00 movq $0x1,(%rax)
7331: 4c 89 68 10 mov %r13,0x10(%rax)
7335: 48 87 53 40 xchg %rdx,0x40(%rbx) // <-------------------------- SBN: (2)
7339: 48 85 d2 test %rdx,%rdx
733c: 4c 89 60 18 mov %r12,0x18(%rax)
7340: 48 c7 40 08 00 00 00 movq $0x0,0x8(%rax) // <-------------------------- SBN: (1)
7347: 00
7348: 74 2d je 7377 <dispatch_async_f+0x77>
734a: 48 89 42 08 mov %rax,0x8(%rdx)
734e: 48 8b 5d e8 mov 0xffffffffffffffe8(%rbp),%rbx
7352: 4c 8b 65 f0 mov 0xfffffffffffffff0(%rbp),%r12
7356: 4c 8b 6d f8 mov 0xfffffffffffffff8(%rbp),%r13
735a: c9 leaveq
735b: c3 retq
735c: 4c 89 ea mov %r13,%rdx
735f: 4c 89 e6 mov %r12,%rsi
7362: 48 89 df mov %rbx,%rdi
7365: 4c 8b 65 f0 mov 0xfffffffffffffff0(%rbp),%r12
7369: 48 8b 5d e8 mov 0xffffffffffffffe8(%rbp),%rbx
736d: 4c 8b 6d f8 mov 0xfffffffffffffff8(%rbp),%r13
7371: c9 leaveq
7372: e9 19 ff ff ff jmpq 7290 <_dispatch_async_f_slow>
7377: 48 89 df mov %rbx,%rdi
737a: 4c 8b 65 f0 mov 0xfffffffffffffff0(%rbp),%r12
737e: 48 8b 5d e8 mov 0xffffffffffffffe8(%rbp),%rbx
7382: 4c 8b 6d f8 mov 0xfffffffffffffff8(%rbp),%r13
7386: c9 leaveq
7387: 48 89 c6 mov %rax,%rsi
738a: e9 51 e2 ff ff jmpq 55e0 <_dispatch_queue_push_list_slow@plt>
Looks like 2 stores (marked as SBN: (1)/(2) in listings above) were reordered by optimizer. (Also note that initialization of dispatch_continuation_t fields (dc_func and dc_ctxt) were also reordered wrt inserting element to queue). To workaround I added Compiler memory barrier between initialization of do_next and inserting to queue: __attribute__((always_inline))
static inline void
_dispatch_queue_push_list(dispatch_queue_t dq, dispatch_object_t _head, dispatch_object_t _tail)
{
struct dispatch_object_s *prev, *head = _head._do, *tail = _tail._do;
tail->do_next = NULL;
__asm__ __volatile__ ("" ::: "memory"); // <-------------------------- SBN: compiler memory barrier
prev = fastpath(dispatch_atomic_xchg(&dq->dq_items_tail, tail));
if (prev) {
// if we crash here with a value less than 0x1000, then we are at a known bug in client code
// for example, see _dispatch_queue_dispose or _dispatch_atfork_child
prev->do_next = head;
} else {
_dispatch_queue_push_list_slow(dq, head);
}
}
After that GCC performs all stores in expected order and I was unable to reproduce the problem anymore. |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||

