Ticket #35 (accepted defect)
invalid code generated by GCC 4.5.1 for _dispatch_queue_push_list()
| Reported by: | sbn@… | Owned by: | dsteffen@… |
|---|---|---|---|
| Priority: | major | Milestone: | |
| Version: | Keywords: | ||
| Cc: | sbn@…, jocke@…, german@…, mark@… |
Description
When building libdispatch r197 using GCC 4.5.1 on Solaris (X86_64) we found that sometimes one of the worker threads starts spinning in tight loop in _dispatch_queue_concurrent_drain_one() function.
src/queue.c
struct dispatch_object_s *
_dispatch_queue_concurrent_drain_one(dispatch_queue_t dq)
{
struct dispatch_object_s *head, *next, *const mediator = (void *)~0ul;
// The mediator value acts both as a "lock" and a signal
head = dispatch_atomic_xchg(&dq->dq_items_head, mediator);
if (slowpath(head == NULL)) {
// The first xchg on the tail will tell the enqueueing thread that it
// is safe to blindly write out to the head pointer. A cmpxchg honors
// the algorithm.
dispatch_atomic_cmpxchg(&dq->dq_items_head, mediator, NULL);
_dispatch_debug("no work on global work queue");
return NULL;
}
if (slowpath(head == mediator)) {
// This thread lost the race for ownership of the queue.
//
// The ratio of work to libdispatch overhead must be bad. This
// scenario implies that there are too many threads in the pool.
// Create a new pending thread and then exit this thread.
// The kernel will grant a new thread when the load subsides.
_dispatch_debug("Contention on queue: %p", dq);
_dispatch_queue_wakeup_global(dq);
#if DISPATCH_PERF_MON
dispatch_atomic_inc(&_dispatch_bad_ratio);
#endif
return NULL;
}
// Restore the head pointer to a sane value before returning.
// If 'next' is NULL, then this item _might_ be the last item.
next = fastpath(head->do_next);
if (slowpath(!next)) {
dq->dq_items_head = NULL;
if (dispatch_atomic_cmpxchg(&dq->dq_items_tail, head, NULL)) {
// both head and tail are NULL now
goto out;
}
// There must be a next item now. This thread won't wait long.
while (!(next = head->do_next)) { // <-------------------------- SBN: spins here forever
_dispatch_hardware_pause();
}
}
dq->dq_items_head = next;
_dispatch_queue_wakeup_global(dq);
out:
return head;
}
This happens only in optimized build and under high load: 2K-16K events dispatched using dispatch_async_f() function.
As it turned out the problem was in too aggressive reordering performed by GCC optimizer in _dispatch_queue_push_list() function which puts new event into lock-free queue.
This function is inlined in dispatch_async_f()
C source:
src/queue_internal.h
__attribute__((always_inline))
static inline void
_dispatch_queue_push_list(dispatch_queue_t dq, dispatch_object_t _head, dispatch_object_t _tail)
{
struct dispatch_object_s *prev, *head = _head._do, *tail = _tail._do;
tail->do_next = NULL; // <-------------------------- SBN: (1)
prev = fastpath(dispatch_atomic_xchg(&dq->dq_items_tail, tail)); // <-------------------------- SBN: (2)
if (prev) {
// if we crash here with a value less than 0x1000, then we are at a known bug in client code
// for example, see _dispatch_queue_dispose or _dispatch_atfork_child
prev->do_next = head;
} else {
_dispatch_queue_push_list_slow(dq, head);
}
}
and
src/queue.c
DISPATCH_NOINLINE
void
dispatch_async_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func)
{
dispatch_continuation_t dc = fastpath(_dispatch_continuation_alloc_cacheonly());
// unlike dispatch_sync_f(), we do NOT need to check the queue width,
// the "drain" function will do this test
if (!dc) {
return _dispatch_async_f_slow(dq, ctxt, func);
}
dc->do_vtable = (void *)DISPATCH_OBJ_ASYNC_BIT;
dc->dc_func = func;
dc->dc_ctxt = ctxt;
_dispatch_queue_push(dq, dc);
}
Disasm of dispatch_async_f() function generated by GCC 4.5.1:
0000000000007300 <dispatch_async_f>:
7300: 55 push %rbp
7301: 48 89 e5 mov %rsp,%rbp
7304: 48 89 5d e8 mov %rbx,0xffffffffffffffe8(%rbp)
7308: 4c 89 65 f0 mov %r12,0xfffffffffffffff0(%rbp)
730c: 48 89 fb mov %rdi,%rbx
730f: 4c 89 6d f8 mov %r13,0xfffffffffffffff8(%rbp)
7313: 48 83 ec 20 sub $0x20,%rsp
7317: 49 89 f4 mov %rsi,%r12
731a: 49 89 d5 mov %rdx,%r13
731d: e8 1e f2 ff ff callq 6540 <_dispatch_continuation_alloc_cacheonly>
7322: 48 85 c0 test %rax,%rax
7325: 74 35 je 735c <dispatch_async_f+0x5c>
7327: 48 89 c2 mov %rax,%rdx
732a: 48 c7 00 01 00 00 00 movq $0x1,(%rax)
7331: 4c 89 68 10 mov %r13,0x10(%rax)
7335: 48 87 53 40 xchg %rdx,0x40(%rbx) // <-------------------------- SBN: (2)
7339: 48 85 d2 test %rdx,%rdx
733c: 4c 89 60 18 mov %r12,0x18(%rax)
7340: 48 c7 40 08 00 00 00 movq $0x0,0x8(%rax) // <-------------------------- SBN: (1)
7347: 00
7348: 74 2d je 7377 <dispatch_async_f+0x77>
734a: 48 89 42 08 mov %rax,0x8(%rdx)
734e: 48 8b 5d e8 mov 0xffffffffffffffe8(%rbp),%rbx
7352: 4c 8b 65 f0 mov 0xfffffffffffffff0(%rbp),%r12
7356: 4c 8b 6d f8 mov 0xfffffffffffffff8(%rbp),%r13
735a: c9 leaveq
735b: c3 retq
735c: 4c 89 ea mov %r13,%rdx
735f: 4c 89 e6 mov %r12,%rsi
7362: 48 89 df mov %rbx,%rdi
7365: 4c 8b 65 f0 mov 0xfffffffffffffff0(%rbp),%r12
7369: 48 8b 5d e8 mov 0xffffffffffffffe8(%rbp),%rbx
736d: 4c 8b 6d f8 mov 0xfffffffffffffff8(%rbp),%r13
7371: c9 leaveq
7372: e9 19 ff ff ff jmpq 7290 <_dispatch_async_f_slow>
7377: 48 89 df mov %rbx,%rdi
737a: 4c 8b 65 f0 mov 0xfffffffffffffff0(%rbp),%r12
737e: 48 8b 5d e8 mov 0xffffffffffffffe8(%rbp),%rbx
7382: 4c 8b 6d f8 mov 0xfffffffffffffff8(%rbp),%r13
7386: c9 leaveq
7387: 48 89 c6 mov %rax,%rsi
738a: e9 51 e2 ff ff jmpq 55e0 <_dispatch_queue_push_list_slow@plt>
Looks like 2 stores (marked as SBN: (1)/(2) in listings above) were reordered by optimizer.
(Also note that initialization of dispatch_continuation_t fields (dc_func and dc_ctxt) were also reordered wrt inserting element to queue).
To workaround I added Compiler memory barrier between initialization of do_next and inserting to queue:
__attribute__((always_inline))
static inline void
_dispatch_queue_push_list(dispatch_queue_t dq, dispatch_object_t _head, dispatch_object_t _tail)
{
struct dispatch_object_s *prev, *head = _head._do, *tail = _tail._do;
tail->do_next = NULL;
__asm__ __volatile__ ("" ::: "memory"); // <-------------------------- SBN: compiler memory barrier
prev = fastpath(dispatch_atomic_xchg(&dq->dq_items_tail, tail));
if (prev) {
// if we crash here with a value less than 0x1000, then we are at a known bug in client code
// for example, see _dispatch_queue_dispose or _dispatch_atfork_child
prev->do_next = head;
} else {
_dispatch_queue_push_list_slow(dq, head);
}
}
After that GCC performs all stores in expected order and I was unable to reproduce the problem anymore.
Attachments
Change History
comment:3 Changed 22 months ago by sbn@…
Disassebly of dispatch_async_f after fix:
0000000000006d50 <dispatch_async_f>:
6d50: 55 push %rbp
6d51: 48 89 e5 mov %rsp,%rbp
6d54: 48 89 5d d8 mov %rbx,0xffffffffffffffd8(%rbp)
6d58: 4c 89 65 e0 mov %r12,0xffffffffffffffe0(%rbp)
6d5c: 48 89 fb mov %rdi,%rbx
6d5f: 4c 89 6d e8 mov %r13,0xffffffffffffffe8(%rbp)
6d63: 4c 89 75 f0 mov %r14,0xfffffffffffffff0(%rbp)
6d67: 49 89 f4 mov %rsi,%r12
6d6a: 4c 89 7d f8 mov %r15,0xfffffffffffffff8(%rbp)
6d6e: 48 83 ec 30 sub $0x30,%rsp
6d72: 4c 8b 2d cf 3b 01 00 mov 80847(%rip),%r13 # 1a948 <_GLOBAL_OFFSET_TABLE_+0x168>
6d79: 49 89 d6 mov %rdx,%r14
6d7c: 41 8b 7d 00 mov 0x0(%r13),%edi
6d80: e8 a3 e2 ff ff callq 5028 <pthread_getspecific@plt>
6d85: 48 85 c0 test %rax,%rax
6d88: 49 89 c7 mov %rax,%r15
6d8b: 74 4a je 6dd7 <dispatch_async_f+0x87>
6d8d: 48 8b 70 08 mov 0x8(%rax),%rsi
6d91: 41 8b 7d 00 mov 0x0(%r13),%edi
6d95: e8 b6 f0 ff ff callq 5e50 <_dispatch_thread_setspecific>
6d9a: 49 c7 07 01 00 00 00 movq $0x1,(%r15)
6da1: 4d 89 77 10 mov %r14,0x10(%r15)
6da5: 4d 89 67 18 mov %r12,0x18(%r15)
6da9: 49 c7 47 08 00 00 00 movq $0x0,0x8(%r15) // <-------------------------------- SBN: (1) tail->do_next = NULL
6db0: 00
6db1: 4c 89 f8 mov %r15,%rax
6db4: 48 87 43 40 xchg %rax,0x40(%rbx) // <-------------------------------- SBN: (2) dispatch_atomic_xchg(&dq->dq_items_tail, tail)
6db8: 48 85 c0 test %rax,%rax
6dbb: 74 3d je 6dfa <dispatch_async_f+0xaa>
6dbd: 4c 89 78 08 mov %r15,0x8(%rax)
6dc1: 48 8b 5d d8 mov 0xffffffffffffffd8(%rbp),%rbx
6dc5: 4c 8b 65 e0 mov 0xffffffffffffffe0(%rbp),%r12
6dc9: 4c 8b 6d e8 mov 0xffffffffffffffe8(%rbp),%r13
6dcd: 4c 8b 75 f0 mov 0xfffffffffffffff0(%rbp),%r14
6dd1: 4c 8b 7d f8 mov 0xfffffffffffffff8(%rbp),%r15
6dd5: c9 leaveq
6dd6: c3 retq
6dd7: 4c 89 f2 mov %r14,%rdx
6dda: 4c 89 e6 mov %r12,%rsi
6ddd: 48 89 df mov %rbx,%rdi
6de0: 4c 8b 65 e0 mov 0xffffffffffffffe0(%rbp),%r12
6de4: 48 8b 5d d8 mov 0xffffffffffffffd8(%rbp),%rbx
6de8: 4c 8b 6d e8 mov 0xffffffffffffffe8(%rbp),%r13
6dec: 4c 8b 75 f0 mov 0xfffffffffffffff0(%rbp),%r14
6df0: 4c 8b 7d f8 mov 0xfffffffffffffff8(%rbp),%r15
6df4: c9 leaveq
6df5: e9 e6 fe ff ff jmpq 6ce0 <_dispatch_async_f_slow>
6dfa: 4c 89 fe mov %r15,%rsi
6dfd: 48 89 df mov %rbx,%rdi
6e00: 4c 8b 65 e0 mov 0xffffffffffffffe0(%rbp),%r12
6e04: 48 8b 5d d8 mov 0xffffffffffffffd8(%rbp),%rbx
6e08: 4c 8b 6d e8 mov 0xffffffffffffffe8(%rbp),%r13
6e0c: 4c 8b 75 f0 mov 0xfffffffffffffff0(%rbp),%r14
6e10: 4c 8b 7d f8 mov 0xfffffffffffffff8(%rbp),%r15
6e14: c9 leaveq
6e15: e9 4e e2 ff ff jmpq 5068 <_dispatch_queue_push_list_slow@plt>
6e1a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
As one can see all stores now are performed in correct order.
comment:5 follow-up: ↓ 6 Changed 22 months ago by dsteffen@…
- Status changed from new to accepted
- Owner set to dsteffen@…
GCC has apparently changed the __sync intrinsics to no longer be compiler barriers (nonsensical IMO since they are defined to generate memory barrier instructions).
The cleanest fix for the Lion branch is to change the default barrier defines in atomic.h
diff --git i/src/shims/atomic.h w/src/shims/atomic.h
index fbc1171..5dfe71b 100644
--- i/src/shims/atomic.h
+++ w/src/shims/atomic.h
@@ -42,11 +42,14 @@
// see comment in dispatch_once.c
#define dispatch_atomic_maximally_synchronizing_barrier() \
_dispatch_atomic_barrier()
-// assume atomic builtins provide barriers
-#define dispatch_atomic_barrier()
-#define dispatch_atomic_acquire_barrier()
-#define dispatch_atomic_release_barrier()
-#define dispatch_atomic_store_barrier()
+// assume atomic builtins provide memory barriers, but ensure compiler does not
+// reorder across them (workaround bugs in recent GCC)
+// http://libdispatch.macosforge.org/trac/ticket/35
+#define dispatch_atomic_barrier() \
+ __asm__ __volatile__("" : : : "memory")
+#define dispatch_atomic_acquire_barrier() dispatch_atomic_barrier()
+#define dispatch_atomic_release_barrier() dispatch_atomic_barrier()
+#define dispatch_atomic_store_barrier() dispatch_atomic_barrier()
#define _dispatch_hardware_pause() asm("")
#define _dispatch_debugger() asm("trap")
comment:6 in reply to: ↑ 5 ; follow-up: ↓ 8 Changed 22 months ago by mark@…
Replying to dsteffen@…:
+#define dispatch_atomic_barrier() \
+ asm volatile("" : : : "memory")
This assembly instruction won't work on ARM or PPC, according to a similar function in the Haskell source code[1] as reproduced below:
294 /*
295 * We need to tell both the compiler AND the CPU about the barriers.
296 * It's no good preventing the CPU from reordering the operations if
297 * the compiler has already done so - hence the "memory" restriction
298 * on each of the barriers below.
299 */
300 EXTERN_INLINE void
301 write_barrier(void) {
| x86_64_HOST_ARCH |
303 asm volatile ("" : : : "memory");
304 #elif powerpc_HOST_ARCH
305 asm volatile ("lwsync" : : : "memory");
306 #elif sparc_HOST_ARCH
307 /* Sparc in TSO mode does not require store/store barriers. */
308 asm volatile ("" : : : "memory");
309 #elif arm_HOST_ARCH && defined(arm_HOST_ARCH_PRE_ARMv7)
310 asm volatile ("" : : : "memory");
311 #elif arm_HOST_ARCH && !defined(arm_HOST_ARCH_PRE_ARMv7)
312 asm volatile ("dmb st" : : : "memory");
313 #elif !defined(WITHSMP)
314 return;
315 #else
316 #error memory barriers unimplemented on this architecture
317 #endif
318 }
[1] Source:
http://hackage.haskell.org/trac/ghc/browser/includes/stg/SMP.h
comment:8 in reply to: ↑ 6 Changed 22 months ago by dsteffen@…
Replying to mark@…:
Replying to dsteffen@…:
+#define dispatch_atomic_barrier() \
+ asm volatile("" : : : "memory")
This assembly instruction won't work on ARM or PPC, according to a similar function in the Haskell source code[1] as reproduced below:
no, this would double up the generated barrier instructions, the __sync builtins are defined to already include these on architectures where necessary (the libdispatch arm port is not open-source, but does do the right thing here).
comment:9 follow-up: ↓ 10 Changed 21 months ago by bonzini@…
Please attach a preprocessed testcase (a *.i file obtained from gcc with the --save-temps option) and the output of adding -### to the gcc invocation.
comment:10 in reply to: ↑ 9 Changed 21 months ago by sbn@…
Replying to bonzini@…:
Please attach a preprocessed testcase (a *.i file obtained from gcc with the --save-temps option) and the output of adding -### to the gcc invocation.
Output of GCC invoked with -###:
$ /opt/csw/gcc4/bin/gcc -DHAVE_CONFIG_H -I. -I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src -I../config -I.. -I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src -DSOLARIS -DSOLARIS64 -DNDEBUG -D_POSIX_PTHREAD_SEMANTICS -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -Wall -I./../../../../perf.x86_64-sun-solaris/include64/kqueue -D_REENTRANT -D_LARGEFILE64_SOURCE -static-libgcc -pipe -std=gnu99 -O2 -threads -m64 -ffast-math -Wstrict-prototypes -Wmissing-prototypes -Wall -Wpointer-arith -Wwrite-strings -Wno-long-long -pedantic -Wno-missing-braces -static-libgcc -DSOLARIS -DSOLARIS64 -DNDEBUG -D_POSIX_PTHREAD_SEMANTICS -L/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/platform/lib64 -DSOLARIS -DSOLARIS64 -DNDEBUG -D_POSIX_PTHREAD_SEMANTICS -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -MT libdispatch_la-queue.lo -MD -MP -MF .deps/libdispatch_la-queue.Tpo -c /tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src/queue.c -### Using built-in specs. COLLECT_GCC=/opt/csw/gcc4/bin/gcc COLLECT_LTO_WRAPPER=/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/lto-wrapper Target: i386-pc-solaris2.10 Configured with: ../gcc-4.5.1/configure --host=i386-pc-solaris2.10 --build=i386-pc-solaris2.10 --with-gnu-as --with-as=/opt/csw/bin/gas --without-gnu-ld --with-ld=/usr/ccs/bin/ld --with-cpu-32=i386 --with-cpu-64=opteron --with-arch-32=i386 --with-arch-64=opteron --enable-stage1-languages=c --enable-nls --with-libiconv-prefix=/opt/csw --enable-threads=posix --prefix=/opt/csw/gcc4 --with-local-prefix=/opt/csw --enable-shared --enable-multilib --with-pkgversion='Blastwave.org Inc. Mon Aug 23 11:16:32 GMT 2010' --with-bugurl=http://www.blastwave.org/support --enable-languages=c,c++,objc,fortran --enable-bootstrap Thread model: posix gcc version 4.5.1 (Blastwave.org Inc. Mon Aug 23 11:16:32 GMT 2010) COLLECT_GCC_OPTIONS='-DHAVE_CONFIG_H' '-I.' '-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src' '-I../config' '-I..' '-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I./../../../../perf.x86_64-sun-solaris/include64/kqueue' '-D_REENTRANT' '-D_LARGEFILE64_SOURCE' '-static-libgcc' '-pipe' '-std=gnu99' '-O2' '-threads' '-m64' '-ffast-math' '-Wstrict-prototypes' '-Wmissing-prototypes' '-Wall' '-Wpointer-arith' '-Wwrite-strings' '-Wno-long-long' '-pedantic' '-Wno-missing-braces' '-static-libgcc' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-L/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/platform/lib64' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-MT' 'libdispatch_la-queue.lo' '-MD' '-MP' '-MF' '.deps/libdispatch_la-queue.Tpo' '-c' '-mtune=opteron' '-march=opteron' "/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/cc1" "-quiet" "-I." "-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src" "-I../config" "-I.." "-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I./../../../../perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-imultilib" "amd64" "-MD" "queue.d" "-MF" ".deps/libdispatch_la-queue.Tpo" "-MP" "-MT" "libdispatch_la-queue.lo" "-D_REENTRANT" "-D_SOLARIS_THREADS" "-DHAVE_CONFIG_H" "-DSOLARIS" "-DSOLARIS64" "-DNDEBUG" "-D_POSIX_PTHREAD_SEMANTICS" "-D_REENTRANT" "-D_LARGEFILE64_SOURCE" "-DSOLARIS" "-DSOLARIS64" "-DNDEBUG" "-D_POSIX_PTHREAD_SEMANTICS" "-DSOLARIS" "-DSOLARIS64" "-DNDEBUG" "-D_POSIX_PTHREAD_SEMANTICS" "/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src/queue.c" "-quiet" "-dumpbase" "queue.c" "-m64" "-mtune=opteron" "-march=opteron" "-auxbase" "queue" "-O2" "-Wstrict-prototypes" "-Wmissing-prototypes" "-Wall" "-Wpointer-arith" "-Wwrite-strings" "-Wno-long-long" "-pedantic" "-Wno-missing-braces" "-std=gnu99" "-ffast-math" "-o" "-" | "/opt/csw/bin/gas" "-I." "-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src" "-I../config" "-I.." "-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I./../../../../perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-Qy" "--64" "-s" "-o" "queue.o" "-" COMPILER_PATH=/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/:/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/:/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/:/usr/ccs/bin/ LIBRARY_PATH=/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/amd64/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/../../../amd64/:/lib/amd64/:/usr/lib/amd64/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/../../../:/lib/:/usr/lib/ COLLECT_GCC_OPTIONS='-DHAVE_CONFIG_H' '-I.' '-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src' '-I../config' '-I..' '-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I./../../../../perf.x86_64-sun-solaris/include64/kqueue' '-D_REENTRANT' '-D_LARGEFILE64_SOURCE' '-static-libgcc' '-pipe' '-std=gnu99' '-O2' '-threads' '-m64' '-ffast-math' '-Wstrict-prototypes' '-Wmissing-prototypes' '-Wall' '-Wpointer-arith' '-Wwrite-strings' '-Wno-long-long' '-pedantic' '-Wno-missing-braces' '-static-libgcc' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-L/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/platform/lib64' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-MT' 'libdispatch_la-queue.lo' '-MD' '-MP' '-MF' '.deps/libdispatch_la-queue.Tpo' '-c' '-mtune=opteron' '-march=opteron'
Don't find a way to attach file here, so here is a link: queue.i
comment:11 Changed 21 months ago by bonzini@…
The problem is an invalid asm that is only triggered in 64-bit mode. It is fine in Lion branch, though adding an asm("":::"memory") in front of sync_lock_test_and_set doesn't hurt and future-proofs the code.


Cc Me!