299 lines
10 KiB
C++
299 lines
10 KiB
C++
#ifndef BOOST_DETAIL_ATOMIC_GCC_ARMV6P_HPP
|
|
#define BOOST_DETAIL_ATOMIC_GCC_ARMV6P_HPP
|
|
|
|
// Distributed under the Boost Software License, Version 1.0.
|
|
// See accompanying file LICENSE_1_0.txt or copy at
|
|
// http://www.boost.org/LICENSE_1_0.txt)
|
|
//
|
|
// Copyright (c) 2009 Helge Bahmann
|
|
// Copyright (c) 2009 Phil Endecott
|
|
// ARM Code by Phil Endecott, based on other architectures.
|
|
|
|
|
|
#include <boost/memory_order.hpp>
|
|
#include <boost/atomic/detail/base.hpp>
|
|
#include <boost/atomic/detail/builder.hpp>
|
|
|
|
// From the ARM Architecture Reference Manual for architecture v6:
|
|
//
|
|
// LDREX{<cond>} <Rd>, [<Rn>]
|
|
// <Rd> Specifies the destination register for the memory word addressed by <Rd>
|
|
// <Rn> Specifies the register containing the address.
|
|
//
|
|
// STREX{<cond>} <Rd>, <Rm>, [<Rn>]
|
|
// <Rd> Specifies the destination register for the returned status value.
|
|
// 0 if the operation updates memory
|
|
// 1 if the operation fails to update memory
|
|
// <Rm> Specifies the register containing the word to be stored to memory.
|
|
// <Rn> Specifies the register containing the address.
|
|
// Rd must not be the same register is Rm or Rn.
|
|
//
|
|
// ARM v7 is like ARM v6 plus:
|
|
// There are half-word and byte versions of the LDREX and STREX instructions,
|
|
// LDREXH, LDREXB, STREXH and STREXB.
|
|
// There are also double-word versions, LDREXD and STREXD.
|
|
// (Actually it looks like these are available from version 6k onwards.)
|
|
// FIXME these are not yet used; should be mostly a matter of copy-and-paste.
|
|
// I think you can supply an immediate offset to the address.
|
|
//
|
|
// A memory barrier is effected using a "co-processor 15" instruction,
|
|
// though a separate assembler mnemonic is available for it in v7.
|
|
|
|
namespace boost {
|
|
namespace detail {
|
|
namespace atomic {
|
|
|
|
|
|
// "Thumb 1" is a subset of the ARM instruction set that uses a 16-bit encoding. It
|
|
// doesn't include all instructions and in particular it doesn't include the co-processor
|
|
// instruction used for the memory barrier or the load-locked/store-conditional
|
|
// instructions. So, if we're compiling in "Thumb 1" mode, we need to wrap all of our
|
|
// asm blocks with code to temporarily change to ARM mode.
|
|
//
|
|
// You can only change between ARM and Thumb modes when branching using the bx instruction.
|
|
// bx takes an address specified in a register. The least significant bit of the address
|
|
// indicates the mode, so 1 is added to indicate that the destination code is Thumb.
|
|
// A temporary register is needed for the address and is passed as an argument to these
|
|
// macros. It must be one of the "low" registers accessible to Thumb code, specified
|
|
// usng the "l" attribute in the asm statement.
|
|
//
|
|
// Architecture v7 introduces "Thumb 2", which does include (almost?) all of the ARM
|
|
// instruction set. So in v7 we don't need to change to ARM mode; we can write "universal
|
|
// assembler" which will assemble to Thumb 2 or ARM code as appropriate. The only thing
|
|
// we need to do to make this "universal" assembler mode work is to insert "IT" instructions
|
|
// to annotate the conditional instructions. These are ignored in other modes (e.g. v6),
|
|
// so they can always be present.
|
|
|
|
#if defined(__thumb__) && !defined(__ARM_ARCH_7A__)
|
|
// FIXME also other v7 variants.
|
|
#define BOOST_ATOMIC_ARM_ASM_START(TMPREG) "adr " #TMPREG ", 1f\n" "bx " #TMPREG "\n" ".arm\n" ".align 4\n" "1: "
|
|
#define BOOST_ATOMIC_ARM_ASM_END(TMPREG) "adr " #TMPREG ", 1f + 1\n" "bx " #TMPREG "\n" ".thumb\n" ".align 2\n" "1: "
|
|
|
|
#else
|
|
// The tmpreg is wasted in this case, which is non-optimal.
|
|
#define BOOST_ATOMIC_ARM_ASM_START(TMPREG)
|
|
#define BOOST_ATOMIC_ARM_ASM_END(TMPREG)
|
|
#endif
|
|
|
|
|
|
#if defined(__ARM_ARCH_7A__)
|
|
// FIXME ditto.
|
|
#define BOOST_ATOMIC_ARM_DMB "dmb\n"
|
|
#else
|
|
#define BOOST_ATOMIC_ARM_DMB "mcr\tp15, 0, r0, c7, c10, 5\n"
|
|
#endif
|
|
|
|
// There is also a "Data Synchronisation Barrier" DSB; this exists in v6 as another co-processor
|
|
// instruction like the above.
|
|
|
|
|
|
static inline void fence_before(memory_order order)
|
|
{
|
|
// FIXME I don't understand enough about barriers to know what this should do.
|
|
switch(order) {
|
|
case memory_order_release:
|
|
case memory_order_acq_rel:
|
|
case memory_order_seq_cst:
|
|
int brtmp;
|
|
__asm__ __volatile__ (
|
|
BOOST_ATOMIC_ARM_ASM_START(%0)
|
|
BOOST_ATOMIC_ARM_DMB
|
|
BOOST_ATOMIC_ARM_ASM_END(%0)
|
|
: "=&l" (brtmp) :: "memory"
|
|
);
|
|
default:;
|
|
}
|
|
}
|
|
|
|
static inline void fence_after(memory_order order)
|
|
{
|
|
// FIXME I don't understand enough about barriers to know what this should do.
|
|
switch(order) {
|
|
case memory_order_acquire:
|
|
case memory_order_acq_rel:
|
|
case memory_order_seq_cst:
|
|
int brtmp;
|
|
__asm__ __volatile__ (
|
|
BOOST_ATOMIC_ARM_ASM_START(%0)
|
|
BOOST_ATOMIC_ARM_DMB
|
|
BOOST_ATOMIC_ARM_ASM_END(%0)
|
|
: "=&l" (brtmp) :: "memory"
|
|
);
|
|
case memory_order_consume:
|
|
__asm__ __volatile__ ("" ::: "memory");
|
|
default:;
|
|
}
|
|
}
|
|
|
|
#undef BOOST_ATOMIC_ARM_DMB
|
|
|
|
|
|
template<typename T>
|
|
class atomic_arm_4 {
|
|
public:
|
|
typedef T integral_type;
|
|
explicit atomic_arm_4(T v) : i(v) {}
|
|
atomic_arm_4() {}
|
|
T load(memory_order order=memory_order_seq_cst) const volatile
|
|
{
|
|
T v=const_cast<volatile const T &>(i);
|
|
fence_after(order);
|
|
return v;
|
|
}
|
|
void store(T v, memory_order order=memory_order_seq_cst) volatile
|
|
{
|
|
fence_before(order);
|
|
const_cast<volatile T &>(i)=v;
|
|
}
|
|
bool compare_exchange_weak(
|
|
T &expected,
|
|
T desired,
|
|
memory_order success_order,
|
|
memory_order failure_order) volatile
|
|
{
|
|
fence_before(success_order);
|
|
int success;
|
|
int tmp;
|
|
__asm__ __volatile__(
|
|
BOOST_ATOMIC_ARM_ASM_START(%2)
|
|
"mov %1, #0\n" // success = 0
|
|
"ldrex %0, [%3]\n" // expected' = *(&i)
|
|
"teq %0, %4\n" // flags = expected'==expected
|
|
"ittt eq\n"
|
|
"strexeq %2, %5, [%3]\n" // if (flags.equal) *(&i) = desired, tmp = !OK
|
|
"teqeq %2, #0\n" // if (flags.equal) flags = tmp==0
|
|
"moveq %1, #1\n" // if (flags.equal) success = 1
|
|
BOOST_ATOMIC_ARM_ASM_END(%2)
|
|
: "=&r" (expected), // %0
|
|
"=&r" (success), // %1
|
|
"=&l" (tmp) // %2
|
|
: "r" (&i), // %3
|
|
"r" (expected), // %4
|
|
"r" ((int)desired) // %5
|
|
: "cc"
|
|
);
|
|
if (success) fence_after(success_order);
|
|
else fence_after(failure_order);
|
|
return success;
|
|
}
|
|
|
|
bool is_lock_free(void) const volatile {return true;}
|
|
protected:
|
|
inline T fetch_add_var(T c, memory_order order) volatile
|
|
{
|
|
fence_before(order);
|
|
T original, tmp;
|
|
int tmp2;
|
|
__asm__ __volatile__(
|
|
BOOST_ATOMIC_ARM_ASM_START(%2)
|
|
"1: ldrex %0, [%3]\n" // original = *(&i)
|
|
"add %1, %0, %4\n" // tmp = original + c
|
|
"strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK
|
|
"teq %2, #0\n" // flags = tmp2==0
|
|
"it ne\n"
|
|
"bne 1b\n" // if (!flags.equal) goto 1
|
|
BOOST_ATOMIC_ARM_ASM_END(%2)
|
|
: "=&r" (original), // %0
|
|
"=&r" (tmp), // %1
|
|
"=&l" (tmp2) // %2
|
|
: "r" (&i), // %3
|
|
"r" (c) // %4
|
|
: "cc"
|
|
);
|
|
fence_after(order);
|
|
return original;
|
|
}
|
|
inline T fetch_inc(memory_order order) volatile
|
|
{
|
|
fence_before(order);
|
|
T original, tmp;
|
|
int tmp2;
|
|
__asm__ __volatile__(
|
|
BOOST_ATOMIC_ARM_ASM_START(%2)
|
|
"1: ldrex %0, [%3]\n" // original = *(&i)
|
|
"add %1, %0, #1\n" // tmp = original + 1
|
|
"strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK
|
|
"teq %2, #0\n" // flags = tmp2==0
|
|
"it ne\n"
|
|
"bne 1b\n" // if (!flags.equal) goto 1
|
|
BOOST_ATOMIC_ARM_ASM_END(%2)
|
|
: "=&r" (original), // %0
|
|
"=&r" (tmp), // %1
|
|
"=&l" (tmp2) // %2
|
|
: "r" (&i) // %3
|
|
: "cc"
|
|
);
|
|
fence_after(order);
|
|
return original;
|
|
}
|
|
inline T fetch_dec(memory_order order) volatile
|
|
{
|
|
fence_before(order);
|
|
T original, tmp;
|
|
int tmp2;
|
|
__asm__ __volatile__(
|
|
BOOST_ATOMIC_ARM_ASM_START(%2)
|
|
"1: ldrex %0, [%3]\n" // original = *(&i)
|
|
"sub %1, %0, #1\n" // tmp = original - 1
|
|
"strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK
|
|
"teq %2, #0\n" // flags = tmp2==0
|
|
"it ne\n"
|
|
"bne 1b\n" // if (!flags.equal) goto 1
|
|
BOOST_ATOMIC_ARM_ASM_END(%2)
|
|
: "=&r" (original), // %0
|
|
"=&r" (tmp), // %1
|
|
"=&l" (tmp2) // %2
|
|
: "r" (&i) // %3
|
|
: "cc"
|
|
);
|
|
fence_after(order);
|
|
return original;
|
|
}
|
|
private:
|
|
T i;
|
|
};
|
|
|
|
|
|
// #ifdef _ARM_ARCH_7
|
|
// FIXME TODO can add native byte and halfword version here
|
|
|
|
|
|
template<typename T>
|
|
class platform_atomic_integral<T, 4> : public build_atomic_from_typical<build_exchange<atomic_arm_4<T> > > {
|
|
public:
|
|
typedef build_atomic_from_typical<build_exchange<atomic_arm_4<T> > > super;
|
|
explicit platform_atomic_integral(T v) : super(v) {}
|
|
platform_atomic_integral(void) {}
|
|
};
|
|
|
|
template<typename T>
|
|
class platform_atomic_integral<T, 1>: public build_atomic_from_larger_type<atomic_arm_4<uint32_t>, T> {
|
|
public:
|
|
typedef build_atomic_from_larger_type<atomic_arm_4<uint32_t>, T> super;
|
|
|
|
explicit platform_atomic_integral(T v) : super(v) {}
|
|
platform_atomic_integral(void) {}
|
|
};
|
|
|
|
template<typename T>
|
|
class platform_atomic_integral<T, 2>: public build_atomic_from_larger_type<atomic_arm_4<uint32_t>, T> {
|
|
public:
|
|
typedef build_atomic_from_larger_type<atomic_arm_4<uint32_t>, T> super;
|
|
|
|
explicit platform_atomic_integral(T v) : super(v) {}
|
|
platform_atomic_integral(void) {}
|
|
};
|
|
|
|
|
|
|
|
typedef build_exchange<atomic_arm_4<void *> > platform_atomic_address;
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
#undef BOOST_ATOMIC_ARM_ASM_START
|
|
#undef BOOST_ATOMIC_ARM_ASM_END
|
|
|
|
|
|
#endif
|