aboutsummaryrefslogtreecommitdiffstats
path: root/libc/private/bionic_atomic_arm.h
blob: 3bb639e0e595913a71c43c1262eea252d04146fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
/*
 * Copyright (C) 2011 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef BIONIC_ATOMIC_ARM_H
#define BIONIC_ATOMIC_ARM_H

#include <machine/cpu-features.h>

/* Some of the harware instructions used below are not available in Thumb-1
 * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
 * problem, we're going to use the same technique than libatomics_ops,
 * which is to temporarily switch to ARM, do the operation, then switch
 * back to Thumb-1.
 *
 * This results in two 'bx' jumps, just like a normal function call, but
 * everything is kept inlined, avoids loading or computing the function's
 * address, and prevents a little I-cache trashing too.
 *
 * However, it is highly recommended to avoid compiling any C library source
 * file that use these functions in Thumb-1 mode.
 *
 * Define three helper macros to implement this:
 */
#if defined(__thumb__) && !defined(__thumb2__)
#  define  __ATOMIC_SWITCH_TO_ARM \
            "adr r3, 5f\n" \
            "bx  r3\n" \
            ".align\n" \
            ".arm\n" \
        "5:\n"
/* note: the leading \n below is intentional */
#  define __ATOMIC_SWITCH_TO_THUMB \
            "\n" \
            "adr r3, 6f\n" \
            "bx  r3\n" \
            ".thumb" \
        "6:\n"

#  define __ATOMIC_CLOBBERS   "r3"  /* list of clobbered registers */

/* Warn the user that ARM mode should really be preferred! */
#  warning Rebuilding this source file in ARM mode is highly recommended for performance!!

#else
#  define  __ATOMIC_SWITCH_TO_ARM   /* nothing */
#  define  __ATOMIC_SWITCH_TO_THUMB /* nothing */
#  define  __ATOMIC_CLOBBERS        /* nothing */
#endif


/* Define a full memory barrier, this is only needed if we build the
 * platform for a multi-core device. For the record, using a 'dmb'
 * instruction on a Nexus One device can take up to 180 ns even if
 * it is completely un-necessary on this device.
 *
 * NOTE: This is where the platform and NDK headers atomic headers are
 *        going to diverge. With the NDK, we don't know if the generated
 *        code is going to run on a single or multi-core device, so we
 *        need to be cautious.
 *
 *        I.e. on single-core devices, the helper immediately returns,
 *        on multi-core devices, it uses "dmb" or any other means to
 *        perform a full-memory barrier.
 *
 * There are three cases to consider for the platform:
 *
 *    - multi-core ARMv7-A       => use the 'dmb' hardware instruction
 *    - multi-core ARMv6         => use the coprocessor
 *    - single core ARMv6+       => do not use any hardware barrier
 */
#if defined(ANDROID_SMP) && ANDROID_SMP == 1

/* Sanity check, multi-core is only supported starting from ARMv6 */
#  if __ARM_ARCH__ < 6
#    error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
#  endif

#  ifdef __ARM_HAVE_DMB
/* For ARMv7-A, we can use the 'dmb' instruction directly */
__ATOMIC_INLINE__ void
__bionic_memory_barrier(void)
{
    /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
     * bother with __ATOMIC_SWITCH_TO_ARM */
    __asm__ __volatile__ ( "dmb" : : : "memory" );
}
#  else /* !__ARM_HAVE_DMB */
/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
 * which requires the use of a general-purpose register, which is slightly
 * less efficient.
 */
__ATOMIC_INLINE__ void
__bionic_memory_barrier(void)
{
    __asm__ __volatile__ (
        __SWITCH_TO_ARM
        "mcr p15, 0, %0, c7, c10, 5"
        __SWITCH_TO_THUMB
        : : "r" (0) : __ATOMIC_CLOBBERS "memory");
}
#  endif /* !__ARM_HAVE_DMB */
#else /* !ANDROID_SMP */
__ATOMIC_INLINE__ void
__bionic_memory_barrier(void)
{
    /* A simple compiler barrier */
    __asm__ __volatile__ ( "" : : : "memory" );
}
#endif /* !ANDROID_SMP */

#ifndef __ARM_HAVE_LDREX_STREX
#error Only ARM devices which have LDREX / STREX are supported
#endif

/* Compare-and-swap, without any explicit barriers. Note that this functions
 * returns 0 on success, and 1 on failure. The opposite convention is typically
 * used on other platforms.
 */
__ATOMIC_INLINE__ int
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
{
    int32_t prev, status;
    do {
        __asm__ __volatile__ (
            __ATOMIC_SWITCH_TO_ARM
            "ldrex %0, [%3]\n"
            "mov %1, #0\n"
            "teq %0, %4\n"
#ifdef __thumb2__
            "it eq\n"
#endif
            "strexeq %1, %5, [%3]"
            __ATOMIC_SWITCH_TO_THUMB
            : "=&r" (prev), "=&r" (status), "+m"(*ptr)
            : "r" (ptr), "Ir" (old_value), "r" (new_value)
            : __ATOMIC_CLOBBERS "cc");
    } while (__builtin_expect(status != 0, 0));
    return prev != old_value;
}

/* Swap operation, without any explicit barriers. */
__ATOMIC_INLINE__ int32_t
__bionic_swap(int32_t new_value, volatile int32_t* ptr)
{
    int32_t prev, status;
    do {
        __asm__ __volatile__ (
            __ATOMIC_SWITCH_TO_ARM
            "ldrex %0, [%3]\n"
            "strex %1, %4, [%3]"
            __ATOMIC_SWITCH_TO_THUMB
            : "=&r" (prev), "=&r" (status), "+m" (*ptr)
            : "r" (ptr), "r" (new_value)
            : __ATOMIC_CLOBBERS "cc");
    } while (__builtin_expect(status != 0, 0));
    return prev;
}

/* Atomic increment - without any barriers
 * This returns the old value
 */
__ATOMIC_INLINE__ int32_t
__bionic_atomic_inc(volatile int32_t* ptr)
{
    int32_t prev, tmp, status;
    do {
        __asm__ __volatile__ (
            __ATOMIC_SWITCH_TO_ARM
            "ldrex %0, [%4]\n"
            "add %1, %0, #1\n"
            "strex %2, %1, [%4]"
            __ATOMIC_SWITCH_TO_THUMB
            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
            : "r" (ptr)
            : __ATOMIC_CLOBBERS "cc");
    } while (__builtin_expect(status != 0, 0));
    return prev;
}

/* Atomic decrement - without any barriers
 * This returns the old value.
 */
__ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t* ptr)
{
    int32_t prev, tmp, status;
    do {
        __asm__ __volatile__ (
            __ATOMIC_SWITCH_TO_ARM
            "ldrex %0, [%4]\n"
            "sub %1, %0, #1\n"
            "strex %2, %1, [%4]"
            __ATOMIC_SWITCH_TO_THUMB
            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
            : "r" (ptr)
            : __ATOMIC_CLOBBERS "cc");
    } while (__builtin_expect(status != 0, 0));
    return prev;
}

#endif /* SYS_ATOMICS_ARM_H */