vm/AtomicCache.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

/*
 * Copyright (C) 2008 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * Mutex-free cache for key1+key2=value.
 */
#ifndef DALVIK_ATOMICCACHE_H_
#define DALVIK_ATOMICCACHE_H_

/*
 * If set to "1", gather some stats on our caching success rate.
 */
#define CALC_CACHE_STATS 0


/*
 * One entry in the cache.  We store two keys (e.g. the classes that are
 * arguments to "instanceof") and one result (e.g. a boolean value).
 *
 * Must be exactly 16 bytes.
 */
struct AtomicCacheEntry {
    u4          key1;
    u4          key2;
    u4          value;
    volatile u4 version;    /* version and lock flag */
};

/*
 * One cache.
 *
 * Thought: we might be able to save a few cycles by storing the cache
 * struct and "entries" separately, avoiding an indirection.  (We already
 * handle "numEntries" separately in ATOMIC_CACHE_LOOKUP.)
 */
struct AtomicCache {
    AtomicCacheEntry*   entries;        /* array of entries */
    int         numEntries;             /* #of entries, must be power of 2 */

    void*       entryAlloc;             /* memory allocated for entries */

    /* cache stats; note we don't guarantee atomic increments for these */
    int         trivial;                /* cache access not required */
    int         fail;                   /* contention failure */
    int         hits;                   /* found entry in cache */
    int         misses;                 /* entry was for other keys */
    int         fills;                  /* entry was empty */
};

/*
 * Do a cache lookup.  We need to be able to read and write entries
 * atomically.  There are a couple of ways to do this:
 *  (1) Have a global lock.  A mutex is too heavy, so instead we would use
 *      an atomic flag.  If the flag is set, we could sit and spin,
 *      but if we're a high-priority thread that may cause a lockup.
 *      Better to just ignore the cache and do the full computation.
 *  (2) Have a "version" that gets incremented atomically when a write
 *      begins and again when it completes.  Compare the version before
 *      and after doing reads.  So long as we have memory barriers in the
 *      right place the compiler and CPU will do the right thing, allowing
 *      us to skip atomic ops in the common read case.  The table updates
 *      are expensive, requiring two writes with barriers.  We also need
 *      some sort of lock to ensure that nobody else tries to start an
 *      update while we're in the middle of one.
 *
 * We expect a 95+% hit rate for the things we use this for, so #2 is
 * much better than #1.
 *
 * _cache is an AtomicCache*
 * _cacheSize is _cache->cacheSize (can save a cycle avoiding the lookup)
 * _key1, _key2 are the keys
 *
 * Define a function ATOMIC_CACHE_CALC that returns a 32-bit value.  This
 * will be invoked when we need to compute the value.
 *
 * Returns the value.
 */
#if CALC_CACHE_STATS > 0
# define CACHE_XARG(_value) ,_value
#else
# define CACHE_XARG(_value)
#endif
#define ATOMIC_CACHE_LOOKUP(_cache, _cacheSize, _key1, _key2) ({            \
    AtomicCacheEntry* pEntry;                                               \
    int hash;                                                               \
    u4 firstVersion, secondVersion;                                         \
    u4 value;                                                               \
                                                                            \
    /* simple hash function */                                              \
    hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1);           \
    pEntry = (_cache)->entries + hash;                                      \
                                                                            \
    firstVersion = android_atomic_acquire_load((int32_t*)&pEntry->version); \
                                                                            \
    if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) {       \
        /*                                                                  \
         * The fields match.  Get the value, then read the version a        \
         * second time to verify that we didn't catch a partial update.     \
         * We're also hosed if "firstVersion" was odd, indicating that      \
         * an update was in progress before we got here (unlikely).         \
         */                                                                 \
        value = android_atomic_acquire_load((int32_t*) &pEntry->value);     \
        secondVersion = pEntry->version;                                    \
                                                                            \
        if ((firstVersion & 0x01) != 0 || firstVersion != secondVersion) {  \
            /*                                                              \
             * We clashed with another thread.  Instead of sitting and      \
             * spinning, which might not complete if we're a high priority  \
             * thread, just do the regular computation.                     \
             */                                                             \
            if (CALC_CACHE_STATS)                                           \
                (_cache)->fail++;                                           \
            value = (u4) ATOMIC_CACHE_CALC;                                 \
        } else {                                                            \
            /* all good */                                                  \
            if (CALC_CACHE_STATS)                                           \
                (_cache)->hits++;                                           \
        }                                                                   \
    } else {                                                                \
        /*                                                                  \
         * Compute the result and update the cache.  We really want this    \
         * to happen in a different method -- it makes the ARM frame        \
         * setup for this method simpler, which gives us a ~10% speed       \
         * boost.                                                           \
         */                                                                 \
        value = (u4) ATOMIC_CACHE_CALC;                                     \
        if (value == 0 && ATOMIC_CACHE_NULL_ALLOWED) { \
            dvmUpdateAtomicCache((u4) (_key1), (u4) (_key2), value, pEntry, \
                        firstVersion CACHE_XARG(_cache) ); \
        } \
    }                                                                       \
    value;                                                                  \
})

/*
 * Allocate a cache.
 */
AtomicCache* dvmAllocAtomicCache(int numEntries);

/*
 * Free a cache.
 */
void dvmFreeAtomicCache(AtomicCache* cache);

/*
 * Update a cache entry.
 *
 * Making the last argument optional, instead of merely unused, saves us
 * a few percent in the ATOMIC_CACHE_LOOKUP time.
 */
void dvmUpdateAtomicCache(u4 key1, u4 key2, u4 value, AtomicCacheEntry* pEntry,
    u4 firstVersion
#if CALC_CACHE_STATS > 0
    , AtomicCache* pCache
#endif
    );

/*
 * Debugging.
 */
void dvmDumpAtomicCacheStats(const AtomicCache* pCache);

#endif  // DALVIK_ATOMICCACHE_H_