summaryrefslogtreecommitdiffstats
path: root/vm/arch/arm/CallEABI.S
blob: 9717e3b3de507dbcb95aa4c25e85aa8f84d9d93f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
/*
 * Copyright (C) 2008 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * JNI method invocation.  This is used to call a C/C++ JNI method.  The
 * argument list has to be pushed onto the native stack according to
 * local calling conventions.
 *
 * This version supports the "new" ARM EABI.
 */

#include <machine/cpu-features.h>

#ifdef __ARM_EABI__

#ifdef EXTENDED_EABI_DEBUG
# define DBG
#else
# define DBG @
#endif


/*
Function prototype:

void dvmPlatformInvoke(void* pEnv, ClassObject* clazz, int argInfo, int argc,
    const u4* argv, const char* signature, void* func, JValue* pReturn) 

The method we are calling has the form:

  return_type func(JNIEnv* pEnv, ClassObject* clazz, ...)
    -or-
  return_type func(JNIEnv* pEnv, Object* this, ...)

We receive a collection of 32-bit values which correspond to arguments from
the interpreter (e.g. float occupies one, double occupies two).  It's up to
us to convert these into local calling conventions.
*/

/*
ARM EABI notes:

r0-r3 hold first 4 args to a method
r9 is given special treatment in some situations, but not for us
r10 (sl) seems to be generally available
r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
r12 (ip) is scratch -- not preserved across method calls
r13 (sp) should be managed carefully in case a signal arrives
r14 (lr) must be preserved
r15 (pc) can be tinkered with directly

r0 holds returns of <= 4 bytes
r0-r1 hold returns of 8 bytes, low word in r0

Callee must save/restore r4+ (except r12) if it modifies them.

Stack is "full descending".  Only the arguments that don't fit in the first 4
registers are placed on the stack.  "sp" points at the first stacked argument
(i.e. the 5th arg).

VFP: single-precision results in s0, double-precision results in d0.

In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
64-bit quantities (long long, double) must be 64-bit aligned.  This means
we have to scan the method signature, identify arguments that must be
padded, and fix them up appropriately.
*/

    .text
    .align  2
    .global dvmPlatformInvoke
    .type   dvmPlatformInvoke, %function

/*
 * On entry:
 *   r0  JNIEnv (can be left alone)
 *   r1  clazz (NULL for virtual method calls, non-NULL for static)
 *   r2  arg info
 *   r3  argc (number of 32-bit values in argv)
 *   [sp]     argv
 *   [sp,#4]  short signature
 *   [sp,#8]  func
 *   [sp,#12] pReturn
 *
 * For a virtual method call, the "this" reference is in argv[0].
 *
 * argInfo (32-bit int) layout:
 *   SRRRLLLL FFFFFFFF FFFFFFFF FFFFFFFF
 *
 *   S - if set, do things the hard way (scan the signature)
 *   R - return type enumeration, really only important for hardware FP
 *   L - number of double-words of storage required on stack (0-30 words)
 *   F - pad flag -- if set, write a pad word to the stack
 *
 * With this arrangement we can efficiently push up to 24 words of arguments
 * onto the stack.  Anything requiring more than that -- which should happen
 * rarely to never -- can do the slow signature scan.
 *
 * (We could pack the Fs more efficiently -- we know we never push two pads
 * in a row, and the first word can never be a pad -- but there's really
 * no need for it.)
 *
 * TODO: could reduce register-saving overhead for "fast" case, since we
 * don't use a couple of registers.  Another thought is to rearrange the
 * arguments such that r0/r1 get passed in on the stack, allowing us to
 * use r0/r1 freely here and then load them with a single ldm.  Might be
 * faster than saving/restoring other registers so that we can leave r0/r1
 * undisturbed.
 *
 * NOTE: if the called function has more than 4 words of arguments, gdb
 * will not be able to unwind the stack past this method.  The only way
 * around this is to convince gdb to respect an explicit frame pointer.
 */
dvmPlatformInvoke:
    .fnstart
    @ Save regs.  Same style as gcc with "-fomit-frame-pointer" -- we don't
    @ disturb "fp" in case somebody else wants it.  Copy "sp" to r4 and use
    @ that to access local vars.
    @
    @ On entry to a function, "sp" must be 64-bit aligned.  This means
    @ we have to adjust sp manually if we push an odd number of regs here
    @ (both here and when exiting).  Easier to just push an even number
    @ of registers.
    mov     ip, sp                      @ ip<- original stack pointer
    .save {r4, r5, r6, r7, r8, r9, ip, lr}
    stmfd   sp!, {r4, r5, r6, r7, r8, r9, ip, lr}

    mov     r4, ip                      @ r4<- original stack pointer

    @ Ensure 64-bit alignment.  EABI guarantees sp is aligned on entry, make
    @ sure we're aligned properly now.
DBG tst     sp, #4                      @ 64-bit aligned?
DBG bne     dvmAbort

    cmp     r1, #0                      @ Is this a static method?
    ldr     r9, [r4]                    @ r9<- argv

    @ Not static: set r1 to *argv++ ("this"), and set argc--.
    @
    @ Note the "this" pointer is not included in the method signature.
    ldreq   r1, [r9], #4
    subeq   r3, r3, #1

    @ Do we have arg padding flags in "argInfo"? (just need to check hi bit)
    teq     r2, #0
    bmi     .Lno_arg_info

    /*
     * "Fast" path.
     *
     * Make room on the stack for the arguments and copy them over,
     * inserting pad words when appropriate.
     *
     * Currently:
     *   r0  don't touch
     *   r1  don't touch
     *   r2  arg info
     *   r3  argc
     *   r4  original stack pointer
     *   r5-r8 (available)
     *   r9  argv
     */
.Lhave_arg_info:
    @ Expand the stack by the specified amount.  We want to extract the
    @ count of double-words from r2, multiply it by 8, and subtract that
    @ from the stack pointer.
    and     ip, r2, #0x0f000000         @ ip<- double-words required
    mov     r5, r2, lsr #28             @ r5<- return type
    sub     sp, sp, ip, lsr #21         @ shift right 24, then left 3
    mov     r8, sp                      @ r8<- sp  (arg copy dest)

    @ Stick argv in r7 and advance it past the argv values that will be
    @ held in r2-r3.  It's possible r3 will hold a pad, so check the
    @ bit in r2.  We do this by ignoring the first bit (which would
    @ indicate a pad in r2) and shifting the second into the carry flag.
    @ If the carry is set, r3 will hold a pad, so we adjust argv less.
    @
    @ (This is harmless if argc==0)
    mov     r7, r9
    movs    r2, r2, lsr #2
    addcc   r7, r7, #8                  @ skip past 2 words, for r2 and r3
    subcc   r3, r3, #2
    addcs   r7, r7, #4                  @ skip past 1 word, for r2
    subcs   r3, r3, #1

.Lfast_copy_loop:
    @ if (--argc < 0) goto invoke
    subs    r3, r3, #1
    bmi     .Lcopy_done                 @ NOTE: expects original argv in r9

.Lfast_copy_loop2:
    @ Get pad flag into carry bit.  If it's set, we don't pull a value
    @ out of argv.
    movs    r2, r2, lsr #1

    ldrcc   ip, [r7], #4                @ ip = *r7++ (pull from argv)
    strcc   ip, [r8], #4                @ *r8++ = ip (write to stack)
    bcc     .Lfast_copy_loop

DBG movcs   ip, #-3                     @ DEBUG DEBUG - make pad word obvious
DBG strcs   ip, [r8]                    @ DEBUG DEBUG
    add     r8, r8, #4                  @ if pad, just advance ip without store
    b       .Lfast_copy_loop2           @ don't adjust argc after writing pad



.Lcopy_done:
    /*
     * Currently:
     *  r0-r3  args (JNIEnv*, thisOrClass, arg0, arg1)
     *  r4  original saved sp
     *  r5  return type (enum DalvikJniReturnType)
     *  r9  original argv
     *
     * The stack copy is complete.  Grab the first two words off of argv
     * and tuck them into r2/r3.  If the first arg is 32-bit and the second
     * arg is 64-bit, then r3 "holds" a pad word and the load is unnecessary
     * but harmless.
     *
     * If there are 0 or 1 arg words in argv, we will be loading uninitialized
     * data into the registers, but since nothing tries to use it it's also
     * harmless (assuming argv[0] and argv[1] point to valid memory, which
     * is a reasonable assumption for Dalvik's interpreted stacks).
     *
     */
    ldmia   r9, {r2-r3}                 @ r2/r3<- argv[0]/argv[1]

    @ call the method
    ldr     ip, [r4, #8]                @ func
#ifdef __ARM_HAVE_BLX
    blx     ip
#else
    mov     lr, pc
    bx      ip
#endif

    @ We're back, result is in r0 or (for long/double) r0-r1.
    @
    @ In theory, we need to use the "return type" arg to figure out what
    @ we have and how to return it.  However, unless we have an FPU,
    @ all we need to do is copy r0-r1 into the JValue union.
    @
    @ Thought: could redefine DalvikJniReturnType such that single-word
    @ and double-word values occupy different ranges; simple comparison
    @ allows us to choose between str and stm.  Probably not worthwhile.
    @
    cmp     r5, #0                      @ DALVIK_JNI_RETURN_VOID?
    ldrne   ip, [r4, #12]               @ pReturn
    stmneia ip, {r0-r1}                 @ pReturn->j <- r0/r1

    @ Restore the registers we saved and return (restores lr into pc, and
    @ the initial stack pointer into sp).
#ifdef __ARM_HAVE_PC_INTERWORK
    ldmdb   r4, {r4, r5, r6, r7, r8, r9, sp, pc}
#else
    ldmdb   r4, {r4, r5, r6, r7, r8, r9, sp, lr}
    bx      lr
#endif
    .fnend



    /*
     * "Slow" path.
     * Walk through the argument list, counting up the number of 32-bit words
     * required to contain it.  Then walk through it a second time, copying
     * values out to the stack.  (We could pre-compute the size to save
     * ourselves a trip, but we'd have to store that somewhere -- this is
     * sufficiently unlikely that it's not worthwhile.)
     *
     * Try not to make any assumptions about the number of args -- I think
     * the class file format allows up to 64K words (need to verify that).
     *
     * Currently:
     *   r0  don't touch
     *   r1  don't touch
     *   r2  (available)
     *   r3  argc
     *   r4  original stack pointer
     *   r5-r8 (available)
     *   r9  argv
     */
.Lno_arg_info:
    mov     r5, r2, lsr #28             @ r5<- return type
    ldr     r6, [r4, #4]                @ r6<- short signature
    mov     r2, #0                      @ r2<- word count, init to zero

.Lcount_loop:
    ldrb    ip, [r6], #1                @ ip<- *signature++
    cmp     ip, #0                      @ end?
    beq     .Lcount_done                @ all done, bail
    add     r2, r2, #1                  @ count++
    cmp     ip, #'D'                    @ look for 'D' or 'J', which are 64-bit
    cmpne   ip, #'J'
    bne     .Lcount_loop

    @ 64-bit value, insert padding if we're not aligned
    tst     r2, #1                      @ odd after initial incr?
    addne   r2, #1                      @ no, add 1 more to cover 64 bits
    addeq   r2, #2                      @ yes, treat prev as pad, incr 2 now
    b       .Lcount_loop
.Lcount_done:

    @ We have the padded-out word count in r2.  We subtract 2 from it
    @ because we don't push the first two arg words on the stack (they're
    @ destined for r2/r3).  Pushing them on and popping them off would be
    @ simpler but slower.
    subs    r2, r2, #2                  @ subtract 2 (for contents of r2/r3)
    movmis  r2, #0                      @ if negative, peg at zero, set Z-flag
    beq     .Lcopy_done                 @ zero args, skip stack copy

DBG tst     sp, #7                      @ DEBUG - make sure sp is aligned now
DBG bne     dvmAbort                    @ DEBUG

    @ Set up to copy from r7 to r8.  We copy from the second arg to the
    @ last arg, which means reading and writing to ascending addresses.
    sub     sp, sp, r2, asl #2          @ sp<- sp - r2*4
    bic     sp, #4                      @ subtract another 4 ifn
    mov     r7, r9                      @ r7<- argv
    mov     r8, sp                      @ r8<- sp

    @ We need to copy words from [r7] to [r8].  We walk forward through
    @ the signature again, "copying" pad words when appropriate, storing
    @ upward into the stack.
    ldr     r6, [r4, #4]                @ r6<- signature
    add     r7, r7, #8                  @ r7<- r7+8 (assume argv 0/1 in r2/r3)

    @ Eat first arg or two, for the stuff that goes into r2/r3.
    ldrb    ip, [r6], #1                @ ip<- *signature++
    cmp     ip, #'D'
    cmpne   ip, #'J'
    beq     .Lstack_copy_loop           @ 64-bit arg fills r2+r3

    @ First arg was 32-bit, check the next
    ldrb    ip, [r6], #1                @ ip<- *signature++
    cmp     r6, #'D'
    cmpne   r6, #'J'
    subeq   r7, #4                      @ r7<- r7-4 (take it back - pad word)
    beq     .Lstack_copy_loop2          @ start with char we already have

    @ Two 32-bit args, fall through and start with next arg

.Lstack_copy_loop:
    ldrb    ip, [r6], #1                @ ip<- *signature++
.Lstack_copy_loop2:
    cmp     ip, #0                      @ end of shorty?
    beq     .Lcopy_done                 @ yes

    cmp     ip, #'D'
    cmpne   ip, #'J'
    beq     .Lcopy64

    @ Copy a 32-bit value.  [r8] is initially at the end of the stack.  We
    @ use "full descending" stacks, so we store into [r8] and incr as we
    @ move toward the end of the arg list.
.Lcopy32:
    ldr     ip, [r7], #4
    str     ip, [r8], #4
    b       .Lstack_copy_loop

.Lcopy64:
    @ Copy a 64-bit value.  If necessary, leave a hole in the stack to
    @ ensure alignment.  We know the [r8] output area is 64-bit aligned,
    @ so we can just mask the address.
    add     r8, r8, #7          @ r8<- (r8+7) & ~7
    ldr     ip, [r7], #4
    bic     r8, r8, #7
    ldr     r2, [r7], #4
    str     ip, [r8], #4
    str     r2, [r8], #4
    b       .Lstack_copy_loop



#if 0

/*
 * Spit out a "we were here", preserving all registers.  (The attempt
 * to save ip won't work, but we need to save an even number of
 * registers for EABI 64-bit stack alignment.)
 */
     .macro SQUEAK num
common_squeak\num:
    stmfd   sp!, {r0, r1, r2, r3, ip, lr}
    ldr     r0, strSqueak
    mov     r1, #\num
    bl      printf
#ifdef __ARM_HAVE_PC_INTERWORK
    ldmfd   sp!, {r0, r1, r2, r3, ip, pc}
#else
    ldmfd   sp!, {r0, r1, r2, r3, ip, lr}
    bx      lr
#endif
    .endm

    SQUEAK  0
    SQUEAK  1
    SQUEAK  2
    SQUEAK  3
    SQUEAK  4
    SQUEAK  5

strSqueak:
    .word   .LstrSqueak
.LstrSqueak:
    .asciz  "<%d>"

    .align  2

#endif

#endif /*__ARM_EABI__*/