diff options
Diffstat (limited to 'tools/testing/selftests/powerpc/math/fpu_asm.S')
-rw-r--r-- | tools/testing/selftests/powerpc/math/fpu_asm.S | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S new file mode 100644 index 000000000000..f3711d80e709 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -0,0 +1,198 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" + +#define PUSH_FPU(pos) \ + stfd f14,pos(sp); \ + stfd f15,pos+8(sp); \ + stfd f16,pos+16(sp); \ + stfd f17,pos+24(sp); \ + stfd f18,pos+32(sp); \ + stfd f19,pos+40(sp); \ + stfd f20,pos+48(sp); \ + stfd f21,pos+56(sp); \ + stfd f22,pos+64(sp); \ + stfd f23,pos+72(sp); \ + stfd f24,pos+80(sp); \ + stfd f25,pos+88(sp); \ + stfd f26,pos+96(sp); \ + stfd f27,pos+104(sp); \ + stfd f28,pos+112(sp); \ + stfd f29,pos+120(sp); \ + stfd f30,pos+128(sp); \ + stfd f31,pos+136(sp); + +#define POP_FPU(pos) \ + lfd f14,pos(sp); \ + lfd f15,pos+8(sp); \ + lfd f16,pos+16(sp); \ + lfd f17,pos+24(sp); \ + lfd f18,pos+32(sp); \ + lfd f19,pos+40(sp); \ + lfd f20,pos+48(sp); \ + lfd f21,pos+56(sp); \ + lfd f22,pos+64(sp); \ + lfd f23,pos+72(sp); \ + lfd f24,pos+80(sp); \ + lfd f25,pos+88(sp); \ + lfd f26,pos+96(sp); \ + lfd f27,pos+104(sp); \ + lfd f28,pos+112(sp); \ + lfd f29,pos+120(sp); \ + lfd f30,pos+128(sp); \ + lfd f31,pos+136(sp); + +# Careful calling this, it will 'clobber' fpu (by design) +# Don't call this from C +FUNC_START(load_fpu) + lfd f14,0(r3) + lfd f15,8(r3) + lfd f16,16(r3) + lfd f17,24(r3) + lfd f18,32(r3) + lfd f19,40(r3) + lfd f20,48(r3) + lfd f21,56(r3) + lfd f22,64(r3) + lfd f23,72(r3) + lfd f24,80(r3) + lfd f25,88(r3) + lfd f26,96(r3) + lfd f27,104(r3) + lfd f28,112(r3) + lfd f29,120(r3) + lfd f30,128(r3) + lfd f31,136(r3) + blr +FUNC_END(load_fpu) + +FUNC_START(check_fpu) + mr r4,r3 + li r3,1 # assume a bad result + lfd f0,0(r4) + fcmpu cr1,f0,f14 + bne cr1,1f + lfd f0,8(r4) + fcmpu cr1,f0,f15 + bne cr1,1f + lfd f0,16(r4) + fcmpu cr1,f0,f16 + bne cr1,1f + lfd f0,24(r4) + fcmpu cr1,f0,f17 + bne cr1,1f + lfd f0,32(r4) + fcmpu cr1,f0,f18 + bne cr1,1f + lfd f0,40(r4) + fcmpu cr1,f0,f19 + bne cr1,1f + lfd f0,48(r4) + fcmpu cr1,f0,f20 + bne cr1,1f + lfd f0,56(r4) + fcmpu cr1,f0,f21 + bne cr1,1f + lfd f0,64(r4) + fcmpu cr1,f0,f22 + bne cr1,1f + lfd f0,72(r4) + fcmpu cr1,f0,f23 + bne cr1,1f + lfd f0,80(r4) + fcmpu cr1,f0,f24 + bne cr1,1f + lfd f0,88(r4) + fcmpu cr1,f0,f25 + bne cr1,1f + lfd f0,96(r4) + fcmpu cr1,f0,f26 + bne cr1,1f + lfd f0,104(r4) + fcmpu cr1,f0,f27 + bne cr1,1f + lfd f0,112(r4) + fcmpu cr1,f0,f28 + bne cr1,1f + lfd f0,120(r4) + fcmpu cr1,f0,f29 + bne cr1,1f + lfd f0,128(r4) + fcmpu cr1,f0,f30 + bne cr1,1f + lfd f0,136(r4) + fcmpu cr1,f0,f31 + bne cr1,1f + li r3,0 # Success!!! +1: blr + +FUNC_START(test_fpu) + # r3 holds pointer to where to put the result of fork + # r4 holds pointer to the pid + # f14-f31 are non volatiles + PUSH_BASIC_STACK(256) + std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray + std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid + PUSH_FPU(STACK_FRAME_LOCAL(2,0)) + + bl load_fpu + nop + li r0,__NR_fork + sc + + # pass the result of the fork to the caller + ld r9,STACK_FRAME_PARAM(1)(sp) + std r3,0(r9) + + ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_fpu + nop + + POP_FPU(STACK_FRAME_LOCAL(2,0)) + POP_BASIC_STACK(256) + blr +FUNC_END(test_fpu) + +# int preempt_fpu(double *darray, int *threads_running, int *running) +# On starting will (atomically) decrement not_ready as a signal that the FPU +# has been loaded with darray. Will proceed to check the validity of the FPU +# registers while running is not zero. +FUNC_START(preempt_fpu) + PUSH_BASIC_STACK(256) + std r3,STACK_FRAME_PARAM(0)(sp) # double *darray + std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting + std r5,STACK_FRAME_PARAM(2)(sp) # int *running + PUSH_FPU(STACK_FRAME_LOCAL(3,0)) + + bl load_fpu + nop + + sync + # Atomic DEC + ld r3,STACK_FRAME_PARAM(1)(sp) +1: lwarx r4,0,r3 + addi r4,r4,-1 + stwcx. r4,0,r3 + bne- 1b + +2: ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_fpu + nop + cmpdi r3,0 + bne 3f + ld r4,STACK_FRAME_PARAM(2)(sp) + ld r5,0(r4) + cmpwi r5,0 + bne 2b + +3: POP_FPU(STACK_FRAME_LOCAL(3,0)) + POP_BASIC_STACK(256) + blr +FUNC_END(preempt_fpu) |