123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548 |
- // ECOin - Copyright (c) - 2014/2022 - GPLv3 - epsylon@riseup.net (https://03c8.net)
- #if defined(OPTIMIZED_SALSA) && defined(__arm__) && defined(__APCS_32__)
- #if defined(__linux__) && defined(__ELF__)
- .section .note.GNU-stack,"",%progbits
- #endif
- #if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
- defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \
- defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \
- defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6T2__) || \
- defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__)
- #define __ARM_ARCH_5E_OR_6__
- #endif
- #if defined(__ARM_ARCH_5E_OR_6__) || defined(__ARM_ARCH_7__) || \
- defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
- defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
- #define __ARM_ARCH_5E_OR_6_OR_7__
- #endif
- #ifdef __ARM_ARCH_5E_OR_6__
- #define scrypt_shuffle() \
- add lr, r0, #9*4; \
- ldmia r0, {r2-r7}; \
- ldmia lr, {r2, r8-r12, lr}; \
- str r3, [r0, #5*4]; \
- str r5, [r0, #15*4]; \
- str r6, [r0, #12*4]; \
- str r7, [r0, #1*4]; \
- ldr r5, [r0, #7*4]; \
- str r2, [r0, #13*4]; \
- str r8, [r0, #2*4]; \
- strd r4, [r0, #10*4]; \
- str r9, [r0, #7*4]; \
- str r10, [r0, #4*4]; \
- str r11, [r0, #9*4]; \
- str lr, [r0, #3*4]; \
- add r2, r0, #64+0*4; \
- add lr, r0, #64+9*4; \
- ldmia r2, {r2-r7}; \
- ldmia lr, {r2, r8-r12, lr}; \
- str r3, [r0, #64+5*4]; \
- str r5, [r0, #64+15*4]; \
- str r6, [r0, #64+12*4]; \
- str r7, [r0, #64+1*4]; \
- ldr r5, [r0, #64+7*4]; \
- str r2, [r0, #64+13*4]; \
- str r8, [r0, #64+2*4]; \
- strd r4, [r0, #64+10*4]; \
- str r9, [r0, #64+7*4]; \
- str r10, [r0, #64+4*4]; \
- str r11, [r0, #64+9*4]; \
- str lr, [r0, #64+3*4]; \
- #define salsa8_core_doubleround_body() \
- add r6, r2, r6; \
- add r7, r3, r7; \
- eor r10, r10, r6, ror #25; \
- add r6, r0, r4; \
- eor r11, r11, r7, ror #25; \
- add r7, r1, r5; \
- strd r10, [sp, #14*4]; \
- eor r12, r12, r6, ror #25; \
- eor lr, lr, r7, ror #25; \
- ldrd r6, [sp, #10*4]; \
- add r2, r10, r2; \
- add r3, r11, r3; \
- eor r6, r6, r2, ror #23; \
- add r2, r12, r0; \
- eor r7, r7, r3, ror #23; \
- add r3, lr, r1; \
- strd r6, [sp, #10*4]; \
- eor r8, r8, r2, ror #23; \
- eor r9, r9, r3, ror #23; \
- ldrd r2, [sp, #6*4]; \
- add r10, r6, r10; \
- add r11, r7, r11; \
- eor r2, r2, r10, ror #19; \
- add r10, r8, r12; \
- eor r3, r3, r11, ror #19; \
- add r11, r9, lr; \
- eor r4, r4, r10, ror #19; \
- eor r5, r5, r11, ror #19; \
- ldrd r10, [sp, #2*4]; \
- add r6, r2, r6; \
- add r7, r3, r7; \
- eor r10, r10, r6, ror #14; \
- add r6, r4, r8; \
- eor r11, r11, r7, ror #14; \
- add r7, r5, r9; \
- eor r0, r0, r6, ror #14; \
- eor r1, r1, r7, ror #14; \
- ldrd r6, [sp, #14*4]; \
- strd r2, [sp, #6*4]; \
- strd r10, [sp, #2*4]; \
- add r6, r11, r6; \
- add r7, r0, r7; \
- eor r4, r4, r6, ror #25; \
- add r6, r1, r12; \
- eor r5, r5, r7, ror #25; \
- add r7, r10, lr; \
- eor r2, r2, r6, ror #25; \
- eor r3, r3, r7, ror #25; \
- strd r2, [sp, #6*4]; \
- add r10, r3, r10; \
- ldrd r6, [sp, #10*4]; \
- add r11, r4, r11; \
- eor r8, r8, r10, ror #23; \
- add r10, r5, r0; \
- eor r9, r9, r11, ror #23; \
- add r11, r2, r1; \
- eor r6, r6, r10, ror #23; \
- eor r7, r7, r11, ror #23; \
- strd r6, [sp, #10*4]; \
- add r2, r7, r2; \
- ldrd r10, [sp, #14*4]; \
- add r3, r8, r3; \
- eor r12, r12, r2, ror #19; \
- add r2, r9, r4; \
- eor lr, lr, r3, ror #19; \
- add r3, r6, r5; \
- eor r10, r10, r2, ror #19; \
- eor r11, r11, r3, ror #19; \
- ldrd r2, [sp, #2*4]; \
- add r6, r11, r6; \
- add r7, r12, r7; \
- eor r0, r0, r6, ror #14; \
- add r6, lr, r8; \
- eor r1, r1, r7, ror #14; \
- add r7, r10, r9; \
- eor r2, r2, r6, ror #14; \
- eor r3, r3, r7, ror #14; \
- #define salsa8_core() \
- ldmia sp, {r0-r12, lr}; \
- ldrd r10, [sp, #14*4]; \
- salsa8_core_doubleround_body(); \
- ldrd r6, [sp, #6*4]; \
- strd r2, [sp, #2*4]; \
- strd r10, [sp, #14*4]; \
- salsa8_core_doubleround_body(); \
- ldrd r6, [sp, #6*4]; \
- strd r2, [sp, #2*4]; \
- strd r10, [sp, #14*4]; \
- salsa8_core_doubleround_body(); \
- ldrd r6, [sp, #6*4]; \
- strd r2, [sp, #2*4]; \
- strd r10, [sp, #14*4]; \
- salsa8_core_doubleround_body(); \
- stmia sp, {r0-r5}; \
- strd r8, [sp, #8*4]; \
- str r12, [sp, #12*4]; \
- str lr, [sp, #13*4]; \
- strd r10, [sp, #14*4]; \
- #else
- #define scrypt_shuffle() \
- #define salsa8_core_doubleround_body() \
- ldr r8, [sp, #8*4]; \
- add r11, r11, r10; \
- ldr lr, [sp, #13*4]; \
- add r12, r12, r3; \
- eor r2, r2, r11, ror #23; \
- add r11, r4, r0; \
- eor r7, r7, r12, ror #23; \
- add r12, r9, r5; \
- str r9, [sp, #9*4]; \
- eor r8, r8, r11, ror #23; \
- str r10, [sp, #14*4]; \
- eor lr, lr, r12, ror #23; \
- ldr r11, [sp, #11*4]; \
- add r9, lr, r9; \
- ldr r12, [sp, #12*4]; \
- add r10, r2, r10; \
- eor r1, r1, r9, ror #19; \
- add r9, r7, r3; \
- eor r6, r6, r10, ror #19; \
- add r10, r8, r4; \
- str r8, [sp, #8*4]; \
- eor r11, r11, r9, ror #19; \
- str lr, [sp, #13*4]; \
- eor r12, r12, r10, ror #19; \
- ldr r9, [sp, #10*4]; \
- add r8, r12, r8; \
- ldr r10, [sp, #15*4]; \
- add lr, r1, lr; \
- eor r0, r0, r8, ror #14; \
- add r8, r6, r2; \
- eor r5, r5, lr, ror #14; \
- add lr, r11, r7; \
- eor r9, r9, r8, ror #14; \
- ldr r8, [sp, #9*4]; \
- eor r10, r10, lr, ror #14; \
- ldr lr, [sp, #14*4]; \
- add r8, r9, r8; \
- str r9, [sp, #10*4]; \
- add lr, r10, lr; \
- str r10, [sp, #15*4]; \
- eor r11, r11, r8, ror #25; \
- add r8, r0, r3; \
- eor r12, r12, lr, ror #25; \
- add lr, r5, r4; \
- eor r1, r1, r8, ror #25; \
- ldr r8, [sp, #8*4]; \
- eor r6, r6, lr, ror #25; \
- add r9, r11, r9; \
- ldr lr, [sp, #13*4]; \
- add r10, r12, r10; \
- eor r8, r8, r9, ror #23; \
- add r9, r1, r0; \
- eor lr, lr, r10, ror #23; \
- add r10, r6, r5; \
- str r11, [sp, #11*4]; \
- eor r2, r2, r9, ror #23; \
- str r12, [sp, #12*4]; \
- eor r7, r7, r10, ror #23; \
- ldr r9, [sp, #9*4]; \
- add r11, r8, r11; \
- ldr r10, [sp, #14*4]; \
- add r12, lr, r12; \
- eor r9, r9, r11, ror #19; \
- add r11, r2, r1; \
- eor r10, r10, r12, ror #19; \
- add r12, r7, r6; \
- str r8, [sp, #8*4]; \
- eor r3, r3, r11, ror #19; \
- str lr, [sp, #13*4]; \
- eor r4, r4, r12, ror #19; \
- #define salsa8_core() \
- ldmia sp, {r0-r7}; \
- ldr r12, [sp, #15*4]; \
- ldr r8, [sp, #11*4]; \
- ldr lr, [sp, #12*4]; \
- ldr r9, [sp, #9*4]; \
- add r8, r8, r12; \
- ldr r11, [sp, #10*4]; \
- add lr, lr, r0; \
- eor r3, r3, r8, ror #25; \
- add r8, r5, r1; \
- ldr r10, [sp, #14*4]; \
- eor r4, r4, lr, ror #25; \
- add lr, r11, r6; \
- eor r9, r9, r8, ror #25; \
- eor r10, r10, lr, ror #25; \
- salsa8_core_doubleround_body(); \
- ldr r11, [sp, #10*4]; \
- add r8, r9, r8; \
- ldr r12, [sp, #15*4]; \
- add lr, r10, lr; \
- eor r11, r11, r8, ror #14; \
- add r8, r3, r2; \
- eor r12, r12, lr, ror #14; \
- add lr, r4, r7; \
- eor r0, r0, r8, ror #14; \
- ldr r8, [sp, #11*4]; \
- eor r5, r5, lr, ror #14; \
- ldr lr, [sp, #12*4]; \
- add r8, r8, r12; \
- str r11, [sp, #10*4]; \
- add lr, lr, r0; \
- str r12, [sp, #15*4]; \
- eor r3, r3, r8, ror #25; \
- add r8, r5, r1; \
- eor r4, r4, lr, ror #25; \
- add lr, r11, r6; \
- str r9, [sp, #9*4]; \
- eor r9, r9, r8, ror #25; \
- str r10, [sp, #14*4]; \
- eor r10, r10, lr, ror #25; \
- salsa8_core_doubleround_body(); \
- ldr r11, [sp, #10*4]; \
- add r8, r9, r8; \
- ldr r12, [sp, #15*4]; \
- add lr, r10, lr; \
- eor r11, r11, r8, ror #14; \
- add r8, r3, r2; \
- eor r12, r12, lr, ror #14; \
- add lr, r4, r7; \
- eor r0, r0, r8, ror #14; \
- ldr r8, [sp, #11*4]; \
- eor r5, r5, lr, ror #14; \
- ldr lr, [sp, #12*4]; \
- add r8, r8, r12; \
- str r11, [sp, #10*4]; \
- add lr, lr, r0; \
- str r12, [sp, #15*4]; \
- eor r3, r3, r8, ror #25; \
- add r8, r5, r1; \
- eor r4, r4, lr, ror #25; \
- add lr, r11, r6; \
- str r9, [sp, #9*4]; \
- eor r9, r9, r8, ror #25; \
- str r10, [sp, #14*4]; \
- eor r10, r10, lr, ror #25; \
- salsa8_core_doubleround_body(); \
- ldr r11, [sp, #10*4]; \
- add r8, r9, r8; \
- ldr r12, [sp, #15*4]; \
- add lr, r10, lr; \
- eor r11, r11, r8, ror #14; \
- add r8, r3, r2; \
- eor r12, r12, lr, ror #14; \
- add lr, r4, r7; \
- eor r0, r0, r8, ror #14; \
- ldr r8, [sp, #11*4]; \
- eor r5, r5, lr, ror #14; \
- ldr lr, [sp, #12*4]; \
- add r8, r8, r12; \
- str r11, [sp, #10*4]; \
- add lr, lr, r0; \
- str r12, [sp, #15*4]; \
- eor r3, r3, r8, ror #25; \
- add r8, r5, r1; \
- eor r4, r4, lr, ror #25; \
- add lr, r11, r6; \
- str r9, [sp, #9*4]; \
- eor r9, r9, r8, ror #25; \
- str r10, [sp, #14*4]; \
- eor r10, r10, lr, ror #25; \
- salsa8_core_doubleround_body(); \
- ldr r11, [sp, #10*4]; \
- add r8, r9, r8; \
- ldr r12, [sp, #15*4]; \
- add lr, r10, lr; \
- str r9, [sp, #9*4]; \
- eor r11, r11, r8, ror #14; \
- eor r12, r12, lr, ror #14; \
- add r8, r3, r2; \
- str r10, [sp, #14*4]; \
- add lr, r4, r7; \
- str r11, [sp, #10*4]; \
- eor r0, r0, r8, ror #14; \
- str r12, [sp, #15*4]; \
- eor r5, r5, lr, ror #14; \
- stmia sp, {r0-r7}; \
- #endif
- #define scrypt_core_macro1a_x4() \
- ldmia r0, {r4-r7}; \
- ldmia lr!, {r8-r11}; \
- stmia r1!, {r4-r7}; \
- stmia r3!, {r8-r11}; \
- eor r4, r4, r8; \
- eor r5, r5, r9; \
- eor r6, r6, r10; \
- eor r7, r7, r11; \
- stmia r0!, {r4-r7}; \
- stmia r12!, {r4-r7}; \
- #define scrypt_core_macro1b_x4() \
- ldmia r3!, {r8-r11}; \
- ldmia r2, {r4-r7}; \
- eor r8, r8, r4; \
- eor r9, r9, r5; \
- eor r10, r10, r6; \
- eor r11, r11, r7; \
- ldmia r0, {r4-r7}; \
- stmia r2!, {r8-r11}; \
- eor r4, r4, r8; \
- eor r5, r5, r9; \
- eor r6, r6, r10; \
- eor r7, r7, r11; \
- ldmia r1!, {r8-r11}; \
- eor r4, r4, r8; \
- eor r5, r5, r9; \
- eor r6, r6, r10; \
- eor r7, r7, r11; \
- stmia r0!, {r4-r7}; \
- stmia r12!, {r4-r7}; \
- #define scrypt_core_macro2_x4() \
- ldmia r12, {r4-r7}; \
- ldmia r0, {r8-r11}; \
- add r4, r4, r8; \
- add r5, r5, r9; \
- add r6, r6, r10; \
- add r7, r7, r11; \
- stmia r0!, {r4-r7}; \
- ldmia r2, {r8-r11}; \
- eor r4, r4, r8; \
- eor r5, r5, r9; \
- eor r6, r6, r10; \
- eor r7, r7, r11; \
- stmia r2!, {r4-r7}; \
- stmia r12!, {r4-r7}; \
- #define scrypt_core_macro3_x4() \
- ldmia r1!, {r4-r7}; \
- ldmia r0, {r8-r11}; \
- add r4, r4, r8; \
- add r5, r5, r9; \
- add r6, r6, r10; \
- add r7, r7, r11; \
- stmia r0!, {r4-r7}; \
- #define scrypt_core_macro3_x6() \
- ldmia r1!, {r2-r7}; \
- ldmia r0, {r8-r12, lr}; \
- add r2, r2, r8; \
- add r3, r3, r9; \
- add r4, r4, r10; \
- add r5, r5, r11; \
- add r6, r6, r12; \
- add r7, r7, lr; \
- stmia r0!, {r2-r7}; \
- .text
- .code 32
- .align 2
- .globl scrypt_core
- .globl _scrypt_core
- #ifdef __ELF__
- .type scrypt_core, %function
- #endif
- scrypt_core:
- _scrypt_core:
- stmfd sp!, {r4-r11, lr}
- mov r12, sp
- sub sp, sp, #21*4
- bic sp, sp, #63
- str r12, [sp, #20*4]
-
- scrypt_shuffle()
-
- str r0, [sp, #16*4]
- add r12, r1, #1024*32*4
- str r12, [sp, #18*4]
- scrypt_core_loop1:
- add lr, r0, #16*4
- add r3, r1, #16*4
- mov r12, sp
- scrypt_core_macro1a_x4()
- scrypt_core_macro1a_x4()
- scrypt_core_macro1a_x4()
- scrypt_core_macro1a_x4()
- str r1, [sp, #17*4]
-
- salsa8_core()
-
- ldr r0, [sp, #16*4]
- mov r12, sp
- add r2, r0, #16*4
- scrypt_core_macro2_x4()
- scrypt_core_macro2_x4()
- scrypt_core_macro2_x4()
- scrypt_core_macro2_x4()
-
- salsa8_core()
-
- ldr r0, [sp, #16*4]
- mov r1, sp
- add r0, r0, #16*4
- scrypt_core_macro3_x6()
- scrypt_core_macro3_x6()
- ldr r3, [sp, #17*4]
- ldr r12, [sp, #18*4]
- scrypt_core_macro3_x4()
-
- add r1, r3, #16*4
- sub r0, r0, #32*4
- cmp r1, r12
- bne scrypt_core_loop1
-
- ldr r4, [r0, #16*4]
- sub r1, r1, #1024*32*4
- str r1, [sp, #17*4]
- mov r4, r4, lsl #32-10
- mov r12, #1024
- add r1, r1, r4, lsr #32-10-7
- scrypt_core_loop2:
- add r2, r0, #16*4
- add r3, r1, #16*4
- str r12, [sp, #18*4]
- mov r12, sp
- #ifdef __ARM_ARCH_5E_OR_6_OR_7__
- pld [r1, #24*4]
- pld [r1, #8*4]
- #endif
- scrypt_core_macro1b_x4()
- scrypt_core_macro1b_x4()
- scrypt_core_macro1b_x4()
- scrypt_core_macro1b_x4()
-
- salsa8_core()
-
- ldr r0, [sp, #16*4]
- mov r12, sp
- add r2, r0, #16*4
- scrypt_core_macro2_x4()
- scrypt_core_macro2_x4()
- scrypt_core_macro2_x4()
- scrypt_core_macro2_x4()
-
- salsa8_core()
-
- ldr r0, [sp, #16*4]
- mov r1, sp
- ldr r3, [sp, #17*4]
- add r0, r0, #16*4
- scrypt_core_macro3_x4()
- mov r4, r4, lsl #32-10
- add r3, r3, r4, lsr #32-10-7
- str r3, [sp, #19*4]
- #ifdef __ARM_ARCH_5E_OR_6_OR_7__
- pld [r3, #16*4]
- pld [r3]
- #endif
- scrypt_core_macro3_x6()
- scrypt_core_macro3_x6()
-
- ldr r12, [sp, #18*4]
- sub r0, r0, #32*4
- ldr r1, [sp, #19*4]
- subs r12, r12, #1
- bne scrypt_core_loop2
-
- scrypt_shuffle()
-
- ldr sp, [sp, #20*4]
- #ifdef __thumb__
- ldmfd sp!, {r4-r11, lr}
- bx lr
- #else
- ldmfd sp!, {r4-r11, pc}
- #endif
- #endif
|