scrypt-arm.S 12 KB


  1. // ECOin - Copyright (c) - 2014/2022 - GPLv3 - epsylon@riseup.net (https://03c8.net)
  2. #if defined(OPTIMIZED_SALSA) && defined(__arm__) && defined(__APCS_32__)
  3. #if defined(__linux__) && defined(__ELF__)
  4. .section .note.GNU-stack,"",%progbits
  5. #endif
  6. #if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
  7. defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \
  8. defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \
  9. defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6T2__) || \
  10. defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__)
  11. #define __ARM_ARCH_5E_OR_6__
  12. #endif
  13. #if defined(__ARM_ARCH_5E_OR_6__) || defined(__ARM_ARCH_7__) || \
  14. defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
  15. defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
  16. #define __ARM_ARCH_5E_OR_6_OR_7__
  17. #endif
  18. #ifdef __ARM_ARCH_5E_OR_6__
  19. #define scrypt_shuffle() \
  20. add lr, r0, #9*4; \
  21. ldmia r0, {r2-r7}; \
  22. ldmia lr, {r2, r8-r12, lr}; \
  23. str r3, [r0, #5*4]; \
  24. str r5, [r0, #15*4]; \
  25. str r6, [r0, #12*4]; \
  26. str r7, [r0, #1*4]; \
  27. ldr r5, [r0, #7*4]; \
  28. str r2, [r0, #13*4]; \
  29. str r8, [r0, #2*4]; \
  30. strd r4, [r0, #10*4]; \
  31. str r9, [r0, #7*4]; \
  32. str r10, [r0, #4*4]; \
  33. str r11, [r0, #9*4]; \
  34. str lr, [r0, #3*4]; \
  35. add r2, r0, #64+0*4; \
  36. add lr, r0, #64+9*4; \
  37. ldmia r2, {r2-r7}; \
  38. ldmia lr, {r2, r8-r12, lr}; \
  39. str r3, [r0, #64+5*4]; \
  40. str r5, [r0, #64+15*4]; \
  41. str r6, [r0, #64+12*4]; \
  42. str r7, [r0, #64+1*4]; \
  43. ldr r5, [r0, #64+7*4]; \
  44. str r2, [r0, #64+13*4]; \
  45. str r8, [r0, #64+2*4]; \
  46. strd r4, [r0, #64+10*4]; \
  47. str r9, [r0, #64+7*4]; \
  48. str r10, [r0, #64+4*4]; \
  49. str r11, [r0, #64+9*4]; \
  50. str lr, [r0, #64+3*4]; \
  51. #define salsa8_core_doubleround_body() \
  52. add r6, r2, r6; \
  53. add r7, r3, r7; \
  54. eor r10, r10, r6, ror #25; \
  55. add r6, r0, r4; \
  56. eor r11, r11, r7, ror #25; \
  57. add r7, r1, r5; \
  58. strd r10, [sp, #14*4]; \
  59. eor r12, r12, r6, ror #25; \
  60. eor lr, lr, r7, ror #25; \
  61. ldrd r6, [sp, #10*4]; \
  62. add r2, r10, r2; \
  63. add r3, r11, r3; \
  64. eor r6, r6, r2, ror #23; \
  65. add r2, r12, r0; \
  66. eor r7, r7, r3, ror #23; \
  67. add r3, lr, r1; \
  68. strd r6, [sp, #10*4]; \
  69. eor r8, r8, r2, ror #23; \
  70. eor r9, r9, r3, ror #23; \
  71. ldrd r2, [sp, #6*4]; \
  72. add r10, r6, r10; \
  73. add r11, r7, r11; \
  74. eor r2, r2, r10, ror #19; \
  75. add r10, r8, r12; \
  76. eor r3, r3, r11, ror #19; \
  77. add r11, r9, lr; \
  78. eor r4, r4, r10, ror #19; \
  79. eor r5, r5, r11, ror #19; \
  80. ldrd r10, [sp, #2*4]; \
  81. add r6, r2, r6; \
  82. add r7, r3, r7; \
  83. eor r10, r10, r6, ror #14; \
  84. add r6, r4, r8; \
  85. eor r11, r11, r7, ror #14; \
  86. add r7, r5, r9; \
  87. eor r0, r0, r6, ror #14; \
  88. eor r1, r1, r7, ror #14; \
  89. ldrd r6, [sp, #14*4]; \
  90. strd r2, [sp, #6*4]; \
  91. strd r10, [sp, #2*4]; \
  92. add r6, r11, r6; \
  93. add r7, r0, r7; \
  94. eor r4, r4, r6, ror #25; \
  95. add r6, r1, r12; \
  96. eor r5, r5, r7, ror #25; \
  97. add r7, r10, lr; \
  98. eor r2, r2, r6, ror #25; \
  99. eor r3, r3, r7, ror #25; \
  100. strd r2, [sp, #6*4]; \
  101. add r10, r3, r10; \
  102. ldrd r6, [sp, #10*4]; \
  103. add r11, r4, r11; \
  104. eor r8, r8, r10, ror #23; \
  105. add r10, r5, r0; \
  106. eor r9, r9, r11, ror #23; \
  107. add r11, r2, r1; \
  108. eor r6, r6, r10, ror #23; \
  109. eor r7, r7, r11, ror #23; \
  110. strd r6, [sp, #10*4]; \
  111. add r2, r7, r2; \
  112. ldrd r10, [sp, #14*4]; \
  113. add r3, r8, r3; \
  114. eor r12, r12, r2, ror #19; \
  115. add r2, r9, r4; \
  116. eor lr, lr, r3, ror #19; \
  117. add r3, r6, r5; \
  118. eor r10, r10, r2, ror #19; \
  119. eor r11, r11, r3, ror #19; \
  120. ldrd r2, [sp, #2*4]; \
  121. add r6, r11, r6; \
  122. add r7, r12, r7; \
  123. eor r0, r0, r6, ror #14; \
  124. add r6, lr, r8; \
  125. eor r1, r1, r7, ror #14; \
  126. add r7, r10, r9; \
  127. eor r2, r2, r6, ror #14; \
  128. eor r3, r3, r7, ror #14; \
  129. #define salsa8_core() \
  130. ldmia sp, {r0-r12, lr}; \
  131. ldrd r10, [sp, #14*4]; \
  132. salsa8_core_doubleround_body(); \
  133. ldrd r6, [sp, #6*4]; \
  134. strd r2, [sp, #2*4]; \
  135. strd r10, [sp, #14*4]; \
  136. salsa8_core_doubleround_body(); \
  137. ldrd r6, [sp, #6*4]; \
  138. strd r2, [sp, #2*4]; \
  139. strd r10, [sp, #14*4]; \
  140. salsa8_core_doubleround_body(); \
  141. ldrd r6, [sp, #6*4]; \
  142. strd r2, [sp, #2*4]; \
  143. strd r10, [sp, #14*4]; \
  144. salsa8_core_doubleround_body(); \
  145. stmia sp, {r0-r5}; \
  146. strd r8, [sp, #8*4]; \
  147. str r12, [sp, #12*4]; \
  148. str lr, [sp, #13*4]; \
  149. strd r10, [sp, #14*4]; \
  150. #else
  151. #define scrypt_shuffle() \
  152. #define salsa8_core_doubleround_body() \
  153. ldr r8, [sp, #8*4]; \
  154. add r11, r11, r10; \
  155. ldr lr, [sp, #13*4]; \
  156. add r12, r12, r3; \
  157. eor r2, r2, r11, ror #23; \
  158. add r11, r4, r0; \
  159. eor r7, r7, r12, ror #23; \
  160. add r12, r9, r5; \
  161. str r9, [sp, #9*4]; \
  162. eor r8, r8, r11, ror #23; \
  163. str r10, [sp, #14*4]; \
  164. eor lr, lr, r12, ror #23; \
  165. ldr r11, [sp, #11*4]; \
  166. add r9, lr, r9; \
  167. ldr r12, [sp, #12*4]; \
  168. add r10, r2, r10; \
  169. eor r1, r1, r9, ror #19; \
  170. add r9, r7, r3; \
  171. eor r6, r6, r10, ror #19; \
  172. add r10, r8, r4; \
  173. str r8, [sp, #8*4]; \
  174. eor r11, r11, r9, ror #19; \
  175. str lr, [sp, #13*4]; \
  176. eor r12, r12, r10, ror #19; \
  177. ldr r9, [sp, #10*4]; \
  178. add r8, r12, r8; \
  179. ldr r10, [sp, #15*4]; \
  180. add lr, r1, lr; \
  181. eor r0, r0, r8, ror #14; \
  182. add r8, r6, r2; \
  183. eor r5, r5, lr, ror #14; \
  184. add lr, r11, r7; \
  185. eor r9, r9, r8, ror #14; \
  186. ldr r8, [sp, #9*4]; \
  187. eor r10, r10, lr, ror #14; \
  188. ldr lr, [sp, #14*4]; \
  189. add r8, r9, r8; \
  190. str r9, [sp, #10*4]; \
  191. add lr, r10, lr; \
  192. str r10, [sp, #15*4]; \
  193. eor r11, r11, r8, ror #25; \
  194. add r8, r0, r3; \
  195. eor r12, r12, lr, ror #25; \
  196. add lr, r5, r4; \
  197. eor r1, r1, r8, ror #25; \
  198. ldr r8, [sp, #8*4]; \
  199. eor r6, r6, lr, ror #25; \
  200. add r9, r11, r9; \
  201. ldr lr, [sp, #13*4]; \
  202. add r10, r12, r10; \
  203. eor r8, r8, r9, ror #23; \
  204. add r9, r1, r0; \
  205. eor lr, lr, r10, ror #23; \
  206. add r10, r6, r5; \
  207. str r11, [sp, #11*4]; \
  208. eor r2, r2, r9, ror #23; \
  209. str r12, [sp, #12*4]; \
  210. eor r7, r7, r10, ror #23; \
  211. ldr r9, [sp, #9*4]; \
  212. add r11, r8, r11; \
  213. ldr r10, [sp, #14*4]; \
  214. add r12, lr, r12; \
  215. eor r9, r9, r11, ror #19; \
  216. add r11, r2, r1; \
  217. eor r10, r10, r12, ror #19; \
  218. add r12, r7, r6; \
  219. str r8, [sp, #8*4]; \
  220. eor r3, r3, r11, ror #19; \
  221. str lr, [sp, #13*4]; \
  222. eor r4, r4, r12, ror #19; \
  223. #define salsa8_core() \
  224. ldmia sp, {r0-r7}; \
  225. ldr r12, [sp, #15*4]; \
  226. ldr r8, [sp, #11*4]; \
  227. ldr lr, [sp, #12*4]; \
  228. ldr r9, [sp, #9*4]; \
  229. add r8, r8, r12; \
  230. ldr r11, [sp, #10*4]; \
  231. add lr, lr, r0; \
  232. eor r3, r3, r8, ror #25; \
  233. add r8, r5, r1; \
  234. ldr r10, [sp, #14*4]; \
  235. eor r4, r4, lr, ror #25; \
  236. add lr, r11, r6; \
  237. eor r9, r9, r8, ror #25; \
  238. eor r10, r10, lr, ror #25; \
  239. salsa8_core_doubleround_body(); \
  240. ldr r11, [sp, #10*4]; \
  241. add r8, r9, r8; \
  242. ldr r12, [sp, #15*4]; \
  243. add lr, r10, lr; \
  244. eor r11, r11, r8, ror #14; \
  245. add r8, r3, r2; \
  246. eor r12, r12, lr, ror #14; \
  247. add lr, r4, r7; \
  248. eor r0, r0, r8, ror #14; \
  249. ldr r8, [sp, #11*4]; \
  250. eor r5, r5, lr, ror #14; \
  251. ldr lr, [sp, #12*4]; \
  252. add r8, r8, r12; \
  253. str r11, [sp, #10*4]; \
  254. add lr, lr, r0; \
  255. str r12, [sp, #15*4]; \
  256. eor r3, r3, r8, ror #25; \
  257. add r8, r5, r1; \
  258. eor r4, r4, lr, ror #25; \
  259. add lr, r11, r6; \
  260. str r9, [sp, #9*4]; \
  261. eor r9, r9, r8, ror #25; \
  262. str r10, [sp, #14*4]; \
  263. eor r10, r10, lr, ror #25; \
  264. salsa8_core_doubleround_body(); \
  265. ldr r11, [sp, #10*4]; \
  266. add r8, r9, r8; \
  267. ldr r12, [sp, #15*4]; \
  268. add lr, r10, lr; \
  269. eor r11, r11, r8, ror #14; \
  270. add r8, r3, r2; \
  271. eor r12, r12, lr, ror #14; \
  272. add lr, r4, r7; \
  273. eor r0, r0, r8, ror #14; \
  274. ldr r8, [sp, #11*4]; \
  275. eor r5, r5, lr, ror #14; \
  276. ldr lr, [sp, #12*4]; \
  277. add r8, r8, r12; \
  278. str r11, [sp, #10*4]; \
  279. add lr, lr, r0; \
  280. str r12, [sp, #15*4]; \
  281. eor r3, r3, r8, ror #25; \
  282. add r8, r5, r1; \
  283. eor r4, r4, lr, ror #25; \
  284. add lr, r11, r6; \
  285. str r9, [sp, #9*4]; \
  286. eor r9, r9, r8, ror #25; \
  287. str r10, [sp, #14*4]; \
  288. eor r10, r10, lr, ror #25; \
  289. salsa8_core_doubleround_body(); \
  290. ldr r11, [sp, #10*4]; \
  291. add r8, r9, r8; \
  292. ldr r12, [sp, #15*4]; \
  293. add lr, r10, lr; \
  294. eor r11, r11, r8, ror #14; \
  295. add r8, r3, r2; \
  296. eor r12, r12, lr, ror #14; \
  297. add lr, r4, r7; \
  298. eor r0, r0, r8, ror #14; \
  299. ldr r8, [sp, #11*4]; \
  300. eor r5, r5, lr, ror #14; \
  301. ldr lr, [sp, #12*4]; \
  302. add r8, r8, r12; \
  303. str r11, [sp, #10*4]; \
  304. add lr, lr, r0; \
  305. str r12, [sp, #15*4]; \
  306. eor r3, r3, r8, ror #25; \
  307. add r8, r5, r1; \
  308. eor r4, r4, lr, ror #25; \
  309. add lr, r11, r6; \
  310. str r9, [sp, #9*4]; \
  311. eor r9, r9, r8, ror #25; \
  312. str r10, [sp, #14*4]; \
  313. eor r10, r10, lr, ror #25; \
  314. salsa8_core_doubleround_body(); \
  315. ldr r11, [sp, #10*4]; \
  316. add r8, r9, r8; \
  317. ldr r12, [sp, #15*4]; \
  318. add lr, r10, lr; \
  319. str r9, [sp, #9*4]; \
  320. eor r11, r11, r8, ror #14; \
  321. eor r12, r12, lr, ror #14; \
  322. add r8, r3, r2; \
  323. str r10, [sp, #14*4]; \
  324. add lr, r4, r7; \
  325. str r11, [sp, #10*4]; \
  326. eor r0, r0, r8, ror #14; \
  327. str r12, [sp, #15*4]; \
  328. eor r5, r5, lr, ror #14; \
  329. stmia sp, {r0-r7}; \
  330. #endif
  331. #define scrypt_core_macro1a_x4() \
  332. ldmia r0, {r4-r7}; \
  333. ldmia lr!, {r8-r11}; \
  334. stmia r1!, {r4-r7}; \
  335. stmia r3!, {r8-r11}; \
  336. eor r4, r4, r8; \
  337. eor r5, r5, r9; \
  338. eor r6, r6, r10; \
  339. eor r7, r7, r11; \
  340. stmia r0!, {r4-r7}; \
  341. stmia r12!, {r4-r7}; \
  342. #define scrypt_core_macro1b_x4() \
  343. ldmia r3!, {r8-r11}; \
  344. ldmia r2, {r4-r7}; \
  345. eor r8, r8, r4; \
  346. eor r9, r9, r5; \
  347. eor r10, r10, r6; \
  348. eor r11, r11, r7; \
  349. ldmia r0, {r4-r7}; \
  350. stmia r2!, {r8-r11}; \
  351. eor r4, r4, r8; \
  352. eor r5, r5, r9; \
  353. eor r6, r6, r10; \
  354. eor r7, r7, r11; \
  355. ldmia r1!, {r8-r11}; \
  356. eor r4, r4, r8; \
  357. eor r5, r5, r9; \
  358. eor r6, r6, r10; \
  359. eor r7, r7, r11; \
  360. stmia r0!, {r4-r7}; \
  361. stmia r12!, {r4-r7}; \
  362. #define scrypt_core_macro2_x4() \
  363. ldmia r12, {r4-r7}; \
  364. ldmia r0, {r8-r11}; \
  365. add r4, r4, r8; \
  366. add r5, r5, r9; \
  367. add r6, r6, r10; \
  368. add r7, r7, r11; \
  369. stmia r0!, {r4-r7}; \
  370. ldmia r2, {r8-r11}; \
  371. eor r4, r4, r8; \
  372. eor r5, r5, r9; \
  373. eor r6, r6, r10; \
  374. eor r7, r7, r11; \
  375. stmia r2!, {r4-r7}; \
  376. stmia r12!, {r4-r7}; \
  377. #define scrypt_core_macro3_x4() \
  378. ldmia r1!, {r4-r7}; \
  379. ldmia r0, {r8-r11}; \
  380. add r4, r4, r8; \
  381. add r5, r5, r9; \
  382. add r6, r6, r10; \
  383. add r7, r7, r11; \
  384. stmia r0!, {r4-r7}; \
  385. #define scrypt_core_macro3_x6() \
  386. ldmia r1!, {r2-r7}; \
  387. ldmia r0, {r8-r12, lr}; \
  388. add r2, r2, r8; \
  389. add r3, r3, r9; \
  390. add r4, r4, r10; \
  391. add r5, r5, r11; \
  392. add r6, r6, r12; \
  393. add r7, r7, lr; \
  394. stmia r0!, {r2-r7}; \
  395. .text
  396. .code 32
  397. .align 2
  398. .globl scrypt_core
  399. .globl _scrypt_core
  400. #ifdef __ELF__
  401. .type scrypt_core, %function
  402. #endif
  403. scrypt_core:
  404. _scrypt_core:
  405. stmfd sp!, {r4-r11, lr}
  406. mov r12, sp
  407. sub sp, sp, #21*4
  408. bic sp, sp, #63
  409. str r12, [sp, #20*4]
  410. scrypt_shuffle()
  411. str r0, [sp, #16*4]
  412. add r12, r1, #1024*32*4
  413. str r12, [sp, #18*4]
  414. scrypt_core_loop1:
  415. add lr, r0, #16*4
  416. add r3, r1, #16*4
  417. mov r12, sp
  418. scrypt_core_macro1a_x4()
  419. scrypt_core_macro1a_x4()
  420. scrypt_core_macro1a_x4()
  421. scrypt_core_macro1a_x4()
  422. str r1, [sp, #17*4]
  423. salsa8_core()
  424. ldr r0, [sp, #16*4]
  425. mov r12, sp
  426. add r2, r0, #16*4
  427. scrypt_core_macro2_x4()
  428. scrypt_core_macro2_x4()
  429. scrypt_core_macro2_x4()
  430. scrypt_core_macro2_x4()
  431. salsa8_core()
  432. ldr r0, [sp, #16*4]
  433. mov r1, sp
  434. add r0, r0, #16*4
  435. scrypt_core_macro3_x6()
  436. scrypt_core_macro3_x6()
  437. ldr r3, [sp, #17*4]
  438. ldr r12, [sp, #18*4]
  439. scrypt_core_macro3_x4()
  440. add r1, r3, #16*4
  441. sub r0, r0, #32*4
  442. cmp r1, r12
  443. bne scrypt_core_loop1
  444. ldr r4, [r0, #16*4]
  445. sub r1, r1, #1024*32*4
  446. str r1, [sp, #17*4]
  447. mov r4, r4, lsl #32-10
  448. mov r12, #1024
  449. add r1, r1, r4, lsr #32-10-7
  450. scrypt_core_loop2:
  451. add r2, r0, #16*4
  452. add r3, r1, #16*4
  453. str r12, [sp, #18*4]
  454. mov r12, sp
  455. #ifdef __ARM_ARCH_5E_OR_6_OR_7__
  456. pld [r1, #24*4]
  457. pld [r1, #8*4]
  458. #endif
  459. scrypt_core_macro1b_x4()
  460. scrypt_core_macro1b_x4()
  461. scrypt_core_macro1b_x4()
  462. scrypt_core_macro1b_x4()
  463. salsa8_core()
  464. ldr r0, [sp, #16*4]
  465. mov r12, sp
  466. add r2, r0, #16*4
  467. scrypt_core_macro2_x4()
  468. scrypt_core_macro2_x4()
  469. scrypt_core_macro2_x4()
  470. scrypt_core_macro2_x4()
  471. salsa8_core()
  472. ldr r0, [sp, #16*4]
  473. mov r1, sp
  474. ldr r3, [sp, #17*4]
  475. add r0, r0, #16*4
  476. scrypt_core_macro3_x4()
  477. mov r4, r4, lsl #32-10
  478. add r3, r3, r4, lsr #32-10-7
  479. str r3, [sp, #19*4]
  480. #ifdef __ARM_ARCH_5E_OR_6_OR_7__
  481. pld [r3, #16*4]
  482. pld [r3]
  483. #endif
  484. scrypt_core_macro3_x6()
  485. scrypt_core_macro3_x6()
  486. ldr r12, [sp, #18*4]
  487. sub r0, r0, #32*4
  488. ldr r1, [sp, #19*4]
  489. subs r12, r12, #1
  490. bne scrypt_core_loop2
  491. scrypt_shuffle()
  492. ldr sp, [sp, #20*4]
  493. #ifdef __thumb__
  494. ldmfd sp!, {r4-r11, lr}
  495. bx lr
  496. #else
  497. ldmfd sp!, {r4-r11, pc}
  498. #endif
  499. #endif