scrypt-x86.S 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915
  1. // ECOin - Copyright (c) - 2014/2022 - GPLv3 - epsylon@riseup.net (https://03c8.net)
  2. # Copyright 2011 pooler@litecoinpool.org
  3. # All rights reserved.
  4. #
  5. # Redistribution and use in source and binary forms, with or without
  6. # modification, are permitted provided that the following conditions
  7. # are met:
  8. # 1. Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. # 2. Redistributions in binary form must reproduce the above copyright
  11. # notice, this list of conditions and the following disclaimer in the
  12. # documentation and/or other materials provided with the distribution.
  13. #
  14. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18. # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20. # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21. # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24. # SUCH DAMAGE.
  25. #if defined(OPTIMIZED_SALSA) && defined(__i386__)
  26. #if defined(__linux__) && defined(__ELF__)
  27. .section .note.GNU-stack,"",%progbits
  28. #endif
  29. #define gen_salsa8_core_quadround() \
  30. movl 52(%esp), %ecx; \
  31. movl 4(%esp), %edx; \
  32. movl 20(%esp), %ebx; \
  33. movl 8(%esp), %esi; \
  34. leal (%ecx, %edx), %edi; \
  35. roll $7, %edi; \
  36. xorl %edi, %ebx; \
  37. movl %ebx, 4(%esp); \
  38. movl 36(%esp), %edi; \
  39. leal (%edx, %ebx), %ebp; \
  40. roll $9, %ebp; \
  41. xorl %ebp, %edi; \
  42. movl 24(%esp), %ebp; \
  43. movl %edi, 8(%esp); \
  44. addl %edi, %ebx; \
  45. roll $13, %ebx; \
  46. xorl %ebx, %ecx; \
  47. movl 40(%esp), %ebx; \
  48. movl %ecx, 20(%esp); \
  49. addl %edi, %ecx; \
  50. roll $18, %ecx; \
  51. leal (%esi, %ebp), %edi; \
  52. roll $7, %edi; \
  53. xorl %edi, %ebx; \
  54. movl %ebx, 24(%esp); \
  55. movl 56(%esp), %edi; \
  56. xorl %ecx, %edx; \
  57. leal (%ebp, %ebx), %ecx; \
  58. roll $9, %ecx; \
  59. xorl %ecx, %edi; \
  60. movl %edi, 36(%esp); \
  61. movl 28(%esp), %ecx; \
  62. movl %edx, 28(%esp); \
  63. movl 44(%esp), %edx; \
  64. addl %edi, %ebx; \
  65. roll $13, %ebx; \
  66. xorl %ebx, %esi; \
  67. movl 60(%esp), %ebx; \
  68. movl %esi, 40(%esp); \
  69. addl %edi, %esi; \
  70. roll $18, %esi; \
  71. leal (%ecx, %edx), %edi; \
  72. roll $7, %edi; \
  73. xorl %edi, %ebx; \
  74. movl %ebx, 44(%esp); \
  75. movl 12(%esp), %edi; \
  76. xorl %esi, %ebp; \
  77. leal (%edx, %ebx), %esi; \
  78. roll $9, %esi; \
  79. xorl %esi, %edi; \
  80. movl %edi, 12(%esp); \
  81. movl 48(%esp), %esi; \
  82. movl %ebp, 48(%esp); \
  83. movl 64(%esp), %ebp; \
  84. addl %edi, %ebx; \
  85. roll $13, %ebx; \
  86. xorl %ebx, %ecx; \
  87. movl 16(%esp), %ebx; \
  88. movl %ecx, 16(%esp); \
  89. addl %edi, %ecx; \
  90. roll $18, %ecx; \
  91. leal (%esi, %ebp), %edi; \
  92. roll $7, %edi; \
  93. xorl %edi, %ebx; \
  94. movl 32(%esp), %edi; \
  95. xorl %ecx, %edx; \
  96. leal (%ebp, %ebx), %ecx; \
  97. roll $9, %ecx; \
  98. xorl %ecx, %edi; \
  99. movl %edi, 32(%esp); \
  100. movl %ebx, %ecx; \
  101. movl %edx, 52(%esp); \
  102. movl 28(%esp), %edx; \
  103. addl %edi, %ebx; \
  104. roll $13, %ebx; \
  105. xorl %ebx, %esi; \
  106. movl 40(%esp), %ebx; \
  107. movl %esi, 28(%esp); \
  108. addl %edi, %esi; \
  109. roll $18, %esi; \
  110. leal (%ecx, %edx), %edi; \
  111. roll $7, %edi; \
  112. xorl %edi, %ebx; \
  113. movl %ebx, 40(%esp); \
  114. movl 12(%esp), %edi; \
  115. xorl %esi, %ebp; \
  116. leal (%edx, %ebx), %esi; \
  117. roll $9, %esi; \
  118. xorl %esi, %edi; \
  119. movl %edi, 12(%esp); \
  120. movl 4(%esp), %esi; \
  121. movl %ebp, 4(%esp); \
  122. movl 48(%esp), %ebp; \
  123. addl %edi, %ebx; \
  124. roll $13, %ebx; \
  125. xorl %ebx, %ecx; \
  126. movl 16(%esp), %ebx; \
  127. movl %ecx, 16(%esp); \
  128. addl %edi, %ecx; \
  129. roll $18, %ecx; \
  130. leal (%esi, %ebp), %edi; \
  131. roll $7, %edi; \
  132. xorl %edi, %ebx; \
  133. movl %ebx, 48(%esp); \
  134. movl 32(%esp), %edi; \
  135. xorl %ecx, %edx; \
  136. leal (%ebp, %ebx), %ecx; \
  137. roll $9, %ecx; \
  138. xorl %ecx, %edi; \
  139. movl %edi, 32(%esp); \
  140. movl 24(%esp), %ecx; \
  141. movl %edx, 24(%esp); \
  142. movl 52(%esp), %edx; \
  143. addl %edi, %ebx; \
  144. roll $13, %ebx; \
  145. xorl %ebx, %esi; \
  146. movl 28(%esp), %ebx; \
  147. movl %esi, 28(%esp); \
  148. addl %edi, %esi; \
  149. roll $18, %esi; \
  150. leal (%ecx, %edx), %edi; \
  151. roll $7, %edi; \
  152. xorl %edi, %ebx; \
  153. movl %ebx, 52(%esp); \
  154. movl 8(%esp), %edi; \
  155. xorl %esi, %ebp; \
  156. leal (%edx, %ebx), %esi; \
  157. roll $9, %esi; \
  158. xorl %esi, %edi; \
  159. movl %edi, 8(%esp); \
  160. movl 44(%esp), %esi; \
  161. movl %ebp, 44(%esp); \
  162. movl 4(%esp), %ebp; \
  163. addl %edi, %ebx; \
  164. roll $13, %ebx; \
  165. xorl %ebx, %ecx; \
  166. movl 20(%esp), %ebx; \
  167. movl %ecx, 4(%esp); \
  168. addl %edi, %ecx; \
  169. roll $18, %ecx; \
  170. leal (%esi, %ebp), %edi; \
  171. roll $7, %edi; \
  172. xorl %edi, %ebx; \
  173. movl 36(%esp), %edi; \
  174. xorl %ecx, %edx; \
  175. leal (%ebp, %ebx), %ecx; \
  176. roll $9, %ecx; \
  177. xorl %ecx, %edi; \
  178. movl %edi, 20(%esp); \
  179. movl %ebx, %ecx; \
  180. movl %edx, 36(%esp); \
  181. movl 24(%esp), %edx; \
  182. addl %edi, %ebx; \
  183. roll $13, %ebx; \
  184. xorl %ebx, %esi; \
  185. movl 28(%esp), %ebx; \
  186. movl %esi, 24(%esp); \
  187. addl %edi, %esi; \
  188. roll $18, %esi; \
  189. leal (%ecx, %edx), %edi; \
  190. roll $7, %edi; \
  191. xorl %edi, %ebx; \
  192. movl %ebx, 28(%esp); \
  193. xorl %esi, %ebp; \
  194. movl 8(%esp), %esi; \
  195. leal (%edx, %ebx), %edi; \
  196. roll $9, %edi; \
  197. xorl %edi, %esi; \
  198. movl 40(%esp), %edi; \
  199. movl %ebp, 8(%esp); \
  200. movl 44(%esp), %ebp; \
  201. movl %esi, 40(%esp); \
  202. addl %esi, %ebx; \
  203. roll $13, %ebx; \
  204. xorl %ebx, %ecx; \
  205. movl 4(%esp), %ebx; \
  206. movl %ecx, 44(%esp); \
  207. addl %esi, %ecx; \
  208. roll $18, %ecx; \
  209. leal (%edi, %ebp), %esi; \
  210. roll $7, %esi; \
  211. xorl %esi, %ebx; \
  212. movl %ebx, 4(%esp); \
  213. movl 20(%esp), %esi; \
  214. xorl %ecx, %edx; \
  215. leal (%ebp, %ebx), %ecx; \
  216. roll $9, %ecx; \
  217. xorl %ecx, %esi; \
  218. movl %esi, 56(%esp); \
  219. movl 48(%esp), %ecx; \
  220. movl %edx, 20(%esp); \
  221. movl 36(%esp), %edx; \
  222. addl %esi, %ebx; \
  223. roll $13, %ebx; \
  224. xorl %ebx, %edi; \
  225. movl 24(%esp), %ebx; \
  226. movl %edi, 24(%esp); \
  227. addl %esi, %edi; \
  228. roll $18, %edi; \
  229. leal (%ecx, %edx), %esi; \
  230. roll $7, %esi; \
  231. xorl %esi, %ebx; \
  232. movl %ebx, 60(%esp); \
  233. movl 12(%esp), %esi; \
  234. xorl %edi, %ebp; \
  235. leal (%edx, %ebx), %edi; \
  236. roll $9, %edi; \
  237. xorl %edi, %esi; \
  238. movl %esi, 12(%esp); \
  239. movl 52(%esp), %edi; \
  240. movl %ebp, 36(%esp); \
  241. movl 8(%esp), %ebp; \
  242. addl %esi, %ebx; \
  243. roll $13, %ebx; \
  244. xorl %ebx, %ecx; \
  245. movl 16(%esp), %ebx; \
  246. movl %ecx, 16(%esp); \
  247. addl %esi, %ecx; \
  248. roll $18, %ecx; \
  249. leal (%edi, %ebp), %esi; \
  250. roll $7, %esi; \
  251. xorl %esi, %ebx; \
  252. movl 32(%esp), %esi; \
  253. xorl %ecx, %edx; \
  254. leal (%ebp, %ebx), %ecx; \
  255. roll $9, %ecx; \
  256. xorl %ecx, %esi; \
  257. movl %esi, 32(%esp); \
  258. movl %ebx, %ecx; \
  259. movl %edx, 48(%esp); \
  260. movl 20(%esp), %edx; \
  261. addl %esi, %ebx; \
  262. roll $13, %ebx; \
  263. xorl %ebx, %edi; \
  264. movl 24(%esp), %ebx; \
  265. movl %edi, 20(%esp); \
  266. addl %esi, %edi; \
  267. roll $18, %edi; \
  268. leal (%ecx, %edx), %esi; \
  269. roll $7, %esi; \
  270. xorl %esi, %ebx; \
  271. movl %ebx, 8(%esp); \
  272. movl 12(%esp), %esi; \
  273. xorl %edi, %ebp; \
  274. leal (%edx, %ebx), %edi; \
  275. roll $9, %edi; \
  276. xorl %edi, %esi; \
  277. movl %esi, 12(%esp); \
  278. movl 28(%esp), %edi; \
  279. movl %ebp, 52(%esp); \
  280. movl 36(%esp), %ebp; \
  281. addl %esi, %ebx; \
  282. roll $13, %ebx; \
  283. xorl %ebx, %ecx; \
  284. movl 16(%esp), %ebx; \
  285. movl %ecx, 16(%esp); \
  286. addl %esi, %ecx; \
  287. roll $18, %ecx; \
  288. leal (%edi, %ebp), %esi; \
  289. roll $7, %esi; \
  290. xorl %esi, %ebx; \
  291. movl %ebx, 28(%esp); \
  292. movl 32(%esp), %esi; \
  293. xorl %ecx, %edx; \
  294. leal (%ebp, %ebx), %ecx; \
  295. roll $9, %ecx; \
  296. xorl %ecx, %esi; \
  297. movl %esi, 32(%esp); \
  298. movl 4(%esp), %ecx; \
  299. movl %edx, 4(%esp); \
  300. movl 48(%esp), %edx; \
  301. addl %esi, %ebx; \
  302. roll $13, %ebx; \
  303. xorl %ebx, %edi; \
  304. movl 20(%esp), %ebx; \
  305. movl %edi, 20(%esp); \
  306. addl %esi, %edi; \
  307. roll $18, %edi; \
  308. leal (%ecx, %edx), %esi; \
  309. roll $7, %esi; \
  310. xorl %esi, %ebx; \
  311. movl %ebx, 48(%esp); \
  312. movl 40(%esp), %esi; \
  313. xorl %edi, %ebp; \
  314. leal (%edx, %ebx), %edi; \
  315. roll $9, %edi; \
  316. xorl %edi, %esi; \
  317. movl %esi, 36(%esp); \
  318. movl 60(%esp), %edi; \
  319. movl %ebp, 24(%esp); \
  320. movl 52(%esp), %ebp; \
  321. addl %esi, %ebx; \
  322. roll $13, %ebx; \
  323. xorl %ebx, %ecx; \
  324. movl 44(%esp), %ebx; \
  325. movl %ecx, 40(%esp); \
  326. addl %esi, %ecx; \
  327. roll $18, %ecx; \
  328. leal (%edi, %ebp), %esi; \
  329. roll $7, %esi; \
  330. xorl %esi, %ebx; \
  331. movl %ebx, 52(%esp); \
  332. movl 56(%esp), %esi; \
  333. xorl %ecx, %edx; \
  334. leal (%ebp, %ebx), %ecx; \
  335. roll $9, %ecx; \
  336. xorl %ecx, %esi; \
  337. movl %esi, 56(%esp); \
  338. addl %esi, %ebx; \
  339. movl %edx, 44(%esp); \
  340. roll $13, %ebx; \
  341. xorl %ebx, %edi; \
  342. movl %edi, 60(%esp); \
  343. addl %esi, %edi; \
  344. roll $18, %edi; \
  345. xorl %edi, %ebp; \
  346. movl %ebp, 64(%esp); \
  347. .text
  348. .align 32
  349. gen_salsa8_core:
  350. gen_salsa8_core_quadround()
  351. gen_salsa8_core_quadround()
  352. ret
  353. .text
  354. .align 32
  355. .globl scrypt_core
  356. .globl _scrypt_core
  357. scrypt_core:
  358. _scrypt_core:
  359. pushl %ebx
  360. pushl %ebp
  361. pushl %edi
  362. pushl %esi
  363. # Check for SSE2 availability
  364. movl $1, %eax
  365. cpuid
  366. andl $0x04000000, %edx
  367. jnz xmm_scrypt_core
  368. gen_scrypt_core:
  369. movl 20(%esp), %edi
  370. movl 24(%esp), %esi
  371. subl $72, %esp
  372. #define scrypt_core_macro1a(p, q) \
  373. movl p(%edi), %eax; \
  374. movl q(%edi), %edx; \
  375. movl %eax, p(%esi); \
  376. movl %edx, q(%esi); \
  377. xorl %edx, %eax; \
  378. movl %eax, p(%edi); \
  379. movl %eax, p(%esp); \
  380. #define scrypt_core_macro1b(p, q) \
  381. movl p(%edi), %eax; \
  382. xorl p(%esi, %edx), %eax; \
  383. movl q(%edi), %ebx; \
  384. xorl q(%esi, %edx), %ebx; \
  385. movl %ebx, q(%edi); \
  386. xorl %ebx, %eax; \
  387. movl %eax, p(%edi); \
  388. movl %eax, p(%esp); \
  389. #define scrypt_core_macro2(p, q) \
  390. movl p(%esp), %eax; \
  391. addl p(%edi), %eax; \
  392. movl %eax, p(%edi); \
  393. xorl q(%edi), %eax; \
  394. movl %eax, q(%edi); \
  395. movl %eax, p(%esp); \
  396. #define scrypt_core_macro3(p, q) \
  397. movl p(%esp), %eax; \
  398. addl q(%edi), %eax; \
  399. movl %eax, q(%edi); \
  400. leal 131072(%esi), %ecx
  401. gen_scrypt_core_loop1:
  402. movl %esi, 64(%esp)
  403. movl %ecx, 68(%esp)
  404. scrypt_core_macro1a(0, 64)
  405. scrypt_core_macro1a(4, 68)
  406. scrypt_core_macro1a(8, 72)
  407. scrypt_core_macro1a(12, 76)
  408. scrypt_core_macro1a(16, 80)
  409. scrypt_core_macro1a(20, 84)
  410. scrypt_core_macro1a(24, 88)
  411. scrypt_core_macro1a(28, 92)
  412. scrypt_core_macro1a(32, 96)
  413. scrypt_core_macro1a(36, 100)
  414. scrypt_core_macro1a(40, 104)
  415. scrypt_core_macro1a(44, 108)
  416. scrypt_core_macro1a(48, 112)
  417. scrypt_core_macro1a(52, 116)
  418. scrypt_core_macro1a(56, 120)
  419. scrypt_core_macro1a(60, 124)
  420. call gen_salsa8_core
  421. movl 92(%esp), %edi
  422. scrypt_core_macro2(0, 64)
  423. scrypt_core_macro2(4, 68)
  424. scrypt_core_macro2(8, 72)
  425. scrypt_core_macro2(12, 76)
  426. scrypt_core_macro2(16, 80)
  427. scrypt_core_macro2(20, 84)
  428. scrypt_core_macro2(24, 88)
  429. scrypt_core_macro2(28, 92)
  430. scrypt_core_macro2(32, 96)
  431. scrypt_core_macro2(36, 100)
  432. scrypt_core_macro2(40, 104)
  433. scrypt_core_macro2(44, 108)
  434. scrypt_core_macro2(48, 112)
  435. scrypt_core_macro2(52, 116)
  436. scrypt_core_macro2(56, 120)
  437. scrypt_core_macro2(60, 124)
  438. call gen_salsa8_core
  439. movl 92(%esp), %edi
  440. scrypt_core_macro3(0, 64)
  441. scrypt_core_macro3(4, 68)
  442. scrypt_core_macro3(8, 72)
  443. scrypt_core_macro3(12, 76)
  444. scrypt_core_macro3(16, 80)
  445. scrypt_core_macro3(20, 84)
  446. scrypt_core_macro3(24, 88)
  447. scrypt_core_macro3(28, 92)
  448. scrypt_core_macro3(32, 96)
  449. scrypt_core_macro3(36, 100)
  450. scrypt_core_macro3(40, 104)
  451. scrypt_core_macro3(44, 108)
  452. scrypt_core_macro3(48, 112)
  453. scrypt_core_macro3(52, 116)
  454. scrypt_core_macro3(56, 120)
  455. scrypt_core_macro3(60, 124)
  456. movl 64(%esp), %esi
  457. movl 68(%esp), %ecx
  458. addl $128, %esi
  459. cmpl %ecx, %esi
  460. jne gen_scrypt_core_loop1
  461. movl 96(%esp), %esi
  462. movl $1024, %ecx
  463. gen_scrypt_core_loop2:
  464. movl %ecx, 68(%esp)
  465. movl 64(%edi), %edx
  466. andl $1023, %edx
  467. shll $7, %edx
  468. scrypt_core_macro1b(0, 64)
  469. scrypt_core_macro1b(4, 68)
  470. scrypt_core_macro1b(8, 72)
  471. scrypt_core_macro1b(12, 76)
  472. scrypt_core_macro1b(16, 80)
  473. scrypt_core_macro1b(20, 84)
  474. scrypt_core_macro1b(24, 88)
  475. scrypt_core_macro1b(28, 92)
  476. scrypt_core_macro1b(32, 96)
  477. scrypt_core_macro1b(36, 100)
  478. scrypt_core_macro1b(40, 104)
  479. scrypt_core_macro1b(44, 108)
  480. scrypt_core_macro1b(48, 112)
  481. scrypt_core_macro1b(52, 116)
  482. scrypt_core_macro1b(56, 120)
  483. scrypt_core_macro1b(60, 124)
  484. call gen_salsa8_core
  485. movl 92(%esp), %edi
  486. scrypt_core_macro2(0, 64)
  487. scrypt_core_macro2(4, 68)
  488. scrypt_core_macro2(8, 72)
  489. scrypt_core_macro2(12, 76)
  490. scrypt_core_macro2(16, 80)
  491. scrypt_core_macro2(20, 84)
  492. scrypt_core_macro2(24, 88)
  493. scrypt_core_macro2(28, 92)
  494. scrypt_core_macro2(32, 96)
  495. scrypt_core_macro2(36, 100)
  496. scrypt_core_macro2(40, 104)
  497. scrypt_core_macro2(44, 108)
  498. scrypt_core_macro2(48, 112)
  499. scrypt_core_macro2(52, 116)
  500. scrypt_core_macro2(56, 120)
  501. scrypt_core_macro2(60, 124)
  502. call gen_salsa8_core
  503. movl 92(%esp), %edi
  504. movl 96(%esp), %esi
  505. scrypt_core_macro3(0, 64)
  506. scrypt_core_macro3(4, 68)
  507. scrypt_core_macro3(8, 72)
  508. scrypt_core_macro3(12, 76)
  509. scrypt_core_macro3(16, 80)
  510. scrypt_core_macro3(20, 84)
  511. scrypt_core_macro3(24, 88)
  512. scrypt_core_macro3(28, 92)
  513. scrypt_core_macro3(32, 96)
  514. scrypt_core_macro3(36, 100)
  515. scrypt_core_macro3(40, 104)
  516. scrypt_core_macro3(44, 108)
  517. scrypt_core_macro3(48, 112)
  518. scrypt_core_macro3(52, 116)
  519. scrypt_core_macro3(56, 120)
  520. scrypt_core_macro3(60, 124)
  521. movl 68(%esp), %ecx
  522. subl $1, %ecx
  523. ja gen_scrypt_core_loop2
  524. addl $72, %esp
  525. popl %esi
  526. popl %edi
  527. popl %ebp
  528. popl %ebx
  529. ret
  530. #define xmm_salsa8_core_doubleround() \
  531. movdqa %xmm1, %xmm4; \
  532. paddd %xmm0, %xmm4; \
  533. movdqa %xmm4, %xmm5; \
  534. pslld $7, %xmm4; \
  535. psrld $25, %xmm5; \
  536. pxor %xmm4, %xmm3; \
  537. pxor %xmm5, %xmm3; \
  538. movdqa %xmm0, %xmm4; \
  539. paddd %xmm3, %xmm4; \
  540. movdqa %xmm4, %xmm5; \
  541. pslld $9, %xmm4; \
  542. psrld $23, %xmm5; \
  543. pxor %xmm4, %xmm2; \
  544. movdqa %xmm3, %xmm4; \
  545. pshufd $0x93, %xmm3, %xmm3; \
  546. pxor %xmm5, %xmm2; \
  547. paddd %xmm2, %xmm4; \
  548. movdqa %xmm4, %xmm5; \
  549. pslld $13, %xmm4; \
  550. psrld $19, %xmm5; \
  551. pxor %xmm4, %xmm1; \
  552. movdqa %xmm2, %xmm4; \
  553. pshufd $0x4e, %xmm2, %xmm2; \
  554. pxor %xmm5, %xmm1; \
  555. paddd %xmm1, %xmm4; \
  556. movdqa %xmm4, %xmm5; \
  557. pslld $18, %xmm4; \
  558. psrld $14, %xmm5; \
  559. pxor %xmm4, %xmm0; \
  560. pshufd $0x39, %xmm1, %xmm1; \
  561. pxor %xmm5, %xmm0; \
  562. movdqa %xmm3, %xmm4; \
  563. paddd %xmm0, %xmm4; \
  564. movdqa %xmm4, %xmm5; \
  565. pslld $7, %xmm4; \
  566. psrld $25, %xmm5; \
  567. pxor %xmm4, %xmm1; \
  568. pxor %xmm5, %xmm1; \
  569. movdqa %xmm0, %xmm4; \
  570. paddd %xmm1, %xmm4; \
  571. movdqa %xmm4, %xmm5; \
  572. pslld $9, %xmm4; \
  573. psrld $23, %xmm5; \
  574. pxor %xmm4, %xmm2; \
  575. movdqa %xmm1, %xmm4; \
  576. pshufd $0x93, %xmm1, %xmm1; \
  577. pxor %xmm5, %xmm2; \
  578. paddd %xmm2, %xmm4; \
  579. movdqa %xmm4, %xmm5; \
  580. pslld $13, %xmm4; \
  581. psrld $19, %xmm5; \
  582. pxor %xmm4, %xmm3; \
  583. movdqa %xmm2, %xmm4; \
  584. pshufd $0x4e, %xmm2, %xmm2; \
  585. pxor %xmm5, %xmm3; \
  586. paddd %xmm3, %xmm4; \
  587. movdqa %xmm4, %xmm5; \
  588. pslld $18, %xmm4; \
  589. psrld $14, %xmm5; \
  590. pxor %xmm4, %xmm0; \
  591. pshufd $0x39, %xmm3, %xmm3; \
  592. pxor %xmm5, %xmm0; \
  593. #define xmm_salsa8_core() \
  594. xmm_salsa8_core_doubleround(); \
  595. xmm_salsa8_core_doubleround(); \
  596. xmm_salsa8_core_doubleround(); \
  597. xmm_salsa8_core_doubleround(); \
  598. .align 32
  599. xmm_scrypt_core:
  600. movl 20(%esp), %edi
  601. movl 24(%esp), %esi
  602. movl %esp, %ebp
  603. subl $128, %esp
  604. andl $-16, %esp
  605. # shuffle 1st block to (%esp)
  606. movl 60(%edi), %edx
  607. movl 44(%edi), %ecx
  608. movl 28(%edi), %ebx
  609. movl 12(%edi), %eax
  610. movl %edx, 12(%esp)
  611. movl %ecx, 28(%esp)
  612. movl %ebx, 44(%esp)
  613. movl %eax, 60(%esp)
  614. movl 40(%edi), %ecx
  615. movl 24(%edi), %ebx
  616. movl 8(%edi), %eax
  617. movl 56(%edi), %edx
  618. movl %ecx, 8(%esp)
  619. movl %ebx, 24(%esp)
  620. movl %eax, 40(%esp)
  621. movl %edx, 56(%esp)
  622. movl 20(%edi), %ebx
  623. movl 4(%edi), %eax
  624. movl 52(%edi), %edx
  625. movl 36(%edi), %ecx
  626. movl %ebx, 4(%esp)
  627. movl %eax, 20(%esp)
  628. movl %edx, 36(%esp)
  629. movl %ecx, 52(%esp)
  630. movl 0(%edi), %eax
  631. movl 48(%edi), %edx
  632. movl 32(%edi), %ecx
  633. movl 16(%edi), %ebx
  634. movl %eax, 0(%esp)
  635. movl %edx, 16(%esp)
  636. movl %ecx, 32(%esp)
  637. movl %ebx, 48(%esp)
  638. # shuffle 2nd block to 64(%esp)
  639. movl 124(%edi), %edx
  640. movl 108(%edi), %ecx
  641. movl 92(%edi), %ebx
  642. movl 76(%edi), %eax
  643. movl %edx, 76(%esp)
  644. movl %ecx, 92(%esp)
  645. movl %ebx, 108(%esp)
  646. movl %eax, 124(%esp)
  647. movl 104(%edi), %ecx
  648. movl 88(%edi), %ebx
  649. movl 72(%edi), %eax
  650. movl 120(%edi), %edx
  651. movl %ecx, 72(%esp)
  652. movl %ebx, 88(%esp)
  653. movl %eax, 104(%esp)
  654. movl %edx, 120(%esp)
  655. movl 84(%edi), %ebx
  656. movl 68(%edi), %eax
  657. movl 116(%edi), %edx
  658. movl 100(%edi), %ecx
  659. movl %ebx, 68(%esp)
  660. movl %eax, 84(%esp)
  661. movl %edx, 100(%esp)
  662. movl %ecx, 116(%esp)
  663. movl 64(%edi), %eax
  664. movl 112(%edi), %edx
  665. movl 96(%edi), %ecx
  666. movl 80(%edi), %ebx
  667. movl %eax, 64(%esp)
  668. movl %edx, 80(%esp)
  669. movl %ecx, 96(%esp)
  670. movl %ebx, 112(%esp)
  671. movl %esi, %edx
  672. leal 131072(%esi), %ecx
  673. xmm_scrypt_core_loop1:
  674. movdqa 0(%esp), %xmm0
  675. movdqa 16(%esp), %xmm1
  676. movdqa 32(%esp), %xmm2
  677. movdqa 48(%esp), %xmm3
  678. movdqa 64(%esp), %xmm4
  679. movdqa 80(%esp), %xmm5
  680. movdqa 96(%esp), %xmm6
  681. movdqa 112(%esp), %xmm7
  682. movdqa %xmm0, 0(%edx)
  683. movdqa %xmm1, 16(%edx)
  684. movdqa %xmm2, 32(%edx)
  685. movdqa %xmm3, 48(%edx)
  686. movdqa %xmm4, 64(%edx)
  687. movdqa %xmm5, 80(%edx)
  688. movdqa %xmm6, 96(%edx)
  689. movdqa %xmm7, 112(%edx)
  690. pxor %xmm4, %xmm0
  691. pxor %xmm5, %xmm1
  692. pxor %xmm6, %xmm2
  693. pxor %xmm7, %xmm3
  694. movdqa %xmm0, 0(%esp)
  695. movdqa %xmm1, 16(%esp)
  696. movdqa %xmm2, 32(%esp)
  697. movdqa %xmm3, 48(%esp)
  698. xmm_salsa8_core()
  699. paddd 0(%esp), %xmm0
  700. paddd 16(%esp), %xmm1
  701. paddd 32(%esp), %xmm2
  702. paddd 48(%esp), %xmm3
  703. movdqa %xmm0, 0(%esp)
  704. movdqa %xmm1, 16(%esp)
  705. movdqa %xmm2, 32(%esp)
  706. movdqa %xmm3, 48(%esp)
  707. pxor 64(%esp), %xmm0
  708. pxor 80(%esp), %xmm1
  709. pxor 96(%esp), %xmm2
  710. pxor 112(%esp), %xmm3
  711. movdqa %xmm0, 64(%esp)
  712. movdqa %xmm1, 80(%esp)
  713. movdqa %xmm2, 96(%esp)
  714. movdqa %xmm3, 112(%esp)
  715. xmm_salsa8_core()
  716. paddd 64(%esp), %xmm0
  717. paddd 80(%esp), %xmm1
  718. paddd 96(%esp), %xmm2
  719. paddd 112(%esp), %xmm3
  720. movdqa %xmm0, 64(%esp)
  721. movdqa %xmm1, 80(%esp)
  722. movdqa %xmm2, 96(%esp)
  723. movdqa %xmm3, 112(%esp)
  724. addl $128, %edx
  725. cmpl %ecx, %edx
  726. jne xmm_scrypt_core_loop1
  727. movl $1024, %ecx
  728. xmm_scrypt_core_loop2:
  729. movdqa 0(%esp), %xmm0
  730. movdqa 16(%esp), %xmm1
  731. movdqa 32(%esp), %xmm2
  732. movdqa 48(%esp), %xmm3
  733. movdqa 64(%esp), %xmm4
  734. movdqa 80(%esp), %xmm5
  735. movdqa 96(%esp), %xmm6
  736. movdqa 112(%esp), %xmm7
  737. movd %xmm4, %edx
  738. andl $1023, %edx
  739. shll $7, %edx
  740. pxor 0(%esi, %edx), %xmm0
  741. pxor 16(%esi, %edx), %xmm1
  742. pxor 32(%esi, %edx), %xmm2
  743. pxor 48(%esi, %edx), %xmm3
  744. pxor 64(%esi, %edx), %xmm4
  745. pxor 80(%esi, %edx), %xmm5
  746. pxor 96(%esi, %edx), %xmm6
  747. pxor 112(%esi, %edx), %xmm7
  748. movdqa %xmm4, 64(%esp)
  749. movdqa %xmm5, 80(%esp)
  750. movdqa %xmm6, 96(%esp)
  751. movdqa %xmm7, 112(%esp)
  752. pxor %xmm4, %xmm0
  753. pxor %xmm5, %xmm1
  754. pxor %xmm6, %xmm2
  755. pxor %xmm7, %xmm3
  756. movdqa %xmm0, 0(%esp)
  757. movdqa %xmm1, 16(%esp)
  758. movdqa %xmm2, 32(%esp)
  759. movdqa %xmm3, 48(%esp)
  760. xmm_salsa8_core()
  761. paddd 0(%esp), %xmm0
  762. paddd 16(%esp), %xmm1
  763. paddd 32(%esp), %xmm2
  764. paddd 48(%esp), %xmm3
  765. movdqa %xmm0, 0(%esp)
  766. movdqa %xmm1, 16(%esp)
  767. movdqa %xmm2, 32(%esp)
  768. movdqa %xmm3, 48(%esp)
  769. pxor 64(%esp), %xmm0
  770. pxor 80(%esp), %xmm1
  771. pxor 96(%esp), %xmm2
  772. pxor 112(%esp), %xmm3
  773. movdqa %xmm0, 64(%esp)
  774. movdqa %xmm1, 80(%esp)
  775. movdqa %xmm2, 96(%esp)
  776. movdqa %xmm3, 112(%esp)
  777. xmm_salsa8_core()
  778. paddd 64(%esp), %xmm0
  779. paddd 80(%esp), %xmm1
  780. paddd 96(%esp), %xmm2
  781. paddd 112(%esp), %xmm3
  782. movdqa %xmm0, 64(%esp)
  783. movdqa %xmm1, 80(%esp)
  784. movdqa %xmm2, 96(%esp)
  785. movdqa %xmm3, 112(%esp)
  786. subl $1, %ecx
  787. ja xmm_scrypt_core_loop2
  788. # re-shuffle 1st block back
  789. movl 60(%esp), %edx
  790. movl 44(%esp), %ecx
  791. movl 28(%esp), %ebx
  792. movl 12(%esp), %eax
  793. movl %edx, 12(%edi)
  794. movl %ecx, 28(%edi)
  795. movl %ebx, 44(%edi)
  796. movl %eax, 60(%edi)
  797. movl 40(%esp), %ecx
  798. movl 24(%esp), %ebx
  799. movl 8(%esp), %eax
  800. movl 56(%esp), %edx
  801. movl %ecx, 8(%edi)
  802. movl %ebx, 24(%edi)
  803. movl %eax, 40(%edi)
  804. movl %edx, 56(%edi)
  805. movl 20(%esp), %ebx
  806. movl 4(%esp), %eax
  807. movl 52(%esp), %edx
  808. movl 36(%esp), %ecx
  809. movl %ebx, 4(%edi)
  810. movl %eax, 20(%edi)
  811. movl %edx, 36(%edi)
  812. movl %ecx, 52(%edi)
  813. movl 0(%esp), %eax
  814. movl 48(%esp), %edx
  815. movl 32(%esp), %ecx
  816. movl 16(%esp), %ebx
  817. movl %eax, 0(%edi)
  818. movl %edx, 16(%edi)
  819. movl %ecx, 32(%edi)
  820. movl %ebx, 48(%edi)
  821. # re-shuffle 2nd block back
  822. movl 124(%esp), %edx
  823. movl 108(%esp), %ecx
  824. movl 92(%esp), %ebx
  825. movl 76(%esp), %eax
  826. movl %edx, 76(%edi)
  827. movl %ecx, 92(%edi)
  828. movl %ebx, 108(%edi)
  829. movl %eax, 124(%edi)
  830. movl 104(%esp), %ecx
  831. movl 88(%esp), %ebx
  832. movl 72(%esp), %eax
  833. movl 120(%esp), %edx
  834. movl %ecx, 72(%edi)
  835. movl %ebx, 88(%edi)
  836. movl %eax, 104(%edi)
  837. movl %edx, 120(%edi)
  838. movl 84(%esp), %ebx
  839. movl 68(%esp), %eax
  840. movl 116(%esp), %edx
  841. movl 100(%esp), %ecx
  842. movl %ebx, 68(%edi)
  843. movl %eax, 84(%edi)
  844. movl %edx, 100(%edi)
  845. movl %ecx, 116(%edi)
  846. movl 64(%esp), %eax
  847. movl 112(%esp), %edx
  848. movl 96(%esp), %ecx
  849. movl 80(%esp), %ebx
  850. movl %eax, 64(%edi)
  851. movl %edx, 80(%edi)
  852. movl %ecx, 96(%edi)
  853. movl %ebx, 112(%edi)
  854. movl %ebp, %esp
  855. popl %esi
  856. popl %edi
  857. popl %ebp
  858. popl %ebx
  859. ret
  860. #endif