Skip to content

Commit e3460ad

Browse files
committed
Smaller stack usage for SHA-1, SHA-256 and SHA-512.
1 parent 9a13375 commit e3460ad

File tree

3 files changed

+134
-106
lines changed

3 files changed

+134
-106
lines changed

src/hashes/sha1.c

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ static int ss_sha1_compress(hash_state *md, const unsigned char *buf)
3939
static int s_sha1_compress(hash_state *md, const unsigned char *buf)
4040
#endif
4141
{
42-
ulong32 a,b,c,d,e,W[80],i;
42+
ulong32 a,b,c,d,e,W[16],i;
4343
#ifdef LTC_SMALL_CODE
4444
ulong32 t;
4545
#endif
@@ -48,6 +48,7 @@ static int s_sha1_compress(hash_state *md, const unsigned char *buf)
4848
for (i = 0; i < 16; i++) {
4949
LOAD32H(W[i], buf + (4*i));
5050
}
51+
#define Wi(i) W[(i) % 16] = ROL(W[((i) - 3) % 16] ^ W[((i) - 8) % 16] ^ W[((i) - 14) % 16] ^ W[((i) - 16) % 16], 1);
5152

5253
/* copy state */
5354
a = md->sha1.state[0];
@@ -56,78 +57,82 @@ static int s_sha1_compress(hash_state *md, const unsigned char *buf)
5657
d = md->sha1.state[3];
5758
e = md->sha1.state[4];
5859

59-
/* expand it */
60-
for (i = 16; i < 80; i++) {
61-
W[i] = ROL(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1);
62-
}
63-
6460
/* compress */
6561
/* round one */
66-
#define FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[i] + 0x5a827999UL); b = ROLc(b, 30);
67-
#define FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[i] + 0x6ed9eba1UL); b = ROLc(b, 30);
68-
#define FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[i] + 0x8f1bbcdcUL); b = ROLc(b, 30);
69-
#define FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[i] + 0xca62c1d6UL); b = ROLc(b, 30);
62+
#define FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[(i) % 16] + 0x5a827999UL); b = ROLc(b, 30);
63+
#define FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[(i) % 16] + 0x6ed9eba1UL); b = ROLc(b, 30);
64+
#define FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[(i) % 16] + 0x8f1bbcdcUL); b = ROLc(b, 30);
65+
#define FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[(i) % 16] + 0xca62c1d6UL); b = ROLc(b, 30);
7066

7167
#ifdef LTC_SMALL_CODE
7268

73-
for (i = 0; i < 20; ) {
69+
for (i = 0; i < 16; ) {
7470
FF0(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
7571
}
72+
for (; i < 20; ) {
73+
Wi(i); FF0(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
74+
}
7675

7776
for (; i < 40; ) {
78-
FF1(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
77+
Wi(i); FF1(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
7978
}
8079

8180
for (; i < 60; ) {
82-
FF2(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
81+
Wi(i); FF2(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
8382
}
8483

8584
for (; i < 80; ) {
86-
FF3(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
85+
Wi(i); FF3(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
8786
}
8887

8988
#else
9089

91-
for (i = 0; i < 20; ) {
90+
for (i = 0; i < 15; ) {
9291
FF0(a,b,c,d,e,i++);
9392
FF0(e,a,b,c,d,i++);
9493
FF0(d,e,a,b,c,i++);
9594
FF0(c,d,e,a,b,i++);
9695
FF0(b,c,d,e,a,i++);
9796
}
97+
FF0(a,b,c,d,e,i++);
98+
Wi(i); FF0(e,a,b,c,d,i++);
99+
Wi(i); FF0(d,e,a,b,c,i++);
100+
Wi(i); FF0(c,d,e,a,b,i++);
101+
Wi(i); FF0(b,c,d,e,a,i++);
98102

99103
/* round two */
100104
for (; i < 40; ) {
101-
FF1(a,b,c,d,e,i++);
102-
FF1(e,a,b,c,d,i++);
103-
FF1(d,e,a,b,c,i++);
104-
FF1(c,d,e,a,b,i++);
105-
FF1(b,c,d,e,a,i++);
105+
Wi(i); FF1(a,b,c,d,e,i++);
106+
Wi(i); FF1(e,a,b,c,d,i++);
107+
Wi(i); FF1(d,e,a,b,c,i++);
108+
Wi(i); FF1(c,d,e,a,b,i++);
109+
Wi(i); FF1(b,c,d,e,a,i++);
106110
}
107111

108112
/* round three */
109113
for (; i < 60; ) {
110-
FF2(a,b,c,d,e,i++);
111-
FF2(e,a,b,c,d,i++);
112-
FF2(d,e,a,b,c,i++);
113-
FF2(c,d,e,a,b,i++);
114-
FF2(b,c,d,e,a,i++);
114+
Wi(i); FF2(a,b,c,d,e,i++);
115+
Wi(i); FF2(e,a,b,c,d,i++);
116+
Wi(i); FF2(d,e,a,b,c,i++);
117+
Wi(i); FF2(c,d,e,a,b,i++);
118+
Wi(i); FF2(b,c,d,e,a,i++);
115119
}
116120

117121
/* round four */
118122
for (; i < 80; ) {
119-
FF3(a,b,c,d,e,i++);
120-
FF3(e,a,b,c,d,i++);
121-
FF3(d,e,a,b,c,i++);
122-
FF3(c,d,e,a,b,i++);
123-
FF3(b,c,d,e,a,i++);
123+
Wi(i); FF3(a,b,c,d,e,i++);
124+
Wi(i); FF3(e,a,b,c,d,i++);
125+
Wi(i); FF3(d,e,a,b,c,i++);
126+
Wi(i); FF3(c,d,e,a,b,i++);
127+
Wi(i); FF3(b,c,d,e,a,i++);
124128
}
125129
#endif
126130

127131
#undef FF0
128132
#undef FF1
129133
#undef FF2
130134
#undef FF3
135+
#undef Wi
131136

132137
/* store */
133138
md->sha1.state[0] = md->sha1.state[0] + a;

src/hashes/sha2/sha256.c

Lines changed: 66 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ static int ss_sha256_compress(hash_state * md, const unsigned char *buf)
6363
static int s_sha256_compress(hash_state * md, const unsigned char *buf)
6464
#endif
6565
{
66-
ulong32 S[8], W[64], t0, t1;
66+
ulong32 S[8], W[16], t0, t1;
6767
#ifdef LTC_SMALL_CODE
6868
ulong32 t;
6969
#endif
@@ -78,30 +78,32 @@ static int s_sha256_compress(hash_state * md, const unsigned char *buf)
7878
for (i = 0; i < 16; i++) {
7979
LOAD32H(W[i], buf + (4*i));
8080
}
81-
82-
/* fill W[16..63] */
83-
for (i = 16; i < 64; i++) {
84-
W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
85-
}
81+
#define Wi(i) W[(i) % 16] = Gamma1(W[((i) - 2) % 16]) + W[((i) - 7) % 16] + Gamma0(W[((i) - 15) % 16]) + W[((i) - 16) % 16]
8682

8783
/* Compress */
8884
#ifdef LTC_SMALL_CODE
89-
#define RND(a,b,c,d,e,f,g,h,i) \
90-
t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
91-
t1 = Sigma0(a) + Maj(a, b, c); \
92-
d += t0; \
85+
#define RND(a,b,c,d,e,f,g,h,i) \
86+
t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[(i) % 16]; \
87+
t1 = Sigma0(a) + Maj(a, b, c); \
88+
d += t0; \
9389
h = t0 + t1;
9490

95-
for (i = 0; i < 64; ++i) {
91+
for (i = 0; i < 16; ++i) {
92+
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i);
93+
t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
94+
S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
95+
}
96+
for (; i < 64; ++i) {
97+
Wi(i);
9698
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i);
9799
t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
98100
S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
99101
}
100102
#else
101-
#define RND(a,b,c,d,e,f,g,h,i,ki) \
102-
t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \
103-
t1 = Sigma0(a) + Maj(a, b, c); \
104-
d += t0; \
103+
#define RND(a,b,c,d,e,f,g,h,i,ki) \
104+
t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[(i) % 16]; \
105+
t1 = Sigma0(a) + Maj(a, b, c); \
106+
d += t0; \
105107
h = t0 + t1;
106108

107109
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98);
@@ -120,56 +122,57 @@ static int s_sha256_compress(hash_state * md, const unsigned char *buf)
120122
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe);
121123
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7);
122124
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174);
123-
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1);
124-
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786);
125-
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6);
126-
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc);
127-
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f);
128-
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa);
129-
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc);
130-
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da);
131-
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152);
132-
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d);
133-
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8);
134-
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7);
135-
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3);
136-
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147);
137-
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351);
138-
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967);
139-
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85);
140-
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138);
141-
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc);
142-
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13);
143-
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354);
144-
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb);
145-
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e);
146-
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85);
147-
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1);
148-
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b);
149-
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70);
150-
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3);
151-
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819);
152-
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624);
153-
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585);
154-
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070);
155-
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116);
156-
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08);
157-
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c);
158-
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5);
159-
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3);
160-
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a);
161-
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f);
162-
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3);
163-
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee);
164-
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f);
165-
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814);
166-
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208);
167-
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa);
168-
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb);
169-
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7);
170-
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2);
125+
Wi(16); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1);
126+
Wi(17); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786);
127+
Wi(18); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6);
128+
Wi(19); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc);
129+
Wi(20); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f);
130+
Wi(21); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa);
131+
Wi(22); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc);
132+
Wi(23); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da);
133+
Wi(24); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152);
134+
Wi(25); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d);
135+
Wi(26); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8);
136+
Wi(27); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7);
137+
Wi(28); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3);
138+
Wi(29); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147);
139+
Wi(30); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351);
140+
Wi(31); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967);
141+
Wi(32); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85);
142+
Wi(33); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138);
143+
Wi(34); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc);
144+
Wi(35); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13);
145+
Wi(36); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354);
146+
Wi(37); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb);
147+
Wi(38); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e);
148+
Wi(39); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85);
149+
Wi(40); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1);
150+
Wi(41); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b);
151+
Wi(42); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70);
152+
Wi(43); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3);
153+
Wi(44); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819);
154+
Wi(45); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624);
155+
Wi(46); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585);
156+
Wi(47); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070);
157+
Wi(48); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116);
158+
Wi(49); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08);
159+
Wi(50); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c);
160+
Wi(51); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5);
161+
Wi(52); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3);
162+
Wi(53); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a);
163+
Wi(54); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f);
164+
Wi(55); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3);
165+
Wi(56); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee);
166+
Wi(57); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f);
167+
Wi(58); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814);
168+
Wi(59); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208);
169+
Wi(60); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa);
170+
Wi(61); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb);
171+
Wi(62); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7);
172+
Wi(63); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2);
171173
#endif
172174
#undef RND
175+
#undef Wi
173176

174177
/* feedback */
175178
for (i = 0; i < 8; i++) {

src/hashes/sha2/sha512.c

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ static int ss_sha512_compress(hash_state * md, const unsigned char *buf)
8888
static int s_sha512_compress(hash_state * md, const unsigned char *buf)
8989
#endif
9090
{
91-
ulong64 S[8], W[80], t0, t1;
91+
ulong64 S[8], W[16], t0, t1;
9292
int i;
9393

9494
/* copy state into S */
@@ -100,16 +100,25 @@ static int s_sha512_compress(hash_state * md, const unsigned char *buf)
100100
for (i = 0; i < 16; i++) {
101101
LOAD64H(W[i], buf + (8*i));
102102
}
103-
104-
/* fill W[16..79] */
105-
for (i = 16; i < 80; i++) {
106-
W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
107-
}
103+
#define Wi(i) W[(i) % 16] = Gamma1(W[((i) - 2) % 16]) + W[((i) - 7) % 16] + Gamma0(W[((i) - 15) % 16]) + W[((i) - 16) % 16];
108104

109105
/* Compress */
110106
#ifdef LTC_SMALL_CODE
111-
for (i = 0; i < 80; i++) {
112-
t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i];
107+
for (i = 0; i < 16; i++) {
108+
t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i % 16];
109+
t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]);
110+
S[7] = S[6];
111+
S[6] = S[5];
112+
S[5] = S[4];
113+
S[4] = S[3] + t0;
114+
S[3] = S[2];
115+
S[2] = S[1];
116+
S[1] = S[0];
117+
S[0] = t0 + t1;
118+
}
119+
for (; i < 80; i++) {
120+
Wi(i);
121+
t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i % 16];
113122
t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]);
114123
S[7] = S[6];
115124
S[6] = S[5];
@@ -121,13 +130,13 @@ static int s_sha512_compress(hash_state * md, const unsigned char *buf)
121130
S[0] = t0 + t1;
122131
}
123132
#else
124-
#define RND(a,b,c,d,e,f,g,h,i) \
125-
t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
126-
t1 = Sigma0(a) + Maj(a, b, c); \
127-
d += t0; \
133+
#define RND(a,b,c,d,e,f,g,h,i) \
134+
t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[(i) % 16]; \
135+
t1 = Sigma0(a) + Maj(a, b, c); \
136+
d += t0; \
128137
h = t0 + t1;
129138

130-
for (i = 0; i < 80; i += 8) {
139+
for (i = 0; i < 16; i += 8) {
131140
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
132141
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
133142
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
@@ -137,7 +146,18 @@ static int s_sha512_compress(hash_state * md, const unsigned char *buf)
137146
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
138147
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
139148
}
149+
for (; i < 80; i += 8) {
150+
Wi(i+0); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
151+
Wi(i+1); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
152+
Wi(i+2); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
153+
Wi(i+3); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
154+
Wi(i+4); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
155+
Wi(i+5); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
156+
Wi(i+6); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
157+
Wi(i+7); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
158+
}
140159
#endif
160+
#undef Wi
141161

142162

143163
/* feedback */

0 commit comments

Comments
 (0)