Posted: Fri Jul 06, 2007 4:09 pm
I got it to h/s:1165755.132529, the barrel rolls were holding it back. ..
[edit]
Oh. I am sorry I mean 4663020.530116 hashes per second, but that is still right along with my thinking of maybe reaching twice the speed with rigorous optimizing..
[edit]
Oh. I am sorry I mean 4663020.530116 hashes per second, but that is still right along with my thinking of maybe reaching twice the speed with rigorous optimizing..
Code: Select all
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
#define H(x,y,z) ((x) ^ (y) ^ (z))
#define I(x,y,z) ((y) ^ ((x) | (~(z))))
#define md5_s1_0 7
#define md5_s1_1 12
#define md5_s1_2 17
#define md5_s1_3 22
#define md5_s2_0 5
#define md5_s2_1 9
#define md5_s2_2 14
#define md5_s2_3 20
#define md5_s3_0 4
#define md5_s3_1 11
#define md5_s3_2 16
#define md5_s3_3 23
#define md5_s4_0 6
#define md5_s4_1 10
#define md5_s4_2 15
#define md5_s4_3 21
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
#define H(x,y,z) ((x) ^ (y) ^ (z))
#define I(x,y,z) ((y) ^ ((x) | (~(z))))
#define MD5_OP4(a,b,c,d,e,f,g,h,i,j) \
for(q = 0; q < 4; ++q) \
{ \
a[q] = c[q] + d(e[q], f[q], g[q]) + X[(h)*4+q] + i; \
\
} \
for(q = 0; q < 4; ++q) \
{ \
e[q] = a[q] << j; \
} \
for(q = 0; q < 4; ++q) \
{ \
f[q] = a[q] >> (32 - j); \
} \
for(q = 0; q < 4; ++q) \
{ \
a[q] = e[q] | f[q]; \
} \
for(q = 0; q < 4; ++q) \
{ \
a[q] = a[q] + b[q]; \
}
uint32_t md5_hash4(uint32_t *X, uint32_t *digest)
{
uint32_t __attribute__((aligned(128))) a[4],
__attribute__((aligned(128))) b[4],
__attribute__((aligned(128))) c[4],
__attribute__((aligned(128))) d[4],
__attribute__((aligned(128))) e[4],
__attribute__((aligned(128))) f[4];
uint32_t x, q;
for(x = 0; x < 4; ++x)
{
a[x] = 0x67452301;
b[x] = 0xefcdab89;
c[x] = 0x98badcfe;
d[x] = 0x10325476;
}
MD5_OP4(a, b, a, F, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, F, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, F, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, F, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, F, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, F, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, F, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, F, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, F, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, F, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, F, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, F, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, F, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, F, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, F, d, a, b, 0, 0xe8c7b56, md5_s1_2);
MD5_OP4(b, c, b, F, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, G, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, G, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, G, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, G, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, G, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, G, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, G, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, G, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, G, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, G, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, G, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, G, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, G, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, G, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, G, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, G, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, H, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, H, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, H, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, H, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, H, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, H, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, H, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, H, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, H, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, H, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, H, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, H, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, H, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, H, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, H, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, H, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, I, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, I, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, I, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, I, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, I, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, I, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, I, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, I, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, I, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, I, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, I, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, I, c, d, a, 0, 0xe8c7b756, md5_s1_3);
MD5_OP4(a, b, a, I, b, c, d, 0, 0xd76aa478, md5_s1_0);
MD5_OP4(d, a, d, I, a, b, c, 0, 0xe8c7b756, md5_s1_1);
MD5_OP4(c, d, c, I, d, a, b, 0, 0xe8c7b756, md5_s1_2);
MD5_OP4(b, c, b, I, c, d, a, 0, 0xe8c7b756, md5_s1_3);
for(x = 0; x < 4; ++x)
{
a[x] += 0x67452301;
b[x] += 0xefcdab89;
c[x] += 0x98badcfe;
d[x] += 0x10325476;
}
for(x = 0; x < 4; ++x)
{
if( (digest[0] == a[x]) && (digest[1] == b[x]) && (digest[2] == c[x]) && (digest[3] == d[x]) )
{
return x+1;
}
}
return a[0];
}