brute forcer
-
- Member
- Posts: 62
- Joined: Fri Jun 29, 2007 8:36 pm
well the code is:it should make 900150983cd24fb0d6963f7d28e17f72, but it doesn't, i still dont understand a all md5 hashing, so i am having a hard time debugging this.
thx
Code: Select all
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
#define H(x,y,z) ((x) ^ (y) ^ (z))
#define I(x,y,z) ((y) ^ ((x) | (~(z))))
void md5_genT(uint32_t *T)
{
uint_fast32_t i;
for(i = 0; i < 64; ++i)
{
T[i] = 4294967296.0 * fabs(sin((float)(i+1)));
//printf("%08x\n", T[i]);
//sleep(1);
}
}
void md5_hash(uint8_t *message, uint_fast32_t mlength, uint32_t *T)
{
uint8_t *nm, nmlength;
uint32_t __s1[] = {7, 12, 17, 22};
uint32_t __s2[] = {5, 9, 14, 20};
uint32_t __s3[] = {4, 11, 16, 23};
uint32_t __s4[] = {6, 10, 15, 21};
uint32_t __k1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
uint32_t __k2[] = {1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12};
uint32_t __k3[] = {5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2};
uint32_t __k4[] = {0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9};
uint32_t X[16];
uint32_t i, j, ii, k;
uint32_t AA, BB, CC, DD;
uint32_t A, B, C, D;
// do padding so message is a multiple of 512 bits (64 bytes)
if((mlength%64) == 0)
{
nmlength = mlength + 64;
}else{
nmlength = mlength + (64 - (mlength%64));
}
nm = (uint8_t*)malloc(nmlength);
memset(nm, 0, nmlength);
memcpy(nm, message, mlength);
printf("nmlength:%u mlength:%u\n", nmlength, mlength);
nm[mlength] = 0x80;
((uint32_t*)&nm[nmlength - 8])[0] = mlength << 3;
((uint32_t*)&nm[nmlength - 8])[1] = 0;
// hash padded message (nmessage)
AA = 0x67452301; BB = 0xefcdab89; CC = 0x98badcfe; DD = 0x10325476;
for(i = 0; i < (nmlength/64); ++i)
{
// create table
for(j = 0; j < 16; ++j)
{
X[j] = nm[i*64+j*4+0] |
(nm[i*64+j*4+1] << 8) |
(nm[i*64+j*4+2] << 16) |
(nm[i*64+j*4+3] << 24);
}
A = AA;
B = BB;
C = CC;
D = DD;
// round one
ii = 0;
for(k = 0; k < 4; ++k)
{
A = B + ROTATE_LEFT((A + F(B, C, D) + X[__k1[k*4+0]] + T[ii+0]), __s1[0]);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[__k1[k*4+1]] + T[ii+1]), __s1[1]);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[__k1[k*4+2]] + T[ii+2]), __s1[2]);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[__k1[k*4+3]] + T[ii+3]), __s1[3]);
printf("X[..]: %u\n", __k1[k*4+0]);
ii += 4;
}
// round two
for(k = 0; k < 4; ++k)
{
A = B + ROTATE_LEFT((A + G(B, C, D) + X[__k2[k*4+0]] + T[ii+0]), __s2[0]);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[__k2[k*4+1]] + T[ii+1]), __s2[1]);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[__k2[k*4+2]] + T[ii+2]), __s2[2]);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[__k2[k*4+3]] + T[ii+3]), __s2[3]);
ii += 4;
}
// round three
for(k = 0; k < 4; ++k)
{
A = B + ROTATE_LEFT((A + H(B, C, D) + X[__k3[k*4+0]] + T[ii+0]), __s3[0]);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[__k3[k*4+1]] + T[ii+1]), __s3[1]);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[__k3[k*4+2]] + T[ii+2]), __s3[2]);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[__k3[k*4+3]] + T[ii+3]), __s3[3]);
ii += 4;
}
// round four
for(k = 0; k < 4; ++k)
{
A = B + ROTATE_LEFT((A + I(B, C, D) + X[__k4[k*4+0]] + T[ii+0]), __s4[0]);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[__k4[k*4+1]] + T[ii+1]), __s4[2]);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[__k4[k*4+2]] + T[ii+2]), __s4[3]);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[__k4[k*4+3]] + T[ii+3]), __s4[4]);
ii += 4;
}
printf("----\n");
printf("ii:%u\n", ii);
AA = AA + A;
BB = BB + B;
CC = CC + C;
DD = DD + D;
}
printf("digest: ");
printf("%02x%02x%02x%02x", AA>>24, AA>>16&0xFF, AA>>8&0xFF, AA&0xFF);
printf("%02x%02x%02x%02x", BB>>24, BB>>16&0xFF, BB>>8&0xFF, BB&0xFF);
printf("%02x%02x%02x%02x", CC>>24, CC>>16&0xFF, CC>>8&0xFF, CC&0xFF);
printf("%02x%02x%02x%02x", DD>>24, DD>>16&0xFF, DD>>8&0xFF, DD&0xFF);
printf("\n");
return;
}
int main(int argc, char *argv[])
{
uint8_t *message = (uint8_t*)"abc";
uint32_t T[64];
md5_genT(&T[0]);
md5_hash(message, strlen(message), &T[0]);
return 1;
}
thx
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
I see it. Look at round four where it uses a index for __s4.
Here is a version that produces the hash as you have above.
Here is a version that produces the hash as you have above.
Code: Select all
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
#define H(x,y,z) ((x) ^ (y) ^ (z))
#define I(x,y,z) ((y) ^ ((x) | (~(z))))
void md5_genT(uint32_t *T)
{
uint_fast32_t i;
for(i = 0; i < 64; ++i)
{
T[i] = 4294967296.0 * fabs(sin((float)(i+1)));
//printf("%08x\n", T[i]);
//sleep(1);
}
}
void md5_hash(uint8_t *message, uint_fast32_t mlength, uint32_t *T)
{
uint8_t *nm, nmlength;
uint32_t __s1[] = {7, 12, 17, 22};
uint32_t __s2[] = {5, 9, 14, 20};
uint32_t __s3[] = {4, 11, 16, 23};
uint32_t __s4[] = {6, 10, 15, 21};
uint32_t __k1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
uint32_t __k2[] = {1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12};
uint32_t __k3[] = {5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2};
uint32_t __k4[] = {0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9};
uint32_t X[16];
uint32_t i, j, ii, k;
uint32_t AA, BB, CC, DD;
uint32_t A, B, C, D;
// do padding so message is a multiple of 512 bits (64 bytes)
if((mlength%64) == 0)
{
nmlength = mlength + 64;
}else{
nmlength = mlength + (64 - (mlength%64));
}
nm = (uint8_t*)malloc(nmlength);
memset(nm, 0, nmlength);
memcpy(nm, message, mlength);
printf("nmlength:%u mlength:%u\n", nmlength, mlength);
nm[mlength] = 0x80;
((uint32_t*)&nm[nmlength - 8])[0] = mlength * 8;
((uint32_t*)&nm[nmlength - 8])[1] = 0;
printf("nm[0]:%x\n", nmlength);
// hash padded message (nmessage)
AA = 0x67452301; BB = 0xefcdab89; CC = 0x98badcfe; DD = 0x10325476;
for(i = 0; i < (nmlength/64); ++i)
{
printf("i:%x\n", i);
// create table
for(j = 0; j < 64; j += 4)
{
X[j/4] = nm[i*64+j+0] |
(nm[i*64+j+1] << 8) |
(nm[i*64+j+2] << 16) |
(nm[i*64+j+3] << 24);
}
A = AA;
B = BB;
C = CC;
D = DD;
// round one
ii = 0;
for(k = 0; k < 4; ++k)
{
printf("T[ii+0]:%x\n", T[ii+0]);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[__k1[k*4+0]] + T[ii+0]), __s1[0]);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[__k1[k*4+1]] + T[ii+1]), __s1[1]);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[__k1[k*4+2]] + T[ii+2]), __s1[2]);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[__k1[k*4+3]] + T[ii+3]), __s1[3]);
ii += 4;
}
// round two
for(k = 0; k < 4; ++k)
{
A = B + ROTATE_LEFT((A + G(B, C, D) + X[__k2[k*4+0]] + T[ii+0]), __s2[0]);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[__k2[k*4+1]] + T[ii+1]), __s2[1]);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[__k2[k*4+2]] + T[ii+2]), __s2[2]);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[__k2[k*4+3]] + T[ii+3]), __s2[3]);
ii += 4;
}
// round three
for(k = 0; k < 4; ++k)
{
A = B + ROTATE_LEFT((A + H(B, C, D) + X[__k3[k*4+0]] + T[ii+0]), __s3[0]);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[__k3[k*4+1]] + T[ii+1]), __s3[1]);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[__k3[k*4+2]] + T[ii+2]), __s3[2]);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[__k3[k*4+3]] + T[ii+3]), __s3[3]);
ii += 4;
}
// round four
for(k = 0; k < 4; ++k)
{
printf("__k4[k*4+0]:%u\n", __k4[k*4+0]);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[__k4[k*4+0]] + T[ii+0]), __s4[0]);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[__k4[k*4+1]] + T[ii+1]), __s4[1]);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[__k4[k*4+2]] + T[ii+2]), __s4[2]);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[__k4[k*4+3]] + T[ii+3]), __s4[3]);
ii += 4;
}
printf("----\n");
printf("ii:%u\n", ii);
AA = AA + A;
BB = BB + B;
CC = CC + C;
DD = DD + D;
}
printf("digest: ");
printf("%02x%02x%02x%02x,", AA&0xFF, AA>>8&0xFF, AA>>16&0xFF, AA>>24&0xFF);
printf("%02x%02x%02x%02x,", BB&0xFF, BB>>8&0xFF, BB>>16&0xFF, BB>>24&0xFF);
printf("%02x%02x%02x%02x,", CC&0xFF, CC>>8&0xFF, CC>>16&0xFF, CC>>24&0xFF);
printf("%02x%02x%02x%02x,", DD&0xFF, DD>>8&0xFF, DD>>16&0xFF, DD>>24&0xFF);
printf("\n");
return;
}
int main(int argc, char *argv[])
{
uint8_t *message = (uint8_t*)"abc";
uint32_t T[64];
md5_genT(&T[0]);
md5_hash(message, strlen(message), &T[0]);
return 1;
}
-
- Member
- Posts: 62
- Joined: Fri Jun 29, 2007 8:36 pm
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
-
- Member
- Posts: 62
- Joined: Fri Jun 29, 2007 8:36 pm
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
-
- Member
- Posts: 62
- Joined: Fri Jun 29, 2007 8:36 pm
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
well, it works!, but it is slow, about ~600,000 h/s ( other libs are ~1,200,000 h/s), but this is ok because it has the possibility to be faster than any general porpoise md5 lib. now what it needs is optimizations, first the T values could be pre-computed: also the loops could be unrolled, hmmm... thats all i can think of.
Code: Select all
#define T_MASK ((md5_word_t)~0)
#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87)
#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9)
#define T3 0x242070db
#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111)
#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050)
#define T6 0x4787c62a
#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec)
#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe)
#define T9 0x698098d8
#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850)
#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e)
#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841)
#define T13 0x6b901122
#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c)
#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71)
#define T16 0x49b40821
#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d)
#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf)
#define T19 0x265e5a51
#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855)
#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2)
#define T22 0x02441453
#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e)
#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437)
#define T25 0x21e1cde6
#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829)
#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278)
#define T28 0x455a14ed
#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa)
#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07)
#define T31 0x676f02d9
#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375)
#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd)
#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e)
#define T35 0x6d9d6122
#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3)
#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb)
#define T38 0x4bdecfa9
#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f)
#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f)
#define T41 0x289b7ec6
#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805)
#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a)
#define T44 0x04881d05
#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6)
#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a)
#define T47 0x1fa27cf8
#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a)
#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb)
#define T50 0x432aff97
#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58)
#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6)
#define T53 0x655b59c3
#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d)
#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82)
#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e)
#define T57 0x6fa87e4f
#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f)
#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb)
#define T60 0x4e0811a1
#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d)
#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca)
#define T63 0x2ad7d2bb
#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e)
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
Code: Select all
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
#define H(x,y,z) ((x) ^ (y) ^ (z))
#define I(x,y,z) ((y) ^ ((x) | (~(z))))
#define md5_s1_0 7
#define md5_s1_1 12
#define md5_s1_2 17
#define md5_s1_3 22
#define md5_s2_0 5
#define md5_s2_1 9
#define md5_s2_2 14
#define md5_s2_3 20
#define md5_s3_0 4
#define md5_s3_1 11
#define md5_s3_2 16
#define md5_s3_3 23
#define md5_s4_0 6
#define md5_s4_1 10
#define md5_s4_2 15
#define md5_s4_3 21
void md5_hash(uint8_t *message, uint_fast32_t mlength)
{
uint32_t *nm32;
uint8_t *nm8;
uint32_t nmlength;
uint32_t i, j;
uint32_t AA, BB, CC, DD;
uint32_t A, B, C, D;
uint32_t X[16];
// do padding so message is a multiple of 512 bits (64 bytes)
if((mlength%64) == 0)
{
nmlength = mlength + 64;
}else{
nmlength = mlength + (64 - (mlength%64));
}
nm8 = (uint8_t*)malloc(nmlength);
nm32 = (uint32_t*)nm8;
// could gain speed doing this someother way.. (memset and memcpy)
memset(nm8, 0, nmlength);
memcpy(nm8, message, mlength);
// add a bit one after message, pad remaining with zeros.
nm8[mlength] = 0x80;
// append length in bits to message (low word first)
nm32[(nmlength/4) - 2] = mlength * 8;
nm32[(nmlength/4) - 1] = 0;
// hash padded message (nmessage)
AA = 0x67452301; BB = 0xefcdab89; CC = 0x98badcfe; DD = 0x10325476;
for(i = 0; i < (nmlength/64); ++i)
{
// move block from arbitrary memory location to stack. (could gain speed changing this)
for(j = 0; j < 16; ++j)
{
X[j] = nm32[i*64+j];
}
A = AA;
B = BB;
C = CC;
D = DD;
/// round one (unrolled)
A = B + ROTATE_LEFT((A + F(B, C, D) + X[0] + 0xd76aa478), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[1] + 0xe8c7b756), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[2] + 0x242070db), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[3] + 0xc1bdceee), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[4] + 0xf57c0faf), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[5] + 0x4787c62a), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[6] + 0xa8304613), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[7] + 0xfd469501), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[8] + 0x698098d8), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[9] + 0x8b44f7af), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[10] + 0xffff5bb1), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[11] + 0x895cd7be), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[12] + 0x6b901122), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[13] + 0xfd987193), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[14] + 0xa679438e), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[15] + 0x49b40821), md5_s1_3);
/// round two (unrolled)
A = B + ROTATE_LEFT((A + G(B, C, D) + X[1] + 0xf61e2562), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[6] + 0xc040b340), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[11] + 0x265e5a51), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[0] + 0xe9b6c7aa), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[5] + 0xd62f105d), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[10] + 0x02441453), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[15] + 0xd8a1e681), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[4] + 0xe7d3fbc8), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[9] + 0x21e1cde6), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[14] + 0xc33707d6), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[3] + 0xf4d50d87), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[8] + 0x455a14ed), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[13] + 0xa9e3e905), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[2] + 0xfcefa3f8), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[7] + 0x676f02d9), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[12] + 0x8d2a4c8a), md5_s2_3);
/// round three (unrolled)
A = B + ROTATE_LEFT((A + H(B, C, D) + X[5] + 0xfffa3942), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[8] + 0x8771f681), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[11] + 0x6d9d6122), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[14] + 0xfde5380c), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[1] + 0xa4beea44), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[4] + 0x4bdecfa9), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[7] + 0xf6bb4b60), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[10] + 0xbebfbc70), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[13] + 0x289b7ec6), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[0] + 0xeaa127fa), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[3] + 0xd4ef3085), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[6] + 0x04881d05), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[9] + 0xd9d4d039), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[12] + 0xe6db99e5), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[15] + 0x1fa27cf8), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[2] + 0xc4ac5665), md5_s3_3);
/// round four (unrolled)
A = B + ROTATE_LEFT((A + I(B, C, D) + X[0] + 0xf4292244), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[7] + 0x432aff97), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[14] + 0xab9423a7), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[5] + 0xfc93a039), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[12] + 0x655b59c3), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[3] + 0x8f0ccc92), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[10] + 0xffeff47d), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[1] + 0x85845dd1), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[8] + 0x6fa87e4f), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[15] + 0xfe2ce6e0), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[6] + 0xa3014314), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[13] + 0x4e0811a1), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[4] + 0xf7537e82), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[11] + 0xbd3af235), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[2] + 0x2ad7d2bb), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[9] + 0xeb86d391), md5_s4_3);
AA = AA + A;
BB = BB + B;
CC = CC + C;
DD = DD + D;
}
// release allocated memory.
free(nm8);
/// (REMOVE US PRINT FUNCTIONS DURING SPEED TESTING)
printf("digest: ");
printf("%02x%02x%02x%02x,", AA&0xFF, AA>>8&0xFF, AA>>16&0xFF, AA>>24&0xFF);
printf("%02x%02x%02x%02x,", BB&0xFF, BB>>8&0xFF, BB>>16&0xFF, BB>>24&0xFF);
printf("%02x%02x%02x%02x,", CC&0xFF, CC>>8&0xFF, CC>>16&0xFF, CC>>24&0xFF);
printf("%02x%02x%02x%02x,", DD&0xFF, DD>>8&0xFF, DD>>16&0xFF, DD>>24&0xFF);
printf("\n");
return;
}
int main(int argc, char *argv[])
{
uint8_t *message = (uint8_t*)"abc";
md5_hash(message, strlen(message));
return 1;
}
hay! look at that were in the millions again ~1,119,000 h/s! heres what it looks like: ( the defines are in the header, and the function now returns true if "input = md5(message)" ) hmm.. maybe getting rid of the system calls could help, then the biggest one: ASM! thought that will be a pain
p.s. i have a stupid question, my CPU is 1.4GHz and if this program calculates 1M h/s, doesn't that mean it sums an md5 in just 1000 instruction?
Code: Select all
#include "md5lib.h"
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
int md5_hash(uint8_t *message, uint_fast32_t mlength, unsigned char input[16])
{
uint32_t *nm32;
uint8_t *nm8;
uint32_t nmlength;
uint32_t i, j;
uint32_t AA, BB, CC, DD;
uint32_t A, B, C, D;
uint32_t X[16];
// do padding so message is a multiple of 512 bits (64 bytes)
if((mlength%64) == 0)
{
nmlength = mlength + 64;
}else{
nmlength = mlength + (64 - (mlength%64));
}
nm8 = (uint8_t*)malloc(nmlength);
nm32 = (uint32_t*)nm8;
// could gain speed doing this someother way.. (memset and memcpy)
memset(nm8, 0, nmlength);
memcpy(nm8, message, mlength);
// add a bit one after message, pad remaining with zeros.
nm8[mlength] = 0x80;
// append length in bits to message (low word first)
nm32[(nmlength/4) - 2] = mlength * 8;
nm32[(nmlength/4) - 1] = 0;
// hash padded message (nmessage)
AA = 0x67452301; BB = 0xefcdab89; CC = 0x98badcfe; DD = 0x10325476;
for(i = 0; i < (nmlength/64); ++i)
{
// move block from arbitrary memory location to stack. (could gain speed changing this)
for(j = 0; j < 16; ++j)
{
X[j] = nm32[i*64+j];
}
A = AA;
B = BB;
C = CC;
D = DD;
/// round one (unrolled)
A = B + ROTATE_LEFT((A + F(B, C, D) + X[0] + 0xd76aa478), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[1] + 0xe8c7b756), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[2] + 0x242070db), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[3] + 0xc1bdceee), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[4] + 0xf57c0faf), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[5] + 0x4787c62a), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[6] + 0xa8304613), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[7] + 0xfd469501), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[8] + 0x698098d8), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[9] + 0x8b44f7af), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[10] + 0xffff5bb1), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[11] + 0x895cd7be), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[12] + 0x6b901122), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[13] + 0xfd987193), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[14] + 0xa679438e), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[15] + 0x49b40821), md5_s1_3);
/// round two (unrolled)
A = B + ROTATE_LEFT((A + G(B, C, D) + X[1] + 0xf61e2562), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[6] + 0xc040b340), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[11] + 0x265e5a51), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[0] + 0xe9b6c7aa), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[5] + 0xd62f105d), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[10] + 0x02441453), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[15] + 0xd8a1e681), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[4] + 0xe7d3fbc8), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[9] + 0x21e1cde6), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[14] + 0xc33707d6), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[3] + 0xf4d50d87), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[8] + 0x455a14ed), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[13] + 0xa9e3e905), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[2] + 0xfcefa3f8), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[7] + 0x676f02d9), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[12] + 0x8d2a4c8a), md5_s2_3);
/// round three (unrolled)
A = B + ROTATE_LEFT((A + H(B, C, D) + X[5] + 0xfffa3942), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[8] + 0x8771f681), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[11] + 0x6d9d6122), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[14] + 0xfde5380c), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[1] + 0xa4beea44), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[4] + 0x4bdecfa9), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[7] + 0xf6bb4b60), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[10] + 0xbebfbc70), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[13] + 0x289b7ec6), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[0] + 0xeaa127fa), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[3] + 0xd4ef3085), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[6] + 0x04881d05), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[9] + 0xd9d4d039), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[12] + 0xe6db99e5), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[15] + 0x1fa27cf8), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[2] + 0xc4ac5665), md5_s3_3);
/// round four (unrolled)
A = B + ROTATE_LEFT((A + I(B, C, D) + X[0] + 0xf4292244), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[7] + 0x432aff97), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[14] + 0xab9423a7), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[5] + 0xfc93a039), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[12] + 0x655b59c3), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[3] + 0x8f0ccc92), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[10] + 0xffeff47d), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[1] + 0x85845dd1), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[8] + 0x6fa87e4f), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[15] + 0xfe2ce6e0), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[6] + 0xa3014314), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[13] + 0x4e0811a1), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[4] + 0xf7537e82), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[11] + 0xbd3af235), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[2] + 0x2ad7d2bb), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[9] + 0xeb86d391), md5_s4_3);
AA = AA + A;
BB = BB + B;
CC = CC + C;
DD = DD + D;
}
// release allocated memory.
free(nm8);
/// (REMOVE US PRINT FUNCTIONS DURING SPEED TESTING)
if((*(unsigned long *)(input) == (AA)) &&
(*(unsigned long *)(input+4) == (BB)) &&
(*(unsigned long *)(input+8) == (CC)) &&
(*(unsigned long *)(input+12) == (DD)))
return 1;
return 0;
}
p.s. i have a stupid question, my CPU is 1.4GHz and if this program calculates 1M h/s, doesn't that mean it sums an md5 in just 1000 instruction?
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
Nope, all instructions are not one cycle on a CISC processor.
Did you try compiling it with -O2, -Os, or -O3 and also -march i686?
Did you try compiling it with -O2, -Os, or -O3 and also -march i686?
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
Give me a little bit and I will tell you how to split the sequence among processors, and use the POSIX threading library.
I am having a little trouble with my math at the moment.
a^b = c
Where I need to find a. I can find b using c^(1/a). I am sure it is on the internet or in a textbook somewhere but I am trying to exercise my brain on it. I will get it before long.
I am having a little trouble with my math at the moment.
a^b = c
Where I need to find a. I can find b using c^(1/a). I am sure it is on the internet or in a textbook somewhere but I am trying to exercise my brain on it. I will get it before long.