Code: Select all
#include <asm/processor.h>
void __attribute__((__fastcall__)) __attribute__((__always_inline__))
md5_hash(uint8_t *message, uint32_t mlength, uint32_t *digest)
{
uint32_t *nm32;
uint8_t *nm8;
uint32_t nmlength;
uint32_t i, j;
uint32_t AA, BB, CC, DD;
register uint32_t A, B, C, D;
uint32_t X[16];
uint8_t TMP[mlength%64 == 0 ? mlength + 64 : mlength + (64 - (mlength%64))];
// do padding so message is a multiple of 512 bits (64 bytes)
nmlength = mlength%64 == 0 ? mlength + 64 : mlength + (64 - (mlength%64));
//if((mlength%64) == 0)
//{
// nmlength = mlength + 64;
//}else{
// nmlength = mlength + (64 - (mlength%64));
//}
//nm8 = (uint8_t*)malloc(nmlength);
prefetchw(&TMP[0]);
nm8 = &TMP[0];
nm32 = (uint32_t*)nm8;
for(i = 0; i < (nmlength/4); ++i)
{
nm32[i] = 0;
}
for(i = 0; i < mlength; ++i)
{
nm8[i] = message[i];
}
// add a bit one after message, pad remaining with zeros.
nm8[mlength] = 0x80;
// append length in bits to message (low word first)
nm32[(nmlength/4) - 2] = mlength * 8;
nm32[(nmlength/4) - 1] = 0;
// hash padded message (nmessage)
AA = 0x67452301; BB = 0xefcdab89; CC = 0x98badcfe; DD = 0x10325476;
prefetch(&X[0]);
prefetchw(&X[0]);
for(i = 0; i < (nmlength/64); ++i)
{
// move block from arbitrary memory location to stack. (could gain speed changing this)
for(j = 0; j < 16; ++j)
{
X[j] = nm32[i*64+j];
}
A = AA;
B = BB;
C = CC;
D = DD;
/// round one (unrolled)
A = B + ROTATE_LEFT((A + F(B, C, D) + X[0] + 0xd76aa478), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[1] + 0xe8c7b756), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[2] + 0x242070db), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[3] + 0xc1bdceee), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[4] + 0xf57c0faf), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[5] + 0x4787c62a), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[6] + 0xa8304613), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[7] + 0xfd469501), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[8] + 0x698098d8), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[9] + 0x8b44f7af), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[10] + 0xffff5bb1), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[11] + 0x895cd7be), md5_s1_3);
A = B + ROTATE_LEFT((A + F(B, C, D) + X[12] + 0x6b901122), md5_s1_0);
D = A + ROTATE_LEFT((D + F(A, B, C) + X[13] + 0xfd987193), md5_s1_1);
C = D + ROTATE_LEFT((C + F(D, A, B) + X[14] + 0xa679438e), md5_s1_2);
B = C + ROTATE_LEFT((B + F(C, D, A) + X[15] + 0x49b40821), md5_s1_3);
/// round two (unrolled)
A = B + ROTATE_LEFT((A + G(B, C, D) + X[1] + 0xf61e2562), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[6] + 0xc040b340), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[11] + 0x265e5a51), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[0] + 0xe9b6c7aa), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[5] + 0xd62f105d), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[10] + 0x02441453), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[15] + 0xd8a1e681), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[4] + 0xe7d3fbc8), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[9] + 0x21e1cde6), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[14] + 0xc33707d6), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[3] + 0xf4d50d87), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[8] + 0x455a14ed), md5_s2_3);
A = B + ROTATE_LEFT((A + G(B, C, D) + X[13] + 0xa9e3e905), md5_s2_0);
D = A + ROTATE_LEFT((D + G(A, B, C) + X[2] + 0xfcefa3f8), md5_s2_1);
C = D + ROTATE_LEFT((C + G(D, A, B) + X[7] + 0x676f02d9), md5_s2_2);
B = C + ROTATE_LEFT((B + G(C, D, A) + X[12] + 0x8d2a4c8a), md5_s2_3);
/// round three (unrolled)
A = B + ROTATE_LEFT((A + H(B, C, D) + X[5] + 0xfffa3942), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[8] + 0x8771f681), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[11] + 0x6d9d6122), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[14] + 0xfde5380c), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[1] + 0xa4beea44), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[4] + 0x4bdecfa9), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[7] + 0xf6bb4b60), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[10] + 0xbebfbc70), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[13] + 0x289b7ec6), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[0] + 0xeaa127fa), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[3] + 0xd4ef3085), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[6] + 0x04881d05), md5_s3_3);
A = B + ROTATE_LEFT((A + H(B, C, D) + X[9] + 0xd9d4d039), md5_s3_0);
D = A + ROTATE_LEFT((D + H(A, B, C) + X[12] + 0xe6db99e5), md5_s3_1);
C = D + ROTATE_LEFT((C + H(D, A, B) + X[15] + 0x1fa27cf8), md5_s3_2);
B = C + ROTATE_LEFT((B + H(C, D, A) + X[2] + 0xc4ac5665), md5_s3_3);
/// round four (unrolled)
A = B + ROTATE_LEFT((A + I(B, C, D) + X[0] + 0xf4292244), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[7] + 0x432aff97), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[14] + 0xab9423a7), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[5] + 0xfc93a039), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[12] + 0x655b59c3), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[3] + 0x8f0ccc92), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[10] + 0xffeff47d), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[1] + 0x85845dd1), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[8] + 0x6fa87e4f), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[15] + 0xfe2ce6e0), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[6] + 0xa3014314), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[13] + 0x4e0811a1), md5_s4_3);
A = B + ROTATE_LEFT((A + I(B, C, D) + X[4] + 0xf7537e82), md5_s4_0);
D = A + ROTATE_LEFT((D + I(A, B, C) + X[11] + 0xbd3af235), md5_s4_1);
C = D + ROTATE_LEFT((C + I(D, A, B) + X[2] + 0x2ad7d2bb), md5_s4_2);
B = C + ROTATE_LEFT((B + I(C, D, A) + X[9] + 0xeb86d391), md5_s4_3);
AA = AA + A;
BB = BB + B;
CC = CC + C;
DD = DD + D;
}
// release allocated memory.
//free(nm8);
digest[0] = AA;
digest[1] = BB;
digest[2] = CC;
digest[3] = DD;
return;
}