murmur hash,一个更快的hash算法

在打算搭建memcache集群的时候,使用了crc3算法来对key进行hash,然而发现该算法性能比较低,于是寻找一个高性能,低碰撞的hash算,很高兴有前人已经为我们发明了这种算法——murmur。

MurmurHash算法:高运算性能,低碰撞率,由Austin Appleby创建于2008年,现已应用到Hadoop、libstdc++、nginx、libmemcached等开源系统。2011年 Appleby被Google雇佣,随后Google推出其变种的CityHash算法。

MurmurHash算法,自称超级快的hash算法,是FNV的4-5倍。官方数据如下:

OneAtATime – 354.163715 mb/sec
FNV – 443.668038 mb/sec
SuperFastHash – 985.335173 mb/sec
lookup3 – 988.080652 mb/sec
MurmurHash 1.0 – 1363.293480 mb/sec
MurmurHash 2.0 – 2056.885653 mb/sec

    //-----------------------------------------------------------------------------  
    // MurmurHash2, 64-bit versions, by Austin Appleby  
      
    // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment   
    // and endian-ness issues if used across multiple platforms.  
      
    typedef unsigned long int uint64_t;  
      
    // 64-bit hash for 64-bit platforms  
    uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )  
    {  
            const uint64_t m = 0xc6a4a7935bd1e995;  
            const int r = 47;  
      
            uint64_t h = seed ^ (len * m);  
      
            const uint64_t * data = (const uint64_t *)key;  
            const uint64_t * end = data + (len/8);  
      
            while(data != end)  
            {  
                    uint64_t k = *data++;  
      
                    k *= m;   
                    k ^= k >> r;   
                    k *= m;   
      
                    h ^= k;  
                    h *= m;   
            }  
      
            const unsigned char * data2 = (const unsigned char*)data;  
      
            switch(len & 7)  
            {  
            case 7: h ^= uint64_t(data2[6]) << 48;  
            case 6: h ^= uint64_t(data2[5]) << 40;  
            case 5: h ^= uint64_t(data2[4]) << 32;  
            case 4: h ^= uint64_t(data2[3]) << 24;  
            case 3: h ^= uint64_t(data2[2]) << 16;  
            case 2: h ^= uint64_t(data2[1]) << 8;  
            case 1: h ^= uint64_t(data2[0]);  
                    h *= m;  
            };  
       
            h ^= h >> r;  
            h *= m;  
            h ^= h >> r;  
      
            return h;  
    }   
      
      
    // 64-bit hash for 32-bit platforms  
    uint64_t MurmurHash64B ( const void * key, int len, unsigned int seed )  
    {  
            const unsigned int m = 0x5bd1e995;  
            const int r = 24;  
      
            unsigned int h1 = seed ^ len;  
            unsigned int h2 = 0;  
      
            const unsigned int * data = (const unsigned int *)key;  
      
            while(len >= 8)  
            {  
                    unsigned int k1 = *data++;  
                    k1 *= m; k1 ^= k1 >> r; k1 *= m;  
                    h1 *= m; h1 ^= k1;  
                    len -= 4;  
      
                    unsigned int k2 = *data++;  
                    k2 *= m; k2 ^= k2 >> r; k2 *= m;  
                    h2 *= m; h2 ^= k2;  
                    len -= 4;  
            }  
      
            if(len >= 4)  
            {  
                    unsigned int k1 = *data++;  
                    k1 *= m; k1 ^= k1 >> r; k1 *= m;  
                    h1 *= m; h1 ^= k1;  
                    len -= 4;  
            }  
      
            switch(len)  
            {  
            case 3: h2 ^= ((unsigned char*)data)[2] << 16;  
            case 2: h2 ^= ((unsigned char*)data)[1] << 8;  
            case 1: h2 ^= ((unsigned char*)data)[0];  
                            h2 *= m;  
            };  
      
            h1 ^= h2 >> 18; h1 *= m;  
            h2 ^= h1 >> 22; h2 *= m;  
            h1 ^= h2 >> 17; h1 *= m;  
            h2 ^= h1 >> 19; h2 *= m;  
      
            uint64_t h = h1;  
      
            h = (h << 32) | h2;  
      
            return h;  
    }
赞赏

微信赞赏支付宝赞赏

发表评论

您的电子邮箱地址不会被公开。