001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.activemq.util; 018 019public class JenkinsHash { 020 021 private static final long INT_MASK = 0x00000000ffffffffL; 022 private static final long BYTE_MASK = 0x00000000000000ffL; 023 024 private static final JenkinsHash _instance = new JenkinsHash(); 025 026 public static JenkinsHash getInstance() { 027 return _instance; 028 } 029 030 private static long rot(long val, int pos) { 031 return ((Integer.rotateLeft((int) (val & INT_MASK), pos)) & INT_MASK); 032 } 033 034 /** 035 * Calculate a hash using all bytes from the input argument, and 036 * a seed of -1. 037 * @param bytes input bytes 038 * @return hash value 039 */ 040 public int hash(byte[] bytes) { 041 return hash(bytes, bytes.length, -1); 042 } 043 044 /** 045 * Calculate a hash using all bytes from the input argument, and 046 * a seed of -1. 047 * @param bytes input bytes 048 * @return hash value 049 */ 050 public int hash(byte[] bytes, int initVal) { 051 return hash(bytes, bytes.length, initVal); 052 } 053 054 /** 055 * taken from hashlittle() -- hash a variable-length key into a 32-bit value 056 * 057 * @param key the key (the unaligned variable-length array of bytes) 058 * @param nbytes number of bytes to include in hash 059 * @param initval can be any integer value 060 * @return a 32-bit value. Every bit of the key affects every bit of the 061 * return value. Two keys differing by one or two bits will have totally 062 * different hash values. 063 * <p> 064 * <p>The best hash table sizes are powers of 2. There is no need to do mod 065 * a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. 066 * For example, if you need only 10 bits, do 067 * <code>h = (h & hashmask(10));</code> 068 * In which case, the hash table should have hashsize(10) elements. 069 * <p> 070 * <p>If you are hashing n strings byte[][] k, do it like this: 071 * for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); 072 * <p> 073 * <p>By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 074 * code any way you wish, private, educational, or commercial. It's free. 075 * <p> 076 * <p>Use for hash table lookup, or anything where one collision in 2^^32 is 077 * acceptable. Do NOT use for cryptographic purposes. 078 */ 079 public int hash(byte[] key, int nbytes, int initval) { 080 int length = nbytes; 081 long a, b, c; // We use longs because we don't have unsigned ints 082 a = b = c = (0x00000000deadbeefL + length + initval) & INT_MASK; 083 int offset = 0; 084 for (; length > 12; offset += 12, length -= 12) { 085 a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; 086 a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; 087 a = (a + (((key[offset + 2] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; 088 a = (a + (((key[offset + 3] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; 089 b = (b + (key[offset + 4] & BYTE_MASK)) & INT_MASK; 090 b = (b + (((key[offset + 5] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; 091 b = (b + (((key[offset + 6] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; 092 b = (b + (((key[offset + 7] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; 093 c = (c + (key[offset + 8] & BYTE_MASK)) & INT_MASK; 094 c = (c + (((key[offset + 9] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; 095 c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; 096 c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; 097 098 /* 099 * mix -- mix 3 32-bit values reversibly. 100 * This is reversible, so any information in (a,b,c) before mix() is 101 * still in (a,b,c) after mix(). 102 * 103 * If four pairs of (a,b,c) inputs are run through mix(), or through 104 * mix() in reverse, there are at least 32 bits of the output that 105 * are sometimes the same for one pair and different for another pair. 106 * 107 * This was tested for: 108 * - pairs that differed by one bit, by two bits, in any combination 109 * of top bits of (a,b,c), or in any combination of bottom bits of 110 * (a,b,c). 111 * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 112 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 113 * is commonly produced by subtraction) look like a single 1-bit 114 * difference. 115 * - the base values were pseudorandom, all zero but one bit set, or 116 * all zero plus a counter that starts at zero. 117 * 118 * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 119 * satisfy this are 120 * 4 6 8 16 19 4 121 * 9 15 3 18 27 15 122 * 14 9 3 7 17 3 123 * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for 124 * "differ" defined as + with a one-bit base and a two-bit delta. I 125 * used http://burtleburtle.net/bob/hash/avalanche.html to choose 126 * the operations, constants, and arrangements of the variables. 127 * 128 * This does not achieve avalanche. There are input bits of (a,b,c) 129 * that fail to affect some output bits of (a,b,c), especially of a. 130 * The most thoroughly mixed value is c, but it doesn't really even 131 * achieve avalanche in c. 132 * 133 * This allows some parallelism. Read-after-writes are good at doubling 134 * the number of bits affected, so the goal of mixing pulls in the 135 * opposite direction as the goal of parallelism. I did what I could. 136 * Rotates seem to cost as much as shifts on every machine I could lay 137 * my hands on, and rotates are much kinder to the top and bottom bits, 138 * so I used rotates. 139 * 140 * #define mix(a,b,c) \ 141 * { \ 142 * a -= c; a ^= rot(c, 4); c += b; \ 143 * b -= a; b ^= rot(a, 6); a += c; \ 144 * c -= b; c ^= rot(b, 8); b += a; \ 145 * a -= c; a ^= rot(c,16); c += b; \ 146 * b -= a; b ^= rot(a,19); a += c; \ 147 * c -= b; c ^= rot(b, 4); b += a; \ 148 * } 149 * 150 * mix(a,b,c); 151 */ 152 a = (a - c) & INT_MASK; 153 a ^= rot(c, 4); 154 c = (c + b) & INT_MASK; 155 b = (b - a) & INT_MASK; 156 b ^= rot(a, 6); 157 a = (a + c) & INT_MASK; 158 c = (c - b) & INT_MASK; 159 c ^= rot(b, 8); 160 b = (b + a) & INT_MASK; 161 a = (a - c) & INT_MASK; 162 a ^= rot(c, 16); 163 c = (c + b) & INT_MASK; 164 b = (b - a) & INT_MASK; 165 b ^= rot(a, 19); 166 a = (a + c) & INT_MASK; 167 c = (c - b) & INT_MASK; 168 c ^= rot(b, 4); 169 b = (b + a) & INT_MASK; 170 } 171 172 //-------------------------------- last block: affect all 32 bits of (c) 173 switch (length) { // all the case statements fall through 174 case 12: 175 c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; 176 case 11: 177 c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; 178 case 10: 179 c = (c + (((key[offset + 9] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; 180 case 9: 181 c = (c + (key[offset + 8] & BYTE_MASK)) & INT_MASK; 182 case 8: 183 b = (b + (((key[offset + 7] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; 184 case 7: 185 b = (b + (((key[offset + 6] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; 186 case 6: 187 b = (b + (((key[offset + 5] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; 188 case 5: 189 b = (b + (key[offset + 4] & BYTE_MASK)) & INT_MASK; 190 case 4: 191 a = (a + (((key[offset + 3] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; 192 case 3: 193 a = (a + (((key[offset + 2] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; 194 case 2: 195 a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; 196 case 1: 197 a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; 198 break; 199 case 0: 200 return (int) (c & INT_MASK); 201 } 202 203 /* 204 * final -- final mixing of 3 32-bit values (a,b,c) into c 205 * 206 * Pairs of (a,b,c) values differing in only a few bits will usually 207 * produce values of c that look totally different. This was tested for 208 * - pairs that differed by one bit, by two bits, in any combination 209 * of top bits of (a,b,c), or in any combination of bottom bits of 210 * (a,b,c). 211 * 212 * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 213 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 214 * is commonly produced by subtraction) look like a single 1-bit 215 * difference. 216 * 217 * - the base values were pseudorandom, all zero but one bit set, or 218 * all zero plus a counter that starts at zero. 219 * 220 * These constants passed: 221 * 14 11 25 16 4 14 24 222 * 12 14 25 16 4 14 24 223 * and these came close: 224 * 4 8 15 26 3 22 24 225 * 10 8 15 26 3 22 24 226 * 11 8 15 26 3 22 24 227 * 228 * #define final(a,b,c) \ 229 * { 230 * c ^= b; c -= rot(b,14); \ 231 * a ^= c; a -= rot(c,11); \ 232 * b ^= a; b -= rot(a,25); \ 233 * c ^= b; c -= rot(b,16); \ 234 * a ^= c; a -= rot(c,4); \ 235 * b ^= a; b -= rot(a,14); \ 236 * c ^= b; c -= rot(b,24); \ 237 * } 238 * 239 */ 240 c ^= b; 241 c = (c - rot(b, 14)) & INT_MASK; 242 a ^= c; 243 a = (a - rot(c, 11)) & INT_MASK; 244 b ^= a; 245 b = (b - rot(a, 25)) & INT_MASK; 246 c ^= b; 247 c = (c - rot(b, 16)) & INT_MASK; 248 a ^= c; 249 a = (a - rot(c, 4)) & INT_MASK; 250 b ^= a; 251 b = (b - rot(a, 14)) & INT_MASK; 252 c ^= b; 253 c = (c - rot(b, 24)) & INT_MASK; 254 255 return (int) (c & INT_MASK); 256 } 257 258}