Pearson.java
/*
* Copyright 2020 Keve Müller
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package app.keve.ktlsh.impl;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* Implementation of Pearson's hash function. Follows original publication:
* https://web.archive.org/web/20120704025921/http://cs.mwsu.edu/~griffin/courses/2133/downloads/Spring11/p677-pearson.pdf
*
*/
public final class Pearson {
/** Pearson's sample random table from aforementioned publication. */
public static final int[] T = {1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163, 14, 197, 213,
181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200, 110, 177, 104, 103, 141, 253, 255, 50, 77, 101,
81, 18, 45, 96, 31, 222, 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235, 97, 234,
57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248, 174, 169, 211, 58, 66, 154, 106, 195, 245,
171, 17, 187, 182, 179, 0, 243, 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10, 138, 30, 48, 183, 156, 35, 61, 26,
143, 74, 251, 94, 129, 162, 63, 152, 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123, 118, 73, 2, 157, 46, 116, 9, 145,
134, 228, 207, 212, 202, 215, 69, 229, 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39,
203, 233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76, 140, 36, 210, 172, 41, 54,
159, 8, 185, 232, 113, 196, 231, 47, 146, 120, 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71,
109, 184, 209};
/** The permutation to use for hashing. */
public final int[] t;
/** The pre-computed hashCode of the permutation array. */
private final int hashCode;
private Pearson(final int[] t) {
this.t = t;
hashCode = Arrays.hashCode(t);
}
/**
* Hash a byte buffer.
*
* @param buf the buffer to hash
* @return the unsigned byte hash value
*/
public int hash(final byte[] buf) {
int hash = 0;
for (final int b : buf) {
hash = t[hash ^ b & 0xFF];
}
return hash;
}
/**
* Hash a single byte.
*
* @param i the byte to hash
* @return the unsigned byte hash value
*/
public int hash(final int i) {
return t[i];
}
/**
* Hash a two bytes.
*
* @param i the first byte to hash
* @param j the second byte.
* @return the unsigned byte hash value
*/
public int hash(final int i, final int j) {
return t[t[i] ^ j];
}
/**
* Hash a three bytes.
*
* @param i the first byte to hash
* @param j the second byte.
* @param k the third byte.
* @return the unsigned byte hash value
*/
public int hash(final int i, final int j, final int k) {
return t[t[t[i] ^ j] ^ k];
}
/**
* Hash a sequence of bytes.
*
* @param buf the bytes to hash
* @return the unsigned byte hash value
*/
public int hash(final int... buf) {
int hash = 0;
for (final int b : buf) {
hash = t[hash ^ b];
}
return hash;
}
/**
* Obtain a Pearson hash instance for the default permutation.
*
* @return the instance.
*/
public static Pearson defaultInstance() {
return Pearson.of(T);
}
/**
* Obtain a Pearson hash instance for a random permutation.
*
* @return the instance.
*/
public static Pearson randomInstance() {
final int[] t = new int[256];
final List<Integer> l = IntStream.range(0, 256).boxed().collect(Collectors.toList());
Collections.shuffle(l);
int i = 0;
final Iterator<Integer> it = l.iterator();
while (it.hasNext()) {
t[i++] = it.next();
}
return Pearson.of(t);
}
/**
* Obtain a Pearson hash instance the given permutation.
*
* @param t the permutation
* @return the instance.
*/
public static Pearson of(final int[] t) {
if (256 != t.length || 256 != IntStream.of(t).distinct().count()) {
throw new IllegalArgumentException("Bad permutation!");
}
for (final int b : t) {
if (b < 0 || b >= 256) {
throw new IllegalArgumentException("Bad value " + b + ".");
}
}
return new Pearson(t.clone());
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(final Object obj) {
if (this == obj) {
return true;
}
if (!(obj instanceof Pearson)) {
return false;
}
final Pearson other = (Pearson) obj;
return hashCode == other.hashCode && Arrays.equals(t, other.t);
}
@Override
public String toString() {
final StringBuffer stringBuffer = new StringBuffer("Pearson");
stringBuffer.append(Arrays.toString(t));
return stringBuffer.toString();
}
}