AbstractTLSHDigest.java
/*
* Copyright 2020 Keve Müller
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package app.keve.ktlsh.impl;
import java.util.Arrays;
/**
* Base implementation of the TLSH digester.
*
* @author keve
*
*/
public abstract class AbstractTLSHDigest implements TLSHDigest {
/** The Pearson default hash of 0. */
protected static final int T0 = 1 /* T[0] */;
/** The Pearson default hash of 2. */
protected static final int T2 = 49 /* T[2] */;
/** The Pearson default hash of 3. */
protected static final int T3 = 12 /* T[3] */;
/** The Pearson default hash of 5. */
protected static final int T5 = 178 /* T[5] */;
/** The Pearson default hash of 7. */
protected static final int T7 = 166 /* T[7] */;
/** The Pearson default hash of 11. */
protected static final int T11 = 84 /* T[11] */;
/** The Pearson default hash of 13. */
protected static final int T13 = 230 /* T[13] */;
/** The Pearson default hash of 17. */
protected static final int T17 = 197 /* T[17] */;
/** The Pearson default hash of 19. */
protected static final int T19 = 181 /* T[19] */;
/** The Pearson default hash of 23. */
protected static final int T23 = 80 /* T[23] */;
/** The Pearson default hash of 29. */
protected static final int T29 = 142 /* T[29] */;
/** The Pearson default hash of 31. */
protected static final int T31 = 200 /* T[31] */;
/** The Pearson default hash of 37. */
protected static final int T37 = 253 /* T[37] */;
/** The Pearson default hash of 41. */
protected static final int T41 = 101 /* T[41] */;
/** The Pearson default hash of 43. */
protected static final int T43 = 18 /* T[43] */;
/** The Pearson default hash of 47. */
protected static final int T47 = 222 /* T[47] */;
/** The Pearson default hash of 53. */
protected static final int T53 = 237 /* T[53] */;
/** The Pearson default hash of 59. */
protected static final int T59 = 214 /* T[59] */;
/** The Pearson default hash of 61. */
protected static final int T61 = 227 /* T[61] */;
/** The Pearson default hash of 67. */
protected static final int T67 = 22 /* T[67] */;
/** The Pearson default hash of 71. */
protected static final int T71 = 175 /* T[71] */;
/** The Pearson default hash of 73. */
protected static final int T73 = 5 /* T[73] */;
/** the window length [4-8]. */
protected final int windowLength;
/** the bucket count (128|256). */
protected final int bucketCount;
/** the number of checksum bytes (1|3). */
protected final int checkSumLength;
/** the buckets to accumulate the histogram in. */
protected final long[] aBucket;
/** the checksum bytes. */
protected final int[] checksum;
/** the number of bytes processed so far. */
protected long count;
protected AbstractTLSHDigest(final int windowLength, final int bucketCount, final int checkSumLength) {
this.windowLength = windowLength;
this.bucketCount = bucketCount;
this.checkSumLength = checkSumLength;
aBucket = new long[256];
checksum = new int[checkSumLength];
count = 0;
}
@Override
public final void reset() {
count = 0;
Arrays.fill(aBucket, 0);
Arrays.fill(checksum, 0);
}
@Override
public final TLSH digest() {
// findQuartiles
final long[] bucketCopy = Arrays.copyOf(aBucket, bucketCount);
Arrays.sort(bucketCopy);
final int quartile = bucketCount / 4;
final int p1 = quartile - 1;
final long q1 = bucketCopy[p1];
final long q2 = bucketCopy[p1 + quartile];
final long q3 = bucketCopy[p1 + 2 * quartile];
// compress buckets
final int codeSize = bucketCount / 4;
final int[] code = new int[codeSize];
for (int i = 0; i < codeSize; i++) {
int h = 0;
for (int j = 0; j < 4; j++) {
final long k = aBucket[4 * i + j];
if (q3 < k) {
h += 3 << j * 2;
} else if (q2 < k) {
h += 2 << j * 2;
} else if (q1 < k) {
h += 1 << j * 2;
}
}
code[i] = h;
}
return TLSH.of(checksum.clone(), count, q1, q2, q3, code);
}
protected final int bMapping(final int salt, final int i, final int j, final int k) {
return Pearson.T[Pearson.T[Pearson.T[Pearson.T[salt] ^ i] ^ j] ^ k];
}
protected final int sMapping(final int h, final int i, final int j, final int k) {
return Pearson.T[Pearson.T[Pearson.T[h ^ i] ^ j] ^ k];
}
/**
* Obtain the lag window as an array.
*
* @return the lag window array.
*/
protected abstract int[] getLag();
@Override
public final String toString() {
final StringBuffer stringBuffer = new StringBuffer(64);
stringBuffer.append("aBucket=\n");
for (int i = 0; i < aBucket.length; i++) {
if (aBucket[i] > 0) {
stringBuffer.append(i).append(':').append(aBucket[i]).append('\n');
}
}
stringBuffer.append('\n');
final int[] lag = getLag();
stringBuffer.append("lag=").append(Arrays.toString(lag)).append('\n');
stringBuffer.append("count=").append(count).append('\n');
stringBuffer.append("checksum=").append(Arrays.toString(checksum)).append('\n');
return stringBuffer.toString();
}
}