/*******************************************************************************
* Copyright (c) 2011 Subgraph.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Subgraph - initial API and implementation
******************************************************************************/
package com.subgraph.vega.internal.http.requests;
import java.util.Arrays;
import com.subgraph.vega.api.http.requests.IPageFingerprint;
public class PageFingerprint implements IPageFingerprint {
public static PageFingerprint generateFromCodeAndString(int code, String body) {
final PageFingerprint fp = new PageFingerprint();
fp.setCode(code);
if(body == null || body.isEmpty())
return fp;
boolean inSpace = false;
int clen = 0;
for(int i = 0; i < body.length(); i++) {
char c = body.charAt(i);
if(c <= 0x20 || c == '<' || c == '>' || c == '\'' || c == '"') {
if(!inSpace) {
inSpace = true;
fp.addWordLength(clen);
clen = 0;
} else {
clen++;
}
} else {
if(inSpace) {
inSpace = false;
fp.addWordLength(clen);
clen = 0;
} else {
clen++;
}
}
}
fp.addWordLength(clen);
return fp;
}
private final static int FP_SIZE = 10;
private final static int FP_MAX_LEN = 15;
private final static int FP_T_REL = 5;
private final static int FP_T_ABS = 6;
private final static int FP_B_FAIL = 3;
private final int[] fpData = new int[FP_SIZE];
private int fpCode;
public void setCode(int code) {
this.fpCode = code;
}
public int getCode() {
return fpCode;
}
public int[] getData() {
return fpData;
}
public void addWordLength(int length) {
if(length <= FP_MAX_LEN) {
fpData[length % FP_SIZE]++;
}
}
public boolean isSame(IPageFingerprint other) {
if(other == null || other.getCode() != fpCode)
return false;
int totalDiff = 0;
int totalScale = 0;
int bucketFail = 0;
for(int i = 0; i < FP_SIZE; i++) {
int diff = fpData[i] - other.getData()[i];
int scale = fpData[i] + other.getData()[i];
if(!isRelativeMatch(diff, scale) || (Math.abs(diff) > FP_T_ABS)) {
bucketFail++;
if(bucketFail > FP_B_FAIL)
return false;
}
totalDiff += diff;
totalScale += scale;
}
return isRelativeMatch(totalDiff, totalScale);
}
private boolean isRelativeMatch(int diff, int scale) {
return Math.abs(diff) <= (1 + (scale * FP_T_REL / 100));
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("FP: (code=");
sb.append(fpCode);
sb.append(") [");
for(int i = 0; i < FP_SIZE; i++) {
if(i > 0)
sb.append(", ");
sb.append(fpData[i]);
}
sb.append("]");
return sb.toString();
}
@Override
public boolean equals(Object other) {
if(this == other) {
return true;
} else if(other instanceof PageFingerprint) {
final PageFingerprint that = (PageFingerprint) other;
return that.fpCode == this.fpCode && Arrays.equals(that.fpData, this.fpData);
} else {
return false;
}
}
@Override
public int hashCode() {
return fpCode * 47 + Arrays.hashCode(fpData);
}
}