package focusedCrawler.target.model;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.HashMap;
import org.apache.commons.codec.digest.DigestUtils;
/*
* Proposed schema by IST:
*
* "url" : "",
* "timestamp": "",
* "request": {
* "method": "",
* "client": {
* "hostname": "",
* "address": "",
* "software": "",
* "robots": "",
* "contact": {
* "name": "",
* "email": "",
* },
* },
* "headers": {
* "Accept": "",
* "Accept-Encoding": "",
* "Accept-Language": "",
* "User-Agent": "",
* },
* "body": null,
* },
* "response": {
* "status": "",
* "server": {
* "hostname": "",
* "address": "",
* },
* "headers": {
* "Content-Encoding": "",
* "Content-Type": "",
* "Date": "",
* "Expires": "",
* "Server": "",
* },
* "body": "",
* },
* "key": "",
* "imported": "",
*/
public class TargetModelCbor {
private static String HOST_NAME;
private static String HOST_ADDRESS;
static {
try {
InetAddress localhost = InetAddress.getLocalHost();
HOST_NAME = localhost.getHostName();
HOST_ADDRESS = localhost.getHostAddress();
} catch (UnknownHostException e) {
HOST_NAME = "localhost";
HOST_ADDRESS = "127.0.0.1";
}
}
public String url;
public String imported;
public String key;
public long timestamp;
public HashMap<String, Object> request;
public HashMap<String, Object> response;
public TargetModelCbor() {
response = new HashMap<String, Object>();
request = new HashMap<String, Object>();
timestamp = System.currentTimeMillis() / 1000L;
}
public TargetModelCbor(String contactName, String contactEmail, URL url, String body) {
this();
HashMap<String, Object> contact = new HashMap<String, Object>();
contact.put("name", contactName);
contact.put("email", contactEmail);
HashMap<String, Object> client = new HashMap<String, Object>();
client.put("software", "ACHE");
client.put("contact", contact);
client.put("hostname", HOST_NAME);
client.put("address", HOST_ADDRESS);
client.put("robots", "classic");
HashMap<String, Object> headers = new HashMap<String, Object>();
headers.put("Accept-Language", "en-US,en");
this.request.put("method", "GET");
this.request.put("client", client);
this.request.put("headers", headers);
this.request.put("body", null);
this.response.put("body", body);
this.url = url.toString();
this.timestamp = System.currentTimeMillis() / 1000L;
this.key = computeReverseKey(url);
}
public String computeReverseKey(String url) {
try {
return this.computeReverseKey(new URL(url));
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Invalid URL.", e);
}
}
public String computeReverseKey(URL url) {
String urlSha1Hash = DigestUtils.sha1Hex(url.toString());
String reverseDomain = reverseDomain(url.getHost());
return reverseDomain + "_" + urlSha1Hash + "_" + timestamp;
}
private String reverseDomain(String domain) {
if(domain == null || domain.isEmpty()) {
return null;
}
String[] hostParts = domain.split("\\.");
if(hostParts.length == 0 ) {
return null;
}
StringBuilder reverseDomain = new StringBuilder();
reverseDomain.append(hostParts[hostParts.length-1]);
for (int i = hostParts.length-2; i >= 0; i--) {
reverseDomain.append('_');
reverseDomain.append(hostParts[i]);
}
return reverseDomain.toString();
}
}