package com.opensoc.tldextractor;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class BasicTldExtractor implements Serializable {
private static final long serialVersionUID = -7440226111118873815L;
private StringBuilder sb = new StringBuilder();
private Pattern pattern;
/**
* The inputFile.
*/
private String inputFile ="effective_tld_names.dat";
public BasicTldExtractor(String filePath) {
this.inputFile=filePath;
this.init();
}
public BasicTldExtractor() {
this.init();
}
private void init(){
try {
ArrayList<String> terms = new ArrayList<String>();
BufferedReader br = new BufferedReader(new InputStreamReader(
getClass().getClassLoader().getResourceAsStream(inputFile)));
String s = null;
while ((s = br.readLine()) != null) {
s = s.trim();
if (s.length() == 0 || s.startsWith("//") || s.startsWith("!"))
continue;
terms.add(s);
}
Collections.sort(terms, new StringLengthComparator());
for (String t : terms)
add(t);
compile();
br.close();
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
protected void add(String s) {
s = s.replace(".", "\\.");
s = "\\." + s;
if (s.startsWith("*")) {
s = s.replace("*", ".+");
sb.append(s).append("|");
} else {
sb.append(s).append("|");
}
}
public void compile() {
if (sb.length() > 0)
sb.deleteCharAt(sb.length() - 1);
sb.insert(0, "[^.]+?(");
sb.append(")$");
pattern = Pattern.compile(sb.toString());
sb = null;
}
public String extract2LD(String host) {
Matcher m = pattern.matcher(host);
if (m.find()) {
return m.group(0);
}
return null;
}
public String extractTLD(String host) {
Matcher m = pattern.matcher(host);
if (m.find()) {
return m.group(1);
}
return null;
}
public static class StringLengthComparator implements Comparator<String> {
public int compare(String s1, String s2) {
if (s1.length() > s2.length())
return -1;
if (s1.length() < s2.length())
return 1;
return 0;
}
}
/**
* Returns the sb.
* @return the sb.
*/
public StringBuilder getSb() {
return sb;
}
/**
* Sets the sb.
* @param sb the sb.
*/
public void setSb(StringBuilder sb) {
this.sb = sb;
}
/**
* Returns the inputFile.
* @return the inputFile.
*/
public String getInputFile() {
return inputFile;
}
/**
* Sets the inputFile.
* @param inputFile the inputFile.
*/
public void setInputFile(String inputFile) {
this.inputFile = inputFile;
}
}