package doser.tools;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.URLEncoder;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.log4j.Logger;
public final class NTToDbPediaUrlEncoding {
private NTToDbPediaUrlEncoding() {
super();
}
public static String dbpediaEncoding(final String url) {
final StringBuffer buffer = new StringBuffer();
for (int i = 0; i < url.length(); i++) {
final String str = String.valueOf(url.charAt(i));
if (str.equalsIgnoreCase("!")) {
buffer.append('!');
} else if (str.equalsIgnoreCase("$")) {
buffer.append('$');
} else if (str.equalsIgnoreCase("&")) {
buffer.append('&');
} else if (str.equalsIgnoreCase("'")) {
buffer.append('\'');
} else if (str.equalsIgnoreCase("(")) {
buffer.append('(');
} else if (str.equalsIgnoreCase(")")) {
buffer.append(')');
} else if (str.equalsIgnoreCase("*")) {
buffer.append('*');
} else if (str.equalsIgnoreCase("+")) {
buffer.append('+');
} else if (str.equalsIgnoreCase(",")) {
buffer.append(',');
} else if (str.equalsIgnoreCase("-")) {
buffer.append('-');
} else if (str.equalsIgnoreCase("/")) {
buffer.append('/');
} else if (str.equalsIgnoreCase(":")) {
buffer.append(':');
} else if (str.equalsIgnoreCase(";")) {
buffer.append(';');
} else if (str.equalsIgnoreCase("=")) {
buffer.append('=');
} else if (str.equalsIgnoreCase("@")) {
buffer.append('@');
} else if (str.equalsIgnoreCase("_")) {
buffer.append('_');
} else if (str.equalsIgnoreCase("~")) {
buffer.append('~');
} else {
try {
buffer.append(URLEncoder.encode(str, "UTF-8"));
} catch (final UnsupportedEncodingException e) {
Logger.getRootLogger().error(e.getStackTrace());
}
}
}
return buffer.toString();
}
public static void main(final String[] args) throws IOException {
final String fileInput = args[0];
final String fileOutput = args[1];
final File fileIn = new File(fileInput);
final File fileOut = new File(fileOutput);
final Writer writer = new FileWriter(fileOut);
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(fileIn));
} catch (final FileNotFoundException e) {
Logger.getRootLogger().error(e.getStackTrace());
}
String line = null;
while ((line = reader.readLine()) != null) {
line = line.replaceAll("[ ]+", " ");
final String splitter[] = line.split(" ");
final StringBuffer buffer = new StringBuffer();
// Subject
String url = splitter[0].substring(1, splitter[0].length() - 1);
String sLine = StringEscapeUtils.unescapeJava(url);
buffer.append("<" + dbpediaEncoding(sLine) + "> ");
// Predicate
buffer.append(splitter[1] + " ");
// Object
if (splitter[2].startsWith("<")) {
url = splitter[2].substring(1, splitter[2].length() - 1);
sLine = StringEscapeUtils.unescapeJava(url);
buffer.append("<" + dbpediaEncoding(sLine) + ">");
} else {
buffer.append(splitter[2]);
}
writer.write(buffer.toString());
writer.write(System.getProperty("line.separator"));
writer.flush();
}
writer.close();
reader.close();
}
}