package wikilib;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WikiLib extends Thread {
private List<Request> requests;
private Pattern removeHtml = Pattern.compile("\\<.*?\\>");
private Pattern findKeywords = Pattern.compile("\\[\\[([\\sa-zA-Z0-9]+)(\\|\\s*([\\sa-zA-Z0-9]+))?\\]\\]");
private Pattern findSpaces = Pattern.compile("\\s");
private Pattern findTitle = Pattern.compile("<h1.*>(.*)</h1>");
public WikiLib() {
super("WikiLib Thread");
requests = new ArrayList<Request>();
start();
}
// Thread main process
@Override
public void run() {
while(true) {
boolean b;
synchronized (requests) {
b = !requests.isEmpty();;
}
Request req;
while (b) {
synchronized (requests) {
req = requests.remove(0);
}
searchPage(req);
synchronized (requests) {
b = !requests.isEmpty();
}
}
try {
sleep(250);
} catch (InterruptedException e) {}
}
}
public void search(Request req) {
synchronized (requests) {
requests.add(req);
}
}
private void searchPage(Request req) {
BufferedReader in;
try {
in = new BufferedReader(new InputStreamReader(req.url.openStream()));
String inputLine, content = "";
while ((inputLine = in.readLine()) != null)
content += inputLine;
in.close();
if (content.indexOf("<div class='searchresults'>") != -1) {
req.result = formatSearchResults(content);
req.complete();
} else {
Matcher ma = findTitle.matcher(content);
ma.find();
req.title = formatSymbols(ma.group(1));
req.result = formatPage(content);
req.complete();
}
} catch (IOException e) {
req.result = e.toString();
req.complete();
}
}
private String formatPage(String content) {
// clean up the contents so less content to go through (faster)
int s = content.indexOf("<!-- start content -->");
int f = content.indexOf("<div class=\"printfooter\">");
content = content.substring(s,f);
content = removeSpanSort(content);
content = formatTables(content);
content = removeScript(content);
content = content.replaceAll("\\{", "");
content = content.replaceAll("\\}", "");
content = content.replaceAll("<br/>", "\n");
content = content.replaceAll("</p>", "\n");
content = formatH2(content);
content = formatH3(content);
content = formatLinks(content);
content = formatOL(content);
content = formatUL(content);
content = this.removeHtml.matcher(content).replaceAll("");
content = formatSymbols(content);
return content;
}
private String formatSymbols(String content) {
return content.replaceAll(">",">")
.replaceAll("<","<")
.replaceAll(" "," ")
.replaceAll("'","'")
.replaceAll("&","&");
}
private String formatSearchResults(String content) {
String buf = "";
// clean up the contents so less content to go through (faster)
content = content.substring(
content.indexOf("<div class='searchresults'>"),
content.indexOf("<div class=\"printfooter\">"));
content = content.replaceAll("\\{", "");
content = content.replaceAll("\\}", "");
// Create each search section
String[] h2 = new String[50];
int idx = 0, cnt = 0;
while (content.indexOf("<h2>", idx) != -1) {
int h2Start = content.indexOf("<h2>", idx);
int h2End = content.indexOf("</h2>", h2Start);
h2[cnt] = this.removeHtml.matcher(content.substring(h2Start, h2End)).replaceAll("");
idx = h2End;
cnt++;
}
// Get content for each section
int idxHeader = 0;
cnt = 0;
while (content.indexOf("<ul class='mw-search-results'>", idxHeader) != -1) {
// Alot of abusing of indexOf cause faster than compiled regex
buf += "$size[14]{$b{"+h2[cnt]+":}}\n\n";
int ulStart = content.indexOf("<ul class='mw-search-results'>", idxHeader);
idxHeader = ulStart + 1;
String header = content.substring(ulStart, content.indexOf("</ul>", ulStart));
// for each search section create search items
int idxItem = header.indexOf("<li>");
while (header.indexOf("<li>", idxItem) != -1) {
int liStart = header.indexOf("<li>", idxItem);
int liEnd = header.indexOf("</li>", liStart);
String itemcnt = header.substring(liStart, liEnd);
int descStart = itemcnt.indexOf("<div");
int descEnd = itemcnt.indexOf("</div");
String desc = itemcnt.substring(descStart, descEnd);
desc = this.removeHtml.matcher(desc).replaceAll("");
desc = formatKeywords(desc);
desc = formatSymbols(desc);
String link = itemcnt.substring(itemcnt.indexOf("<a"),
itemcnt.indexOf("</a>"));
int linkidx = link.indexOf("href=\"");
String title = this.removeHtml.matcher(link).replaceAll("");
link = link.substring(linkidx, link.indexOf("\"", linkidx + 6)).replace("href=\"", "");
buf += "$b{$u{$col[0,0,192]{$a["+ link + "]{" + title + "}}}}\n";
buf += desc + "\n\n";
idxItem = liEnd + 1;
}
cnt++;
}
return buf;
}
private String removeSpanSort(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -7;
while (content.indexOf("<span class=\"smwsortkey\">", idx) != -1) {
aStart = content.indexOf("<span class=\"smwsortkey\">", idx);
buf += content.substring(aEnd+7, aStart);
aEnd = content.indexOf("</span>", aStart);
idx = aEnd;
}
buf += content.substring(aEnd+7);
return buf;
}
private String removeScript(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -9;
while (content.indexOf("<script", idx) != -1) {
aStart = content.indexOf("<script", idx);
buf += content.substring(aEnd+9, aStart);
aEnd = content.indexOf("</script>", aStart);
idx = aEnd;
}
buf += content.substring(aEnd+9);
return buf;
}
private String formatTables(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -8;
while (content.indexOf("<table", idx) != -1) {
aStart = content.indexOf("<table", idx);
buf += content.substring(aEnd+8, aStart);
idx = content.indexOf(">", aStart)+1;
aEnd = content.indexOf("</table>", idx);
if(content.substring(aStart, idx).indexOf("toc") < 0) {
aStart = idx;
buf += formatTR(content.substring(aStart, aEnd).replaceAll("<th", "<td").replaceAll("</th", "</td"));
buf += "\n\n";
}
idx = aEnd;
}
buf += content.substring(aEnd+8);
return buf;
}
private String formatTR(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -5;
while (content.indexOf("<tr", idx) != -1) {
aStart = content.indexOf("<tr", idx);
//buf += content.substring(aEnd+5, aStart);
aStart = content.indexOf(">", aStart)+1;
aEnd = content.indexOf("</tr>", aStart);
buf += "\n"+formatTD(content.substring(aStart, aEnd))+" |";
idx = aEnd;
}
//buf += content.substring(aEnd+5);
return buf;
}
private String formatTD(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -5;
int i=0;
while (content.indexOf("<td", idx) != -1) {
i++;
aStart = content.indexOf("<td", idx);
aStart = content.indexOf(">", aStart)+1;
//buf += content.substring(aEnd+5, aStart);
aEnd = content.indexOf("</td>", aStart);
buf += " | "+content.substring(aStart, aEnd);
idx = aEnd;
}
//buf += content.substring(aEnd+5);
return buf;
}
private String formatLinks(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -4;
while (content.indexOf("<a", idx) != -1) {
aStart = content.indexOf("<a", idx);
buf += content.substring(aEnd+4, aStart);
aEnd = content.indexOf("</a>", aStart);
String link = content.substring(aStart, aEnd);
int linkidx = link.indexOf("href=\"");
String title = this.removeHtml.matcher(link).replaceAll("");
link = link.substring(linkidx, link.indexOf("\"", linkidx + 6)).replace("href=\"", "");
buf += "$u{$col[0,0,192]{$a["+ link + "]{" + title + "}}}";
idx = aEnd;
}
buf += content.substring(aEnd+4);
return buf;
}
private String formatOL(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -5;
while (content.indexOf("<ol>", idx) != -1) {
aStart = content.indexOf("<ol>", idx);
buf += content.substring(aEnd+5, aStart);
aEnd = content.indexOf("</ol>", aStart);
buf += "\n"+formatLI(content.substring(aStart+4, aEnd), true);
idx = aEnd;
}
buf += content.substring(aEnd+5);
return buf;
}
private String formatUL(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -5;
while (content.indexOf("<ul>", idx) != -1) {
aStart = content.indexOf("<ul>", idx);
buf += content.substring(aEnd+5, aStart);
aEnd = content.indexOf("</ul>", aStart);
buf += "\n"+formatLI(content.substring(aStart+4, aEnd), false);
idx = aEnd;
}
buf += content.substring(aEnd+5);
return buf;
}
private String formatLI(String content, boolean ordered) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -5;
int i=0;
while (content.indexOf("<li>", idx) != -1) {
i++;
aStart = content.indexOf("<li>", idx);
buf += content.substring(aEnd+5, aStart);
aEnd = content.indexOf("</li>", aStart);
buf += "$b{"+(ordered?i+".":"�")+"} "+content.substring(aStart+4, aEnd)+"\n";
idx = aEnd;
}
buf += content.substring(aEnd+5);
return buf;
}
private String formatH2(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -5;
while (content.indexOf("<h2>", idx) != -1) {
aStart = content.indexOf("<h2>", idx);
buf += content.substring(aEnd+5, aStart);
aEnd = content.indexOf("</h2>", aStart);
buf += "\n$size[13]{$b{"+content.substring(aStart+4, aEnd)+"}}\n\n";
idx = aEnd;
}
buf += content.substring(aEnd+5);
return buf;
}
private String formatH3(String content) {
String buf = "";
int idx = 0;
int aStart = 0;
int aEnd = -5;
while (content.indexOf("<h3>", idx) != -1) {
aStart = content.indexOf("<h3>", idx);
buf += content.substring(aEnd+5, aStart);
aEnd = content.indexOf("</h3>", aStart);
buf += "\n$b{"+content.substring(aStart+4, aEnd)+"}";
idx = aEnd;
}
buf += content.substring(aEnd+5);
return buf;
}
private String formatKeywords(String text) {
Matcher ma = this.findKeywords.matcher(text);
StringBuffer buffer = new StringBuffer(text.length());
while (ma.find()) {
String link = "/wiki/";
link += this.findSpaces.matcher(ma.group(1)).replaceAll("_");
String title = (ma.group(3) == null) ? ma.group(1) : ma.group(3);
link = "$u{$col[0,0,192]{$a[" + link + "]{" + title + "}}}";
ma.appendReplacement(buffer, Matcher.quoteReplacement(link));
}
ma.appendTail(buffer);
return buffer.toString();
}
}