package com.sikulix.htmlparse;
import net.htmlparser.jericho.*;
import java.util.*;
import java.io.*;
import java.net.*;
public class App {
private static void p(String msg, Object... args) {
System.out.println(String.format(msg, args));
}
static boolean debug = false;
static String start;
static String sourceUrlString;
final static String notodo = "NADA";
static String todo = notodo;
static String lpURL =
"https://bugs.launchpad.net/sikuli/+bugs?field.searchtext=&orderby=-importance&field.status%3Alist=INPROGRESS&"
+ "field.status%3Alist=FIXCOMMITTED&assignee_option=any&field.assignee=&field.bug_reporter=&field.bug_commenter=&"
+ "field.subscriber=&field.structural_subscriber=&field.milestone%3Alist=##version##&field.tag=&field.tags_combinator=ANY&"
+ "field.has_cve.used=&field.omit_dupes.used=&field.omit_dupes=on&field.affects_me.used=&field.has_patch.used=&"
+ "field.has_branches.used=&field.has_no_branches.used=&field.has_blueprints.used=&field.has_no_blueprints.used=&"
+ "search=Search&orderby=-id#start##";
static String lp110 = "59266";
static String lp120 = "63602";
static Map<String, String> imps = new HashMap<String, String>();
static List<String> options = new ArrayList<String>();
static boolean isOptionsValid = false;
static {
imps.put("U", "0");
imps.put("W", "1");
imps.put("L", "2");
imps.put("M", "3");
imps.put("H", "4");
imps.put("C", "5");
}
private static String nextArg(String[] args) {
if (!isOptionsValid) {
if(args.length == 0) {
return notodo;
}
options.addAll(Arrays.asList(args));
isOptionsValid = true;
}
if (options.isEmpty()) {
return null;
}
String next = options.get(0);
options.remove(0);
return next;
}
private static String nextArg() {
return nextArg(new String[]{});
}
private static String nextArg(String preset) {
String next = nextArg();
if (next == null) {
return preset;
}
return next;
}
public static void main(String[] args) throws Exception {
todo = nextArg(args);
if ("tess".equals(todo)) {
start = "http://tesseract-ocr.googlecode.com/files/";
sourceUrlString="http://code.google.com/p/tesseract-ocr/downloads/list";
Source source=new Source(new URL(sourceUrlString));
scanSegmentsTess(source.getAllElements(HTMLElementName.TD));
}
if (todo.startsWith("lp")) {
start = "https://bugs.launchpad.net/sikuli/+bug/";
Map<String, String> bugs = new HashMap<String, String>();
String vers = lp110;
String vfor = "1.1.0";
if (null != nextArg()) {
vers = lp120;
vfor = "1.2.0";
}
String url = lpURL.replace("##version##", vers);
int first = 0;
int count = 1;
String from = "start=0";
while (count > 0) {
Source source=new Source(new URL(url.replace("##start##", from)));
count = scanSegmentsLP(source, first, bugs);
if (count > 0) {
first += count;
from = String.format("memo=%d&start=%d", first, first);
}
}
p("*** %s has entries: %d", vfor, first);
Object[] keys = bugs.keySet().toArray();
Arrays.sort(keys, Collections.reverseOrder());
String key;
String val;
String bnum;
for (Object k: keys) {
key = (String) k;
val = bugs.get(key);
bnum = key.substring(2);
p("(%s - %s) %s", bnum, val.substring(0,2), val.substring(2) );
p("link: `%s - %s <%s>`_", bnum, val.substring(0,2), start + bnum);
}
}
if ("dump".equals(todo)) {
String url = nextArg();
if (url == null) {
todo = notodo;
}
displaySegments(new Source(new URL(url)));
}
if (todo == notodo) {
p("Nothing to do");
System.exit(1);
}
System.exit(1);
}
private static void displaySegments(Source source) {
for (Segment segment : source.getAllElements()) {
displaySegment(segment);
}
}
private static void displaySegments(Source source, String elem) {
for (Segment segment : source.getAllElements(elem)) {
displaySegment(segment);
}
}
private static void displaySegment(Segment segment) {
p("-------------------------------------------------------------------------------");
p("%s", segment.getDebugInfo());
p("%s", segment.toString());
}
private static void scanSegmentsTess(List<? extends Segment> segments) {
String href;
String link;
String cls;
String lang;
for (Segment segment : segments) {
if (debug) {
displaySegment(segment);
p("-------------------------------------------------------------------------------");
}
cls = segment.getFirstElement().getAttributeValue("class");
if (cls == null) {
continue;
}
if (!cls.startsWith("vt col_1")) {
continue;
}
link = segment.getTextExtractor().toString();
if (!link.endsWith("3.02")) {
continue;
}
if (!link.contains("language")) {
continue;
}
link = link.split(" language")[0];
href = segment.getAllElements(HTMLElementName.A).get(0).getAttributeValue("href");
href = href.split("\\?")[1].split("&")[0].split("=")[1];
String parts[] = href.split("\\.");
lang = parts[parts.length - 3];
System.out.println(String.format("%s = %s (%s)", lang, link, start + href));
}
}
private static int scanSegmentsLP(Source source, int first, Map<String, String> bugs) {
String href;
String link;
String lang;
List<? extends Segment> segments = source.getAllElementsByClass("buglisting-row");
int n = first;
int count = 0;
for (Segment segment : segments) {
if (debug) {
displaySegment(segment);
p("-------------------------------------------------------------------------------");
}
List<Element> elems = segment.getAllElements();
// displaySegment(segment);
String bn = "";
String bt = "";
String st = "";
String im = "";
String cls;
String key;
for (Element e : elems) {
cls = e.getAttributeValue("class");
if ("bugnumber".equals(cls)) {
bn = e.getTextExtractor().toString().substring(1);
bn = "000" + bn;
bn = bn.substring(bn.length()-7);
} else if ("bugtitle".equals(cls)) {
bt = e.getTextExtractor().toString();
} else if (cls.startsWith("importance")) {
im = e.getTextExtractor().toString().substring(0, 1);
}else if (cls.startsWith("status")) {
st = e.getTextExtractor().toString().substring(0, 1);
}
if (!bt.isEmpty()) {
n++;
count++;
key = ("F".equals(st) ? "2" : "1") + imps.get(im) + bn;
bugs.put(key, st + im + bt);
// p("%3d %s (%s - %s%s) %s", n, key, bn, st, im, bt);
bn = bt = "";
}
}
if (debug) System.exit(1);
}
return count;
}
}