package at.tugraz.examreminder.crawler; import java.io.*; import java.net.URLEncoder; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.*; import android.util.Log; import at.tugraz.examreminder.ExamReminderApplication; import at.tugraz.examreminder.core.Course; import at.tugraz.examreminder.core.Exam; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.StatusLine; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; public class TuGrazSearchCrawler implements Crawler { private final static String LOGCAT_TAG = "TuGrazSearchCrawler"; private final static String SEARCH_MACHINE_URI = "http://search.tugraz.at/search"; private HashMap<String, String> SEARCH_MACHINE_URLI_ATTRIBUTES = new HashMap<String, String>() {{ put("q", ""); // Searchstring put("site", "Alle"); put("btnG", "Suchen"); put("client", "tug_portal"); put("output", "xml_no_dtd"); put("sort", "date%3AD%3AL%3Ad1"); put("entqr", "3"); put("entqrm", "0"); put("entsp", "a"); put("oe", "UTF-8"); put("ie", "UTF-8"); put("ud", "1"); put("filter", "1"); put("hl", "de"); // Language }}; private final static SimpleDateFormat SEARCH_MACHINE_RESULTS_DATE_FORMAT = new SimpleDateFormat("dd.MM.yyyy HH:mm"); private final static String tempCoursesSearchDataXmlFilename = "courses.tmp"; private final static String tempExamsSearchDataXmlFilename = "exams.tmp"; private String generateSearchUrl(String searchTerm) throws UnsupportedEncodingException { String searchUrl = ""; searchUrl += SEARCH_MACHINE_URI; SEARCH_MACHINE_URLI_ATTRIBUTES.remove("q"); SEARCH_MACHINE_URLI_ATTRIBUTES.put("q", searchTerm); boolean isFirstAttribute = true; for (Map.Entry<String, String> entry : SEARCH_MACHINE_URLI_ATTRIBUTES.entrySet()) { if (isFirstAttribute) { searchUrl += "?" + entry.getKey() + "=" + URLEncoder.encode(entry.getValue(), "UTF8"); isFirstAttribute = false; } searchUrl += "&" + entry.getKey() + "=" + URLEncoder.encode(entry.getValue(), "UTF8"); } return searchUrl; } private void getResponseXmlAndWriteToFile(String searchTerm, File file) throws IOException{ String searchUrl; try { searchUrl = generateSearchUrl(searchTerm); } catch (UnsupportedEncodingException e) { Log.v(LOGCAT_TAG, "UnsupportedEncodingException"); return; } HttpClient httpClient = new DefaultHttpClient(); HttpResponse httpResponse; try { httpResponse = httpClient.execute(new HttpGet(searchUrl)); StatusLine statusLine = httpResponse.getStatusLine(); if (statusLine.getStatusCode() == HttpStatus.SC_OK) { FileOutputStream fileOutputStream = new FileOutputStream(file); httpResponse.getEntity().writeTo(fileOutputStream); fileOutputStream.close(); } else { httpResponse.getEntity().getContent().close(); throw new IOException(statusLine.getReasonPhrase()); } } catch (ClientProtocolException e) { Log.v(LOGCAT_TAG, e.toString()); } } @Override public List<Course> getCourseList(String searchTerm) { Log.d(LOGCAT_TAG, "- get courses for searchterm "+ searchTerm); File tempFileOnDevice = new File(ExamReminderApplication.getAppContext().getExternalFilesDir(null), tempCoursesSearchDataXmlFilename); List<Course> foundCourse; try { getResponseXmlAndWriteToFile(searchTerm, tempFileOnDevice); foundCourse = getCourseListFromFile(new FileInputStream(tempFileOnDevice)); setExamsFromFile(new FileInputStream(tempFileOnDevice), foundCourse); } catch (IOException e) { Log.v(LOGCAT_TAG, e.toString()); return null; } Log.d(LOGCAT_TAG, "- found "+foundCourse.size()+ " courses for searchterm "+ searchTerm); Collections.sort(foundCourse); return foundCourse; } @Override public SortedSet<Exam> getExams(Course course) { Log.d(LOGCAT_TAG, "- get exams for course "+ course.name + "("+ course.number + ")"); File tempFileOnDevice = new File(ExamReminderApplication.getAppContext().getExternalFilesDir(null), tempExamsSearchDataXmlFilename); SortedSet<Exam> foundExams; try { getResponseXmlAndWriteToFile(course.name, tempFileOnDevice); foundExams = getExamsFromFile(new FileInputStream(tempFileOnDevice), course); } catch (IOException e) { Log.v(LOGCAT_TAG, e.toString()); return null; } Log.d(LOGCAT_TAG, "- found "+ foundExams.size() + " courses for course "+course.number); return foundExams; } public List<Course> getCourseListFromFile(InputStream inputstream) throws IOException { List<Course> foundCourse = new ArrayList<Course>(); Map<String, String> currentModuleMap = new HashMap<String, String>(); DataInputStream in = new DataInputStream(inputstream); InputStreamReader isr = new InputStreamReader(in); BufferedReader br = new BufferedReader(isr); String currentTagValue; String currentTagAttribute; String currentLine; Course currentCourse; while ((currentLine = br.readLine()) != null) { if (currentLine.contains("<MODULE_RESULT>")) { currentModuleMap.clear(); while ((currentLine = br.readLine()) != null) { if (currentLine.contains("</MODULE_RESULT>")) { if (currentModuleMap.containsKey("WEB SERVICE") && (currentModuleMap.get("WEB SERVICE").toString().equals("CBO"))) { currentCourse = new at.tugraz.examreminder.core.Course(); currentCourse.id = currentModuleMap.get("id_c"); currentCourse.name = currentModuleMap.get("courseName"); currentCourse.number = currentModuleMap.get("courseCode"); currentCourse.term = currentModuleMap.get("teachingTerm"); currentCourse.type = currentModuleMap.get("teachingActivityID"); if (currentModuleMap.containsKey("persons_name")) { currentCourse.lecturer = currentModuleMap.get("persons_name"); } else if (currentModuleMap.containsKey("persons_name1")) { currentCourse.lecturer = currentModuleMap.get("persons_name1"); } foundCourse.add(currentCourse); } currentModuleMap.clear(); } if (currentLine.contains("<Field") && currentLine.contains("</Field>")) { currentTagAttribute = currentLine.substring(currentLine.indexOf("=\"") + 2, currentLine.indexOf("\">")); currentTagValue = currentLine.substring(currentLine.indexOf("\">") + 2, currentLine.indexOf("</Field>")); currentModuleMap.put(currentTagAttribute, currentTagValue); } else if (currentLine.contains("<Field>")) { throw new IOException("Format of returned data not recognized!"); } } } } br.close(); isr.close(); in.close(); return foundCourse; } public SortedSet<Exam> getExamsFromFile(InputStream inputstream, Course course) throws IOException{ SortedSet<Exam> foundExams = new TreeSet<Exam>(); Map<String, String> currentModuleMap = new HashMap<String, String>(); DataInputStream in = new DataInputStream(inputstream); InputStreamReader isr = new InputStreamReader(in); BufferedReader br = new BufferedReader(isr); String currentTagValue; String currentTagAttribute; String currentLine; Exam currentExam; String currentCourseId; String currentCourseName; while (((currentLine = br.readLine()) != null)) { if (currentLine.contains("<MODULE_RESULT>")) { currentModuleMap.clear(); while (((currentLine = br.readLine()) != null)) { if (currentLine.contains("</MODULE_RESULT>")) { if (currentModuleMap.containsKey("WEB SERVICE") && (currentModuleMap.get("WEB SERVICE").toString().equals("EBO"))) { currentCourseId = currentModuleMap.get("courseID"); currentCourseName = currentModuleMap.get("courseCode"); if(currentCourseName.equals(course.name) && currentCourseId.equals(course.number)) { currentExam = new Exam(course); try { if(currentModuleMap.containsKey("examStart")) { if(currentModuleMap.containsKey("examLocation")) { currentExam.place = currentModuleMap.get("examLocation").replaceAll(""","\""); } else currentExam.place = ""; currentExam.term = currentModuleMap.get("teachingTerm"); currentExam.lecturer = currentModuleMap.get("lecturer"); currentExam.examinar = currentModuleMap.get("examinerName"); currentExam.participants = Integer.parseInt(currentModuleMap.get("numberOfParticipants")); currentExam.participants_max = Integer.parseInt(currentModuleMap.get("maximumNumberOfParticipants")); currentExam.updated_at = null; GregorianCalendar calendar = new GregorianCalendar(); calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("examStart"))); currentExam.setFrom((GregorianCalendar)calendar.clone()); if(currentModuleMap.containsKey("examEnd")) { calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("examStart"))); calendar.add(Calendar.HOUR_OF_DAY, 1); currentExam.setTo((GregorianCalendar)calendar.clone()); } else { calendar.clear(); currentExam.setTo(calendar); } if(currentModuleMap.containsKey("registerDeadline")) { calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("registerDeadline"))); currentExam.registerDeadline = (GregorianCalendar)calendar.clone(); } else { calendar.clear(); currentExam.registerDeadline = calendar; } if(currentModuleMap.containsKey("cancelDeadline")) { calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("cancelDeadline"))); currentExam.cancelDeadline = (GregorianCalendar)calendar.clone(); } else { calendar.clear(); currentExam.cancelDeadline = calendar; } foundExams.add(currentExam); } } catch (ParseException e) { throw new IOException("Dateformat of returned data not recognized!"); } } } currentModuleMap.clear(); } if (currentLine.contains("<Field") && currentLine.contains("</Field>")) { currentTagAttribute = currentLine.substring(currentLine.indexOf("=\"") + 2, currentLine.indexOf("\">")); currentTagValue = currentLine.substring(currentLine.indexOf("\">") + 2, currentLine.indexOf("</Field>")); currentModuleMap.put(currentTagAttribute, currentTagValue); } else if (currentLine.contains("<Field>")) { throw new IOException("Format of returned data not recognized!"); } } } } br.close(); isr.close(); in.close(); return foundExams; } public void setExamsFromFile(InputStream inputstream, List<Course> courses) throws IOException{ SortedSet<Exam> foundExams = new TreeSet<Exam>(); Map<String, String> currentModuleMap = new HashMap<String, String>(); DataInputStream in = new DataInputStream(inputstream); InputStreamReader isr = new InputStreamReader(in); BufferedReader br = new BufferedReader(isr); String currentTagValue; String currentTagAttribute; String currentLine; Exam currentExam; String currentCourseId; String currentCourseName; while (((currentLine = br.readLine()) != null)) { if (currentLine.contains("<MODULE_RESULT>")) { currentModuleMap.clear(); while (((currentLine = br.readLine()) != null)) { if (currentLine.contains("</MODULE_RESULT>")) { if (currentModuleMap.containsKey("WEB SERVICE") && (currentModuleMap.get("WEB SERVICE").toString().equals("EBO"))) { currentCourseId = currentModuleMap.get("courseID"); currentCourseName = currentModuleMap.get("courseCode"); for(Course courseitem : courses) { if(currentCourseName.equals(courseitem.name) && currentCourseId.equals(courseitem.number)) { currentExam = new Exam(courseitem); try { if(currentModuleMap.containsKey("examStart")) { if(currentModuleMap.containsKey("examLocation")) { currentExam.place = currentModuleMap.get("examLocation").replaceAll(""","\""); } else currentExam.place = ""; currentExam.term = currentModuleMap.get("teachingTerm"); currentExam.lecturer = currentModuleMap.get("lecturer"); currentExam.examinar = currentModuleMap.get("examinerName"); currentExam.participants = Integer.parseInt(currentModuleMap.get("numberOfParticipants")); currentExam.participants_max = Integer.parseInt(currentModuleMap.get("maximumNumberOfParticipants")); currentExam.updated_at = null; GregorianCalendar calendar = new GregorianCalendar(); calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("examStart"))); currentExam.setFrom((GregorianCalendar)calendar.clone()); if(currentModuleMap.containsKey("examEnd")) { calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("examEnd"))); currentExam.setTo((GregorianCalendar)calendar.clone()); } else { calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("examStart"))); calendar.add(Calendar.HOUR_OF_DAY, 1); currentExam.setTo((GregorianCalendar)calendar.clone()); } if(currentModuleMap.containsKey("registerDeadline")) { calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("registerDeadline"))); currentExam.registerDeadline = (GregorianCalendar)calendar.clone(); } else { calendar.clear(); currentExam.registerDeadline = calendar; } if(currentModuleMap.containsKey("cancelDeadline")) { calendar.setTime(SEARCH_MACHINE_RESULTS_DATE_FORMAT.parse(currentModuleMap.get("cancelDeadline"))); currentExam.cancelDeadline = (GregorianCalendar)calendar.clone(); } else { calendar.clear(); currentExam.cancelDeadline = calendar; } courseitem.exams.add(currentExam); Log.v(LOGCAT_TAG, "...add exam "+currentExam.getFromFormated() + " to " + courseitem.name + "(" + courseitem.number + ")"); } } catch (ParseException e) { throw new IOException("Dateformat of returned data not recognized!"); } } } } currentModuleMap.clear(); } if (currentLine.contains("<Field") && currentLine.contains("</Field>")) { currentTagAttribute = currentLine.substring(currentLine.indexOf("=\"") + 2, currentLine.indexOf("\">")); currentTagValue = currentLine.substring(currentLine.indexOf("\">") + 2, currentLine.indexOf("</Field>")); currentModuleMap.put(currentTagAttribute, currentTagValue); } else if (currentLine.contains("<Field>")) { throw new IOException("Format of returned data not recognized!"); } } } } br.close(); isr.close(); in.close(); } }