/***************************************************** * RequestTask * This method accepts a string of a valid URL request to DB. * It gathers the HTML request, and parses it. * It returns an ArrayList of Profiles of the entries. * ***************************************************/ package edu.grinnell.appdev.grinnelldirectory.Tasks; import android.app.Activity; import android.app.ProgressDialog; import android.content.Context; import android.content.Intent; import android.os.AsyncTask; import android.widget.Toast; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.StatusLine; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.grinnell.appdev.grinnelldirectory.Activities.ProfileListActivity; import edu.grinnell.appdev.grinnelldirectory.Models.Profile; import edu.grinnell.appdev.grinnelldirectory.R; public class RequestTask extends AsyncTask<String, Void, ArrayList<Profile>> { final public static int NO_ERROR = 0; final public static int NO_ENTRIES = 1; final public static int TOO_MANY_ENTRIES = 2; final public static int NO_RESPONSE_STRING = 3; final public static int OTHER = 4; public int errorCode = NO_ERROR; private Context mActivity; ProgressDialog loadingDialog; String responseString; // makeRequest() stores its response here ArrayList<Profile> profileList; // The final product, a list of downloaded // Profile objects String currentUri; // The current page content is being downloaded from final String UNAVAILABLE = "This data is unavailable off-campus."; public interface ParserErrorMessage { public void setErrorMessage(int message); } public RequestTask(Context context) { super(); mActivity = context; } /* Setup the progress bar. */ @Override protected void onPreExecute() { // Show a loading dialoge loadingDialog = new ProgressDialog(mActivity); loadingDialog.setCancelable(true); loadingDialog.setMessage("Loading Results ..."); loadingDialog.show(); } // In AsyncTasks, doInBackground is called first, analogous to a main // method. protected ArrayList<Profile> doInBackground(String... uri) { profileList = new ArrayList<Profile>(); currentUri = uri[0]; iterativelyScrapePages(); return profileList; } /* * Stop the dialog and notify the main thread that the results are loaded. */ @Override protected void onPostExecute(ArrayList<Profile> result) { // dismiss loading.. if (loadingDialog != null) { if (loadingDialog.isShowing()) { loadingDialog.dismiss(); } } if (errorCode == TOO_MANY_ENTRIES) { Toast toast = Toast .makeText(mActivity, "Too many results. Please refine search", Toast.LENGTH_LONG); toast.show(); } else if (errorCode == NO_ENTRIES) { Toast toast = Toast.makeText(mActivity, "No results found", Toast.LENGTH_LONG); toast.show(); } else if (errorCode == NO_RESPONSE_STRING) { Toast toast = Toast.makeText(mActivity, "Network Error. Please Try Again.", Toast.LENGTH_LONG); toast.show(); } else { Intent listIntent = new Intent(mActivity, ProfileListActivity.class); ProfileListActivity.setData(profileList); mActivity.startActivity(listIntent); ((Activity) mActivity).overridePendingTransition( R.anim.left_slide_in, R.anim.left_slide_out); } super.onPostExecute(result); } // Adds the queried entries to profileList private void iterativelyScrapePages() { do { makeRequest(); // download the next page of content } while (parseResponse()); // Parse the content. If parseResponse() // returns true, a next page exists. } // This method is a basic HTTP request. It saves the HTML response to // responseString. private int makeRequest(String... uri) { HttpClient httpclient = new DefaultHttpClient(); HttpResponse response; try { response = httpclient.execute(new HttpGet(currentUri)); StatusLine statusLine = response.getStatusLine(); if (statusLine.getStatusCode() == HttpStatus.SC_OK) { ByteArrayOutputStream out = new ByteArrayOutputStream(); response.getEntity().writeTo(out); out.close(); responseString = out.toString(); return 0; } else { // Closes the connection. response.getEntity().getContent().close(); throw new IOException(statusLine.getReasonPhrase()); } } catch (ClientProtocolException e) { // TODO Handle problems.. } catch (IOException e) { // TODO Handle problems.. } return -1; } // This method parses out entry information an HTML response, and adds // Profile objects to profileList. // responseString must be a valid grinnell College db page // This method does not know how to handle the "too many entries" response // and the off-campus response. private boolean parseResponse() { if (responseString == null) { errorCode = NO_RESPONSE_STRING; return false; } // Set up the tokenizer, seperating by token '\n'. You should find out // what a tokenizer is. StringTokenizer strTok = new StringTokenizer(responseString, "\n"); String curTok, picurl, firstName, lastName, username, dept, phonenum, campusaddress, boxno, stufacstatus, sgapos; // boolean indicating if there exists a next page. boolean anotherPage = false; boolean onCampus = false; if (!responseString.contains("off campus viewers")) { onCampus = true; strTok.nextToken(); curTok = strTok.nextToken(); } curTok = strTok.nextToken(); while (!curTok.contains("<p>")) { curTok = strTok.nextToken(); } if (curTok.contains("pages")) { errorCode = TOO_MANY_ENTRIES; return false; } else if (curTok.contains("<strong>no</strong>")) { errorCode = NO_ENTRIES; return false; } // skip useless information for (int i = 0; i < 9; i++) { strTok.nextToken(); } curTok = strTok.nextToken(); // If a next page button exsts, then there is a next page. // Grab URL of next pageand set return value of method to true. if (curTok.contains("Next Page")) { anotherPage = true; currentUri = "https://itwebapps.grinnell.edu" + curTok.substring(53, curTok.length() - 38); for (int i = 0; i < 22; i++) strTok.nextToken(); curTok = strTok.nextToken(); } else { anotherPage = false; for (int i = 0; i < 20; i++) { strTok.nextToken(); } curTok = strTok.nextToken(); } // loop, keeps adding entries to profileList until there are none. do { if (onCampus) { // parse entries // parse image URL. If no image, save " ". if (curTok.contains("image1")) picurl = curTok.substring( curTok.indexOf("img src=\"") + 9, curTok.indexOf("\" alt=\"")); else picurl = ""; curTok = strTok.nextToken(); } else { picurl = ""; curTok = strTok.nextToken(); } String fullName; // parse full name if (onCampus) { fullName = curTok.substring( curTok.substring(40).indexOf('>') + 41, curTok .substring(40).indexOf('<') + 40); } else { String rawName = dataParser(curTok); fullName = rawName; } if (fullName == null) { errorCode = NO_ENTRIES; return false; } firstName = fullName.substring(0, fullName.indexOf(',')); lastName = fullName.substring(fullName.indexOf(',') + 2); curTok = strTok.nextToken(); if (onCampus) { // parse student major or faculty department dept = curTok.substring(35, curTok.indexOf("</td>")); String smallerdeptString = curTok.substring(curTok .indexOf("tny") + 6); // some faculty/staff have multiple titles if (dept.contains("tny")) { dept = facStaffTitle(dept); } curTok = strTok.nextToken(); // parse phone number, username, campus address, box #, // student/faculty status if (curTok.charAt(37) != '<') { phonenum = curTok.substring(37, 41); if (phonenum.contains("-")) { phonenum = ""; } } else { phonenum = ""; } } else { dept = UNAVAILABLE; phonenum = UNAVAILABLE; curTok = strTok.nextToken(); } curTok = strTok.nextToken(); if (!curTok.contains(" ")) { username = curTok.substring(53, curTok.indexOf('@')); } else { username = ""; } strTok.nextToken(); curTok = strTok.nextToken(); if (onCampus) { campusaddress = curTok .substring(0, curTok.indexOf("</TD>")); campusaddress = campusaddress.trim(); curTok = strTok.nextToken(); boxno = curTok.substring(36, curTok.indexOf("</TD>")); if (boxno.equals(" ")) boxno = "Not Available"; curTok = strTok.nextToken(); stufacstatus = curTok.substring(37, curTok.indexOf(" </TD>")); strTok.nextToken(); } else { campusaddress = UNAVAILABLE; boxno = UNAVAILABLE; stufacstatus = UNAVAILABLE; curTok = strTok.nextToken(); curTok = strTok.nextToken(); curTok = strTok.nextToken(); } curTok = strTok.nextToken(); // parse SGA status sgapos = ""; if (curTok.equals("<tr>\r")) { // senator for (int i = 0; i < 3; i++) curTok = strTok.nextToken(); sgapos = curTok.substring(18, curTok.indexOf("</span>")); while (!curTok.contains("window.open") && !curTok.contains("New Search") && !curTok.contains("style=\"text-align:center;\"")) { curTok = strTok.nextToken(); } } // Adds a new Profile to profileList containing all the newly // parsed information profileList.add(new Profile(picurl, firstName, lastName, username, dept, phonenum, campusaddress, boxno, stufacstatus, sgapos)); } while (curTok.contains(" ")); // determine if there is another // entry to be parsed return anotherPage;// returns boolean indicating if there exists a // next page. /* * if(anotherPage){ for(int i=0; i<6; i++) strTok.nextToken(); * curTok = strTok.nextToken(); * * String beginningOfURL = curTok.substring(66); return * "https://itwebapps.grinnell.edu" + beginningOfURL.substring(0, * beginningOfURL.indexOf('"')); * * } */ } /** * * @param str * @return * * A simple Regex parser for stripping out numbers */ private String numberParser(String str) { String match = "[0-9]+"; Pattern pattern = Pattern.compile("-?\\d+"); Matcher m = pattern.matcher(str); while (m.find()) { return m.group(0); } return null; } private static String stripBrackets(String str) { return str.substring(1, str.length() - 1); } private static String imageParser(String str) { Pattern pattern = Pattern.compile("-?\\d+"); Matcher m = pattern.matcher(str); while (m.find()) { return m.group(0); } return null; } /** * * @param str * @return * * HTML parser when content includes name and email (Regex) */ private static String[] nameEmailRoleParser (String str) { int addIndex = 0; String[] returnArr = new String[3]; String match = ">([A-z].*?)<"; Pattern pattern = Pattern.compile(match); Matcher m = pattern.matcher(str); while (m.find()) { returnArr[addIndex++] = stripBrackets(m.group()); } return returnArr; } /** * * @param str * @return * * Generic HTML parser using Regex */ private String dataParser (String str) { String match = ">([A-z].*?)<"; Pattern pattern = Pattern.compile(match); Matcher m = pattern.matcher(str); if (m.find()) { return m.group(0).substring(1, m.group(0).length() - 1); } else { return null; } } /** * * @param title * @return * * Get relevantdata regarding Staff and faculty */ private String facStaffTitle(String title) { boolean inBracket = false; String tmp = ""; boolean lastcharintempissemicolon = false; for (int i = 0; i < title.length(); i++) { if (!inBracket && title.charAt(i) != '<') { tmp += title.charAt(i); lastcharintempissemicolon = true; } if (title.charAt(i) == '>') { inBracket = false; if (!lastcharintempissemicolon) tmp += ";"; lastcharintempissemicolon = false; } if (title.charAt(i) == '<') { inBracket = true; } } return tmp; } }