package com.jobmineplus.mobile.widgets.table; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.Locale; import com.jobmineplus.mobile.exceptions.HiddenColumnsException; import com.jobmineplus.mobile.exceptions.InfiniteLoopException; import com.jobmineplus.mobile.exceptions.JbmnplsParsingException; import com.jobmineplus.mobile.widgets.Job.APPLY_STATUS; import com.jobmineplus.mobile.widgets.Job.INTERVIEW_TYPE; import com.jobmineplus.mobile.widgets.Job.STATE; import com.jobmineplus.mobile.widgets.Job.STATUS; import com.jobmineplus.mobile.widgets.Job.HEADER; /** * For this class, you need to have one for each table you are parsing * You should declare an object of this class final. To parse the job * data you must use ColumnInfo to specify each column's data of interest * by their type. * DO NOT include id as the first column as all tables have that - * and it is redundant to declare it all the time. The id will be gained * from the constructor column * @author matthewn4444 * */ public class TableParser { private final int INFINITE_LOOP_LIMIT = 1000; private OnTableParseListener listener; private static final SimpleDateFormat DATE_FORMAT_SPACE = new SimpleDateFormat("d MMM yyyy", Locale.getDefault()); private static final SimpleDateFormat DATE_FORMAT_DASH = new SimpleDateFormat("d-MMM-yyyy", Locale.getDefault()); //=============== // Constructor //=============== public TableParser() { } //================== // Public Methods //================== /** * You must attach a listener if you want to execute each outline * @param listener */ public void setOnTableRowParse(OnTableParseListener listener) { this.listener = listener; } /** * The activities call this inside parseWebpage to help them get the * column data from the table. * This takes the raw html from the HttpResponse and gets the table * html from it based on the id of the table. * This will parse the table by getting the <tr>....</tr> code from * the html and serve it to parseTable. * Passing an outline as a backup tries to parse the html with a second * outline if the first one fails * @param html */ public void execute(TableParserOutline outline, String html) { TableParserOutline[] outlines = {outline}; execute(outlines, html); } public void execute(TableParserOutline[] outlines, String html) { boolean verifySingleOutline = outlines.length == 1; SimpleHtmlParser parser = new SimpleHtmlParser(html); String text, tableID = outlines[0].getTableId(); int i, index, start, end, columnNum = 0; index = parser.skipText(tableID); // Set the position to the table headers start = parser.skipText("<th"); end = parser.skipText("<tr"); parser.setPosition(index); // Now check to see if the outline(s) is/are valid for this table TableParserOutline passOutline = null; if (verifySingleOutline) { // See if each header matches HEADER[] headers = outlines[0].getHeaders(); for (i = 0; i < headers.length; i++) { text = parser.getTextInNextElement("th").toLowerCase(Locale.getDefault()).replace(" ", " "); if (parser.getPosition() < end) { if (!text.equals(headers[i].toString())) { throw new HiddenColumnsException("Outline does not match html"); } } else { throw new HiddenColumnsException("Outline has more columns than html"); } } passOutline = outlines[0]; } else { ArrayList<TableParserOutline> outlinesArr = new ArrayList<TableParserOutline>( Arrays.asList(outlines)); // Decide which outline fits the given html and use that one while (outlinesArr.size() > 1) { text = parser.getTextInNextElement("th").toLowerCase(Locale.getDefault()).replace(" ", " ");; if (parser.getPosition() < end) { // Not enough headers in html for (i = outlinesArr.size() - 1; i >= 0; i--) { HEADER[] headers = outlinesArr.get(i).getHeaders(); if (columnNum < headers.length) { // Not enough headers in outline String curHeader = headers[columnNum].toString(); if (!curHeader.equals(text)) { outlinesArr.remove(i); } } else { outlinesArr.remove(i); } } columnNum++; } else { break; } } if (outlinesArr.size() != 1) { throw new HiddenColumnsException("Cannot find a suitable outline to parse table."); } passOutline = outlinesArr.get(0); } // Get the html for the table itself end = parser.skipText("</table>"); html = html.substring(start, end); internalExecute(passOutline, html); } private void internalExecute(TableParserOutline outline, String html) { if (listener == null) { throw new JbmnplsParsingException("You did not attach a listener to the table parsing function."); } // Parse the table now try { parseTable(outline, html); } catch (NumberFormatException e) { throw new JbmnplsParsingException(e.getMessage()); } } /** * This parses the table based on the table HTML. This requires the HTML to be * <tr>....</tr>. The execute method in this class calls it correctly. Any errors * in parsing will throw JbmnplsParsingException. This is much faster than using * a 3rd party HTML parser because this is 4 times faster. When complete it will * return mid-code. * @param html */ private void parseTable(TableParserOutline outline, String html) { SimpleHtmlParser parser = new SimpleHtmlParser(html); HEADER[] headers = outline.getHeaders(); int row = 0; Object[] passedObj = new Object[outline.columnLength()]; while(!parser.isEndOfContent() && row < INFINITE_LOOP_LIMIT) { // Check if there is another TD, if not we are done int position = html.indexOf("<tr", parser.getPosition()); if (position == -1) { return; } parser.setPosition(position); // Parse the job id of the table, if no id, then table is empty String text = parser.getTextInNextTD(); if (text == "") { return; } try { passedObj[0] = Integer.parseInt(text); } catch (NumberFormatException e) { e.printStackTrace(); throw new HiddenColumnsException("Cannot get id from table."); } for (int i = 1; i < outline.columnLength(); i++) { text = parser.getTextInNextTD(); // Convert the value to the column type and type Object value = null; switch(headers[i]) { // Strings case JOB_TITLE: case EMPLOYER: case EMPLOYER_NAME: case UNIT: case UNIT_NAME: case UNIT_NAME_1: case TERM: case ROOM: case INSTRUCTIONS: case INTERVIEWER: case LOCATION: case START_TIME: // Handled by interviews case END_TIME: // Handled by interviews case SHORTLIST: value = text; break; // Integers case OPENINGS: case NUM_APPS: case LENGTH: value = text == "" ? 0 : Integer.parseInt(text); break; case JOB_ID: case JOB_IDENTIFIER: if (text == "") { // No data in table/row if (row != 0) { throw new JbmnplsParsingException("Cannot parse id because it is empty on row= " + row); } return; } value = text == "" ? 0 : Integer.parseInt(text); break; // Dates case LAST_DAY_TO_APPLY: case LAST_DATE_TO_APPLY: case DATE: if (text.equals("")) { value = new Date(0); } else { try { if (text.contains("-")) { value = DATE_FORMAT_DASH.parse(text); } else { value = DATE_FORMAT_SPACE.parse(text); } } catch (ParseException e) { e.printStackTrace(); value = new Date(0); } } break; // Interview Type case INTER_TYPE: value = INTERVIEW_TYPE.getTypefromString(text); break; // Application Status case APPLY: value = APPLY_STATUS.getApplicationStatusfromString(text); break; // Job Status case APP_STATUS: value = STATUS.getStatusfromString(text); break; // Job State case JOB_STATUS: value = STATE.getStatefromString(text); break; // Ignore case VIEW_DETAILS: case VIEW_PACKAGE: case SELECT_TIME: case BLANK: break; default: throw new JbmnplsParsingException( "Cannot parse column with invalid type. Row= " + row + ", type= " + headers[i] + ", did you add them here?"); } passedObj[i] = value; } // Now we pass the values back to the activities to make jobs listener.onRowParse(outline, passedObj); row++; } // Chances are, these parts will never run if (row >= INFINITE_LOOP_LIMIT) { throw new InfiniteLoopException("We ran an infinite loop looking for column data."); } throw new JbmnplsParsingException("Went to end of table but found no information."); } //============= // Interface //============= public interface OnTableParseListener { /** * This is needed once you parse each row * Parameter 1 will always be the job id, everything else will follow just * the same as the table columns * @param data: the parameters for a job shown from TableParsingOutline class */ public void onRowParse(TableParserOutline outline, Object... jobData); } }