package com.mehmetakiftutuncu.eshotroid.utilities;
import java.util.ArrayList;
import android.util.Log;
/**
* A utility class for parsing the pages
*
* @author Mehmet Akif Tütüncü
*/
public class Parser
{
// Tags for a busses in Ulasim.aspx
public static final String BUS_OPEN_START = "<option value=";
public static final String BUS_OPEN_END = "\">";
public static final String BUS_CLOSE = "</option>";
// Tags for a bus time in Saatler.aspx
public static final String BUSTIME_OPEN_START = "<span ";
public static final String BUSTIME_OPEN_END = "\">";
public static final String BUSTIME_CLOSE = "</span>";
// Tags for a bus route in Saatler.aspx
public static final String BUSROUTE_OPEN_START = "<span ";
public static final String BUSROUTE_OPEN_END = "Guzergah\">";
public static final String BUSROUTE_CLOSE = "</span>";
// Extra tags to be ignored for a bus time
public static final String BUSTIME_FONT_OPEN_START = "<font ";
public static final String BUSTIME_FONT_OPEN_END = "\">";
public static final String BUSTIME_FONT_CLOSE = "</font>";
// Tag that encloses bus times
public static final String BUSTIME_TABLE_TAG = "<table cellspacing="; //public static final String BUSTIME_TABLE_TAG = "<table "; This causes problems if there is no time for the selected bus with selected day (736P is an example)
/**
* Tag for debugging
*/
public static final String LOG_TAG = "Eshotroid_Parser";
/**
* Gets the route of the bus from the times page of that bus
*
* @param page Source of the page
*
* @return Route of the bus, null if any error occurs
*/
public static String parseBusRoute(String page)
{
String result = null;
// Start looking for bus route
int start = 0, end = 0;
// Get the bus route information start point
start = page.indexOf(BUSROUTE_OPEN_START);
if(start != -1)
{
// Get where the start point ends
start = page.indexOf(BUSROUTE_OPEN_END, start);
if(start != -1)
{
// This is the actual start point of the bus route information
start += BUSROUTE_OPEN_END.length();
// Now look for where the bus route information ends
end = page.indexOf(BUSROUTE_CLOSE, start);
if(end != -1)
{
// Now we extract the bus route information using the found start and end points
result = page.substring(start, end);
}
}
}
return result;
}
/**
* Gets the list of the bus times from the specified page
*
* @param page Source of the page
*
* @return List of the bus times, null if any error occurs
*/
public static ArrayList<String> parseBusTimes(String page)
{
try
{
// Resulting list
ArrayList<String> list = new ArrayList<String>();
// First find where the actual bus times start
int beginning = page.lastIndexOf(BUSTIME_TABLE_TAG);
if(beginning != -1)
{
page = page.substring(beginning);
}
else
{
Log.e(LOG_TAG, "No times were found in the page!");
return null;
}
// Start looking for bus times
int start = 0, end = 0, last = 0;
do
{
// Get the next bus time information start point starting from the last known position
start = page.indexOf(BUSTIME_OPEN_START, last);
if(start != -1)
{
// Get where the start point ends
start = page.indexOf(BUSTIME_OPEN_END, start);
if(start != -1)
{
// This is the actual start point of the bus time information
start += BUSTIME_OPEN_END.length();
// Now look for where the bus time information ends
end = page.indexOf(BUSTIME_CLOSE, start);
if(end != -1)
{
// Now we extract the bus time information using the found start and end points
String item = page.substring(start, end);
// If the bus time information still has tags
if(item.contains("<"))
{
// Extract the bus time information between extra tags
item = extractFromTags(item, BUSTIME_FONT_OPEN_START, BUSTIME_FONT_OPEN_END, BUSTIME_FONT_CLOSE);
}
// Finally add the found and extracted bus time information to the list
list.add(item);
// Move the end point of the bus time information forward
end += BUSTIME_CLOSE.length();
}
}
// Mark the last known position and continue
last = end;
}
} while(start != -1);
return list;
}
catch(Exception e)
{
Log.e(LOG_TAG, "Error occured while parsing bus times!", e);
return null;
}
}
/**
* Gets the list of the busses from the specified page
*
* @param page Source of the page
*
* @return List of the busses, null if any error occurs
*/
public static ArrayList<String> parseBusses(String page)
{
try
{
// Resulting list
ArrayList<String> list = new ArrayList<String>();
// Start looking for busses
int start = 0, end = 0, last = 0;
do
{
// Get the next bus information start point starting from the last known position
start = page.indexOf(BUS_OPEN_START, last);
if(start != -1)
{
// Get where the start point ends
start = page.indexOf(BUS_OPEN_END, start);
if(start != -1)
{
// This is the actual start point of the bus information
start += BUS_OPEN_END.length();
// Now look for where the bus information ends
end = page.indexOf(BUS_CLOSE, start);
if(end != -1)
{
// Now we extract the bus information using the found start and end points
// Plus we fix the Turkish characters in the result
String item = fixTurkishHtmlEntityCharacters(page.substring(start, end));
// Finally add the found and extracted bus information to the list
list.add(item);
// Move the end point of the bus information forward
end += BUS_CLOSE.length();
}
}
// Mark the last known position and continue
last = end;
}
} while(start != -1);
return list;
}
catch(Exception e)
{
Log.e(LOG_TAG, "Error occured while parsing busses!", e);
return null;
}
}
/**
* Extracts the information in a string between the specified tags
*
* @param source Original string
* @param openStart Beginning of the open tag
* @param openEnd Ending of the open tag
* @param close Close tag
*
* @return Information between the specified tags
*/
private static String extractFromTags(String source, String openStart, String openEnd, String close)
{
String result = source;
int start = 0, end = 0;
start = source.indexOf(openStart);
if(start != -1)
{
start = source.indexOf(openEnd, start);
if(start != -1)
{
start += openEnd.length();
end = source.indexOf(close, start);
if(end != -1)
{
result = fixTurkishHtmlEntityCharacters(source.substring(start, end));
}
}
}
return result;
}
/**
* Replaces all HTML entity characters in Turkish
*
* @param source Source string
*
* @return Source string with fixed Turkish characters
*/
private static String fixTurkishHtmlEntityCharacters(String source)
{
String result = source;
result = result.replaceAll("İ", "İ");
result = result.replaceAll("ı", "ı");
result = result.replaceAll("Ö", "Ö");
result = result.replaceAll("ö", "ö");
result = result.replaceAll("Ü", "Ü");
result = result.replaceAll("ü", "ü");
result = result.replaceAll("Ç", "Ç");
result = result.replaceAll("ç", "ç");
result = result.replaceAll("Ğ", "Ğ");
result = result.replaceAll("ğ", "ğ");
result = result.replaceAll("Ş", "Ş");
result = result.replaceAll("ş", "ş");
return result;
}
}