package com.abmash.parser;
import com.abmash.parser.content.ContentElement;
import com.abmash.parser.content.Header;
import com.abmash.parser.content.Image;
import com.abmash.parser.content.Input;
import com.abmash.parser.content.Link;
import java.util.ArrayList;
import java.util.HashMap;
public abstract class DocumentParser {
String url;
String sourceText;
String visibleText;
String title;
HashMap<String, String> metaTags;
ArrayList<Header> headers;
ArrayList<Link> links;
ArrayList<Image> images;
ArrayList<Input> inputs;
ArrayList<ContentElement> customElements;
ArrayList<ContentElement> periodicElements;
public DocumentParser(String source) {
sourceText = source;
}
// general parser methods
protected abstract void parseUrl();
/**
* parse visible text
*/
protected abstract void parseVisibleText();
/**
* parse title
*/
protected abstract void parseTitle();
protected abstract void parseMetaTags();
protected abstract void parseHeaders();
protected abstract void parseLinks();
protected abstract void parseImages();
protected abstract void parseInputs();
// custom parser methods
// TODO general methods with custom selector query
public abstract void parsePeriodicElements();
// getter methods
public String getUrl() {
if(url == null) {
parseUrl();
}
return url;
}
public String getSourceText() {
return sourceText;
}
public String getVisibleText() {
if(visibleText == null) {
parseVisibleText();
}
return visibleText;
}
public String getTitle() {
if(title == null) {
parseTitle();
}
return title;
}
public HashMap<String, String> getMetaTags() {
if(metaTags == null) {
metaTags = new HashMap<String, String>();
parseMetaTags();
}
return metaTags;
}
public ArrayList<Header> getHeaders() {
if(headers == null) {
headers = new ArrayList<Header>();
parseHeaders();
}
return headers;
}
public ArrayList<Link> getLinks() {
if(links == null) {
links = new ArrayList<Link>();
parseLinks();
}
return links;
}
public ArrayList<Image> getImages() {
if(images == null) {
images = new ArrayList<Image>();
parseImages();
}
return images;
}
public ArrayList<Input> getInputs() {
if(inputs == null) {
inputs = new ArrayList<Input>();
parseInputs();
}
return inputs;
}
public ArrayList<ContentElement> getPeriodicElements() {
periodicElements = new ArrayList<ContentElement>();
parsePeriodicElements();
return periodicElements;
}
}