package com.vtence.molecule.decoration;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HtmlDocumentProcessor implements ContentProcessor {
private static final int TEXT = 1;
private static final int NAME = 1;
private static final int CONTENT = 2;
private static final Pattern HEAD = Pattern.compile("<head(?:.*?)>\n?(.*?)\n?</head>", Pattern.DOTALL);
private static final Pattern TITLE = Pattern.compile("<title(?:.*?)>\n?(.*?)\n?</title>\\s*\n?", Pattern.DOTALL);
private static final Pattern BODY = Pattern.compile("<body(?:.*?)>\n?(.*?)\n?</body>", Pattern.DOTALL);
private static final Pattern META = Pattern.compile("<meta name=\"([^\"]*)\" content=\"([^\"]*)\"", Pattern.DOTALL);
public Map<String, String> process(String html) {
final Map<String, String> chunks = new HashMap<>();
addHead(chunks, html);
addTitle(chunks, html);
addBody(chunks, html);
addMetaData(chunks, html);
return chunks;
}
private void addHead(Map<String, String> chunks, String html) {
String head = extract(html, HEAD);
if (head == null) return;
chunks.put("head", stripTitle(head));
}
private String stripTitle(String head) {
return TITLE.matcher(head).replaceFirst("");
}
private String extract(String html, Pattern pattern) {
Matcher matcher = pattern.matcher(html);
if (!matcher.find()) return null;
return matcher.group(TEXT);
}
private void addTitle(Map<String, String> chunks, String html) {
String head = extract(html, HEAD);
if (head == null) return;
String title = extract(head, TITLE);
if (title == null) return;
chunks.put("title", title.trim());
}
private void addBody(Map<String, String> chunks, String html) {
String body = extract(html, BODY);
if (body == null) return;
chunks.put("body", body);
}
private void addMetaData(Map<String, String> chunks, String head) {
Matcher matcher = META.matcher(head);
while (matcher.find()) {
chunks.put(matcher.group(NAME), matcher.group(CONTENT));
}
}
}