package gov.nysenate.openleg.util;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
/** Utilities for scraping html */
public abstract class ScrapeUtils {
/**
* Given a jsoup element, gets all contained text preserving formatting by tags such as <br>
* @param element An html element
* @return String
*/
public static String getFormattedText(Element element) {
StringBuilder stringBuilder = new StringBuilder();
element.childNodes().forEach(node -> {
if (node instanceof TextNode) {
stringBuilder.append(((TextNode) node).text());
} else if (node instanceof Element) {
if ("br".equalsIgnoreCase(((Element) node).tag().getName())) {
stringBuilder.append("\n");
} else {
stringBuilder.append(getFormattedText((Element) node));
}
}
});
return stringBuilder.toString();
}
}