package com.geel9.facepunch.misc; import java.io.IOException; import java.io.StringBufferInputStream; import java.util.ArrayList; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import com.geel9.facepunch.APISession; public class PostSplitter { /** * The different types of post "sections" * * @author Geel9 */ public static enum PostSection{ QUOTE, TEXT, IMAGE, CODE, LINK, RELEASE, IFRAME, MEDIA } private static String attributesToString(NamedNodeMap attributes){ String ret = ""; if(attributes == null) return ""; for(int i = 0; i < attributes.getLength(); i++){ String name = attributes.item(i).getNodeName(); String content = getTextContent(attributes.item(i)); String prefix = "\""; if(content.contains(prefix)) prefix = "'"; ret += attributes.item(i).getNodeName() + "=" + prefix + getTextContent(attributes.item(i)) + prefix + " "; } return ret; } private static String getTextContent(Node node){ int nodeLength = node.getChildNodes().getLength(); String ret = ""; if(nodeLength > 0){ for(int i = 0; i < nodeLength; i++){ Node n = node.getChildNodes().item(i); ret += n.getNodeValue(); } } else{ ret = node.getNodeValue(); if(ret == null) ret = ""; } return ret; } private static String nodeToHTML(Node node, boolean ignoreFirst){ int childrenNum = node.getChildNodes().getLength(); String nodeName = node.getNodeName(); String attributes = attributesToString(node.getAttributes()); String textContent = getTextContent(node); String prefix = "<" + nodeName + " " + attributes + ">"; if(attributes.equals("")) prefix = "<" + nodeName + ">"; String suffix = "</" + nodeName + ">"; if(nodeName.equals("#text") || ignoreFirst){ prefix = ""; suffix = ""; } if(childrenNum == 0){ return prefix + textContent + suffix; } else{ String totalRet = prefix; for(int i = 0; i < childrenNum; i++){ totalRet += nodeToHTML(node.getChildNodes().item(i), false); } totalRet += suffix; return totalRet; } } public static ArrayList<FPPostSection> splitPost(String source) throws SAXException, IOException, ParserConfigurationException{ ArrayList<FPPostSection> sections = new ArrayList<FPPostSection>(); FPPostSection currentSection = new FPPostSection(); source = "<root>" + source + "</root>"; //Highlighting! source = source.replaceAll("<span class=\"highlight\">(.*?)</span>", "<b><font color='red'>$1</font></b>"); source = source.replaceAll("<img src=\"/fp", "<img src=\"http://www.facepunch.com/fp"); source = source.replaceAll("border=(\\d+?)", "border=\"$1\""); source = source.replaceAll("font size=(\\d+?)", "font size=\"$1\""); source = source.replaceAll("<video (.+?) muted loop autoplay onclick", "<video $1 onclick"); source = source.replaceAll("frameborder=\"0\" allowfullscreen>", "frameborder=\"0\">"); source = source.replaceAll("�", "..."); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); StringBufferInputStream s = new StringBufferInputStream(source); Document dom = builder.parse(s); Element root = dom.getDocumentElement(); NodeList items = root.getChildNodes(); for(int i = 0; i < items.getLength(); i++){ Node node = items.item(i); String nodeName = node.getNodeName(); String nodeText = getTextContent(node); NamedNodeMap attributes = node.getAttributes(); if(nodeName.equals("img") && node.hasAttributes()){ Node attributeImgSrc = attributes.getNamedItem("src"); Node attributeImgClass = attributes.getNamedItem("class"); if(attributeImgSrc != null){ if(currentSection.postSection != null) sections.add(currentSection); String imgSource = getTextContent(attributeImgSrc); boolean thumbnail = attributeImgClass != null && getTextContent(attributeImgClass).equals("thumbnail"); currentSection = new FPPostSection(); currentSection.postSection = PostSection.IMAGE; currentSection.imgUrl = imgSource; currentSection.imageIsThumb = thumbnail; sections.add(currentSection); currentSection = new FPPostSection(); } } else if(nodeName.equals("div") && node.hasAttributes()){ Node attributeDivClass = attributes.getNamedItem("class"); String attributeString = ""; if(attributeDivClass != null){ attributeString = getTextContent(attributeDivClass); } if(attributeString.equals("quote")){ //We have a quote int numChildren = node.getChildNodes().getLength(); boolean hasInformationDiv = false; for(int j = 0; j < node.getChildNodes().getLength(); j++){ Node test = node.getChildNodes().item(j); if(!test.hasAttributes()) continue; Node t = test.getAttributes().getNamedItem("class"); if(t != null && getTextContent(t).equals("information")) hasInformationDiv = true; } System.out.println(hasInformationDiv ? "Information div " : "No information div"); //No information div if(!hasInformationDiv){ Node divQuoteMessage = node.getFirstChild(); String quoteMessage = nodeToHTML(divQuoteMessage, true); if(currentSection.postSection != null) sections.add(currentSection); currentSection = new FPPostSection(); currentSection.postSection = PostSection.QUOTE; currentSection.quoteContent = quoteMessage; sections.add(currentSection); currentSection = new FPPostSection(); } //Information div! else{ Node divQuoteInfo = node.getFirstChild(); Node divQuoteMessage = divQuoteInfo.getNextSibling(); Node divQuoteInfoUrl = divQuoteInfo.getFirstChild(); String quotedPerson = ""; if(divQuoteInfoUrl.getChildNodes().getLength() != 0) quotedPerson = divQuoteInfoUrl.getFirstChild().getNodeValue().replace(" posted:", ""); else quotedPerson = divQuoteInfo.getFirstChild().getNodeValue().replace(" posted:", ""); String quoteMessage = nodeToHTML(divQuoteMessage, true); if(currentSection.postSection != null) sections.add(currentSection); currentSection = new FPPostSection(); currentSection.postSection = PostSection.QUOTE; currentSection.quoteAuthor = quotedPerson; currentSection.quoteContent = quoteMessage; sections.add(currentSection); currentSection = new FPPostSection(); } } else if(attributeString.equals("center")){ Node videoDiv = node.getFirstChild(); String className = getTextContent(videoDiv.getAttributes().getNamedItem("class")); if(!className.equals("video")) continue; Node iFrame = videoDiv.getFirstChild(); NamedNodeMap iFrameAttributes = iFrame.getAttributes(); String src = getTextContent(iFrameAttributes.getNamedItem("src")); if(currentSection.postSection != null){ sections.add(currentSection); currentSection = new FPPostSection(); } String embedCode = APISession.quickMatch("/embed/(.+?)\\?", src); currentSection.mediaName = "Youtube video"; currentSection.mediaURL = embedCode != null ? "http://www.youtube.com/watch?v=" + embedCode : src; currentSection.postSection = PostSection.MEDIA; sections.add(currentSection); currentSection = new FPPostSection(); } else if(attributeString.equals("video")){ Node iFrame = node.getFirstChild(); NamedNodeMap iFrameAttributes = iFrame.getAttributes(); String src = getTextContent(iFrameAttributes.getNamedItem("src")); if(currentSection.postSection != null){ sections.add(currentSection); currentSection = new FPPostSection(); } currentSection.linkHTML = "Youtube Video"; currentSection.linkUrl = src; currentSection.postSection = PostSection.TEXT; sections.add(currentSection); currentSection = new FPPostSection(); } else if(attributeString.equals("bbcode_container")){ Node bollocks = node.getFirstChild(); if(bollocks != null){ Node code = bollocks.getNextSibling(); if(code != null){ if(currentSection.postSection != null){ if(currentSection.postSection == PostSection.CODE){ currentSection.sectionHTML += getTextContent(code); } else { sections.add(currentSection); currentSection = new FPPostSection(); currentSection.postSection = PostSection.CODE; currentSection.sectionHTML = getTextContent(code); } } else{ currentSection = new FPPostSection(); currentSection.postSection = PostSection.CODE; currentSection.sectionHTML = getTextContent(code); } } } }else if(attributeString.equals("bbcode_release")){ Node code = node; if(code != null){ if(currentSection.postSection != null){ sections.add(currentSection); currentSection = new FPPostSection(); currentSection.postSection = PostSection.RELEASE; currentSection.sectionHTML = nodeToHTML(code, true); } else{ currentSection = new FPPostSection(); currentSection.postSection = PostSection.RELEASE; currentSection.sectionHTML = nodeToHTML(code, true); } } } } else if(nodeName.equals("a") && node.hasAttributes()){ String contents = nodeToHTML(node, true); Node linkNode = attributes.getNamedItem("href"); Node child = node.getFirstChild(); String childName = child.getNodeName(); if(linkNode != null){ if(childName.equals("img") || childName.equals("a")){ if(currentSection.postSection != null) sections.add(currentSection); currentSection = new FPPostSection(); currentSection.linkUrl = getTextContent(linkNode); currentSection.linkHTML = contents; currentSection.postSection = PostSection.LINK; sections.add(currentSection); currentSection = new FPPostSection(); } else{ if(currentSection.postSection != null){ if(currentSection.postSection != PostSection.TEXT){ sections.add(currentSection); currentSection = new FPPostSection(); currentSection.postSection = PostSection.TEXT; } } else{ currentSection = new FPPostSection(); currentSection.postSection = PostSection.TEXT; } currentSection.sectionHTML += nodeToHTML(node, false); } } } else if(nodeName.equals("iframe") && node.hasAttributes()){ Node srcNode = attributes.getNamedItem("src"); if(srcNode != null){ if(currentSection.postSection != null) sections.add(currentSection); currentSection = new FPPostSection(); currentSection.iFrameLink = getTextContent(srcNode); currentSection.postSection = PostSection.IFRAME; sections.add(currentSection); currentSection = new FPPostSection(); } } else{ currentSection.postSection = PostSection.TEXT; currentSection.sectionHTML += nodeToHTML(node, false); } } if(currentSection.postSection != null){ FPPostSection p = (sections.size() != 0) ? sections.get(sections.size() - 1) : new FPPostSection(); if(!p.equals(currentSection)) sections.add(currentSection); } return sections; } }