package eu.dnetlib.iis.wf.ingest.webcrawl.fundings; import java.util.Stack; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** * Webcrawl XML SAX handler providing fundings related text. * * @author mhorst * */ public class WebcrawlFundingsHandler extends DefaultHandler { private static final String ELEM_CSVRECORD = "csvRecord"; private static final String ELEM_COLUMN = "column"; private static final String ATTR_NAME = "name"; private static final String FUNDING_ATTR_VALUE = "FX"; private Stack<String> parents; private StringBuilder currentValue; private String currentColumnName; private boolean enteredFundingColumn; private StringBuffer fundingText; @Override public void startDocument() throws SAXException { this.parents = new Stack<String>(); this.currentValue = new StringBuilder(); this.fundingText = new StringBuffer(); this.enteredFundingColumn = false; } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (isWithinElement(qName, ELEM_COLUMN, ELEM_CSVRECORD)) { this.currentColumnName = attributes.getValue(ATTR_NAME); if (FUNDING_ATTR_VALUE.equalsIgnoreCase(this.currentColumnName)) { this.enteredFundingColumn = true; } } this.parents.push(qName); } @Override public void endElement(String uri, String localName, String qName) throws SAXException { this.parents.pop(); if (isWithinElement(qName, ELEM_COLUMN, ELEM_CSVRECORD) && FUNDING_ATTR_VALUE.equalsIgnoreCase(this.currentColumnName)) { if (currentValue!=null && currentValue.length()>0) { if (fundingText.length()>0) { fundingText.append('\n'); } fundingText.append(currentValue.toString()); } currentValue = new StringBuilder(); enteredFundingColumn = false; } } @Override public void endDocument() throws SAXException { parents.clear(); parents = null; } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (this.enteredFundingColumn) { this.currentValue.append(ch, start, length); } } boolean isWithinElement(String qName, String expectedElement, String expectedParent) { return qName.equals(expectedElement) && (expectedParent==null || !this.parents.isEmpty() && expectedParent.equals(this.parents.peek())); } public CharSequence getFundingText() { if (fundingText.length()>0) { return fundingText.toString(); } else { return null; } } }