package eu.dnetlib.iis.wf.ingest.pmc.metadata;
import static eu.dnetlib.iis.wf.ingest.pmc.metadata.JatsXmlConstants.*;
import static eu.dnetlib.iis.wf.ingest.pmc.metadata.TagHierarchyUtils.*;
import java.util.HashMap;
import java.util.Map;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.google.common.collect.Maps;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.ExtractedDocumentMetadata;
/**
* JATS XML SAX handler.
*
* @author mhorst
*
*/
public class JatsXmlHandler extends DefaultHandler {
public static final String ENTITY_TYPE_UNKNOWN = "unknown";
private boolean rootElement = true;
private final ExtractedDocumentMetadata.Builder builder;
private final XmlSwitcherHandler xmlSwitcherHandler;
//------------------------ CONSTRUCTORS --------------------------
/**
* Default constructor.
* @param receiver
*/
public JatsXmlHandler(ExtractedDocumentMetadata.Builder builder) {
super();
this.builder = builder;
if (!this.builder.hasExternalIdentifiers()) {
this.builder.setExternalIdentifiers(new HashMap<CharSequence, CharSequence>());
}
Map<String, ProcessingFinishedAwareXmlHandler> handlers = Maps.newHashMap();
handlers.put(ELEM_JOURNAL_META, new JournalMetaXmlHandler(builder));
handlers.put(ELEM_ARTICLE_META, new ArticleMetaXmlHandler(builder));
handlers.put(ELEM_REF_LIST, new RefListXmlHandler(builder));
xmlSwitcherHandler = new XmlSwitcherHandler(handlers);
}
//------------------------ LOGIC --------------------------
@Override
public void startDocument() throws SAXException {
clearAllFields();
xmlSwitcherHandler.startDocument();
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (rootElement || isElement(qName, ELEM_ARTICLE)) {
rootElement = false;
// extracting article-type from root element or from article element nested in oai record
String articleType = attributes.getValue(ATTR_ARTICLE_TYPE);
if (articleType!=null) {
builder.setEntityType(articleType);
} else {
builder.setEntityType(ENTITY_TYPE_UNKNOWN);
}
}
xmlSwitcherHandler.startElement(uri, localName, qName, attributes);
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
xmlSwitcherHandler.endElement(uri, localName, qName);
}
@Override
public void endDocument() throws SAXException {
xmlSwitcherHandler.endDocument();
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
xmlSwitcherHandler.characters(ch, start, length);
}
//------------------------ PRIVATE --------------------------
private void clearAllFields() {
this.rootElement = true;
}
}