/* * Copyright 2011 Global Biodiversity Information Facility (GBIF) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.gbif.occurrence.parsing.response_file; import org.gbif.api.vocabulary.OccurrenceSchemaType; import org.gbif.occurrence.constants.ResponseElementEnum; import org.gbif.occurrence.parsing.RawXmlOccurrence; import java.io.StringWriter; import java.util.ArrayList; import java.util.List; import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class ParsedSearchResponse { private static final Logger LOG = LoggerFactory.getLogger(ParsedSearchResponse.class); private List<RawXmlOccurrence> records; private OccurrenceSchemaType schemaType; private Transformer transformer; private ResponseSchemaDetector schemaDetector; private Map<ResponseElementEnum, String> responseElements; private Node abcd1Header; private DocumentBuilder docBuilder; public ParsedSearchResponse() throws TransformerException, ParserConfigurationException { records = new ArrayList<RawXmlOccurrence>(); transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); transformer.setOutputProperty(OutputKeys.METHOD, "xml"); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer.setOutputProperty(OutputKeys.STANDALONE, "yes"); schemaDetector = new ResponseSchemaDetector(); docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); } public void addRecordAsXml(Node rawRecord) { Node workingNode = rawRecord; if (abcd1Header != null) { Document doc = docBuilder.newDocument(); doc.adoptNode(rawRecord); doc.adoptNode(abcd1Header); Element root = doc.createElement("occurrence"); Element dataSource = doc.createElement("DataSource"); dataSource.appendChild(abcd1Header); root.appendChild(dataSource); root.appendChild(rawRecord); workingNode = root; } String xml = nodeToString(workingNode); LOG.debug("Serialized record: [{}]", xml); checkSchema(xml); if (responseElements != null) { RawXmlOccurrence record = new RawXmlOccurrence(); record.setSchemaType(schemaType); populateRecordCodes(workingNode, record); record.setXml(xml); records.add(record); } } private void checkSchema(String xml) { if (schemaType == null) { schemaType = schemaDetector.detectSchema(xml); if (schemaType != null) { LOG.debug("Setting schema to [{}]", schemaType); responseElements = schemaDetector.getResponseElements(schemaType); } } } /** * Recursively traverse the node, returning the text value of the first node that has a name * matching targetElement. If node isn't found, returns null. Saves constructing elaborate, * namespace aware machinery for quick traverse of typically small data (a single occurrence record). * * @param node the parsed xml to traverse * @param targetElement the name of the node to find * * @return the text value of the target node */ private String fakeXPath(Node node, String targetElement) { if (node.getNodeName().equals(targetElement)) { return node.getTextContent(); } else if (node.hasChildNodes()) { NodeList nodeList = node.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { Node child = nodeList.item(i); String result = fakeXPath(child, targetElement); if (result != null) return result; } return null; } else { return null; } } public Node getAbcd1Header() { return abcd1Header; } public List<RawXmlOccurrence> getRecords() { return records; } public OccurrenceSchemaType getSchemaType() { return schemaType; } private String nodeToString(Node node) { StringWriter sw = new StringWriter(); try { DOMSource source = new DOMSource(node); transformer.transform(source, new StreamResult(sw)); } catch (TransformerException e) { LOG.warn("Failed to transform node to string", e); } String result = sw.toString(); // now strip out all namespacing result = result.replaceAll("[\\s]xmlns[:[a-zA-Z0-9]*]*=\".*?\"", ""); result = result.replaceAll("ns0:", ""); return result; } private void populateRecordCodes(Node node, RawXmlOccurrence record) { // the null checks are to guard against overwriting something already set on the record String instCode = fakeXPath(node, responseElements.get(ResponseElementEnum.INSTITUTION_CODE)); if (instCode != null) record.setInstitutionCode(instCode); String collectionCode = fakeXPath(node, responseElements.get(ResponseElementEnum.COLLECTION_CODE)); if (collectionCode != null) record.setCollectionCode(collectionCode); String catNum = fakeXPath(node, responseElements.get(ResponseElementEnum.CATALOG_NUMBER)); if (catNum != null) record.setCatalogNumber(catNum); } public void setAbcd1Header(Node abcd1Header) { this.abcd1Header = abcd1Header; } public void setRecords(List<RawXmlOccurrence> records) { this.records = records; } public void setSchemaType(OccurrenceSchemaType schemaType) { this.schemaType = schemaType; } }