/* * Copyright 2007 T-Rank AS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package no.trank.openpipe.solr.producer.xml; import java.util.Arrays; import java.util.Iterator; import java.util.NoSuchElementException; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import no.trank.openpipe.api.document.BaseAnnotatedField; import no.trank.openpipe.api.document.Document; import no.trank.openpipe.api.document.PreResolvedAnnotation; /** * @version $Revision$ */ public class XmlStreamDocumentReader implements Iterable<Document> { private static final Logger log = LoggerFactory.getLogger(XmlStreamDocumentReader.class); private static final String TAG_DOC = "doc"; private static final String TAG_FIELD = "field"; private static final String TAG_FIELD_NAME = "name"; private static final String TAG_BOOST = "boost"; private final XMLStreamReader reader; private String operation; // operation is the first start tag (add, delete) private Document nextDocument; private boolean failure = false; public XmlStreamDocumentReader(XMLStreamReader reader) { this.reader = reader; } /** * Reads from the xml stream, ie the request input stream, and constructs * <code>Document</code> instances on demand rather than keeping an internal * list, which could potentially be very large. * The operation is always set to the first start tag in the xml document. * * @return a document Iterator */ @Override public Iterator<Document> iterator() { return new Iterator<Document>() { @Override public boolean hasNext() { try { while (!failure && nextDocument == null && reader.hasNext()) { if (reader.next() == XMLStreamConstants.START_ELEMENT) { if (operation == null) { operation = reader.getLocalName(); } else if (TAG_DOC.equals(reader.getLocalName())) { nextDocument = readNextDocument(); nextDocument.setOperation(operation); } } } } catch (XMLStreamException e) { log.warn("Error reading posted Solr xml", e); failure = true; return false; } return nextDocument != null; } private Document readNextDocument() throws XMLStreamException { final Document doc = new Document(); doc.addFieldValue(TAG_BOOST, reader.getAttributeValue(null, TAG_BOOST)); while (reader.hasNext()) { final int type = reader.next(); if (type == XMLStreamConstants.START_ELEMENT) { if (TAG_FIELD.equals(reader.getLocalName())) { final String fieldName = reader.getAttributeValue(null, TAG_FIELD_NAME); final String boost = reader.getAttributeValue(null, TAG_BOOST); final BaseAnnotatedField field = new BaseAnnotatedField(reader.getElementText()); if (boost != null) { field.add(TAG_BOOST, Arrays.asList(new PreResolvedAnnotation(boost))); } doc.addField(fieldName, field); } } else if (type == XMLStreamConstants.END_ELEMENT) { if (TAG_DOC.equals(reader.getLocalName())) { return doc; } } } throw new XMLStreamException("Unclosed <doc/> element"); } @Override public Document next() { if (!hasNext()) { throw new NoSuchElementException(); } try { return nextDocument; } finally { nextDocument = null; } } @Override public void remove() { throw new UnsupportedOperationException(); } }; } public boolean isFailure() { return failure; } }