/* * Copyright (c)2006-2010 Mark Logic Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * The use of the Apache License does not indicate that this project is * affiliated with the Apache Software Foundation. */ package com.marklogic.recordloader; import java.io.EOFException; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.URLDecoder; import java.nio.charset.MalformedInputException; import java.util.Enumeration; import java.util.jar.JarEntry; import java.util.jar.JarFile; import java.util.logging.Logger; import java.util.zip.ZipException; import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; import com.marklogic.ps.RecordLoader; import com.marklogic.ps.Utilities; import com.marklogic.ps.timing.TimedEvent; /** * @author Michael Blakeley, michael.blakeley@marklogic.com * */ public class Loader extends TranscodingLoader { /** * */ private static final String XPP3_RESOURCE_NAME = "META-INF/services/org.xmlpull.v1.XmlPullParserFactory"; private XmlPullParser xpp = null; // local cache for hot-loop configuration info private String idName; private String recordName; private String recordNamespace; // actual fields private Producer producer; private ProducerFactory producerFactory; private boolean foundRoot = false; private boolean useDocumentRoot = false; /* * (non-Javadoc) * * @see com.marklogic.recordloader.AbstractLoader#process() */ public void process() throws LoaderException { super.process(); logger.fine("auto=" + config.isUseAutomaticIds()); logger.fine("filename=" + config.isUseFilenameIds()); try { xpp = config.getXppFactory().newPullParser(); xpp.setInput(new InputStreamReader(input, decoder)); // TODO feature isn't supported by xpp3 - look at xpp5? // xpp.setFeature(XmlPullParser.FEATURE_DETECT_ENCODING, true); // TODO feature isn't supported by xpp3 - look at xpp5? // xpp.setFeature(XmlPullParser.FEATURE_PROCESS_DOCDECL, true); xpp .setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true); } catch (XmlPullParserException e) { throw new FatalException(e); } producerFactory = new ProducerFactory(config, xpp); // cache certain info locally recordName = config.getRecordName(); recordNamespace = config.getRecordNamespace(); useDocumentRoot = config.isUseDocumentRoot(); try { processRecords(); } catch (Exception e) { if (e instanceof RuntimeException) { throw (RuntimeException) e; } if (null != inputFile) { logger.info("current file: \"" + inputFilePath + "\""); } if (null != currentFileBasename) { logger.info("current file basename: \"" + currentFileBasename + "\""); } logger.info("current uri: \"" + currentUri + "\""); if (producer != null) { logger.info("current record: " + producer + ", bytes = " + producer.getByteBufferDescription()); } if (e instanceof MalformedInputException) { // invalid character sequence, probably logger.warning("input could not be decoded: try setting " + Configuration.INPUT_ENCODING_KEY + " (or set " + Configuration.INPUT_MALFORMED_ACTION_KEY + " to " + Configuration.INPUT_MALFORMED_ACTION_IGNORE + " or " + Configuration.INPUT_MALFORMED_ACTION_REPLACE + ")."); } if (e instanceof LoaderException) { throw (LoaderException) e; } throw new LoaderException(e); } finally { cleanupInput(event.isError()); } } private void processRecords() { int eventType; boolean c = true; while (c) { try { eventType = xpp.nextToken(); switch (eventType) { // We *only* care about finding records, // then passing them off a new producer case XmlPullParser.START_TAG: processStartElement(); break; case XmlPullParser.END_DOCUMENT: c = false; break; default: break; } } catch (Exception e) { if (currentFileBasename != null) { logger.warning("error in " + currentFileBasename + (currentRecordPath == null ? "" : (" at " + currentRecordPath))); } if (null != currentUri) { logger.warning("current URI = " + currentUri); } if (producer != null) { logger.warning("producer = " + producer.getByteBufferDescription()); logger.warning("buffer = " + producer.getBuffer()); } if (xpp != null) { logger.warning("pos = " + xpp.getPositionDescription()); // this can throw StringIndexOutOfBoundsException try { logger.warning("text = " + xpp.getText()); } catch (StringIndexOutOfBoundsException se) { logger.warning("text is unavailable"); } } // get to the init cause, if there is one logger.logException("exception", Utilities.getCause(e)); if (!config.isFatalErrors()) { // keep going logger.logException("non-fatal: skipping", e); // stop and set error state event.stop(true); monitor.add(currentUri, event); if (content != null) { content.close(); } if (config.isUseFilenameIds()) { c = false; } if (e instanceof EOFException) { // there was an error at the end of the file, // so exit the loop c = false; } continue; } // fatal logger.warning("re-throwing fatal error"); throw new FatalException(e); } } if (null != content) { XmlPullParserException e = new XmlPullParserException( "end of document before end of current record!\n" + "recordName = " + recordName + ", recordNamespace = " + recordNamespace + " at " + xpp.getPositionDescription() + "\n" + currentUri); if (config.isFatalErrors()) { throw new FatalException(e); } logger.logException("non-fatal", e); } } private void processStartElement() throws LoaderException, XmlPullParserException, IOException { String name = xpp.getName(); String namespace = xpp.getNamespace(); logger.finest(name + " in '" + namespace + "'"); if (!foundRoot) { // this must be the document root logger.fine("found document root: '" + name + "' in '" + namespace + "'"); foundRoot = true; // if we aren't swallowing the whole doc, // then there's nothing more to do here. if (!useDocumentRoot) { return; } } if (null == recordName) { synchronized (config) { if (null == config.getRecordName()) { // this must be the record-level element recordName = name; recordNamespace = namespace; config.setRecordName(recordName); config.setRecordNamespace(namespace); logger.fine("autodetected record element: '" + recordName + "' in '" + recordNamespace + "'"); } else { // another thread beat us to it recordName = config.getRecordName(); recordNamespace = config.getRecordNamespace(); } } } if (isRecordStart(name, namespace)) { // start of a new record logger.fine("found record element: '" + recordName + "' in '" + recordNamespace + "'"); event = new TimedEvent(); // hand off the work to a new producer producer = producerFactory.newProducer(); String id = producer.getCurrentId(); logger.fine("found id " + id); if (null == id) { throw new LoaderException( "producer exited without currentId"); } // must create content object before checking its uri currentUri = composeUri(id); content = contentFactory.newContent(currentUri); producer.setSkippingRecord(checkIdAndUri(id)); if (!producer.isSkippingRecord()) { // are we streaming this content? if (config.isInputStreaming()) { content.setInputStream(producer); } else { content.setBytes(Utilities.read(producer)); } insert(); } updateMonitor(producer.getBytesRead()); cleanupRecord(); return; } // handle unknown element if (config.isIgnoreUnknown()) { logger .warning("skipping unknown non-record element: " + name); return; } } /** * @param name * @param namespace * @return */ private boolean isRecordStart(String name, String namespace) { return useDocumentRoot || (name.equals(recordName) && namespace .equals(recordNamespace)); } @Override protected void cleanupRecord() { super.cleanupRecord(); producer = null; } @Override public void setConfiguration(Configuration _config) throws LoaderException { super.setConfiguration(_config); // check required configuration idName = config.getIdNodeName(); if (idName == null) { throw new FatalException("Missing required property: " + Configuration.ID_NAME_KEY); } } /** * @param _logger * */ public static void checkEnvironment(Logger _logger) { // check the XPP3 version ClassLoader loader = RecordLoader.getClassLoader(); if (null == loader) { throw new NullPointerException("null class loader"); } // the xppUrl should look something like... // jar:file:/foo/xpp3-1.1.4c.jar!/META-INF/services/org.xmlpull.v1.XmlPullParserFactory URL xppUrl = loader.getResource(XPP3_RESOURCE_NAME); if (null == xppUrl) { throw new FatalException( "Please configure your classpath to include" + " XPP3 (version 1.1.4 or later)."); } checkXppVersion(getXppVersion(_logger, xppUrl)); } /** * @param _logger * @param xppUrl * @return */ private static String[] getXppVersion(Logger _logger, URL xppUrl) { String proto = xppUrl.getProtocol(); // TODO handle file protocol directly, too? if (!"jar".equals(proto)) { throw new FatalException("xppUrl protocol: " + proto); } String file = null; String jarPath = null; try { // the file portion should look something like... // file:/foo/xpp3-1.1.4c.jar!/META-INF/services/org.xmlpull.v1.XmlPullParserFactory // file=/C:/Program%20Files/MarkLogic/Demo/lib/xpp3-1.1.4c.jar!/META-INF/services/org.xmlpull.v1.XmlPullParserFactory file = xppUrl.getFile(); URL fileUrl = new URL(file); proto = fileUrl.getProtocol(); if (!"file".equals(proto)) { throw new FatalException("fileUrl protocol: " + proto); } file = fileUrl.getFile(); // allow for "!/" jarPath = URLDecoder.decode(file.substring(0, file.length() - XPP3_RESOURCE_NAME.length() - 2), "UTF-8"); return getXppVersion(_logger, new JarFile(jarPath)); } catch (LoaderException e) { throw new FatalException(e); } catch (ZipException e) { throw new FatalException("file=" + file + "; jar=" + jarPath, e); } catch (IOException e) { throw new FatalException(file, e); } } /** * @param _logger * @param jar * @return * @throws LoaderException */ private static String[] getXppVersion(Logger _logger, JarFile jar) throws LoaderException { String versionSuffix = "_VERSION"; String versionPrefix = "XPP3_"; String name; for (Enumeration<JarEntry> e = jar.entries(); e.hasMoreElements();) { name = e.nextElement().getName(); if (name.startsWith(versionPrefix) && name.endsWith(versionSuffix)) { name = name.substring(versionPrefix.length(), name .length() - versionSuffix.length()); _logger.info("XPP3 version = " + name); return name.split("\\."); } } throw new LoaderException("no XPP3 version information in " + jar.getName()); } /** * @param version */ private static void checkXppVersion(String[] version) { if (null == version) { throw new FatalException( "No version info found - XPP3 is probably too old."); } // check major, minor, patch for 1+, 1+, and 4+ int major = Integer.parseInt(version[0]); if (major < 1) { throw new FatalException( "The XPP3 major version is too old: " + major); } int minor = Integer.parseInt(version[1]); if (1 == major && minor < 1) { throw new FatalException( "The XPP3 minor version is too old: " + minor); } int patch = Integer.parseInt(version[2].replaceFirst( "(\\d+)\\D+", "$1")); if (1 == major && 1 == minor && patch < 4) { throw new FatalException( "The XPP3 patch version is too old: " + version[2]); } } }