/** * Copyright (c) 2008-2010 Mark Logic Corporation. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * The use of the Apache License does not indicate that this project is * affiliated with the Apache Software Foundation. */ package com.marklogic.recordloader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.CharsetDecoder; import java.util.logging.Logger; import com.marklogic.ps.SimpleLogger; import com.marklogic.ps.Utilities; import com.marklogic.ps.timing.TimedEvent; /** * @author Michael Blakeley, michael.blakeley@marklogic.com * */ public abstract class AbstractLoader implements LoaderInterface { protected CharsetDecoder decoder; protected SimpleLogger logger; protected TimedEvent event; protected Configuration config; protected Monitor monitor; protected String currentRecordPath; protected String currentFileBasename; protected String currentUri; protected ContentInterface content; protected ContentFactory contentFactory; protected String startId; protected String entryPath; protected String fileBasename; protected InputStream input; protected File inputFile; protected String inputFilePath; /** * @param _logger * * The abstract implementation does nothing. Subclasses may * overload as needed. If problems are encountered, the subclass * should throw a FatalException or another run-time exception. */ public static void checkEnvironment(Logger _logger) { // do nothing } /* * (non-Javadoc) * * @see com.marklogic.recordloader.AbstractLoader#call() */ public Object call() throws Exception { try { if (null != inputFile) { // time to instantiate the reader logger.fine("processing " + inputFilePath); setInput(new FileInputStream(inputFile), decoder); } event = new TimedEvent(); process(); // preempt the finally block cleanup(); return null; } catch (RuntimeException e) { // for NullPointerException, etc monitor.halt(e); return null; } catch (Exception e) { logger.warning("Exception " + e.toString() + " while processing " + (null != currentUri ? currentUri : currentRecordPath)); throw e; } catch (Throwable t) { // for OutOfMemoryError, etc monitor.halt(t); return null; } finally { cleanup(); } } /** * @throws IOException */ private void cleanup() throws IOException { // TODO test for entryPath isn't useful, since it's always set if (null != fileBasename && null != entryPath) { // clean up via monitor monitor.cleanup(fileBasename); fileBasename = null; entryPath = null; } if (null != input) { input.close(); input = null; } if (null != inputFile) { inputFile = null; } if (null != contentFactory) { contentFactory.close(); contentFactory = null; } } /* * (non-Javadoc) * * @see com.marklogic.recordloader.LoaderInterface#process() * * subclasses should override this method to extend it */ @SuppressWarnings("unused") public void process() throws LoaderException { // safety check if (null == input) { throw new NullPointerException("caller must set input"); } // cache some info locally startId = config.getStartId(); } /* * (non-Javadoc) * * @see * com.marklogic.recordloader.LoaderInterface#setInput(java.io.InputStream) */ @SuppressWarnings("unused") public void setInput(InputStream _is, CharsetDecoder _decoder) throws LoaderException { if (null == _is) { throw new NullPointerException("null input stream"); } if (null == _decoder) { throw new NullPointerException("null charset decoder"); } input = _is; decoder = _decoder; } /** * @param _isError * @throws LoaderException * */ protected void cleanupInput(boolean _isError) throws LoaderException { cleanupRecord(); if (null == input) { return; } try { input.close(); } catch (IOException e) { // nothing we can do about it logger.logException(e); } if (null == inputFile) { return; } if (!config.isDeleteInputFile()) { return; } // if there was a non-fatal error, delete it anyway if (_isError && config.isFatalErrors()) { return; } // remove the input file try { String path = inputFile.getCanonicalPath(); logger.info("deleting " + path); if (!inputFile.delete()) { throw new LoaderException("delete failed for " + path); } } catch (IOException e) { throw new LoaderException(e); } } /* * (non-Javadoc) * * @see com.marklogic.recordloader.LoaderInterface#setInput(java.io.File) */ public void setInput(File _file, CharsetDecoder _decoder) throws LoaderException { if (null == _file) { throw new NullPointerException("null input file"); } if (null == _decoder) { throw new NullPointerException("null charset decoder"); } // defer opening it until call() inputFile = _file; try { inputFilePath = inputFile.getCanonicalPath(); } catch (IOException e) { throw new LoaderException(e); } decoder = _decoder; } /* * (non-Javadoc) * * @see * com.marklogic.recordloader.LoaderInterface#setFileBasename(java.lang. * String) */ public void setFileBasename(String _name) throws LoaderException { fileBasename = _name; if (null == _name) { return; } currentFileBasename = Utilities.stripExtension(_name); logger.fine("using fileBasename = " + _name); // don't tell the contentFactory unless config says it's ok if (config.isUseFilenameCollection()) { contentFactory.setFileBasename(_name); } } /* * (non-Javadoc) * * @see * com.marklogic.recordloader.LoaderInterface#setRecordPath(java.lang.String * ) */ public void setRecordPath(String _path) throws LoaderException { entryPath = _path; // replace and coalesce any backslashes with slash if (config.isInputNormalizePaths()) { currentRecordPath = _path.replaceAll("[\\\\]+", "/"); } else { currentRecordPath = _path; } // this form of URI() does escaping nicely if (config.isEscapeUri()) { URI uri; try { // URI(schema, ssp, fragment) constructor cannot handle // ssp = 2008-11-07T12:23:47.617766-08:00/1 // (despite what the javadoc says)... // in this situation, treat the path as the fragment. uri = new URI(null, currentRecordPath, null); } catch (URISyntaxException e) { try { uri = new URI(null, null, currentRecordPath); } catch (URISyntaxException e1) { throw new LoaderException(e); } } currentRecordPath = uri.toString(); } } /** * @param len * */ protected void updateMonitor(long len) { // handle monitor accounting // note that we count skipped records, too event.increment(len); monitor.add(currentUri, event); } /** * @throws LoaderException */ protected void insert() throws LoaderException { logger.fine("inserting " + currentUri); content.insert(); } /** * */ protected void cleanupRecord() { // clean up logger.fine("content = " + content); if (null != content) { content.close(); } content = null; currentUri = null; } private boolean checkStartId(String id) { if (null == startId) { return false; } // we're still scanning for the startid: // is this my cow? if (!startId.equals(id)) { // don't bother to open the stream: skip this record monitor.incrementSkipped("id " + id + " != " + startId); return true; } logger.info("found START_ID " + id); startId = null; config.setStartId(null); // not needed for multithreaded start_id config, but doesn't hurt monitor.resetThreadPool(); return false; } /** * @param _id * @return * @throws IOException * @throws LoaderException */ protected boolean checkIdAndUri(String _id) throws LoaderException, IOException { return checkStartId(_id) || checkExistingUri(currentUri); } protected String composeUri(String id) throws IOException { logger.finest(id); if (null == id) { throw new IOException("id may not be null"); } String cleanId = id.trim(); // TODO move this to the end? String inputStripPrefix = config.getInputStripPrefix(); if (null != inputStripPrefix && inputStripPrefix.length() > 0) { cleanId = cleanId.replaceFirst(inputStripPrefix, ""); } if (cleanId.length() < 1) { throw new IOException("id may not be empty"); } // automatically use the current file, if available // note that config.getUriPrefix() will ensure that the uri ends in '/' // TODO differentiate between files and zip archives? StringBuilder baseName = new StringBuilder(config.getUriPrefix()); if (useFileBasename()) { baseName.append(currentFileBasename); } if (null != baseName && baseName.length() > 0 && ! cleanId.startsWith("/") && '/' != baseName.charAt(baseName.length() - 1)) { baseName.append("/"); } baseName.append(cleanId); baseName.append(config.getUriSuffix()); String finalName = baseName.toString(); logger.finest(finalName); return finalName; } /** * @return */ private boolean useFileBasename() { return null != currentFileBasename && !currentFileBasename.equals("") && !config.isUseFilenameIds() && !config.isIgnoreFileBasename(); } /** * @param uri * @return * @throws IOException * @throws LoaderException */ private boolean checkExistingUri(String uri) throws LoaderException, IOException { // return true if we're supposed to check, // and if the document already exists if (config.isSkipExisting() || config.isErrorExisting()) { boolean exists = content.checkDocumentUri(uri); logger.fine("checking for uri " + uri + " = " + exists); if (exists) { if (config.isErrorExisting()) { throw new LoaderException( "ERROR_EXISTING=true, cannot overwrite existing document: " + uri); } // ok, must be skipExisting... // count it and log the message monitor.incrementSkipped("existing uri " + uri); return true; } else if (config.isSkipExistingUntilFirstMiss()) { synchronized (monitor) { logger.info("resetting " + Configuration.SKIP_EXISTING_KEY + " at " + uri); config.setSkipExisting(false); config.configureThrottling(); monitor.resetTimer("skipped"); } } } return false; } /* * (non-Javadoc) * * @see * com.marklogic.recordloader.LoaderInterface#setConfiguration(com.marklogic * .recordloader.Configuration) */ @SuppressWarnings("unused") public void setConfiguration(Configuration _config) throws LoaderException { config = _config; logger = config.getLogger(); } /* * (non-Javadoc) * * @see * com.marklogic.recordloader.LoaderInterface#setConnectionUri(java.net.URI) */ public void setConnectionUri(URI _uri) throws LoaderException { if (null == config) { throw new NullPointerException( "must call setConfiguration() before setUri()"); } // load the correct content factory try { contentFactory = config.getContentFactoryConstructor() .newInstance(new Object[] {}); } catch (Exception e) { logger.logException(e); throw new FatalException(e); } contentFactory.setConfiguration(config); contentFactory.setConnectionUri(_uri); } /* * (non-Javadoc) * * @seecom.marklogic.recordloader.LoaderInterface#setMonitor(com.marklogic. * recordloader.Monitor) */ @SuppressWarnings("unused") public void setMonitor(Monitor _monitor) throws LoaderException { monitor = _monitor; } }