// Copyright 2006 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.enterprise.connector.traversal; import com.google.enterprise.connector.instantiator.MockInstantiator; import com.google.enterprise.connector.instantiator.ThreadPool; import com.google.enterprise.connector.jcr.JcrTraversalManager; import com.google.enterprise.connector.mock.MockRepository; import com.google.enterprise.connector.mock.MockRepositoryEventList; import com.google.enterprise.connector.mock.jcr.MockJcrQueryManager; import com.google.enterprise.connector.persist.ConnectorNotFoundException; import com.google.enterprise.connector.pusher.MockPusher; import com.google.enterprise.connector.pusher.PushException; import com.google.enterprise.connector.pusher.Pusher; import com.google.enterprise.connector.pusher.Pusher.PusherStatus; import com.google.enterprise.connector.pusher.PusherFactory; import com.google.enterprise.connector.spi.Document; import com.google.enterprise.connector.spi.DocumentList; import com.google.enterprise.connector.spi.Property; import com.google.enterprise.connector.spi.RepositoryDocumentException; import com.google.enterprise.connector.spi.RepositoryException; import com.google.enterprise.connector.spi.SkippedDocumentException; import com.google.enterprise.connector.spi.SpiConstants; import com.google.enterprise.connector.spi.TraversalContext; import com.google.enterprise.connector.spi.TraversalContextAware; import com.google.enterprise.connector.spi.TraversalManager; import com.google.enterprise.connector.spi.Value; import com.google.enterprise.connector.test.ConnectorTestUtils; import com.google.enterprise.connector.util.testing.AdjustableClock; import junit.framework.ComparisonFailure; import junit.framework.TestCase; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.Set; import javax.jcr.query.QueryManager; /** * Tests for {@link com.google.enterprise.connector.traversal.QueryTraverser}. */ public class QueryTraverserTest extends TestCase { protected static final File RESOURCE_DIR = new File("source/resources/"); // Common objects used by many tests. AdjustableClock clock; ThreadPool threadPool; MockInstantiator instantiator; ValidatingPusher pusher; TraversalStateStore stateStore; ProductionTraversalContext traversalContext; String connectorName; @Override protected void setUp() throws Exception { connectorName = getName(); clock = new AdjustableClock(); threadPool = new ThreadPool(5, clock); instantiator = new MockInstantiator(threadPool); pusher = new ValidatingPusher(); traversalContext = new ProductionTraversalContext(); traversalContext.setTraversalTimeLimitSeconds(1); stateStore = new RecordingTraversalStateStore(); } /** * Test method for * {@link com.google.enterprise.connector.traversal.QueryTraverser * #runBatch(BatchSize)}. */ public final void testRunBatch() { runTestBatches(1); runTestBatches(2); runTestBatches(3); runTestBatches(4); runTestBatches(5); } private void runTestBatches(int batchHint) { try { // Reset traversal state from previous tests. stateStore.storeTraversalState(null); MockRepositoryEventList mrel = new MockRepositoryEventList("MockRepositoryEventLog1.txt"); Traverser traverser = createTraverser(mrel); System.out.println(); BatchSize batchSize = new BatchSize(batchHint); System.out.println("Running batch test batchsize " + batchHint); int totalDocsProcessed = 0; int batchNumber = 0; while (true) { int docsProcessed = 0; docsProcessed = traverser.runBatch(batchSize).getCountProcessed(); if (docsProcessed <= 0) { break; } totalDocsProcessed += docsProcessed; String state = ""; try { state = instantiator.getConnectorState(connectorName); } catch (ConnectorNotFoundException e) { fail("Connector " + connectorName + " Not Found: " + e.toString()); } System.out.println("Batch# " + batchNumber + " docs " + docsProcessed + " checkpoint " + state); batchNumber++; } assertEquals(4, totalDocsProcessed); } finally { instantiator.shutdown(true, 5000); } } /** * Create a Traverser. * @param mrel * @param connectorName * @param instantiator * @return a Traverser instance */ private Traverser createTraverser(MockRepositoryEventList mrel) { MockRepository r = new MockRepository(mrel); QueryManager qm = new MockJcrQueryManager(r.getStore()); TraversalManager qtm = new JcrTraversalManager(qm); Traverser traverser = new QueryTraverser(new MockPusher(System.out), qtm, stateStore, connectorName, traversalContext, clock); instantiator.setupTraverser(connectorName, traverser); return traverser; } /** * Initialize a large file used for tests. This is to avoid * having giant files checked into the source code repository. * * @param fname the name of the large file to create (if it * doesn't already exist). * @throws IOException if creating the large file fails. */ private void makeLargeFile(String fname) throws IOException { File largeFile = new File(fname); if (!largeFile.exists()) { byte[] text = "abcdefghijklmnopqrstuvwxyz\n".getBytes(); FileOutputStream os = new FileOutputStream(largeFile); for (int i = 0; i < 100000; i++) { os.write(text); } os.close(); } } /** * Test that we are indeed streaming the file. */ public void testLargeFileStream() { try { // This has internal knowledge of the contents of // MockRepositoryEventLogLargeFile.txt used below. makeLargeFile("testdata/tmp/largefile.txt"); } catch (IOException e) { fail("Unable to initialize largefile.txt: " + e.toString()); } try { MockRepositoryEventList mrel = new MockRepositoryEventList("MockRepositoryEventLogLargeFile.txt"); Traverser traverser = createTraverser(mrel); int docsProcessed = 0; BatchSize batchSize = new BatchSize(1); do { docsProcessed = traverser.runBatch(batchSize).getCountProcessed(); } while (docsProcessed > 0); } finally { instantiator.shutdown(true, 5000); } } private void checkResult(long documentCount, BatchResult result) { assertEquals(documentCount, result.getCountProcessed()); assertEquals(Long.toString(documentCount), stateStore.getTraversalState()); assertEquals(documentCount, pusher.getPushCount()); } public void testNullStateStore() { NeverEndingDocumentlistTraversalManager traversalManager = new NeverEndingDocumentlistTraversalManager(100); QueryTraverser queryTraverser = new QueryTraverser(pusher, traversalManager, null, connectorName, traversalContext, clock, null); BatchResult result = queryTraverser.runBatch(new BatchSize(100)); assertEquals(0, result.getCountProcessed()); assertEquals(TraversalDelayPolicy.ERROR, result.getDelayPolicy()); } public void testTimeout() { NeverEndingDocumentlistTraversalManager traversalManager = new NeverEndingDocumentlistTraversalManager(100); QueryTraverser queryTraverser = new QueryTraverser(pusher, traversalManager, stateStore, connectorName, traversalContext, clock, null); BatchResult result = queryTraverser.runBatch(new BatchSize(100)); assertTrue(result.getCountProcessed() > 0); checkResult(traversalManager.getDocumentCount(), result); } /** * Tests a timeout before nextDocument is called, and ensures that * checkpoint() was called. */ public void testTimeoutBeforeNextDocument() { // This implementation waits before returning a DocumentList that // throws an exception if nextDocument is called. final long batchMillis = traversalContext.traversalTimeLimitSeconds() * 1000 * 2; NeverEndingDocumentlistTraversalManager traversalManager = new NeverEndingDocumentlistTraversalManager(100) { @Override public DocumentList startTraversal() { clock.adjustTime(batchMillis); return new DocumentList() { public Document nextDocument() { throw new RuntimeException(); } public String checkpoint() { return "0"; } }; } }; QueryTraverser queryTraverser = new QueryTraverser(pusher, traversalManager, stateStore, connectorName, traversalContext, clock, null); BatchResult result = queryTraverser.runBatch(new BatchSize(100)); assertEquals(0, result.getCountProcessed()); checkResult(traversalManager.getDocumentCount(), result); assertEquals(TraversalDelayPolicy.IMMEDIATE, result.getDelayPolicy()); } /** * Tests a cancellation before nextDocument is called, and ensures * that checkpoint() was not called. */ public void testCancellationBeforeNextDocument() { // This implementation cancels the batch before returning a // DocumentList that throws an exception if nextDocument is // called. final Traverser[] traverserHolder = new Traverser[1]; NeverEndingDocumentlistTraversalManager traversalManager = new NeverEndingDocumentlistTraversalManager(100) { @Override public DocumentList startTraversal() { traverserHolder[0].cancelBatch(); return new DocumentList() { public Document nextDocument() { throw new RuntimeException(); } public String checkpoint() { throw new RuntimeException(); } }; } }; QueryTraverser queryTraverser = new QueryTraverser(pusher, traversalManager, stateStore, connectorName, traversalContext, clock, null); traverserHolder[0] = queryTraverser; BatchResult result = queryTraverser.runBatch(new BatchSize(100)); assertEquals(0, result.getCountProcessed()); assertEquals(0, traversalManager.getDocumentCount()); assertEquals(null, stateStore.getTraversalState()); assertEquals(TraversalDelayPolicy.ERROR, result.getDelayPolicy()); } public void testBatchSize() { LargeDocumentlistTraversalManager traversalManager = new LargeDocumentlistTraversalManager(10); QueryTraverser queryTraverser = new QueryTraverser(pusher, traversalManager, stateStore, connectorName, traversalContext, clock, null); BatchResult result = queryTraverser.runBatch(new BatchSize(10)); assertTrue(result.getCountProcessed() > 10); assertTrue(result.getCountProcessed() <= 20); checkResult(traversalManager.getDocumentCount(), result); } private void checkExceptionHandling(Exception exception, Where where, long documentCount) { checkExceptionHandling(exception, where, documentCount, documentCount); } private void checkExceptionHandling(Exception exception, Where where, long documentCount, long pushCount) { ExceptionalTraversalManager traversalManager = new ExceptionalTraversalManager(exception, where); QueryTraverser queryTraverser = new QueryTraverser(pusher, traversalManager, stateStore, connectorName, traversalContext, clock); BatchResult result = queryTraverser.runBatch(new BatchSize(10)); assertEquals(documentCount, result.getCountProcessed()); assertEquals(pushCount, pusher.getPushCount()); pusher.throwAssertionError(); } public void testBatchSizeException() { checkExceptionHandling(new RepositoryException("BatchSizeException"), Where.SET_BATCH_HINT, 2); } public void testStartTraversalRepositoryException() { checkExceptionHandling(new RepositoryException("StartTraversalException"), Where.START_TRAVERSAL, 0); } public void testStartTraversalRuntimeException() { checkExceptionHandling(new RuntimeException("StartTraversalException"), Where.START_TRAVERSAL, 0); } public void testResumeTraversalRepositoryException() { // Create a checkpoint to force a resume traversal. stateStore.storeTraversalState("2"); checkExceptionHandling(new RepositoryException("ResumeTraversalException"), Where.RESUME_TRAVERSAL, 0); } public void testResumeTraversalRuntimeException() { // Create a checkpoint to force a resume traversal. stateStore.storeTraversalState("2"); checkExceptionHandling(new RuntimeException("ResumeTraversalException"), Where.RESUME_TRAVERSAL, 0); } public void testFirstDocumentRepositoryException() { checkExceptionHandling(new RepositoryException("FirstDocumentException"), Where.FIRST_DOCUMENT, 0); } public void testFirstDocumentRepositoryDocumentException() { checkExceptionHandling( new RepositoryDocumentException("FirstDocumentRepositoryDocumentException"), Where.FIRST_DOCUMENT, 1); } public void testFirstDocumentRuntimeException() { checkExceptionHandling(new RuntimeException("FirstDocumentException"), Where.FIRST_DOCUMENT, 1); } public void testNextDocumentRepositoryException() { checkExceptionHandling(new RepositoryException("NextDocumentException"), Where.NEXT_DOCUMENT, 0); } public void testNextDocumentRepositoryDocumentException() { checkExceptionHandling( new RepositoryDocumentException("NextDocumentRepositoryDocumentException"), Where.NEXT_DOCUMENT, 1); } public void testNextDocumentRuntimeException() { checkExceptionHandling(new RuntimeException("NextDocumentException"), Where.NEXT_DOCUMENT, 1); } public void testCheckpointRepositoryException() { checkExceptionHandling(new RepositoryException("CheckpointException"), Where.CHECKPOINT, 0); } public void testCheckpointRuntimeException() { checkExceptionHandling(new RuntimeException("CheckpointException"), Where.CHECKPOINT, 0); } public void testDocidRepositoryException() { checkExceptionHandling( new RepositoryException("DocidRepositoryException"), Where.DOCUMENT_DOCID, 0); } public void testDocidRuntimeException() { checkExceptionHandling( new IllegalArgumentException("DocidRuntimeException"), Where.DOCUMENT_DOCID, 2, 1); } public void testDocumentRepositoryException() { checkExceptionHandling( new RepositoryException("DocumentRepositoryException"), Where.DOCUMENT_CONTENT, 0); } public void testRepositoryDocumentException() { checkExceptionHandling( new RepositoryDocumentException("RepositoryDocumentException"), Where.DOCUMENT_CONTENT, 2, 1); } public void testSkipDocumentException() { checkExceptionHandling( new SkippedDocumentException("SkippedDocumentException"), Where.DOCUMENT_CONTENT, 2, 1); } public void testDocumentRuntimeException() { checkExceptionHandling(new RuntimeException("DocumentException"), Where.DOCUMENT_CONTENT, 0); } /** * A {@link TraversalManager} for a {@link NeverEndingDocumentList}. */ private class NeverEndingDocumentlistTraversalManager implements TraversalManager, TraversalContextAware { private final int docMillis; private long documentCount; public NeverEndingDocumentlistTraversalManager(int docMillis) { this.docMillis = docMillis; } public void setTraversalContext(TraversalContext traversalContext) { throw new UnsupportedOperationException(); } public void setBatchHint(int batchHint) { // Ignored. } public DocumentList startTraversal() { return new NeverEndingDocumentList(this); } public DocumentList resumeTraversal(String checkPoint) { throw new UnsupportedOperationException(); } // Return a new document every {@code docMillis} milliseconds. synchronized Document newDocument() { clock.adjustTime(docMillis); String id = Long.toString(documentCount++); return ConnectorTestUtils.createSimpleDocument(id); } synchronized long getDocumentCount() { return documentCount; } } /** * {@link DocumentList} that never runs out of documents - * returning new documents until interrupted. */ private class NeverEndingDocumentList implements DocumentList { private final NeverEndingDocumentlistTraversalManager traversalManager; public NeverEndingDocumentList( NeverEndingDocumentlistTraversalManager traversalManager) { this.traversalManager = traversalManager; } public String checkpoint() { return Long.toString(traversalManager.getDocumentCount()); } /** * Returns a new {@link Document} with an * SpiConstants.PROPNAME_DOCID property set to the number * previously returned. */ public Document nextDocument() throws RepositoryException { return traversalManager.newDocument(); } } /** * A {@link TraversalManager} for a {@link LargeDocumentList}. */ private class LargeDocumentlistTraversalManager extends NeverEndingDocumentlistTraversalManager { private int batchHint; public LargeDocumentlistTraversalManager(int docMillis) { super(docMillis); } @Override public void setBatchHint(int batchHint) { this.batchHint = batchHint; } synchronized int getBatchHint() { return batchHint; } @Override public DocumentList startTraversal() { return new LargeDocumentList(this); } } /** * {@link DocumentList} that returns twice the batchHint * number of documents. */ private class LargeDocumentList extends NeverEndingDocumentList { private final LargeDocumentlistTraversalManager traversalManager; public LargeDocumentList( LargeDocumentlistTraversalManager traversalManager) { super(traversalManager); this.traversalManager = traversalManager; } @Override public Document nextDocument() throws RepositoryException { if (traversalManager.getDocumentCount() < 2 * traversalManager.getBatchHint()) { return super.nextDocument(); } else { return null; } } } /** * Locations from where ExceptionalTraversalManager will throw * its exceptions. */ private static enum Where { NONE, SET_BATCH_HINT, START_TRAVERSAL, RESUME_TRAVERSAL, // TraversalManager FIRST_DOCUMENT, NEXT_DOCUMENT, CHECKPOINT, // DocumentList DOCUMENT_DOCID, DOCUMENT_CONTENT // Document } /** Throws either a RuntimeException or a RepositoryException. */ private void throwException(Exception exception) throws RepositoryException { if (exception instanceof RepositoryDocumentException) { throw (RepositoryDocumentException) exception; } else if (exception instanceof RepositoryException) { throw (RepositoryException) exception; } else if (exception instanceof RuntimeException) { // RuntimeExceptions don't need to be declared. throw (RuntimeException) exception; } } /** * A {@link TraversalManager} that throws configured Exceptions. */ private class ExceptionalTraversalManager implements TraversalManager { protected final Where where; protected final Exception exception; public ExceptionalTraversalManager(Exception exception, Where where) { this.exception = exception; this.where = where; } @Override public void setBatchHint(int batchHint) throws RepositoryException { if (where == Where.SET_BATCH_HINT) { throwException(exception); } } @Override public DocumentList startTraversal() throws RepositoryException { if (where == Where.START_TRAVERSAL) { throwException(exception); } return new ExceptionalDocumentList(0, exception, where); } @Override public DocumentList resumeTraversal(String checkpoint) throws RepositoryException { if (where == Where.RESUME_TRAVERSAL) { throwException(exception); } int docNum = Integer.parseInt(checkpoint); if (docNum > 3) { return null; // Only return two batches. } return new ExceptionalDocumentList(docNum, exception, where); } } /** * A {@link DocumentList} that throws configured Exceptions. */ private class ExceptionalDocumentList implements DocumentList { protected final Where where; protected final Exception exception; protected final int startNum; protected int docNum; public ExceptionalDocumentList(int docNum, Exception exception, Where where) { this.startNum = docNum; this.docNum = docNum; this.exception = exception; this.where = where; } @Override public Document nextDocument() throws RepositoryException { // Return no more than two documents per batch. if (docNum > startNum + 1) { return null; } int doc = docNum++; // This knows we are returning 2-document batches, so although // docNum keeps going up, even numbered docs are the first // in the batch, and odd numbered docs are the next. switch (where) { case FIRST_DOCUMENT: if ((doc & 1) == 0) { pusher.skipDocument(); throwException(exception); } break; case NEXT_DOCUMENT: if ((doc & 1) == 1) { pusher.skipDocument(); throwException(exception); } break; } return new ExceptionalDocument(doc, exception, where); } @Override public String checkpoint() throws RepositoryException { if (where == Where.CHECKPOINT) { throwException(exception); } return String.valueOf(docNum); } } /** * A {@link Document} that throws configured Exceptions. */ private class ExceptionalDocument implements Document { protected final Where where; protected final Exception exception; protected final int docNum; protected final Document doc; public ExceptionalDocument(int docNum, Exception exception, Where where) { this.exception = exception; this.where = where; this.docNum = docNum; this.doc = ConnectorTestUtils.createSimpleDocument(String.valueOf(docNum)); } @Override public Set<String> getPropertyNames() throws RepositoryException { return doc.getPropertyNames(); } @Override public Property findProperty(String name) throws RepositoryException { if (SpiConstants.PROPNAME_CONTENT.equals(name) && where == Where.DOCUMENT_CONTENT && (docNum & 1) == 0) { throwException(exception); } if (SpiConstants.PROPNAME_DOCID.equals(name) && where == Where.DOCUMENT_DOCID && (docNum & 1) == 1) { throwException(exception); } return doc.findProperty(name); } } /** * A {@link Pusher} that performs validations * @see ValidatingPusher#take(Document) for details. */ private static class ValidatingPusher implements Pusher, PusherFactory { private String connectorName = null; private volatile long pushCount = 0; private volatile long expectedId = 0; private volatile ComparisonFailure assertionError = null; /** * Performs the following validations: * <OL> * <LI>connectorName matches the connector name passed to * {@link ValidatingPusher#ValidatingPusher(String)}. * </OL> */ @Override public Pusher newPusher(String connectorName) { if (this.connectorName == null) { this.connectorName = connectorName; } else { assertEqualsAndStore(this.connectorName, connectorName); } return this; } /** * Performs the following validations and increments the count * of pushed documents if all the validations pass. * <OL> * <LI>SpiConstants.PROPNAME_DOCID property of {@link Document} * matches the number of documents pushed (formatted as a {@link String}). * </OL> */ @Override public synchronized PusherStatus take(Document document) throws RepositoryException, PushException { String gotId = Value.getSingleValueString(document, SpiConstants.PROPNAME_DOCID); assertEqualsAndStore(Long.toString(expectedId), gotId); try { Value.getSingleValue(document, SpiConstants.PROPNAME_CONTENT); expectedId++; pushCount++; } catch (RuntimeException re) { // Mimicking DocPusher's behavior for RuntimeExceptions. expectedId++; throw new RepositoryDocumentException(re); } catch (RepositoryDocumentException rde) { expectedId++; throw rde; } catch (Throwable t) { throw new PushException(t); } return PusherStatus.OK; } synchronized void skipDocument() { expectedId++; } @Override public void flush() { } @Override public synchronized void cancel() { pushCount = 0; expectedId = 0; } @Override public PusherStatus getPusherStatus() { return PusherStatus.OK; } /** * Returns the number of documents that have been pushed. */ synchronized long getPushCount() { return pushCount; } /** Checks an assertion and stores it if one occurs. */ void assertEqualsAndStore(String expected, String value) { try { assertEquals(expected, value); } catch (ComparisonFailure e) { assertionError = e; } } /** Throws and clears an assertion failure if one occurred. */ synchronized void throwAssertionError() { if (assertionError != null) { ComparisonFailure e = assertionError; assertionError = null; throw e; } } } /** * A {@link TraversalStateStore} that remembers the last saved state in * memory for testing purposes. */ private static class RecordingTraversalStateStore implements TraversalStateStore { private String state; public String getTraversalState() { return state; } public void storeTraversalState(String state) { this.state = state; } } }