QueryTraversalUtil.java example

Explorer
manager.v3-master
- projects
// Copyright (C) 2006 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.test;

import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.DocumentList;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.TraversalManager;
import com.google.enterprise.connector.spi.Value;

/**
 * This class provides a simple end-to-end test for an SPI implementation's
 * TraversalManager. The way this class uses the SPI's traversal calls is close
 * to the way that the connector manager does, but this is a single
 * self-contained class. See the comments to understand how the connector
 * manager differs from this simple test.
 */
public class QueryTraversalUtil {

  public static void runTraversal(TraversalManager queryTraversalManager,
      int batchHint) throws RepositoryException {

    queryTraversalManager.setBatchHint(batchHint);

    DocumentList documentList = queryTraversalManager.startTraversal();
    // The real connector manager will not always start from the beginning.
    // It will start from the beginning if it receives an explicit admin
    // command to do so, or if it thinks that it has never run this connector
    // before. It decides whether it has run a connector before by storing
    // every checkpoint it receives from
    // the connector. If it can find no stored checkpoint, it assumes that
    // it has never run this connector before and starts from the beginning,
    // as here.

    while (documentList != null) {
      int counter = 0;
      Document document = null;
      while ((document = documentList.nextDocument()) != null) {
        processOneDocument(document);
        counter++;
        if (counter == batchHint) {
          // this test program only takes batchHint results from each
          // resultSet. The real connector manager may take fewer - for
          // example, if it receives a shutdown request
          break;
        }
      }

      String checkPointString = documentList.checkpoint();

      if (counter == 0) {
        // this test program stops if it receives zero results in a resultSet.
        // the real connector Manager might wait a while, then try again
        break;
      }

      documentList = queryTraversalManager.resumeTraversal(checkPointString);
      // the real connector manager will call checkpoint (as here) as soon
      // as possible after processing the last document it wants to process.
      // It would then store the checkpoint string it received in persistent
      // store.
      // Unlike here, it might not then immediately turn around and call
      // resumeTraversal. For example, it may have received a shutdown command,
      // so it won't call resumeTraversal again until it starts up again.
      // Or, it may be running this connector on a schedule and there may be a
      // scheduled pause.
    }
  }

  public static void processOneDocument(Document document)
      throws RepositoryException {

    // every document should have a name
    String name = Value.getSingleValueString(document,
        SpiConstants.PROPNAME_DOCID);

    // if a SEARCHURL was specified, use it
    String contentSnippet = Value.getSingleValueString(document,
        SpiConstants.PROPNAME_SEARCHURL);

    if (contentSnippet == null) {
      // if there is no search URL, the connector manager will ask for
      // the content property and will base-64 encode it. Here we will
      // only access the first so many characters of the content and
      // print it out
      contentSnippet = Value.getSingleValueString(document,
          SpiConstants.PROPNAME_CONTENT);
      if (contentSnippet == null) {
        contentSnippet = "no content";
      }
    }
    // here the real content manager would format the document as
    // required by the feed API and send it to the GSA.
    System.out.println("Document " + name);
    System.out.println("Content");
    System.out.println(contentSnippet);
  }
}