DocumentClosure.java example

Explorer
BigSemanticsJava-master
/**
 * 
 */
package ecologylab.bigsemantics.metadata.builtins;

import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ecologylab.bigsemantics.actions.SemanticAction;
import ecologylab.bigsemantics.actions.SemanticActionHandler;
import ecologylab.bigsemantics.actions.SemanticsConstants;
import ecologylab.bigsemantics.collecting.DocumentDownloadedEventHandler;
import ecologylab.bigsemantics.collecting.DownloadStatus;
import ecologylab.bigsemantics.collecting.SemanticsDownloadMonitors;
import ecologylab.bigsemantics.collecting.SemanticsGlobalScope;
import ecologylab.bigsemantics.collecting.SemanticsSite;
import ecologylab.bigsemantics.documentcache.PersistentDocumentCache;
import ecologylab.bigsemantics.documentparsers.DocumentParser;
import ecologylab.bigsemantics.downloadcontrollers.CachedPageDownloadController;
import ecologylab.bigsemantics.downloadcontrollers.DownloadController;
import ecologylab.bigsemantics.html.documentstructure.SemanticInLinks;
import ecologylab.bigsemantics.httpclient.SimplHttpResponse;
import ecologylab.bigsemantics.logging.CachedHtmlStale;
import ecologylab.bigsemantics.logging.CachedMmdStale;
import ecologylab.bigsemantics.logging.ChangeLocation;
import ecologylab.bigsemantics.logging.DocumentLogRecord;
import ecologylab.bigsemantics.logging.PersistenceCacheDocHit;
import ecologylab.bigsemantics.logging.PersistenceCacheHtmlHit;
import ecologylab.bigsemantics.logging.PersistenceCacheMiss;
import ecologylab.bigsemantics.logging.Phase;
import ecologylab.bigsemantics.metametadata.FilterLocation;
import ecologylab.bigsemantics.metametadata.MetaMetadata;
import ecologylab.bigsemantics.metametadata.MetaMetadataCompositeField;
import ecologylab.bigsemantics.metametadata.MetaMetadataRepository;
import ecologylab.bigsemantics.model.text.ITermVector;
import ecologylab.bigsemantics.model.text.TermVectorFeature;
import ecologylab.bigsemantics.seeding.SearchResult;
import ecologylab.bigsemantics.seeding.Seed;
import ecologylab.bigsemantics.seeding.SeedDistributor;
import ecologylab.collections.SetElement;
import ecologylab.concurrent.Downloadable;
import ecologylab.generic.Continuation;
import ecologylab.io.DownloadProcessor;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.SIMPLTranslationException;
import ecologylab.serialization.SimplTypesScope;
import ecologylab.serialization.formatenums.StringFormat;
import ecologylab.serialization.library.geom.PointInt;

/**
 * New Container object. Mostly just a closure around Document. Used as a candidate and wrapper for
 * downloading.
 * 
 * @author andruid
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
public class DocumentClosure extends SetElement
    implements TermVectorFeature, Downloadable, SemanticsConstants,
    Continuation<DocumentClosure>
{

  static Logger                               logger;

  static
  {
    logger = LoggerFactory.getLogger(DocumentClosure.class);
  }

  private SemanticsGlobalScope                semanticsScope;

  /**
   * This is tracked mainly for debugging, so we can see what pURL was fed into the meta-metadata
   * address resolver machine.
   */
  private ParsedURL                           initialPURL;

  private Document                            document;

  private final Object                        DOCUMENT_LOCK        = new Object();

  private DownloadStatus                      downloadStatus       = DownloadStatus.UNPROCESSED;

  private final Object                        DOWNLOAD_STATUS_LOCK = new Object();

  private DocumentParser                      documentParser;

  private SemanticInLinks                     semanticInlinks;

  private List<Continuation<DocumentClosure>> continuations;

  /**
   * Keeps state about the search process, if this is encapsulates a search result;
   */
  private SearchResult                        searchResult;

  private PointInt                            dndPoint;

  /**
   * If true (the normal case), then any MediaElements encountered will be added to the candidates
   * collection, for possible inclusion in the visual information space.
   */
  private boolean                             collectMedia         = true;

  /**
   * If true (the normal case), then hyperlinks encounted will be fed to the web crawler, providing
   * that they are traversable() and of the right mime types.
   */
  private boolean                             crawlLinks           = true;
  
  private final Object                        DOWNLOAD_LOCK        = new Object();

  /**
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws ClassNotFoundException
   */
  private DocumentClosure(Document document,
                          SemanticsGlobalScope semanticsSessionScope,
                          SemanticInLinks semanticInlinks)
  {
    super();
    this.semanticsScope = semanticsSessionScope;
    this.initialPURL = document.getLocation();
    this.document = document;
    this.semanticInlinks = semanticInlinks;
    this.continuations = new ArrayList<Continuation<DocumentClosure>>();
  }

  /**
   * Should only be called by Document.getOrCreateClosure().
   * 
   * @param document
   * @param semanticInlinks
   */
  DocumentClosure(Document document, SemanticInLinks semanticInlinks)
  {
    this(document, document.getSemanticsScope(), semanticInlinks);
  }

  /**
   * @return the infoCollector
   */
  public SemanticsGlobalScope getSemanticsScope()
  {
    return semanticsScope;
  }

  public ParsedURL getInitialPURL()
  {
    return initialPURL;
  }

  /**
   * @return the document
   */
  public Document getDocument()
  {
    synchronized (DOCUMENT_LOCK)
    {
      return document;
    }
  }

  public DocumentParser getDocumentParser()
  {
    return documentParser;
  }

  /**
   * @param presetDocumentParser
   *          the presetDocumentParser to set
   */
  public void setDocumentParser(DocumentParser presetDocumentParser)
  {
    this.documentParser = presetDocumentParser;
  }

  @Override
  public SemanticsSite getSite()
  {
    Document document = this.document;
    return (document == null) ? null : document.getSite();
  }

  @Override
  public SemanticsSite getDownloadSite()
  {
    Document document = this.document;
    if (document != null)
    {
      if (document.getDownloadLocation().isFile())
        return null;
    }
    return (document == null) ? null : document.getSite();
  }

  public boolean isFromSite(SemanticsSite site)
  {
    return site != null && site == getSite();
  }

  @Override
  public ParsedURL location()
  {
    Document document = this.document;
    return (document == null) ? null : document.getLocation();
  }

  @Override
  public ParsedURL getDownloadLocation()
  {
    Document document = this.document;
    return (document == null) ? null : document.getDownloadLocation();
  }

  /**
   * @return the semanticInlinks
   */
  public SemanticInLinks getSemanticInlinks()
  {
    return semanticInlinks;
  }

  /**
   * Keeps state about the search process, if this Container is a search result;
   */
  public SearchResult searchResult()
  {
    return searchResult;
  }

  /**
   * 
   * @param resultDistributer
   * @param searchNum
   *          Index into the total number of (seeding) searches specified and being aggregated.
   * @param resultNum
   *          Result number among those returned by google.
   */
  public void setSearchResult(SeedDistributor resultDistributer, int resultNum)
  {
    searchResult = new SearchResult(resultDistributer, resultNum);
  }

  public SeedDistributor resultDistributer()
  {
    return (searchResult == null) ? null : searchResult.resultDistributer();
  }

  @Override
  public DocumentLogRecord getLogRecord()
  {
    return document.logRecord();
  }

  @Override
  public boolean isImage()
  {
    return document.isImage();
  }

  public boolean isSeed()
  {
    return (document != null) && document.isSeed();
  }

  public Seed getSeed()
  {
    return document != null ? document.getSeed() : null;
  }

  public boolean isDnd()
  {
    return dndPoint != null;
  }

  public PointInt getDndPoint()
  {
    return dndPoint;
  }

  public void setDndPoint(PointInt dndPoint)
  {
    this.dndPoint = dndPoint;
  }

  /**
   * This method is called before we actually hit the website. Thus, it uses the initial URL to test
   * if we need to hit the website. If it returns true, we definitely don't need to hit the website;
   * if it returns false, we need to hit the website, but the actual document might have been cached
   * using another URL.
   */
  @Override
  public boolean isCached()
  {
    return false;
  }

  /**
   * @return the downloadStatus
   */
  public DownloadStatus getDownloadStatus()
  {
    synchronized (DOWNLOAD_STATUS_LOCK)
    {
      return downloadStatus;
    }
  }

  public boolean isUnprocessed()
  {
    return getDownloadStatus() == DownloadStatus.UNPROCESSED;
  }
  
  /**
   * Test state variable inside of QUEUE_DOWNLOAD_LOCK.
   * 
   * @return true if result has already been queued, connected to, downloaded, ... so it should not
   *         be operated on further.
   */
  public boolean downloadHasBeenQueued()
  {
    return getDownloadStatus() != DownloadStatus.UNPROCESSED;
  }

  /**
   * Test and set state variable inside of QUEUE_DOWNLOAD_LOCK.
   * 
   * @return true if this really queues the download, and false if it had already been queued.
   */
  private boolean testAndSetQueueDownload()
  {
    synchronized (DOWNLOAD_STATUS_LOCK)
    {
      if (downloadStatus != DownloadStatus.UNPROCESSED)
        return false;
      setDownloadStatusInternal(DownloadStatus.QUEUED);
      return true;
    }
  }

  private void setDownloadStatus(DownloadStatus newStatus)
  {
    synchronized (DOWNLOAD_STATUS_LOCK)
    {
      setDownloadStatusInternal(newStatus);
    }
  }

  /**
   * (this method does not lock DOWNLOAD_STATUS_LOCK!)
   * 
   * @param newStatus
   */
  private void setDownloadStatusInternal(DownloadStatus newStatus)
  {
    this.downloadStatus = newStatus;
    if (this.document != null)
    {
      document.setDownloadStatus(newStatus);
    }
  }

  public DownloadProcessor<DocumentClosure> downloadMonitor()
  {
    SemanticsDownloadMonitors downloadMonitors = semanticsScope.getDownloadMonitors();
    return downloadMonitors.downloadProcessor(document.isImage(),
                                              isDnd(),
                                              isSeed(),
                                              document.isGui());
  }

  /**
   * Download if necessary, using the {@link ecologylab.concurrent.DownloadMonitor DownloadMonitor}
   * if USE_DOWNLOAD_MONITOR is set (it seems it always is), or in a new thread. Control will be
   * passed to {@link #downloadAndParse() downloadAndParse()}. Does nothing if this has been
   * previously queued, if it has been recycled, or if it isMuted().
   * 
   * @return true if this is actually queued for download. false if it was previously, if its been
   *         recycled, or if it is muted.
   */
  public boolean queueDownload()
  {
    if (recycled())
    {
      debugA("ERROR: cant queue download cause already recycled.");
      return false;
    }
    if (this.getDownloadLocation() == null)
      return false;
    final boolean result = !filteredOut(); // for dashboard type on the fly filtering
    if (result)
    {
      if (!testAndSetQueueDownload())
        return false;
      delete(); // remove from candidate pools! (invokes deleteHook as well)

      downloadMonitor().download(this, continuations == null ? null : this);
    }
    return result;
  }

  /**
   * In use cases such as the service, we want to be able to call performDownload() synchronously,
   * and in the same time make sure that the same closure will be downloaded by one thread at a
   * time. This method uses a lock to implement this.
   * @param noCacheRead 
   * @param noCacheWrite 
   * 
   * @throws IOException
   */
  public DownloadStatus performDownloadSynchronously(boolean noCacheRead, boolean noCacheWrite)
      throws IOException
  {
    synchronized (DOWNLOAD_LOCK)
    {
      performDownload(noCacheRead, noCacheWrite);
      return downloadStatus;
    }
  }

  /**
   * Connect to the information resource. Figure out the appropriate MetaMetadata and DocumentType.
   * Download the information resource and parse it. Do cleanup afterwards.
   * 
   * This method is typically called by DownloadMonitor.
   * 
   * @throws IOException
   */
  @Override
  public void performDownload() throws IOException
  {
    performDownload(false, false);
  }

  public void performDownload(boolean noCacheRead, boolean noCacheWrite) throws IOException
  {
    MetaMetadata metaMetadata = (MetaMetadata) document.getMetaMetadata();
    if (metaMetadata.isNoCache())
    {
      noCacheRead = true;
      noCacheWrite = true;
    }

    synchronized (DOWNLOAD_STATUS_LOCK)
    {
      logger.info("Entering performDownload(), downloadStatus = " + downloadStatus + " skipCache=" + noCacheRead);
      if (noCacheRead)
      {
        switch (downloadStatus)
        {
        case CONNECTING:
        case PARSING:
          return;
        default:
          break;
        }
      }
      else
      {
        if (recycled() || document.isRecycled())
        {
          logger.error("Recycled document closure in performDownload(): " + document);
          return;
        }
        switch (downloadStatus)
        {
        case CONNECTING:
        case PARSING:
        case DOWNLOAD_DONE:
        case IOERROR:
        case RECYCLED:
          return;
        default:
          break;
        }
      }
      logger.info("Changing status from " + downloadStatus + " to connecting: " + this);
      setDownloadStatusInternal(DownloadStatus.CONNECTING);
    }
    
    ParsedURL location = location();
    DocumentLogRecord logRecord = getLogRecord();
    PersistentDocumentCache pCache = semanticsScope.getPersistentDocumentCache();
    
    logRecord.beginPhase(Phase.DOWNLOAD_AND_PARSE);

    // Check the persistent cache first
    PersistenceMetaInfo cacheMetaInfo = null;
    String cachedRawContent = null;
    Document cachedDoc = null;
    if (pCache != null && !noCacheRead)
    {
      logRecord.beginPhase(Phase.PCACHE_READ);

      try
      {
        cacheMetaInfo = pCache.getMetaInfo(location);
        if (cacheMetaInfo != null)
        {
          logRecord.setPersistenceMetaInfo(cacheMetaInfo);

          // check if cached raw content is too old.
          Date accessTime = cacheMetaInfo.getAccessTime();
          Date currentTime = new Date();
          long diff = currentTime.getTime() - accessTime.getTime();
          long cacheLifeMs = metaMetadata.getCacheLifeMs();
          if (diff <= cacheLifeMs)
          {
            // it's not too old, we should use the cached raw content.
            cachedRawContent = pCache.retrieveRawContent(cacheMetaInfo);
            logRecord.logPost().addEventNow(new PersistenceCacheHtmlHit());

            // check if cached document needs to be re-extracted
            String currentHash = metaMetadata.getHashForExtraction();
            if (currentHash.equals(cacheMetaInfo.getMmdHash()))
            {
              cachedDoc = pCache.retrieveDoc(cacheMetaInfo);
              logRecord.logPost().addEventNow(new PersistenceCacheDocHit());
            }
            else
            {
              logRecord.logPost().addEventNow(new CachedMmdStale());
            }
          }
          else
          {
            logRecord.logPost().addEventNow(new CachedHtmlStale());
          }
        }
        else
        {
          logRecord.logPost().addEventNow(new PersistenceCacheMiss());
        }
      }
      catch (Exception e)
      {
        String errMsg = "Error accessing persistence cache.";
        logger.error(errMsg, e);
        logRecord.addErrorRecord(errMsg, e);
      }

      logRecord.endPhase(Phase.PCACHE_READ);
    }

    // If not in the persistent cache, download the raw page and parse
    if (cachedDoc != null)
    {
      semanticsScope.getLocalDocumentCollection().remap(document, cachedDoc);
      changeDocument(cachedDoc);
    }
    else
    {
      DownloadController downloadController = null;
      boolean rawContentDownloaded = false;
      if (cachedRawContent != null)
      {
        downloadController =
            new CachedPageDownloadController(cacheMetaInfo.getLocation(),
                                             cacheMetaInfo.getRawAdditionalLocations(),
                                             cacheMetaInfo.getCharset(),
                                             cacheMetaInfo.getMimeType(),
                                             200,
                                             "OK",
                                             cachedRawContent);
      }
      else
      {
        downloadController = downloadRawPage(location);
        rawContentDownloaded = true;
      }

      if (downloadController.isGood())
      {
        handleRedirections(downloadController, location);
        metaMetadata =
            changeMetaMetadataIfNeeded(downloadController.getHttpResponse().getMimeType());

        findParser(metaMetadata, downloadController);
        if (documentParser != null)
        {
          doParse(metaMetadata);
          if (pCache != null && !noCacheWrite)
          {
            doPersist(pCache, downloadController, document, rawContentDownloaded);
          }
          documentParser = null;
        }
      }
      else
      {
        logger.error("Network connection error: " + document);
        setDownloadStatus(DownloadStatus.IOERROR);
        logRecord.endPhase(Phase.DOWNLOAD_AND_PARSE);
        return;
      }
      downloadController.recycle();
    }

    document.downloadAndParseDone(documentParser);
    logRecord.endPhase(Phase.DOWNLOAD_AND_PARSE);
    setDownloadStatus(DownloadStatus.DOWNLOAD_DONE);
  }

  private DownloadController downloadRawPage(ParsedURL location) throws IOException
  {
    getLogRecord().beginPhase(Phase.DOWNLOAD);
    String userAgent = document.getMetaMetadata().getUserAgentString();
    DownloadController downloadController = semanticsScope.createDownloadController(this);
    downloadController.setUserAgent(userAgent);
    if (downloadController.accessAndDownload(location))
    {
      SimplHttpResponse httpResp = downloadController.getHttpResponse();
      getLogRecord().setDownloadStatusCode(httpResp.getCode());
    }
    getLogRecord().endPhase(Phase.DOWNLOAD);
    return downloadController;
  }

  private void handleRedirections(DownloadController downloadController, ParsedURL location)
  {
    String newUrl = downloadController.getHttpResponse().getUrl();
    ParsedURL newPurl = ParsedURL.getAbsolute(newUrl);
    Document newDoc = semanticsScope.getOrConstructDocument(newPurl);
    changeDocument(newDoc);

    MetaMetadataCompositeField mmd = newDoc.getMetaMetadata();
    if (mmd instanceof MetaMetadata)
    {
      FilterLocation filter = ((MetaMetadata) mmd).getFilterLocation();
      if (filter != null)
      {
        ArrayList<ParsedURL> altLocs = new ArrayList<ParsedURL>();
        try
        {
          newPurl = filter.filter(newPurl, altLocs);
          if (newPurl != null && !newPurl.equals(document.getLocation()))
          {
            document.setLocation(newPurl);
          }
          for (ParsedURL altLoc : altLocs)
          {
            document.addAdditionalLocation(altLoc);
            semanticsScope.getLocalDocumentCollection().addMapping(altLoc, document);
          }
        }
        catch (Exception e)
        {
          logger.error("Exception filtering location " + newPurl, e);
        }
      }
    }

    // handle other locations:
    List<ParsedURL> otherLocations = downloadController.getHttpResponse().getOtherPurls();
    if (otherLocations != null)
    {
      for (ParsedURL otherLocation : otherLocations)
      {
        if (otherLocation != null)
        {
          document.addAdditionalLocation(otherLocation);
          semanticsScope.getLocalDocumentCollection().addMapping(otherLocation, document);
        }
      }
    }
  }

  private MetaMetadata changeMetaMetadataIfNeeded(String mimeType)
  {
    MetaMetadata metaMetadata = (MetaMetadata) document.getMetaMetadata();
    // check for more specific meta-metadata
    if (metaMetadata.isGenericMetadata())
    { // see if we can find more specifc meta-metadata using mimeType
      MetaMetadataRepository repository = semanticsScope.getMetaMetadataRepository();
      MetaMetadata mimeMmd = repository.getMMByMime(mimeType);
      if (mimeMmd != null && !mimeMmd.equals(metaMetadata))
      {
        // new meta-metadata!
        if (!mimeMmd.getMetadataClass().isAssignableFrom(document.getClass()))
        {
          // more specific so we need new metadata!
          Document document = (Document) mimeMmd.constructMetadata(); // set temporary on stack
          changeDocument(document);
        }
        metaMetadata = mimeMmd;
        document.setMetaMetadata(mimeMmd);
      }
    }
    return metaMetadata;
  }

  private void findParser(MetaMetadata metaMetadata, DownloadController downloadController)
  {
    if (documentParser == null)
    {
      boolean noParser = false;
      
//      // First check if registered no parser
//      noParser = DocumentParser.isRegisteredNoParser(document.getLocation());
//      List<MetadataParsedURL> additionalLocations = document.getAdditionalLocations();
//      if (additionalLocations != null)
//      {
//        for (int i = 0; i < additionalLocations.size() && !noParser; ++i)
//        {
//          noParser |= DocumentParser.isRegisteredNoParser(additionalLocations.get(i).getValue());
//        }
//      }

      if (noParser)
      {
        logger.warn("Registered no parser: " + document);
      }
      else
      {
        // If not registered no parser, try to find one
        documentParser =
            DocumentParser.getByMmd(metaMetadata, semanticsScope, this, downloadController);
        if (documentParser == null)
        {
          logger.warn("No parser found: " + metaMetadata);
        }
      }
    }
  }

  private void doParse(MetaMetadata metaMetadata) throws IOException
  {
    getLogRecord().beginPhase(Phase.EXTRACT);

    // container or not (it could turn out to be an image or some other mime type), parse the baby!
    setDownloadStatus(DownloadStatus.PARSING);
    takeSemanticActions(metaMetadata, metaMetadata.getBeforeSemanticActions());
    documentParser.parse();
    takeSemanticActions(metaMetadata, metaMetadata.getAfterSemanticActions());
    addDocGraphCallbacksIfNeeded();

    getLogRecord().endPhase(Phase.EXTRACT);
  }

  private void takeSemanticActions(MetaMetadata metaMetadata, ArrayList<SemanticAction> actions)
  {
    if (metaMetadata != null && actions != null)
    {
      SemanticActionHandler handler = new SemanticActionHandler(semanticsScope, documentParser);
      handler.takeSemanticActions(metaMetadata, document, actions);
    }
  }

  private void addDocGraphCallbacksIfNeeded()
  {
    if (this.getSemanticsScope().ifAutoUpdateDocRefs())
    {
      // add callbacks so that when this document is downloaded and parsed, references to it will
      // be updated automatically.
      Set<DocumentDownloadedEventHandler> listeners =
          semanticsScope.getDocumentDownloadingMonitor().getListenersForDocument(document);
      if (listeners != null && listeners.size() > 0)
      {
        addContinuations(listeners);
      }
    }
  }

  private void doPersist(PersistentDocumentCache pCache,
                         DownloadController downloadController,
                         Document doc,
                         boolean rawContentDownloaded)
      throws IOException
  {
    getLogRecord().beginPhase(Phase.PCACHE_WRITE);
    try
    {
      if (rawContentDownloaded)
      {
        PersistenceMetaInfo metaInfo =
            pCache.store(doc,
                         downloadController.getHttpResponse().getContent(),
                         downloadController.getHttpResponse().getCharset(),
                         downloadController.getHttpResponse().getMimeType(),
                         doc.getMetaMetadata().getHashForExtraction());
        getLogRecord().setId(metaInfo.getDocId());
        getLogRecord().setPersistenceMetaInfo(metaInfo);
      }
      else
      {
        PersistenceMetaInfo metaInfo = pCache.getMetaInfo(doc.getLocation());
        pCache.updateDoc(metaInfo, doc);
      }
    }
    catch (Exception e)
    {
      String errMsg = "Error storing to persistence cache.";
      logger.error(errMsg, e);
      getLogRecord().addErrorRecord(errMsg, e);
    }
    getLogRecord().endPhase(Phase.PCACHE_WRITE);
  }

  /**
   * Dispatch all of our registered callbacks.
   */
  @Override
  public void callback(DocumentClosure o)
  {
    if (continuations == null)
      return;

    List<Continuation<DocumentClosure>> currentContinuations;
    synchronized (continuations)
    {
      currentContinuations = new ArrayList<Continuation<DocumentClosure>>(continuations);
    }
    if (currentContinuations != null)
    {
      for (Continuation<DocumentClosure> continuation : currentContinuations)
      {
        try
        {
          continuation.callback(o);
        }
        catch (Exception e)
        {
          logger.error("Error calling back: " + o + ": " + continuation, e);
        }
      }
    }

    // wait to recycle continuations until after they have been called.
    if (isRecycled())
    {
      continuations.clear();
      continuations = null;
    }
  }

  public List<Continuation<DocumentClosure>> getContinuations()
  {
    return continuations;
  }

  private List<Continuation<DocumentClosure>> continuations()
  {
    return continuations;
  }

  public void addContinuation(Continuation<DocumentClosure> continuation)
  {
    synchronized (continuations)
    {
      continuations().add(continuation);
    }
  }

  public void addContinuations(Collection<? extends Continuation<DocumentClosure>> incomingContinuations)
  {
    synchronized (continuations)
    {
      List<Continuation<DocumentClosure>> continuations = continuations();
      for (Continuation<DocumentClosure> continuation : incomingContinuations)
        continuations.add(continuation);
    }
  }

  public void addContinuationBefore(Continuation<DocumentClosure> continuation)
  {
    synchronized (continuations)
    {
      continuations().add(0, continuation);
    }
  }

  /**
   * Add a continuation to this closure before it is downloaded (i.e. before its performDownload()
   * method finishes).
   * 
   * This gives the client the possibility of making sure the continuation will be called when the
   * closure finishes downloading.
   * 
   * @param continuation
   * @return true if the continuation is added before the closure finishes downloading; false if the
   *         closure is already downloaded.
   */
  public boolean addContinuationBeforeDownloadDone(Continuation<DocumentClosure> continuation)
  {
    if (downloadStatus != DownloadStatus.DOWNLOAD_DONE
        && downloadStatus != DownloadStatus.IOERROR
        && downloadStatus != DownloadStatus.RECYCLED)
    {
      synchronized (DOWNLOAD_STATUS_LOCK)
      {
        if (downloadStatus != DownloadStatus.DOWNLOAD_DONE
            && downloadStatus != DownloadStatus.IOERROR
            && downloadStatus != DownloadStatus.RECYCLED)
        {
          addContinuation(continuation);
          return true;
        }
      }
    }
    return false;
  }

  /**
   * Document metadata object must change, because we learned something new about its type.
   * 
   * @param newDocument
   */
  public void changeDocument(Document newDocument)
  {
    synchronized (DOCUMENT_LOCK)
    {
      if (newDocument != document)
      {
        Document oldDocument = document;
        document = newDocument;

        logger.info("Changing {} to {}", oldDocument, newDocument);

        SemanticsSite oldSite = oldDocument.site();
        SemanticsSite newSite = newDocument.site();
        if (oldSite != null && oldSite != newSite)
        {
          // calling changeDocument() because of redirecting?
          if (oldSite.isDownloading())
            oldSite.endDownload(oldDocument.getDownloadLocation());
        }

        newDocument.inheritValues(oldDocument);

        semanticInlinks = newDocument.getSemanticInlinks(); // probably not needed, but just in
                                                            // case.

        newDocument.setLogRecord(oldDocument.getLogRecord());

        ParsedURL oldLoc = oldDocument.getLocation();
        ParsedURL newLoc = newDocument.getLocation();
        if (oldLoc != null && !oldLoc.equals(newLoc))
        {
          ChangeLocation changeLocationEvent = new ChangeLocation(oldLoc, newLoc);
          getLogRecord().logPost().addEventNow(changeLocationEvent);
        }

        oldDocument.recycle();
      }
    }
  }

  /**
   * Close the current connection. Re-open a connection to the same location. Use the same Document
   * object; don't process re-directs, or anything like that. Re-connect simply.
   * 
   * @return PURLConnection for the new connection.
   * @throws IOException
   */
  public DownloadController reConnect() throws IOException
  {
    DownloadController downloadController = semanticsScope.createDownloadController(this);
    downloadController.accessAndDownload(document.getLocation());
    return downloadController;
  }

  @Override
  public void recycle()
  {
    recycle(false);
  }

  @Override
  public synchronized void recycle(boolean recycleDocument)
  {
    synchronized (DOWNLOAD_STATUS_LOCK)
    {
      if (downloadStatus == DownloadStatus.RECYCLED)
        return;
      setDownloadStatusInternal(DownloadStatus.RECYCLED);
    }

    if (documentParser != null)
      documentParser.recycle();

    semanticInlinks = null;

    initialPURL = null;

    // ??? should we recycle Document here -- under what circumstances???
    if (recycleDocument)
      document.recycle();
  }

  @Override
  public boolean recycled()
  {
    Document document = this.document;
    return document == null || document.isRecycled();
  }

  @Override
  public boolean isRecycled()
  {
    return document == null || document.isRecycled();
  }

  /**
   * Resets this closure as if it is newly created.
   */
  public void reset()
  {
    setDownloadStatus(DownloadStatus.UNPROCESSED);
    if (document != null)
    {
      document.resetRecycleStatus();
    }
  }

  @Override
  public String toString()
  {
    return super.toString() + "[" + document.getLocation() + "]";
  }

  @Override
  public int hashCode()
  {
    return (document == null) ? -1 : document.hashCode();
  }

  @Override
  public ITermVector termVector()
  {
    return (document == null) ? null : document.termVector();
  }

  /**
   * Called by DownloadMonitor in case a timeout happens.
   */
  @Override
  public void handleIoError(Throwable e)
  {
    setDownloadStatus(DownloadStatus.IOERROR);
    if (documentParser != null)
    {
      documentParser.handleIoError(e);
    }
    recycle();
  }

  @Override
  public String message()
  {
    return document == null ? "recycled" : document.getLocation().toString();
  }

  public void serialize(OutputStream stream)
  {
    serialize(stream, StringFormat.XML);
  }

  public void serialize(OutputStream stream, StringFormat format)
  {
    Document document = getDocument();
    try
    {
      SimplTypesScope.serialize(document, System.out, format);

      System.out.println("\n");
    }
    catch (SIMPLTranslationException e)
    {
      error("Could not serialize " + document);
      e.printStackTrace();
    }
  }

  public void serialize(StringBuilder buffy)
  {
    Document document = getDocument();
    try
    {
      SimplTypesScope.serialize(document, buffy, StringFormat.XML);
      System.out.println("\n");
    }
    catch (SIMPLTranslationException e)
    {
      error("Could not serialize " + document);
      e.printStackTrace();
    }
  }

}