package org.icij.extract.document; import org.apache.tika.metadata.Metadata; import java.io.IOException; import java.io.Reader; import java.nio.file.Path; import java.util.*; import java.util.function.Supplier; public class Document { private final Path path; private Supplier<String> id; private String foreignId = null; private final Metadata metadata; private Identifier identifier; private List<EmbeddedDocument> embeds = new LinkedList<>(); private Map<String, EmbeddedDocument> lookup = new HashMap<>(); private Reader reader = null; private ReaderGenerator readerGenerator = null; /** * Instantiate a document with a pre-generated ID. In this case, the ID generator is only used when adding * embedded documents to this parent. * * @param id a pre-generated ID * @param identifier an identifier generator * @param path the path to the document * @param metadata document metadata */ public Document(final String id, final Identifier identifier, final Path path, final Metadata metadata) { Objects.requireNonNull(identifier, "The identifier generator must not be null."); Objects.requireNonNull(path, "The path must not be null."); this.metadata = metadata; this.path = path; this.identifier = identifier; this.id = ()-> id; } /** * @see Document(String, Identifier, Path, Metadata) */ public Document(final String id, final Identifier identifier, final Path path) { this(id, identifier, path, new Metadata()); } /** * Instantiate a document when the ID has not yet been generated. * * @param identifier for generating the ID * @param path the path to the document * @param metadata document metadata */ public Document(final Identifier identifier, final Path path, final Metadata metadata) { Objects.requireNonNull(identifier, "The identifier generator must not be null."); Objects.requireNonNull(path, "The path must not be null."); this.metadata = metadata; this.path = path; this.identifier = identifier; // Create a supplier that will cache the result of the generator after the first invocation. this.id = ()-> { final String id; try { id = this.generateId(); } catch (Exception e) { throw new RuntimeException("Unable to generate document ID.", e); } this.id = ()-> id; return id; }; } /** * @see Document(Identifier, Path, Metadata) */ public Document(final Identifier identifier, final Path path) { this(identifier, path, new Metadata()); } String generateId() throws Exception { return identifier.generate(this); } public String getId() { return id.get(); } public String getHash() { return identifier.retrieveHash(getMetadata()); } Identifier getIdentifier() { return identifier; } public Path getPath() { return path; } public Metadata getMetadata() { return metadata; } public EmbeddedDocument addEmbed(final Metadata metadata) { return addEmbed(new EmbeddedDocument(this, metadata)); } private EmbeddedDocument addEmbed(final Identifier identifier, final Path path, final Metadata metadata) { return addEmbed(new EmbeddedDocument(this, identifier, path, metadata)); } public EmbeddedDocument addEmbed(final String key, final Identifier identifier, final Path path, final Metadata metadata) { return lookup.put(key, addEmbed(identifier, path, metadata)); } private EmbeddedDocument addEmbed(final EmbeddedDocument embed) { embeds.add(embed); return embed; } public List<EmbeddedDocument> getEmbeds() { return embeds; } public EmbeddedDocument getEmbed(final String key) { return lookup.get(key); } public void setReader(final Reader reader) { this.reader = reader; } public void setReader(final ReaderGenerator readerGenerator) { this.readerGenerator = readerGenerator; } public void setForeignId(final String foreignId) { this.foreignId = foreignId; } public String getForeignId() { return foreignId; } public synchronized Reader getReader() throws IOException { if (null == reader && null != readerGenerator) { reader = readerGenerator.generate(); } return reader; } @Override public boolean equals(final Object other) { if (!(other instanceof Document)) { return false; } // Only documents with the same ID are equal, as paths are not globally unique unless explicitly declared so, // if, for example, the PathIdentifier is used. final String id = getId(); return null != id && id.equals(((Document) other).getId()); } @Override public String toString() { return path.toString(); } @FunctionalInterface public interface ReaderGenerator { Reader generate() throws IOException; } }