/* * Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br) * Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com) * * For further information check the LICENSE file. */ package bio.pih.genoogle.io; import java.io.File; import java.io.IOException; import java.util.NoSuchElementException; import bio.pih.genoogle.encoder.MaskEncoder; import bio.pih.genoogle.encoder.SequenceEncoder; import bio.pih.genoogle.index.IndexConstructionException; import bio.pih.genoogle.index.MemoryInvertedIndex; import bio.pih.genoogle.index.SubSequenceIndexInfo; import bio.pih.genoogle.index.ValueOutOfBoundsException; import bio.pih.genoogle.index.builder.InvertedIndexBuilder; import bio.pih.genoogle.io.proto.Io.StoredDatabank; import bio.pih.genoogle.io.reader.ParseException; import bio.pih.genoogle.seq.Alphabet; import bio.pih.genoogle.seq.IllegalSymbolException; import bio.pih.genoogle.seq.SymbolList; /** * A data bank witch index its sequences and uses similar subsequences index. * * @author albrecht * */ public class IndexedSequenceDataBank extends AbstractSimpleSequenceDataBank { protected final MemoryInvertedIndex index; protected InvertedIndexBuilder indexBuilder; protected final MaskEncoder maskEncoder; private final String mask; private final int subSequenceOffset; public IndexedSequenceDataBank(String name, Alphabet alphabet, int subSequenceLength, String mask, File path, AbstractDatabankCollection<? extends AbstractSimpleSequenceDataBank> parent) { this(name, alphabet, subSequenceLength, parent.getEncoder(), mask, path,parent); } public IndexedSequenceDataBank(String name, Alphabet alphabet, int subSequenceLength, SequenceEncoder indexedSequenceEncoder, String mask, File path, AbstractDatabankCollection<? extends AbstractSimpleSequenceDataBank> parent) throws ValueOutOfBoundsException { super(name, alphabet, subSequenceLength, path, parent); this.mask = mask; this.subSequenceOffset = indexedSequenceEncoder.getSubSequenceLength(); if (mask != null) { maskEncoder = new MaskEncoder(mask, encoder); } else { maskEncoder = null; } index = new MemoryInvertedIndex(this, indexedSequenceEncoder); } @Override public synchronized boolean load() throws IOException, ValueOutOfBoundsException { boolean b = super.load(); if (b == false) { return false; } index.loadFromFile(); return true; } public void encodeSequences(boolean forceFormating) throws IOException, NoSuchElementException, ValueOutOfBoundsException, IndexConstructionException, ParseException, IllegalSymbolException { beginIndexBuild(); super.encodeSequences(forceFormating); endIndexBuild(); } public void beginIndexBuild() throws IndexConstructionException { indexBuilder = new InvertedIndexBuilder(this); indexBuilder.constructIndex(); } public void endIndexBuild() throws IndexConstructionException { indexBuilder.finishConstruction(); indexBuilder = null; } @Override public int doSequenceProcessing(int sequenceId, int[] encodedSequence) throws IndexConstructionException, IllegalSymbolException { int size = SequenceEncoder.getSequenceLength(encodedSequence); if (maskEncoder == null) { indexBuilder.addSequence(sequenceId, encodedSequence); } else { SymbolList sequence = encoder.decodeIntegerArrayToSymbolList(encodedSequence); int[] filteredSequence = maskEncoder.applySequenceMask(sequence); indexBuilder.addSequence(sequenceId, filteredSequence); } return size; } public MemoryInvertedIndex getIndex() { return index; } /** * The offset between each subsequence of this data bank. * @return offset between the begin of two together subsequences. */ @Override public int getSubSequencesOffset() { if (maskEncoder == null) { return subSequenceOffset; } else { return maskEncoder.getPatternLength(); } } public MaskEncoder getMaskEncoder() { return maskEncoder; } /** * Receive an encodedSubSequence, that is a sub-sequence 8 bases length encoded into a short, * and return an Array of integer containing the sequence and position that is <b>exactly equals</b> the subsequence. * @param encodedSubSequence * @return a list containing the {@link SubSequenceIndexInfo} encoded, use {@link SubSequenceIndexInfo} to decode it. */ public long[] getMatchingSubSequence(int encodedSubSequence) throws ValueOutOfBoundsException, IOException { return index.getMatchingSubSequence(encodedSubSequence); } @Override public boolean check() { if (!index.check()) { return false; } return super.check(); } @Override public void delete() { super.delete(); index.delete(); } protected void setStoredDatabankInfo(StoredDatabank.Builder storedDatabankBuilder) { super.setStoredDatabankInfo(storedDatabankBuilder); if (mask == null) { storedDatabankBuilder.setMask(""); } else { storedDatabankBuilder.setMask(mask); } storedDatabankBuilder.setLowComplexityFilter(getLowComplexityFilter()); } }