/* * Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br) * Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com) * * For further information check the LICENSE file. */ package bio.pih.genoogle.index; import java.io.BufferedInputStream; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel.MapMode; import org.apache.log4j.Logger; import bio.pih.genoogle.encoder.SequenceEncoder; import bio.pih.genoogle.io.AbstractSequenceDataBank; import bio.pih.genoogle.io.proto.Io.InvertedIndexBuck; import bio.pih.genoogle.seq.SymbolList; /** * An inverted sub-sequences index stored in the memory. * * @author albrecht */ public class MemoryInvertedIndex extends AbstractInvertedIndex { protected long[][] index = null; private static Logger logger = Logger.getLogger(MemoryInvertedIndex.class.getCanonicalName()); /** * @param databank * @param subSequenceLength */ public MemoryInvertedIndex(AbstractSequenceDataBank databank, SequenceEncoder indexedSequenceEncoder) { super(databank, indexedSequenceEncoder); } @Override public long[] getMatchingSubSequence(SymbolList subSequence) throws ValueOutOfBoundsException { if (subSequence.getLength() != subSequenceLength) { throw new ValueOutOfBoundsException("The length (" + subSequence.getLength() + ") of the given sequence is different from the sub-sequence (" + subSequenceLength + ")"); } int encodedSubSequence = encoder.encodeSubSequenceToInteger(subSequence); return getMatchingSubSequence(encodedSubSequence); } @Override public long[] getMatchingSubSequence(int encodedSubSequence) { return index[encodedSubSequence]; } @Override public String indexStatus() { StringBuilder sb = new StringBuilder(); for (long[] bucket : index) { if (bucket != null) { for (long subSequenceInfoEncoded : bucket) { sb.append("\t"); sb.append(SubSequenceIndexInfo.getSequenceId(subSequenceInfoEncoded)); sb.append(": "); sb.append(SubSequenceIndexInfo.getStart(subSequenceInfoEncoded)); sb.append("\n"); } } } return sb.toString(); } @Override public void loadFromFile() throws IOException { long b = System.currentTimeMillis(); logger.info("Loading inverted index."); this.index = new long[indexSize][]; File memoryInvertedIndexFile = getMemoryInvertedIndexFile(); FileInputStream memoryInvertedIndexIS = new FileInputStream(memoryInvertedIndexFile); FileInputStream in = new FileInputStream(getMemoryInvertedOffsetIndexFile()); DataInputStream fileInputStream = new DataInputStream(new BufferedInputStream(in)); long mmOffset = 0; final long TWO_GB = Integer.MAX_VALUE; final long invertedIndexFileLength = memoryInvertedIndexFile.length(); MappedByteBuffer map = null; if (invertedIndexFileLength > TWO_GB) { map = memoryInvertedIndexIS.getChannel().map(MapMode.READ_ONLY, mmOffset, TWO_GB); } else { map = memoryInvertedIndexIS.getChannel().map(MapMode.READ_ONLY, mmOffset, invertedIndexFileLength); } long totalSubSequences = 0; while (fileInputStream.available() > 0) { IndexFileOffset indexFilePosition = IndexFileOffset.newFrom(fileInputStream); int subSequence = indexFilePosition.subSequence; int length = indexFilePosition.length; long offset = indexFilePosition.offset; //System.out.println(subSequence); if (offset + length > mmOffset + TWO_GB) { mmOffset += (TWO_GB - length); if (mmOffset + TWO_GB > invertedIndexFileLength) { long l = invertedIndexFileLength - mmOffset; System.out.println(l); map = memoryInvertedIndexIS.getChannel().map(MapMode.READ_ONLY, mmOffset, l); } else { map = memoryInvertedIndexIS.getChannel().map(MapMode.READ_ONLY, mmOffset, TWO_GB); } } long reOffset = offset - mmOffset; if (reOffset > Integer.MAX_VALUE) { logger.fatal(reOffset + " is too big for " + reOffset); System.exit(-2); } byte[] data = new byte[length]; map.position((int) reOffset); map.get(data); InvertedIndexBuck invertedIndexBuck = InvertedIndexBuck.parseFrom(data); long[] entries = new long[invertedIndexBuck.getBuckCount()]; for (int j = 0; j < invertedIndexBuck.getBuckCount(); j++) { entries[j] = invertedIndexBuck.getBuck(j); } totalSubSequences += invertedIndexBuck.getBuckCount(); index[subSequence] = entries; } for (int i = 0; i < indexSize; i++) { if (index[i] == null) { index[i] = EMPTY_ARRAY; } } this.loaded = true; logger.info(totalSubSequences + " sub sequences was loaded into the inverted index."); logger.info("Inverted index loaded in " + (System.currentTimeMillis() - b)); } @Override public boolean fileExists() { return memoryInvertedIndexFileExisits() && memoryInvertedOffsetIndexFileExisits(); } private boolean memoryInvertedIndexFileExisits() { return getMemoryInvertedIndexFile().exists(); } private boolean memoryInvertedOffsetIndexFileExisits() { return getMemoryInvertedOffsetIndexFile().exists(); } public File getMemoryInvertedIndexFile() { return new File(databank.getFullPath() + ".midx"); } public File getMemoryInvertedOffsetIndexFile() { return new File(databank.getFullPath() + ".oidx"); } public boolean check() { if (getMemoryInvertedIndexFile().exists() && getMemoryInvertedOffsetIndexFile().exists()) { return true; } return false; } public void delete() { if (getMemoryInvertedIndexFile().exists()) { boolean delete = getMemoryInvertedIndexFile().delete(); if (!delete) { logger.error(getMemoryInvertedOffsetIndexFile() + " can not be deleted."); } } if (getMemoryInvertedOffsetIndexFile().exists()) { boolean delete = getMemoryInvertedOffsetIndexFile().delete(); if (!delete) { logger.error(getMemoryInvertedOffsetIndexFile() + " can not be deleted."); } } } }