/* * The MIT License * * Copyright (c) 2011 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.illumina.parser; import picard.PicardException; import java.io.File; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; /** * Parse various formats and versions of Illumina Basecall files, and use them the to populate * ClusterData objects. Clients of this code should use IlluminaDataProviderFactory to create an IlluminaDataProvider. * IlluminaDataProvider is immutable after construction. * * @author jburke@broadinstitute.org */ public class IlluminaDataProvider implements Iterator<ClusterData>, Iterable<ClusterData> { /** contains QSeqs, bcls, or other Illumina file types that will be parsed by this class */ private final File basecallDirectory; //These two are for error reporting only private final int lane; /** A list of parsers (already initialized) that should output data in a format consistent with readStructure */ private final IlluminaParser[] parsers; /** * for each parser in this.parsers there is an array of IlluminaDataTypes that specifies what datatypes that parser is providing in * this particular run. A parser may be able to provide data types which may not be listed here because client code may not * have specified these data types */ private final IlluminaDataType[][] dataTypes; /** Calculated once, outputReadTypes describes the type of read data for each ReadData that will be found in output ClusterData objects */ private final ReadType[] outputReadTypes; /** Number of reads in each ClusterData */ private final int numReads; /** * Create an IlluminaDataProvider given a map of parsersToDataTypes for particular file formats. Compute once the miscellaneous data for the * run that will be passed to each ClusterData. * * @param basecallDirectory For error reporting only. * @param lane For error reporting only. */ IlluminaDataProvider(final OutputMapping outputMapping, final Map<IlluminaParser, Set<IlluminaDataType>> parsersToDataTypes, final File basecallDirectory, final int lane) { this.basecallDirectory = basecallDirectory; this.lane = lane; numReads = outputMapping.numOutputReads(); final int numParsers = parsersToDataTypes.size(); if (numParsers == 0) { throw new PicardException("There were 0 parsers passed to IlluminaDataProvider!"); } int i = 0; parsers = new IlluminaParser[numParsers]; dataTypes = new IlluminaDataType[numParsers][]; for (final Map.Entry<IlluminaParser, Set<IlluminaDataType>> pToD : parsersToDataTypes.entrySet()) { parsers[i] = pToD.getKey(); final Set<IlluminaDataType> dts = pToD.getValue(); dataTypes[i] = new IlluminaDataType[dts.size()]; dts.toArray(dataTypes[i++]); } this.outputReadTypes = new ReadType[numReads]; i = 0; for (final ReadDescriptor rd : outputMapping.getOutputDescriptors()) { outputReadTypes[i++] = rd.type; } } /** * @return True if we have more clusters to read */ public boolean hasNext() { final boolean more = parsers[0].hasNext(); if (!more) { for (int i = 1; i < parsers.length; i++) { if (parsers[i].hasNext()) { throw new PicardException("Unequal length Illumina files in " + basecallDirectory + ", lane " + lane + ". Failing parser: " + parsers[i].getClass().getName()); } } } return more; } /** * @return Current cluster data populated with only the data that matches one of the data types in dataTypes. */ public ClusterData next() { if (!hasNext()) { throw new NoSuchElementException(); } final ClusterData cluster = new ClusterData(outputReadTypes); cluster.setLane(lane); //IMPORTANT NOTE: This assignment to tile MUST happen BEFORE the loop below because getTileOfNextCluster //returns the tile for the next cluster and if we call this after the loop then whenever we pass a tile //boundary the last cluster in the previous tile will have the wrong tile number cluster.setTile(parsers[0].getTileOfNextCluster()); for (int i = 0; i < parsers.length; i++) { final IlluminaData ilData = parsers[i].next(); for (final IlluminaDataType ilDataType : dataTypes[i]) { switch (ilDataType) { case Position: addData(cluster, (PositionalData) ilData); break; case PF: addData(cluster, (PfData) ilData); break; case Barcodes: addData(cluster, (BarcodeData) ilData); break; case BaseCalls: addReadData(cluster, numReads, (BaseData) ilData); break; case QualityScores: addReadData(cluster, numReads, (QualityData) ilData); break; default: throw new PicardException("Unknown data type " + ilDataType + " requested by IlluminaDataProviderFactory"); } } } return cluster; } /* * Methods for that transfer data from the IlluminaData objects to the current cluster */ private void addData(final ClusterData clusterData, final PositionalData posData) { clusterData.setX(posData.getXCoordinate()); clusterData.setY(posData.getYCoordinate()); } private void addData(final ClusterData clusterData, final PfData pfData) { clusterData.setPf(pfData.isPf()); } private void addData(final ClusterData clusterData, final BarcodeData barcodeData) { clusterData.setMatchedBarcode(barcodeData.getBarcode()); } private void addReadData(final ClusterData clusterData, final int numReads, final BaseData baseData) { final byte[][] bases = baseData.getBases(); for (int i = 0; i < numReads; i++) { clusterData.getRead(i).setBases(bases[i]); } } private void addReadData(final ClusterData clusterData, final int numReads, final QualityData qualityData) { final byte[][] qualities = qualityData.getQualities(); for (int i = 0; i < numReads; i++) { clusterData.getRead(i).setQualities(qualities[i]); } } private void addReadData(final ClusterData clusterData, final int numReads, final RawIntensityData rawIntensityData) { final FourChannelIntensityData[] fcids = rawIntensityData.getRawIntensities(); for (int i = 0; i < numReads; i++) { clusterData.getRead(i).setRawIntensities(fcids[i]); } } private void addReadData(final ClusterData clusterData, final int numReads, final NoiseData noiseData) { final FourChannelIntensityData[] fcids = noiseData.getNoise(); for (int i = 0; i < numReads; i++) { clusterData.getRead(i).setNoise(fcids[i]); } } public void remove() { throw new UnsupportedOperationException(); } /** Jump so that the next record returned will be from the specified tile. */ public void seekToTile(final int oneBasedTileNumber) { for (final IlluminaParser parser : parsers) { parser.seekToTile(oneBasedTileNumber); } } @Override public Iterator<ClusterData> iterator() { return this; } public void close() { for (final IlluminaParser parser : parsers) { parser.close(); } } }