/* * The MIT License (MIT) * * Copyright (c) 2007-2015 Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* * To change this template, choose Tools | Templates * and open the template in the editor. */ package org.broad.igv.data; //~--- JDK imports ------------------------------------------------------------ import java.io.*; /** * @author jrobinso */ public class DataUtils { public static int getIndexBefore(int[] values, int x) { return getIndexBefore(values, x, 0, values.length); } public static int getIndexBefore(int[] values, int x, int leftBound, int rightBound) { int idx = (leftBound + rightBound) / 2; if ((idx == 0) || (idx == values.length - 1)) { return idx; } if (values[idx] == x) { return idx; } if (values[idx] < x) { if (values[idx + 1] >= x) { return idx; } else { leftBound = idx; return getIndexBefore(values, x, leftBound, rightBound); } } else { // values[idx] > x if (values[idx - 1] <= x) { return idx - 1; } else { rightBound = idx; return getIndexBefore(values, x, leftBound, rightBound); } } } /** * Estimate the number of rows in an ascii data file. Estimate is based on * the first 100 lines, and assumes the line length is approximately * constant. * * @param textFile * @return */ public static AsciiFileMetrics estimateFileMetrics(String textFile) { int estRowCount = 0; try { BufferedReader reader = null; File file = new File(textFile); reader = new BufferedReader(new FileReader(file)); String nextLine = reader.readLine(); double lineCount = 0; double nChars = 0; while ((nextLine = reader.readLine()) != null && (lineCount < 100)) { nChars += nextLine.length(); lineCount++; } int columnCount = nextLine.split("\t").length; double charsPerLine = ((lineCount > 0) ? nChars / lineCount : 0); estRowCount = (int) (file.length() / charsPerLine); return new AsciiFileMetrics(estRowCount, columnCount, charsPerLine); } catch (FileNotFoundException ex) { ex.printStackTrace(); } catch (IOException ex) { ex.printStackTrace(); } return null; } /** * Method description * * @param fileMetrics * @return */ public static int estimatePreprocessingTime(AsciiFileMetrics fileMetrics) { return 8 + (int) ((0.0036 * fileMetrics.getEstRowCount() * fileMetrics.getColumnCount()) / 100); } /** * This class has some useful metrics for optimizing reading of large ascii files * * @author jrobinso */ public static class AsciiFileMetrics { private int estRowCount; private int columnCount; private double estBytesPerLine; public AsciiFileMetrics(int estRowCount, int columnCount, double estBytesPerLine) { this.estRowCount = estRowCount; this.columnCount = columnCount; this.estBytesPerLine = estBytesPerLine; } public double getEstBytesPerLine() { return estBytesPerLine; } public int getEstRowCount() { return estRowCount; } public int getColumnCount() { return columnCount; } } }