/* * Copyright (C) Justo Montiel, David Torres, Sergio Gomez, Alberto Fernandez * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see * <http://www.gnu.org/licenses/> */ package importExport; import genomeObjects.ExtendedCRON; import inicial.FesLog; import inicial.Language; import java.beans.PropertyChangeEvent; import java.beans.PropertyChangeListener; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.Serializable; import java.util.Enumeration; import java.util.Hashtable; import java.util.Iterator; import java.util.LinkedList; import java.util.StringTokenizer; import java.util.logging.Level; import javax.swing.JOptionPane; import javax.swing.JProgressBar; import javax.swing.tree.DefaultMutableTreeNode; import ContextForest.DissimilarityMatrixData; import errors.FitxerIncompatible; /** * <p> * <b>MultiDendrograms</b> * </p> * * Reads a text file containing a distances matrix in either list or matrix * format, with and without headers * * @author Justo Montiel, David Torres, Sergio Gómez, Alberto Fernández * * @since JDK 6.0 */ public class ReadTXT implements PropertyChangeListener, Serializable { //just a program to read in .txt files private final String nomfitx; private int numElements = 0; private String[] TaulaNoms; private final LinkedList<String[]> lstdades; private LinkedList<StructIn<String>> lst; private static final double NULL = -1.0; private final ExtendedCRON EC; private JProgressBar progressBar = null; // file path public ReadTXT(final String pathFichero) throws Exception { boolean tipusA, tipusB; nomfitx = pathFichero; lstdades = this.PosaEnMemoria(); String[] tmpC; int nl, nc; EC = new ExtendedCRON(); tmpC = lstdades.get(0); nc = tmpC.length; nl = lstdades.size(); // Matrix or list format if ((nc > 3) || ((nc == 3) && (nl == 4))) lst = this.llegeixMatriu(); else if ((nc == 3) && (nl == 3)) { LinkedList<StructIn<String>> lstA, lstM; try { lstA = this.llegeixAparellat(); tipusA = true; } catch (final Exception e) { tipusA = false; lstA = null; } try { lstM = this.llegeixMatriu(); tipusB = true; } catch (final Exception e) { tipusB = false; lstM = null; } if (tipusA && tipusB) { // Unable to determine format final String msg = Language.getLabel(10); JOptionPane.showMessageDialog(null, msg, "Warning", JOptionPane.WARNING_MESSAGE); lst = lstA; } else if (tipusA) lst = lstA; else if (tipusB) lst = lstM; else throw new FitxerIncompatible(Language.getLabel(11)); } else if (nc == 3) { lst = this.llegeixAparellat(); } // if (FesLog.LOG.getLevel().equals(Level.FINER)) { // FesLog.LOG.finer("---------- INTRODUCED DATA ----------"); // for (final StructIn<?> s : lst) // FesLog.LOG.finer(s.getC1() + "\t" + s.getC2() + "\t" // + s.getVal()); // } } //use this constructor for context trees public ReadTXT(final ExtendedCRON EC) throws Exception{ this.EC = EC; nomfitx = ""; //by dissimilarities //lstdades = this.PosaEnMemoria_noFile(); //by matrix lstdades = this.PosaEnMemoria_noFile_2(); String[] tmpC; int nl, nc; boolean tipusA, tipusB; tmpC = lstdades.get(0); nc = tmpC.length; nl = lstdades.size(); // Matrix or list format //if ((nc > 3) || ((nc == 3) && (nl == 4))) //legacy if ((nc > 3) || ((nc == 3) && (nl == 4)) || ((nc == 3) && (nl == 2))) //add 2-node case //from a read matrix lst = this.llegeixMatriu(); else if ((nc == 3) && (nl == 3)) { LinkedList<StructIn<String>> lstA, lstM; try { //from matched reads lstA = this.llegeixAparellat(); tipusA = true; } catch (final Exception e) { tipusA = false; lstA = null; } try { lstM = this.llegeixMatriu(); tipusB = true; } catch (final Exception e) { tipusB = false; lstM = null; } if (tipusA && tipusB) { // Unable to determine format final String msg = Language.getLabel(10); JOptionPane.showMessageDialog(null, msg, "Warning", JOptionPane.WARNING_MESSAGE); lst = lstA; } else if (tipusA) lst = lstA; else if (tipusB) lst = lstM; else throw new FitxerIncompatible(Language.getLabel(11)); } else if (nc == 3) { lst = this.llegeixAparellat(); } // if (FesLog.LOG.getLevel().equals(Level.FINER)) { // FesLog.LOG.finer("---------- INTRODUCED DATA ----------"); // for (final StructIn<?> s : lst) // FesLog.LOG.finer(s.getC1() + "\t" + s.getC2() + "\t" // + s.getVal()); // } } //this constructor for context forests public ReadTXT(DissimilarityMatrixData DMD) throws Exception{ this.EC = null; nomfitx = ""; //lstdades = this.PosaEnMemoria_CF(DMD); lstdades = this.PosaEnMemoria_CF_2(DMD); String[] tmpC; int nl, nc; boolean tipusA, tipusB; tmpC = lstdades.get(0); nc = tmpC.length; nl = lstdades.size(); // Matrix or list format //if ((nc > 3) || ((nc == 3) && (nl == 4))) //legacy if ((nc > 3) || ((nc == 3) && (nl == 4)) || ((nc == 3) && (nl == 2))) //add 2-node case lst = this.llegeixMatriu(); else if ((nc == 3) && (nl == 3)) { LinkedList<StructIn<String>> lstA, lstM; try { lstA = this.llegeixAparellat(); tipusA = true; } catch (final Exception e) { tipusA = false; lstA = null; } try { lstM = this.llegeixMatriu(); tipusB = true; } catch (final Exception e) { tipusB = false; lstM = null; } if (tipusA && tipusB) { // Unable to determine format final String msg = Language.getLabel(10); JOptionPane.showMessageDialog(null, msg, "Warning", JOptionPane.WARNING_MESSAGE); lst = lstA; } else if (tipusA) lst = lstA; else if (tipusB) lst = lstM; else throw new FitxerIncompatible(Language.getLabel(11)); } else if (nc == 3) { lst = this.llegeixAparellat(); } } public LinkedList<StructIn<String>> read() { return lst; } //paired reads private LinkedList<StructIn<String>> llegeixAparellat() throws Exception { LinkedList<StructIn<String>> lstd, lstTmp; String a, b; final Hashtable<String, Integer> ht = new Hashtable<String, Integer>(); Double v = null; int numLinia = 1; int ind = 0; int ncols; double[][] dades; String[] noms; lstTmp = new LinkedList<StructIn<String>>(); for (final String[] s : lstdades) { a = s[0]; b = s[1]; try { v = Double.parseDouble(s[2]); } catch (final NumberFormatException e) { // Type error in third column throw new FitxerIncompatible(Language.getLabel(13) + numLinia + Language.getLabel(14)); } if (!ht.containsKey(a)) { ht.put(a, ind++); } if (!ht.containsKey(b)) { ht.put(b, ind++); } lstTmp.add(new StructIn<String>(a, b, v)); numLinia++; } ncols = ht.size(); ind = 0; noms = new String[ncols]; final Enumeration<String> e = ht.keys(); String snom; while (e.hasMoreElements()) { snom = e.nextElement(); noms[ht.get(snom)] = snom; } // Initialize matrix dades = new double[ncols][ncols]; for (int r = 0; r < ncols - 1; r++) { for (int c = r; c < ncols; c++) { dades[r][c] = ReadTXT.NULL; dades[c][r] = ReadTXT.NULL; } } lstd = new LinkedList<StructIn<String>>(); int i, ii, iaux; for (final StructIn<?> s : lstTmp) { i = ht.get(s.getC1()); ii = ht.get(s.getC2()); v = s.getVal(); if (i > ii) { iaux = i; i = ii; ii = iaux; } if ((dades[i][ii] != -1) && (dades[i][ii] != v)) { } else { dades[i][ii] = v; lstd.add(new StructIn<String>(noms[i], noms[ii], v)); } } for (int r = 0; r < ncols - 1; r++) { for (int c = r + 1; c < ncols; c++) { // Unassigned distances error if (dades[r][c] == ReadTXT.NULL) throw new FitxerIncompatible(Language.getLabel(15)); } } numElements = ncols; TaulaNoms = noms; return lstd; } //matrix reads private LinkedList<StructIn<String>> llegeixMatriu() throws Exception { LinkedList<StructIn<String>> lstd = null; int nl, nc, row = 0, col = 0; double[][] dades; Iterator<String[]> it; String[] noms = null, tmp; nl = lstdades.size(); nc = lstdades.get(0).length; dades = new double[nc][nc]; it = lstdades.iterator(); int numLinia = 1; if (nl >= nc) { if (nl > nc) { noms = it.next(); } else { noms = new String[nc]; for (int n = 1; n <= nc; n++) { noms[n - 1] = Integer.toString(n); } numLinia--; } while (it.hasNext()) { numLinia++; tmp = it.next(); if (row >= nc) throw new FitxerIncompatible(Language.getLabel(100)); for (col = 0; col < nc; col++) { if (col == row) { if (Double.parseDouble(tmp[col]) != 0) { throw new FitxerIncompatible(Language.getLabel(101) + numLinia + ")"); } else dades[row][col] = -1.0; } else { try { dades[row][col] = Double.parseDouble(tmp[col]); } catch (NumberFormatException e) { throw new FitxerIncompatible(Language.getLabel(102)); } } } row++; } lstd = new LinkedList<StructIn<String>>(); for (int r = 0; r < nc - 1; r++) { for (int c = r + 1; c < nc; c++) { if (dades[r][c] != dades[c][r]) { // Non-symmetric matrix error throw new FitxerIncompatible(Language.getLabel(12)); } else lstd.add(new StructIn<String>(noms[r], noms[c], dades[r][c])); } } } else if (nl < nc) { noms = new String[nl]; while (it.hasNext()) { tmp = it.next(); for (col = 0; col < nc; col++) { if (col == 0) noms[row] = tmp[col]; else { if (col == row + 1) { if (Double.parseDouble(tmp[col]) != 0) { throw new FitxerIncompatible( Language.getLabel(101) + numLinia + ")"); } else dades[row][col - 1] = -1.0; } else { try { dades[row][col - 1] = Double .parseDouble(tmp[col]); } catch (NumberFormatException e) { throw new FitxerIncompatible( Language.getLabel(102)); } } } } numLinia++; row++; } lstd = new LinkedList<StructIn<String>>(); for (int r = 0; r < nl - 1; r++) { for (int c = r + 1; c < nl; c++) { if (dades[r][c] != dades[c][r]) { // Non-symmetric matrix error throw new FitxerIncompatible(Language.getLabel(12)); } else lstd.add(new StructIn<String>(noms[r], noms[c], dades[r][c])); } } } numElements = nc; TaulaNoms = noms; return lstd; } public int getNumElements() { return numElements; } public String[] getTaulaNoms() { return TaulaNoms; } private LinkedList<String[]> PosaEnMemoria() throws FitxerIncompatible { int tmpCol, numCols = 0, numLinia = 1; String[] dadesLinia; final LinkedList<String[]> lstDades = new LinkedList<String[]>(); String linia; String delims = " ,;|\t\n"; final File fichero = new File(nomfitx); try { final FileReader freader = new FileReader(fichero); BufferedReader buff = new BufferedReader(freader); // Reading headers if ((linia = buff.readLine()) != null) { StringTokenizer st = new StringTokenizer(linia, delims); dadesLinia = new String[numCols]; numCols = st.countTokens(); if (numCols < 3) { throw new FitxerIncompatible(Language.getLabel(104) + " 1" + Language.getLabel(105) + " '" + fichero.getName() + "'"); } dadesLinia = new String[numCols]; for (int c = 0; c < numCols; c++) { String str = st.nextToken(); dadesLinia[c] = str; } lstDades.add(dadesLinia); } else { // Empty file throw new FitxerIncompatible(Language.getLabel(103) + " '" + fichero.getName() + "'"); } while ((linia = buff.readLine()) != null) { numLinia++; StringTokenizer st = new StringTokenizer(linia, delims); dadesLinia = new String[numCols]; tmpCol = st.countTokens(); if (tmpCol != numCols) { // Number of columns error throw new FitxerIncompatible(Language.getLabel(104) + " " + numLinia + Language.getLabel(105) + " '" + fichero.getName() + "'"); } dadesLinia = new String[tmpCol]; for (int c = 0; c < tmpCol; c++) { String str = st.nextToken(); dadesLinia[c] = str; } lstDades.add(dadesLinia); } } catch (IOException e) { } return lstDades; } private LinkedList<String[]> PosaEnMemoria_noFile_2(){ int tmpCol, numCols = 0, numLinia = 1; String[] dadesLinia; final LinkedList<String[]> lstDades = new LinkedList<String[]>(); String linia; String delims = " ,;|\t\n"; final File fichero = new File(nomfitx); // // Reading headers // linia = EC.getDissimilaritiesAsMatrix().get(0); // // StringTokenizer stH = new StringTokenizer(linia, delims); // dadesLinia = new String[numCols]; // // numCols = stH.countTokens(); // dadesLinia = new String[numCols]; // // for (int c = 0; c < numCols; c++) { // String str = stH.nextToken(); // dadesLinia[c] = str; // } // // lstDades.add(dadesLinia); //reading through all lines for (String S : EC.getDissimilaritiesAsMatrix()){ linia = S; numLinia++; StringTokenizer st = new StringTokenizer(linia, delims); dadesLinia = new String[numCols]; tmpCol = st.countTokens(); dadesLinia = new String[tmpCol]; for (int c = 0; c < tmpCol; c++) { String str = st.nextToken(); dadesLinia[c] = str; } lstDades.add(dadesLinia); } return lstDades; } private LinkedList<String[]> PosaEnMemoria_noFile(){ int tmpCol, numCols = 0; String[] dadesLinia; final LinkedList<String[]> lstDades = new LinkedList<String[]>(); String delims = " ,;|\t\n"; for (int i = 0; i<EC.getDissimilarities().size(); i++){ // Reading headers StringTokenizer st = new StringTokenizer(EC.getDissimilarities().get(i), delims); dadesLinia = new String[numCols]; numCols = st.countTokens(); dadesLinia = new String[numCols]; for (int c = 0; c < numCols; c++) { String str = st.nextToken(); dadesLinia[c] = str; } lstDades.add(dadesLinia); } return lstDades; } private LinkedList<String[]> PosaEnMemoria_CF(DissimilarityMatrixData DMD){ int tmpCol, numCols = 0; String[] dadesLinia; final LinkedList<String[]> lstDades = new LinkedList<String[]>(); String delims = " ,;|\t\n"; boolean PrintStatus = false; //only print status for large matrices. if (DMD.getFormattedDissimilarities().size() >= 250){ PrintStatus = true; System.out.println("Large Tree: Data loading status will be displayed."); } int Counter = 0; // matrix approach - works but still slow. for (int i = 0; i< DMD.getFormattedDissimilarities().size(); i++){ //for (int i = 0; i< DMD.getMatrixFormattedDissimilarities().size(); i++){ // Reading headers StringTokenizer st = new StringTokenizer(DMD.getFormattedDissimilarities().get(i), delims); //StringTokenizer st = new StringTokenizer(DMD.getMatrixFormattedDissimilarities().get(i), delims); numCols = st.countTokens(); dadesLinia = new String[numCols]; for (int c = 0; c < numCols; c++) { String str = st.nextToken(); dadesLinia[c] = str; } lstDades.add(dadesLinia); //display status messages, if appropriate. if (PrintStatus){ Counter++; if (Counter % 5000 == 0){ System.out.println("Read " + Counter + "/" + DMD.getFormattedDissimilarities().size() + " Dissimilarity Relations."); } } } if (PrintStatus){ System.out.println("All dissimilarities successfully read!"); } return lstDades; } private LinkedList<String[]> PosaEnMemoria_CF_2(DissimilarityMatrixData DMD){ int tmpCol, numCols = 0, numLinia = 1; String[] dadesLinia; final LinkedList<String[]> lstDades = new LinkedList<String[]>(); String linia; String delims = " ,;|\t\n"; final File fichero = new File(nomfitx); // // Reading headers // linia = EC.getDissimilaritiesAsMatrix().get(0); // // StringTokenizer stH = new StringTokenizer(linia, delims); // dadesLinia = new String[numCols]; // // numCols = stH.countTokens(); // dadesLinia = new String[numCols]; // // for (int c = 0; c < numCols; c++) { // String str = stH.nextToken(); // dadesLinia[c] = str; // } // // lstDades.add(dadesLinia); System.out.println("Transferring matrix data."); //int Counter = 0; //reading through all lines for (String S : DMD.getMatrixFormattedDissimilarities()){ linia = S; //increment counters //Counter++; numLinia++; StringTokenizer st = new StringTokenizer(linia, delims); dadesLinia = new String[numCols]; tmpCol = st.countTokens(); dadesLinia = new String[tmpCol]; for (int c = 0; c < tmpCol; c++) { String str = st.nextToken(); dadesLinia[c] = str; } lstDades.add(dadesLinia); // //message // System.out.println("Scanned " + Counter // + "/" + DMD.getMatrixFormattedDissimilarities().size() // + " rows of dissimilarity matrix."); } return lstDades; } @Override //currently nonfunctional public void propertyChange(PropertyChangeEvent evt) { if (progressBar != null){ if (evt.getPropertyName() == "progress") { int progress = (Integer) evt.getNewValue(); progressBar.setValue(progress); } } } } //completed classbody