/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.filter.utils; import java.util.List; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFLine; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.AltFilter; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.FilterFilter; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.FlagIDFilter; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.GenotypeIDFilter; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.IDFilterInterface; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.NumberIDFilter; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.NumberIDFilterInterface; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.QualFilter; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.StringIDFilter; import edu.yu.einstein.genplay.core.multiGenome.filter.VCFID.StringIDFilterInterface; import edu.yu.einstein.genplay.core.multiGenome.utils.FormattedMultiGenomeName; import edu.yu.einstein.genplay.core.multiGenome.utils.VCFLineUtility; import edu.yu.einstein.genplay.dataStructure.enums.VCFColumnName; import edu.yu.einstein.genplay.util.Utils; /** * @author Nicolas Fourel * @version 0.1 */ public abstract class FilterUtility { /** * The method to use to test a VCF line with a specific filter * @param filter the filter * @param line the VCF line (map String/Object) * @return true if the line verifies the filter, false otherwise */ public boolean isValid(IDFilterInterface filter, VCFLine line) { if (filter.getColumnName() == VCFColumnName.FORMAT) { List<String> genomeNames = filter.getGenomeNames(); String[] results = new String[genomeNames.size()]; for (int i = 0; i < genomeNames.size(); i++) { String rawName = FormattedMultiGenomeName.getRawName(genomeNames.get(i)); results[i] = VCFLineUtility.getValue(line, filter.getHeaderType(), rawName); } if (results.length > 1) { FormatFilterOperatorType operator = filter.getOperator(); if (operator == FormatFilterOperatorType.AND) { return passANDTest(filter, results); } else if (operator == FormatFilterOperatorType.OR) { return passORTest(filter, results); } else if (operator == FormatFilterOperatorType.SUM) { return passSUMTest(filter, results); } else if (operator == FormatFilterOperatorType.MEAN) { return passMEANTest(filter, results); } System.err.println("StringUtility.isValid() no operator found"); } else { if (results[0] != null){ return passTest(filter, results[0]); } // If results[0] equal null, the ID field has probably not been found. } return false; } String value = line.getValueFromColumn(filter.getColumnName()); return passTest(filter, value); } /** * Algorithm for AND operator * @param filter the filter * @param results the array of results * @return true if the results verify the filter according to the filter */ private boolean passANDTest (IDFilterInterface filter, String[] results) { for (int i = 0; i < results.length; i++) { if (!passTest(filter, results[i])) { return false; } } return true; } /** * Algorithm for OR operator * @param filter the filter * @param results the array of results * @return true if the results verify the filter according to the filter */ private boolean passORTest (IDFilterInterface filter, String[] results) { for (int i = 0; i < results.length; i++) { if (passTest(filter, results[i])) { return true; } } return false; } /** * Algorithm for SUM operator * @param filter the filter * @param results the array of results * @return true if the results verify the filter according to the filter */ private boolean passSUMTest (IDFilterInterface filter, String[] results) { Float sum = (float) 0.0; for (int i = 0; i < results.length; i++) { Float f = toFloat(results[i]); if (f != null) { sum += f; } } return passTest(filter, sum.toString()); } /** * Algorithm for MEAN operator * @param filter the filter * @param results the array of results * @return true if the results verify the filter according to the filter */ private boolean passMEANTest (IDFilterInterface filter, String[] results) { Float mean = (float) 0.0; for (int i = 0; i < results.length; i++) { Float f = toFloat(results[i]); if (f != null) { mean += f; } } mean /= results.length; return passTest(filter, mean.toString()); } /** * Parses a string to a float * @param s the string * @return the float, null otherwise */ protected Float toFloat (String s) { try { if (s.indexOf(',') != -1) { return toComplexFloat(s); } return Float.parseFloat(s); } catch (Exception e) { return null; } } /** * A complex float is a string that contains several float comma separated. * It will return the minimum float found AND different than 0.0 * @param s the string * @return the minimum float value found (different than 0.0), null otherwise */ private Float toComplexFloat (String s) { String[] stringArray = Utils.split(s, ','); if (stringArray.length == 1) { return toFloat(stringArray[0]); } float current; Float min = null; for (int i = 0; i < stringArray.length; i++) { current = toFloat(stringArray[i]); if ((min == null) || ((current < min) && (current != 0.0))) { min = current; } } return min; } /** * Creates a description of the filter * @param filter the filter * @return the description as a String */ public String toStringForDisplay (IDFilterInterface filter) { String text = ""; if ((filter instanceof AltFilter) || (filter instanceof FilterFilter) || (filter instanceof StringIDFilter)) { StringIDFilterInterface current = (StringIDFilterInterface) filter; text += "Must "; if (current.isRequired()) { text += "contains "; } else { text += "not contains "; } text += current.getValue(); } else if ((filter instanceof QualFilter) || (filter instanceof NumberIDFilter)) { NumberIDFilterInterface current = (NumberIDFilterInterface) filter; text += "x " + current.getInequation01() + " " + current.getValue01(); if ((current.getInequation02() != null) && (current.getValue02() != null)) { if (current.isCumulative()) { text += " AND "; } else { text += " OR "; } text += "x " + current.getInequation02() + " " + current.getValue02(); } } else if (filter instanceof FlagIDFilter) { FlagIDFilter current = (FlagIDFilter) filter; text += "Must be "; if (current.isRequired()) { text += "present"; } else { text += "absent"; } } else if (filter instanceof GenotypeIDFilter) { GenotypeIDFilter current = (GenotypeIDFilter) filter; text = "Must be "; if (current.getOption() == GenotypeIDFilter.HETEROZYGOTE_OPTION) { text += "heterozygote"; } else if (current.getOption() == GenotypeIDFilter.HOMOZYGOTE_OPTION) { text += "homozygote"; } text += " ("; if (current.canBePhased() && current.canBeUnPhased()) { text += "phased & unphased"; } else if (current.canBePhased() && !current.canBeUnPhased()) { text += "phased only"; } else if (!current.canBePhased() && current.canBeUnPhased()) { text += "unphased only"; } text += ")"; } if (!text.isEmpty()) { List<String> genomeNames = filter.getGenomeNames(); FormatFilterOperatorType operator = filter.getOperator(); if (genomeNames != null) { if (genomeNames.size() == 1) { text += " for " + genomeNames.get(0); } else if (genomeNames.size() > 1) { text += " - " + operator.toString().toUpperCase() + " of "; for (int i = 0; i < genomeNames.size(); i++) { text += genomeNames.get(i); if (i < (genomeNames.size() - 1)) { text += ", "; } } } } text += "."; } return text; } /** * @param filter the filter to use * @param value the value to test * @return true if the value passes the test, false otherwise */ protected abstract boolean passTest (IDFilterInterface filter, String value); /** * Tests if both object are equals * @param filter the filter * @param obj the other filter * @return true if both filter are equals */ public abstract boolean equals(IDFilterInterface filter, Object obj); /** * Looks for the error within the filter. * Each error is separated with a new line. * @param filter the filter * @return the error(s), an empty string otherwise */ public abstract String getErrors(IDFilterInterface filter); }