/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.parser.genomeWindowParser; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Collections; import java.util.List; import edu.yu.einstein.genplay.util.NumberFormats; /** * The {@link PositionParser} is a class that can parse a string in order to retrieve a text and number values. * Every parser is based on grammar rules, here are the ones for this parser: * - a text value is composed of all capital and small letters (code 65 to 122) * - a number value is composed of number 0 to 9 (code 48 to 57) * - the list of delimiter is: * "tab" (code 9) * "-" (code 45) (if not preceded by a delimiter, otherwise it's considered as a minus sign) * ":" (code 58) * ";" (code 59) * - a text followed by a number (eg: "hello 5") is considered as a text value * - a number followed by a text (eg: "5 hello") is considered as a number value * - a number value is an integer (all values out of integer bounds are excluded: -2,147,483,648 to 2,147,483,647 (inclusive)) * - a number value can contain commas and a dot (as decimal delimiter) in its text format * * @author Nicolas Fourel */ public class PositionParser { private NumberFormat format; // The number format object. private List<String> textElements; // The list of text elements. private List<String> positionElements; // The list of position (still as a strings). private List<Integer> numberElements; // The list of position (as integers). private CharacterHandler currentCharacter; // The current character handler. /** * Constructor of {@link PositionParser} */ public PositionParser () { initialize(); } /** * @param list a list * @return the description of the list */ private String getListDescription (List<?> list) { String description = ""; int size = list.size(); if (size == 0) { description = "The list is empty."; } else { for (int i = 0; i < size; i++) { description += (i + 1) + ": " + list.get(i).toString(); if (i < (size - 1)) { description += "\n"; } } } return description; } /** * @return the numberElements */ public List<Integer> getNumberElements() { return numberElements; } /** * @return the textElements */ public List<String> getTextElements() { return textElements; } /** * Initializes the parser lists. */ private void initialize () { format = NumberFormats.getPositionFormat(); textElements = new ArrayList<String>(); positionElements = new ArrayList<String>(); numberElements = new ArrayList<Integer>(); currentCharacter = new CharacterHandler(); } /** * Integer has a minimum value of -2,147,483,648 and a maximum value of 2,147,483,647 (inclusive). * @param integer an integer * @return true if the integer is in the integer bounds, false otherwise */ private boolean isIntBound (int integer) { return ((integer >= -2147483648) || (integer <= 2147483647)); } /** * Transforms the list of position (still as text format) in an integer list. */ private void optimizeIntegerList () { numberElements = new ArrayList<Integer>(); for (String position: positionElements) { Number current = null; try { current = format.parse(position); } catch (Exception e) {} if (current != null) { int currentValue = current.intValue(); if (isIntBound(currentValue)) { numberElements.add(current.intValue()); } } } Collections.sort(numberElements); } /** * Parse a string. * @param s the string to parse */ public void parse (String s) { // Initialize the parser (reset the list). initialize(); // Initialize local parser attributes int size = s.length(); // Get the size of the list (avoids several size() calls). Boolean isText = null; // Says if the current string is a text element or a position element. String currentBuffer = ""; // The current element in process. CharacterHandler previousChar = null; // Parse for (int i = 0; i < size; i++) { currentCharacter.initialize(s.charAt(i), previousChar); if (currentCharacter.isDelimiter()) { if (isText != null) { if (isText) { textElements.add(currentBuffer); } else { positionElements.add(currentBuffer); } isText = null; currentBuffer = ""; } } else { currentBuffer += currentCharacter.getCharacter(); if (currentCharacter.isLetter()) { if (isText == null) { isText = true; } } else if (currentCharacter.isInteger() || currentCharacter.isIntegerPart()) { if (isText == null) { isText = false; } } } previousChar = currentCharacter; } // Add the last element if (isText != null) { if (isText) { textElements.add(currentBuffer); } else { positionElements.add(currentBuffer); } } // Transform positions into numbers optimizeIntegerList(); } /** * Shows the content of the parser. */ public void show () { String info = ""; info += "List of text elements:\n"; info += getListDescription(textElements) + "\n"; info += "List of position elements:\n"; info += getListDescription(positionElements) + "\n"; info += "List of number elements:\n"; info += getListDescription(numberElements); System.out.println(info); } }