package aliview.sequences;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import aliview.AliView;
import aliview.AminoAcid;
import aliview.NucleotideUtilities;
import aliview.sequencelist.AlignmentListModel;
import aliview.sequencelist.Interval;
import aliview.utils.ArrayUtilities;
// todo can save memory by changing data implementation into byte instead of char
public class BasicSequence implements Sequence, Comparable<Sequence> {
private static final Logger logger = Logger.getLogger(BasicSequence.class);
private boolean simpleName = false;
// TODO what is this selection offset?
public int selectionOffset = 0;
protected Bases bases;
// TranslatedBases has to be volatile so no problems araise with the double lock in the lazy creation below
// see: http://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html
protected TranslatedBases translatedBases;
protected SequenceSelectionModel selectionModel;
private AlignmentListModel alignmentModel;
protected String name;
protected int id;
public BasicSequence(){
this.id = SequenceUtils.createID();
selectionModel = new DefaultSequenceSelectionModel();
}
public BasicSequence(Bases bases) {
this();
this.bases = bases;
}
public BasicSequence(BasicSequence template) {
this.name = template.name;
this.id = template.id;
this.bases = template.getNonTranslatedBases().getCopy();
this.alignmentModel = template.alignmentModel;
this.selectionModel = createNewSelectionModel();
}
public Sequence getCopy() {
return new BasicSequence(this);
}
public String getName(){
return name;
}
public void setName(String name){
this.name = name;
}
public String getSimpleName(){
return name;
}
public boolean isTranslated() {
if(getAlignmentModel() != null){
return getAlignmentModel().isTranslated();
}
return false;
}
public int getLength() {
return getBases().getLength();
}
public int getNonTranslatedLength() {
return getNonTranslatedBases().getLength();
}
public byte[] getGapPaddedCodonInTranslatedPos(int pos) {
return getTranslatedBases().getGapPaddedCodonInTranslatedPos(pos);
}
public boolean isCodonSecondPos(int pos) {
return getTranslatedBases().isCodonSecondPos(pos);
}
protected Bases getBases(){
if(isTranslated()){
return getTranslatedBases();
}
return this.bases;
}
protected Bases getNonTranslatedBases(){
return this.bases;
}
private TranslatedBases getTranslatedBases(){
if(translatedBases == null){
// this is double locked to avoid synchronized block after the lazy initialization of TranslatedBases object
// TranslatedBases has to be declared volatile above
// see: http://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html and http://en.wikipedia.org/wiki/Double-checked_locking
synchronized(this){
if(translatedBases == null){
translatedBases = new TranslatedBases(this.bases, this);
}
}
}
return translatedBases;
}
public SequenceSelectionModel createNewSelectionModel(){
return new DefaultSequenceSelectionModel();
}
public int countStopCodon(){
return getTranslatedBases().countStopCodon();
}
public AminoAcid getTranslatedAminoAcidAtNucleotidePos(int x) {
return getTranslatedBases().getAminoAcidAtNucleotidePos(x);
}
public AminoAcidAndPosition getNoGapAminoAcidAtNucleotidePos(int target){
return getTranslatedBases().getNoGapAminoAcidAtNucleotidePos(target);
}
public byte getBaseAtPos(int n){
return getBases().get(n);
}
public char getCharAtPos(int n) {
return (char) getBaseAtPos(n);
}
// TODO
public boolean isBaseSelected(int n){
return selectionModel.isSelected(n);
}
public void clearAllSelection(){
selectionModel.clearAll();
}
public void selectAllBases(){
selectionModel.selectAll();
}
public long countSelectedPositions(int startIndex, int endIndex) {
return selectionModel.countSelectedPositions(startIndex, endIndex);
}
public String getSelectedBasesAsString(){
StringBuilder selection = new StringBuilder();
if(selectionModel.hasSelection()){
//logger.info("hasSel");
for(int n = 0;n < getBases().getLength();n++){
if(selectionModel.isSelected(n) == true){
selection.append( getBases().charAt(n) );
}
}
}
return selection.toString();
}
public byte[] getSelectedBasesAsByte(){
byte[] bases = null;
bases = getSelectedBasesAsString().toString().getBytes();
return bases;
}
public String getBasesAsString(){
String baseString = "";
baseString = bases.toString();
return baseString;
}
public void writeBases(OutputStream out) throws IOException{
int length = getBases().getLength();
for(int n = 0; n < length; n++){
out.write(getBases().get(n));
}
}
public void writeBases(Writer out) throws IOException{
int length = getBases().getLength();
for(int n = 0; n < length; n++){
out.write(getBases().get(n));
}
}
public void writeBasesBetween(int start, int end, Writer out) throws IOException {
for(int n = start; n <= end; n++){
out.write( getBases().charAt(n));
}
}
public void toggleSimpleName(){
this.simpleName = !simpleName;
}
public String toString(){
if(simpleName == true){
return getSimpleName();
}else{
return getName();
}
}
public Interval find(Pattern pattern, int startPos){
// Allocate a Matcher object from the compiled regex pattern,
// and provide the input to the Matcher
String basesAsString = getBases().toString();
Matcher matcher = pattern.matcher(basesAsString);
Interval foundInterval = null;
boolean wasFound = matcher.find(startPos);
if(wasFound){
int foundStart = matcher.start();
int foundEnd = matcher.end() - 1;
foundInterval = new Interval(foundStart, foundEnd);
}
else{
//logger.info("not found");
}
return foundInterval;
}
public int find(byte find, int startPos){
for(int n = startPos; n < getBases().getLength(); n++){
if(find == getBases().get(n)){
return n;
}
}
return -1;
}
public int getFirstSelectedPosition() {
return selectionModel.getFirstSelectedPosition();
}
public int getLastSelectedPosition() {
return selectionModel.getLastSelectedPosition(this.getLength());
}
/*
*
* TODO could skip
*
*/
public void replaceSelectedBasesWithGap(){
replaceSelectedBasesWithChar((char)SequenceUtils.GAP_SYMBOL);
}
public void replaceSelectedBasesWithChar(char newChar) {
byte newBase = (byte) newChar;
if(hasSelection()){
// loop through all bases and see if it is selected - this is just as
// fast as trying to separate the selected ones first and then only doing them
for(int n = 0;n < getBases().getLength();n++){
if(isBaseSelected(n) == true){
getBases().set(n, newBase);
}
}
}
}
public int[] getSequenceAsBaseVals() {
int[] baseVals = new int[getBases().getLength()];
for(int n = 0;n < getBases().getLength() ;n++){
baseVals[n] = NucleotideUtilities.baseValFromChar((char) getBases().get(n));
}
return baseVals;
}
public void insertGapLeftOfSelectedBase(){
// get first selected position
int position = getFirstSelectedPosition();
if(rangeCheck(position)){
insertGapAt(position);
}
}
public void insertGapRightOfSelectedBase(){
// get first selected position
int position = getLastSelectedPosition();
if(rangeCheck(position+1)){
insertGapAt(position+1);
}
}
public boolean isGapRightOfSelection(){
return isGapRightOfSelection(1);
}
public boolean isEndRightOfSelection(){
int rightSelected = getLastSelectedPosition();
if(rightSelected + 1 == getLength()){
return true;
}
return false;
}
public boolean isGapOrEndRightOfSelection(){
if(isEndRightOfSelection()){
return true;
}
else{
return isGapRightOfSelection();
}
}
public boolean isGapLeftOfSelection(){
return isGapLeftOfSelection(1);
}
public boolean isGapRightOfSelection(int offset){
boolean isGap = false;
int rightSelected = getLastSelectedPosition();
if(rangeCheck(rightSelected) && rangeCheck(rightSelected+offset)){
if(NucleotideUtilities.isGap(getBaseAtPos(rightSelected + offset))){
isGap = true;
}
}
return isGap;
}
public boolean isGapLeftOfSelection(int offset){
boolean isGap = false;
int leftSelected = getFirstSelectedPosition();
if(rangeCheck(leftSelected) && rangeCheck(leftSelected-offset)){
if(NucleotideUtilities.isGap(getBaseAtPos(leftSelected - offset))){
isGap = true;
}
}
return isGap;
}
public void deleteGapLeftOfSelection(){
// get first selected position
int leftPosition = getFirstSelectedPosition();
if(rangeCheck(leftPosition-1)){
// only if gap is left of selection
if(NucleotideUtilities.isGap(getBaseAtPos(leftPosition-1))){
deleteBase(leftPosition-1);
}
}
}
public void deleteGapRightOfSelection() {
// get first selected position
int rightPosition = getLastSelectedPosition();
if(rangeCheck(rightPosition+1)){
// only if gap is left of selection
if(NucleotideUtilities.isGap(getBaseAtPos(rightPosition+1))){
deleteBase(rightPosition+1);
}
}
}
public void moveSelectionRightIfGapOrEndIsPresent(int steps) {
for(int m = 0; m < steps; m++){
// get first selected position
int leftPosition = getFirstSelectedPosition();
int rightPosition = getLastSelectedPosition();
//if(rangeCheck(leftPosition) && rangeCheck(rightPosition+1)){
// only if gap is right of selection
if(isGapOrEndRightOfSelection()){
// move bases one step at the time from right to left
for(int n = rightPosition; n >= leftPosition; n--){
// move residue
getBases().moveBaseRight(n);
//getBases().set(n + 1, getBases().get(n));
// move selection
// move selection
if(isBaseSelected(n)){
setSelectionAt(n+1);
}else{
clearSelectionAt(n+1);
}
}
// and finally put the gap at the left side
getBases().set(leftPosition,'-');
clearSelectionAt(leftPosition);
}
//}
}
}
public void moveSelectionLeftIfGapIsPresent(int steps) {
for(int m = 0; m < steps; m++){
// get first selected position
int leftPosition = getFirstSelectedPosition();
int rightPosition = getLastSelectedPosition();
if(rangeCheck(leftPosition-1) && rangeCheck(rightPosition)){
// only if gap is left of selection
if(isGapLeftOfSelection()){
for(int n = leftPosition; n <= rightPosition; n++){
// move residue
getBases().moveBaseLeft(n);
//getBases().set(n - 1, getBases().get(n));
// move selection
if(isBaseSelected(n)){
setSelectionAt(n-1);
}else{
clearSelectionAt(n-1);
}
}
// and finally put the gap at the right side
getBases().set(rightPosition, '-');
clearSelectionAt(rightPosition);
}
}
}
}
public void moveSelectedResiduesRightIfGapOrEndIsPresent(){
moveSelectionRightIfGapOrEndIsPresent(1);
}
public void moveSelectedResiduesLeftIfGapIsPresent(){
moveSelectionLeftIfGapIsPresent(1);
}
public void insertGapAt(int n){
getBases().insertAt(n, SequenceUtils.GAP_SYMBOL);
// do the same with selmodel
selectionModel.insertNewPosAt(n);
}
public int[] getSelectedPositions() {
return selectionModel.getSelectedPositions(0, this.getLength() - 1);
}
public void replaceBases(int startReplaceIndex, int stopReplaceIndex, byte[] insertBases) {
getBases().replace(startReplaceIndex, stopReplaceIndex, insertBases);
}
public void setSelectionAt(int i){
selectionModel.setSelectionAt(i);
}
public void clearSelectionAt(int i){
selectionModel.clearSelectionAt(i);
}
public void setSelection(int startIndex, int endIndex, boolean clearFirst){
startIndex = Math.max(0, startIndex);
endIndex = Math.min(this.getLength() - 1, endIndex);
selectionModel.setSelection(startIndex, endIndex, clearFirst);
}
private boolean rangeCheck(int pos) {
if(bases != null &&pos >= 0 && pos < bases.getLength()){
return true;
}
return false;
}
public void deleteSelectedBases(){
int[] toDelete = selectionModel.getSelectedPositions(0, this.getLength() - 1);
getBases().delete(toDelete);
createNewSelectionModel();
}
public void deleteBase(int index){
getBases().delete(index);
selectionModel.removePosition(index);
}
public void reverseComplement() {
reverse();
complement();
}
public void complement() {
getBases().complement();
}
public void reverse(){
getBases().reverse();
}
public void rightPadSequenceWithGaps(int finalLength) {
int addCount = finalLength - getBases().getLength();
if(addCount > 0){
byte[] additional = new byte[addCount];
Arrays.fill(additional, SequenceUtils.GAP_SYMBOL);
getBases().append(additional);
}
}
public void leftPadSequenceWithGaps(int finalLength) {
int addCount = finalLength - getBases().getLength();
if(addCount > 0){
byte[] additional = new byte[addCount];
Arrays.fill(additional, SequenceUtils.GAP_SYMBOL);
getBases().insertAt(0,additional);
}
}
public String getCitatedName() {
String name = getName();
name = StringUtils.remove(name, '\'');
logger.info(name);
name = StringUtils.remove(name, '\"');
name = StringUtils.remove(name, '>');
name = "'" + name + "'";
return name;
}
public String getBasesAtThesePosAsString(ArrayList<Integer> allWantedPos) {
StringBuilder allPos = new StringBuilder();
for(Integer aPos: allWantedPos){
allPos.append((char)getBaseAtPos(aPos.intValue()));
}
return allPos.toString();
}
public void deleteBasesFromMask(boolean[] mask){
int nTruePos = ArrayUtilities.count(mask, true);
int[] toDelete = new int[nTruePos];
int deleteCount = 0;
for(int n = 0; n < getBases().getLength() && n < mask.length ; n++){
if(mask[n] == true){
toDelete[deleteCount] = n;
deleteCount ++;
}
}
getBases().delete(toDelete);
// and do same for sel-model
for(int n = mask.length-1; n >= 0; n--){
if(mask[n] == true){
selectionModel.removePosition(n);
}
}
}
public void append(String moreInterleavedsequence) {
getBases().append(moreInterleavedsequence.getBytes());
}
public boolean hasSelection() {
return selectionModel.hasSelection();
}
public void clearBase(int pos) {
getBases().set(pos, SequenceUtils.GAP_SYMBOL);
}
public byte[] getAllBasesAsByteArray(){
return getBases().toByteArray();
}
public byte[] getBasesBetween(int startIndexInclusive, int endIndexInclusive){
return getBases().toByteArray(startIndexInclusive, endIndexInclusive);
}
public boolean isEmpty() {
// TODO Auto-generated method stub
boolean isEmpty = true;
for(int n = 0; n < getBases().getLength(); n++){
if(getBaseAtPos(n) == '-' || getBaseAtPos(n) =='?'){
}else{
isEmpty = false;
break;
}
}
return isEmpty;
}
public int getUngapedLength() {
return getUngapedPos(this.getLength());
}
public int compareTo(Sequence other) {
return getName().compareTo(other.getName());
}
public int getUngapedPos(int position){
// TODO this is a problem with large sequences
if(position > 1000000){
return -1;
}
int posCount = 0;
int gapCount = 0;
for(int n = 0; n <= position; n++){
if(NucleotideUtilities.isGap(getBaseAtPos(n))){
gapCount ++;
}else{
posCount ++;
}
}
return posCount;
}
public String getUngapedSequence() {
StringBuilder ungapedSeq = new StringBuilder(getLength());
for(int n = 0; n < getLength(); n++){
byte base = getBaseAtPos(n);
if(NucleotideUtilities.isGap(base)){
// skip this one
}else{
ungapedSeq.append((char)base);
}
}
return ungapedSeq.toString();
}
public void selectAllBasesUntilGap(int x) {
// loop right until gap
for(int n = x; n < getLength(); n++){
if(NucleotideUtilities.isGap(getBaseAtPos(n))){
break;
}
else{
setSelectionAt(n);
}
}
// and then left
for(int n = x; n >=0; n--){
if(NucleotideUtilities.isGap(getBaseAtPos(n))){
break;
}
else{
setSelectionAt(n);
}
}
}
public void selectionExtendRight() {
if(hasSelection()){
int lastSelectedPos = getLastSelectedPosition();
int seqEndPos = getLength() - 1;
setSelection(lastSelectedPos, seqEndPos, true);
}
}
public void selectionExtendLeft() {
if(hasSelection()){
int firstSelectedPos = getLastSelectedPosition();
setSelection(0, firstSelectedPos, true);
}
}
public void invertSelection(){
selectionModel.invertSelection(getLength());
}
public void deleteAllGaps(){
// no matter if translated or not - always remove all gaps from backend sequence
getNonTranslatedBases().deleteAll(SequenceUtils.GAP_SYMBOL);
//getUnBases().deleteAll(SequenceUtils.GAP_SYMBOL);
createNewSelectionModel();
}
public int getID() {
return id;
}
public int getPosOfSelectedIndex(int posInSeq) {
return selectionModel.countPositionsUntilSelectedCount(posInSeq);
}
public boolean isAllSelected() {
return selectionModel.isAllSelected();
}
public int countChar(char targetChar) {
int count = 0;
for(int n = 0; n < getBases().getLength(); n++){
if(getCharAtPos(n) == targetChar){
count ++;
}
}
return count;
}
public boolean contains(char testChar) {
boolean contains = false;
for(int n = 0; n < getBases().getLength(); n++){
if(getCharAtPos(n) == testChar){
contains = true;
break;
}
}
return contains;
}
public int indexOf(char testChar) {
for(int n = 0; n < getBases().getLength(); n++){
if(getCharAtPos(n) == testChar){
return n;
}
}
return -1;
}
public int countChar(char targetChar, int startpos, int endpos) {
int count = 0;
for(int n = startpos; n < endpos && n < getBases().getLength(); n++){
if(targetChar == getBases().charAt(n)){
count ++;
}
}
return count;
}
public void setAlignmentModel(AlignmentListModel model){
this.alignmentModel = model;
}
public AlignmentListModel getAlignmentModel() {
return alignmentModel;
}
}