/* * Copyright 2015 MiLaboratory.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.milaboratory.core.motif; import com.milaboratory.core.sequence.Alphabet; import com.milaboratory.core.sequence.Sequence; import com.milaboratory.core.sequence.Wildcard; import com.milaboratory.util.BitArray; import java.util.Arrays; /** * Data structure for efficient exact and fuzzy matching/searching of sequences (wildcard-aware). * * @param <S> base sequence type */ public final class Motif<S extends Sequence<S>> implements java.io.Serializable { private final Alphabet<S> alphabet; private final int size; /** * data.get(code * size + position) */ final BitArray data; final BitapPattern bitapPattern; Motif(Alphabet<S> alphabet, int size, BitArray data) { if (!dataConsistent(data, size)) throw new IllegalArgumentException("Inconsistent data. Some positions in motif has no possible values."); this.alphabet = alphabet; this.size = size; this.data = data; this.bitapPattern = toBitapPattern(); } /** * Creates motif from sequence. * * @param sequence sequence */ public Motif(S sequence) { this.alphabet = sequence.getAlphabet(); this.size = sequence.size(); int alphabetSize = alphabet.size(); this.data = new BitArray(alphabetSize * size); for (int i = 0; i < size; ++i) { Wildcard wildcard = this.alphabet.codeToWildcard(sequence.codeAt(i)); for (int j = 0; j < wildcard.size(); j++) data.set(wildcard.getMatchingCode(j) * size + i); } this.bitapPattern = toBitapPattern(); } /** * Returns per-position or of two motifs. * * <p>e.g. ATGC or TTCC = WTSC</p> * * @param other * @return */ public Motif<S> or(Motif<S> other) { if (other.size != size) throw new IllegalArgumentException("Supports only motifs with the same size as this."); BitArray result = data.clone(); result.or(other.data); return new Motif<>(alphabet, size, result); } public BitapPattern getBitapPattern() { if (size >= 64) throw new RuntimeException("Supports motifs with length less then 64."); return bitapPattern; } private BitapPattern toBitapPattern() { if (size >= 64) return null; int aSize = alphabet.size(); long[] patternMask = new long[aSize], reversePatternMask = new long[aSize]; Arrays.fill(patternMask, ~0); Arrays.fill(reversePatternMask, ~0); int p = 0; for (int i = 0; i < aSize; ++i) for (int j = 0; j < size; ++j) if (data.get(p++)) { patternMask[i] &= ~(1L << j); reversePatternMask[i] &= ~(1L << (size - j - 1)); } return new BitapPattern(size, patternMask, reversePatternMask); } public int size() { return size; } public boolean allows(byte code, int position) { return data.get(code * size + position); } public boolean matches(S sequence, int from) { if (from < 0 || from + size > sequence.size()) throw new IndexOutOfBoundsException(); for (int i = 0; i < size; ++i) if (!allows(sequence.codeAt(from++), i)) return false; return true; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Motif<?> motif = (Motif<?>) o; if (size != motif.size) return false; if (!alphabet.equals(motif.alphabet)) return false; return data.equals(motif.data); } @Override public int hashCode() { int result = alphabet.hashCode(); result = 31 * result + size; result = 31 * result + data.hashCode(); return result; } private final static boolean dataConsistent(BitArray data, int size) { OUTER: for (int i = 0; i < size; i++) { for (int j = i; j < data.size(); j += size) if (data.get(j)) continue OUTER; return false; } return true; } //private final static boolean dataConsistent(BitArray data, int size) { // int i = 0; // while (i < data.size()) { // if (data.get(i)) { // i = ((i / size) + 1) * size; // continue; // } // ++i; // if (i % size == 0) // return false; // } // return true; //} }