/**
*
*/
package context.core.textnets;
import context.core.textnets.Corpus.LABELTYPES;
import context.core.textnets.Corpus.UNITOFANALYSIS;
import java.util.ArrayList;
/**
* @author Shubhanshu
*
*/
public abstract class TextStream {
private ArrayList<Token> streamText;
private String fileName;
private boolean isLower;
/**
*
* @param fileName
*/
public TextStream(String fileName) {
// TODO Auto-generated constructor stub
this.fileName = fileName;
this.streamText = new ArrayList<Token>();
this.isLower = true;
}
/**
*
* @param t
*/
public void addToken(Token t){
this.streamText.add(t);
}
/**
*
* @param isLower
*/
public void setLowerCase(boolean isLower){
this.isLower = isLower;
}
/**
*
* @return
*/
public boolean getIsLowerCase(){
return this.isLower;
}
/**
*
* @param net
* @param windowSize
* @param l
* @param unitOfAnalysis
*/
public void makeNetwork(Network net, int windowSize, LABELTYPES l, UNITOFANALYSIS unitOfAnalysis){
//System.out.println("Window Size: "+windowSize);
//System.out.println("Stream Size: "+streamText.size());
//System.out.println("Unit of Analysis: "+unitOfAnalysis);
Integer wordIndex = null;
String word = "";
String label = null;
Token t = null;
/**
* Start with an element in the words vector and seek ahead till the
* window size. If you find an element then create an edge else move
* ahead. Once all elements in windowSize are finished increment to the
* next element in the words vector and repeat. TODO - Add feature for
* skipping the seperator.
*/
for (int i = 0; i < streamText.size(); i++) {
t = streamText.get(i);
word = t.getText();
label = t.getLabel(l);
//System.out.println("Source: "+word);
if (word == null || word.equals(".") || word.matches("\\p{Punct}")) {
continue;
}
word = word.trim();
if(getIsLowerCase()){
word = word.toLowerCase();
}
if (label!=null) {
wordIndex = i;
} else {
wordIndex = null;
continue;
}
int dist = 0;
for (int j = 1; j + i < streamText.size() && dist <= windowSize; j++) {
Integer targetIndex = i + j;
Token t_target = streamText.get(targetIndex);
String target = t_target.getText();
String t_label = t_target.getLabel(l);
//System.out.println("Target: "+target);
if (!unitOfAnalysis.equals(UNITOFANALYSIS.PARAGRAPH) && target == null) {
continue;
}
if (unitOfAnalysis.equals(UNITOFANALYSIS.PARAGRAPH) && target == null) {
break;
}
target = target.trim();
if (!unitOfAnalysis.equals(UNITOFANALYSIS.SENTENCE) && target.matches("\\p{Punct}")) {
continue;
} else if (unitOfAnalysis.equals(UNITOFANALYSIS.SENTENCE) &&
(target.equals(".") || target.matches("!\\?\\."))) {
break;
} else if (unitOfAnalysis.equals(UNITOFANALYSIS.SENTENCE) && target.matches("\\p{Punct}")){
continue;
}
dist++;
if(getIsLowerCase()){
target = target.toLowerCase();
}
if (t_label == null) {
continue;
}
if(word.equals(target)){
continue;
}
if(word.length() > 0 && target.length() > 0){
WordNode n1 = new WordNode(word, label);
WordNode n2 = new WordNode(target, t_label);
net.addEdge(n1, n2);
}
}
}
System.out.println("Finished Generating Network now off to printing files. ");
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
}
}