package com.maalaang.omtwitter.uima.reader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.Progress;
import com.maalaang.omtwitter.io.OMTwitterCorpusFileReader;
import com.maalaang.omtwitter.io.OMTwitterReader;
import com.maalaang.omtwitter.model.OMTweet;
import com.maalaang.omtwitter.uima.type.TweetAnnotation;
/**
* @author Sangwon Park
*/
public class TwitterCorpusReader extends CollectionReader_ImplBase {
public static final String PARAM_TWITTER_CORPUS_FILE = "twitterCorpusFile";
public static final String PARAM_FIELDS = "fields";
public static final String PARAM_FIELDS_DELIM = "fieldsDelimiter";
private OMTwitterReader reader = null;
private Logger logger = null;
@Override
public void initialize() throws ResourceInitializationException {
super.initialize();
logger = getLogger();
try {
String fieldsNameStr = (String) getConfigParameterValue(PARAM_FIELDS);
String fieldsDelim = (String) getConfigParameterValue(PARAM_FIELDS_DELIM);
String[] fieldNames = fieldsNameStr.split("\\s+");
int[] fields = new int[fieldNames.length];
for (int i = 0; i < fieldNames.length; i++) {
fields[i] = OMTwitterCorpusFileReader.fieldNameToId(fieldNames[i]);
}
reader = new OMTwitterCorpusFileReader((String)getConfigParameterValue(PARAM_TWITTER_CORPUS_FILE), fieldsDelim, fields);
} catch (UnsupportedEncodingException e) {
logger.log(Level.SEVERE, e.getMessage());
throw new ResourceInitializationException(e);
} catch (FileNotFoundException e) {
logger.log(Level.SEVERE, e.getMessage());
throw new ResourceInitializationException(e);
}
logger.log(Level.INFO, this.getClass().getSimpleName() + " initialized");
}
public void getNext(CAS cas) throws IOException, CollectionException {
JCas jcas = null;
try {
jcas = cas.getJCas();
} catch (CASException e) {
logger.log(Level.SEVERE, e.getMessage());
throw new CollectionException(e);
}
OMTweet tweet = reader.next();
jcas.setDocumentText(tweet.getText());
TweetAnnotation tweetAnn = new TweetAnnotation(jcas);
tweetAnn.setBegin(0);
tweetAnn.setEnd(tweet.getText().length());
tweetAnn.setId(tweet.getId());
tweetAnn.setAuthor(tweet.getAuthor());
tweetAnn.setDate(tweet.getDateString());
tweetAnn.setQuery(tweet.getQuery());
tweetAnn.setPolarity(tweet.getPolarityString());
tweetAnn.addToIndexes();
logger.log(Level.FINE, tweet.toString());
}
public void close() throws IOException {
reader.close();
reader = null;
}
public Progress[] getProgress() {
return null;
}
public boolean hasNext() throws IOException, CollectionException {
return reader.hasNext();
}
}