/*
* #%L
* gitools-obo
* %%
* Copyright (C) 2013 Universitat Pompeu Fabra - Biomedical Genomics group
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
package org.gitools.datasources.obo;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import java.util.LinkedList;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class OBOStreamReader implements OBOEventTypes {
private static final Pattern STANZA_NAME_PATTERN = Pattern.compile("^\\[(.*)\\][ \\t]*(?:!(.*))?$");
private static final Pattern LINE_COMMENT_PATTERN = Pattern.compile("^\\s*!(.*)$");
private static final Pattern TAG_NAME_PATTERN = Pattern.compile("^[0-9a-zA-Z_]$");
private final OBOStream stream;
private final Stack<OBOStream> streamStack;
private final LinkedList<OBOEvent> tokens;
private boolean headerStarted;
private boolean headerEnded;
private boolean documentEnded;
private String stanzaName;
private String tagName;
public OBOStreamReader(Reader reader) {
this(new OBOStream(new BufferedReader(reader)));
}
public OBOStreamReader(URL baseUrl) throws IOException {
this(new OBOStream(baseUrl));
}
private OBOStreamReader(OBOStream stream) {
this.stream = stream;
streamStack = new Stack<>();
tokens = new LinkedList<>();
tokens.offer(new OBOEvent(DOCUMENT_START, 0));
headerStarted = false;
headerEnded = false;
documentEnded = false;
stanzaName = null;
}
public OBOEvent nextEvent() throws IOException {
if (tokens.size() > 0) {
return tokens.poll();
}
while (tokens.size() == 0) {
String line = stream.nextLine();
int pos = stream.getLinePos();
if (line == null) {
if (!documentEnded) {
documentEnded = true;
tokens.offer(new OBOEvent(DOCUMENT_END, pos));
break;
} else {
return null;
}
}
Matcher stanzaMatcher = STANZA_NAME_PATTERN.matcher(line);
Matcher commentMatcher = LINE_COMMENT_PATTERN.matcher(line);
if (stanzaMatcher.matches()) {
if (stanzaName == null) {
if (!headerEnded) {
tokens.offer(new OBOEvent(HEADER_END, pos));
headerEnded = true;
}
}
String stzName = stanzaMatcher.group(1);
tokens.offer(new OBOEvent(STANZA_START, pos, stzName));
stanzaName = stzName;
} else if (commentMatcher.matches()) {
tokens.offer(new OBOEvent(COMMENT, pos));
} else {
if (stanzaName == null && !headerStarted) {
tokens.offer(new OBOEvent(HEADER_START, pos));
headerStarted = true;
}
nextTag(line, pos);
}
}
return tokens.poll();
}
public void close() throws IOException {
stream.close();
// TODO streamStack
}
private void nextTag(String line, int linepos) {
int pos = line.indexOf(':');
if (pos < 0) {
tokens.offer(new OBOEvent(UNKNOWN, linepos));
return;
}
tagName = line.substring(0, pos);
String content = line.substring(pos + 1);
StringBuilder sb = new StringBuilder();
//TODO parse contents and generate events
escapeCharsAndRemoveComments(content, sb);
content = sb.toString().trim();
tokens.offer(new OBOEvent(TAG_START, linepos, stanzaName, tagName, content));
tokens.offer(new OBOEvent(TAG_END, linepos, stanzaName, tagName, content));
}
/**
* replace escape characters and remove comments
*/
private void escapeCharsAndRemoveComments(String line, StringBuilder sb) {
int len = line.length();
int pos = 0;
while (pos < len) {
char c = line.charAt(pos++);
if (c == '!') {
pos = len;
} else if (c != '\\' || (c == '\\' && pos == len - 1)) {
sb.append(c);
} else {
c = line.charAt(pos++);
switch (c) {
case 'n':
sb.append('\n');
break;
case 'W':
sb.append(' ');
break;
case 't':
sb.append('\t');
break;
case ':':
sb.append(':');
break;
case ',':
sb.append(',');
break;
case '"':
sb.append('"');
break;
case '\\':
sb.append('\\');
break;
case '(':
sb.append('(');
break;
case ')':
sb.append(')');
break;
case '[':
sb.append('[');
break;
case ']':
sb.append(']');
break;
case '{':
sb.append('{');
break;
case '}':
sb.append('}');
break;
default:
sb.append(c);
break;
}
}
}
}
}