package com.yahoo.glimmer.util;
/*
* Copyright (c) 2012 Yahoo! Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
* See accompanying LICENSE file.
*/
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.ArrayList;
public class BySubjectRecord {
private static final Charset CHARSET = Charset.forName("UTF-8");
public static final char RECORD_DELIMITER = '\n';
public static final char FIELD_DELIMITER = '\t';
private static final int MAX_RELATIONS = 10000;
private long id;
/**
* Because the doc id's have to line up with what is in the 'all resources'
* hash(to avoid having a separate 'subjects' hash), the id's don't run
* consecutively. This is a problem when we split the bySubjects file for
* the collection builder. We can't assume the first record in the split is
* 0, as it may be preceded by empty docs. The previousId is used to keep
* the doc ids consistent when setting the first doc id in a split.
*/
private long previousId = -1;
private String subject;
private final ArrayList<String> relations = new ArrayList<String>();
private transient StringBuilder sb;
public static class BySubjectRecordParseException extends Exception {
private static final long serialVersionUID = 421747997614595011L;
public BySubjectRecordParseException(String message) {
super(message);
}
public BySubjectRecordParseException(NumberFormatException e) {
super(e);
}
}
public void readFrom(final byte[] bytes, final int start, final int end) throws BySubjectRecordException {
readFrom(bytes, start, end, CHARSET);
}
public void readFrom(final byte[] bytes, final int start, final int end, final Charset charset) throws BySubjectRecordException {
ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes, start, end);
try {
readFrom(new InputStreamReader(inputStream, charset));
} catch (IOException e) {
// This shouldn't happen reading from a ByteArrayInputStream.
throw new RuntimeException(e);
}
}
/**
*
* @param reader
* @param sb
* @return false on failed to parse.
* @throws IOException
* @throws ParseException
* on invalid input.
*/
public void readFrom(Reader reader) throws IOException, BySubjectRecordException {
if (sb == null) {
sb = new StringBuilder();
}
readFrom(reader, sb);
}
private void readFrom(Reader reader, StringBuilder sb) throws IOException, BySubjectRecordException {
readField(reader, sb);
try {
id = Long.parseLong(sb.toString());
} catch (NumberFormatException e) {
throw new BySubjectRecordException("Reading id", e);
}
if (id < 0) {
throw new BySubjectRecordException("Negative doc ID:" + id);
}
readField(reader, sb);
try {
previousId = Long.parseLong(sb.toString());
} catch (NumberFormatException e) {
throw new BySubjectRecordException("Reading previousId", e);
}
if (previousId < -1) {
throw new BySubjectRecordException("Negative doc previousId:" + previousId);
}
if (previousId >= id) {
throw new BySubjectRecordException("Id:" + id + " is not bigger than previousId:" + previousId);
}
readField(reader, sb);
subject = sb.toString();
relations.clear();
while (readField(reader, sb)) {
if (sb.length() > 0)
relations.add(sb.toString());
}
}
public static class BySubjectRecordException extends Exception {
private static final long serialVersionUID = 2571219720786672147L;
public BySubjectRecordException(String string) {
super(string);
}
public BySubjectRecordException(String string, Exception e) {
super(string, e);
}
}
private static boolean readField(final Reader reader, final StringBuilder sb) throws IOException {
sb.setLength(0);
int c;
while ((c = reader.read()) != -1) {
if (c == FIELD_DELIMITER) {
return true;
}
if (c == RECORD_DELIMITER) {
break;
}
sb.append((char) c);
}
return false;
}
public long getId() {
return id;
}
public void setId(long id) {
if (id < 0) {
throw new IllegalArgumentException("setId() given negative value:" + id);
}
this.id = id;
}
public long getPreviousId() {
return previousId;
}
public void setPreviousId(long previousId) {
if (previousId < 0) {
throw new IllegalArgumentException("setPreviousId() given negative value:" + id);
}
this.previousId = previousId;
}
public String getSubject() {
return subject;
}
public void setSubject(String subject) {
this.subject = subject;
}
public String getRelation(int index) {
return relations.get(index);
}
public Iterable<String> getRelations() {
return relations;
}
public Reader getRelationsReader() {
return new Reader() {
private int relationsIndex;
private int relationIndex;
@Override
public void close() throws IOException {
}
@Override
public int read(final char[] buffer, final int startIndex, final int len) throws IOException {
if (len == 0) {
return 0;
}
int bufferIndex = startIndex;
int bufferEndIndex = startIndex + len;
for (;;) {
if (relationsIndex >= relations.size()) {
if (bufferIndex == startIndex) {
return -1;
} else {
return bufferIndex - startIndex;
}
}
String relationString = relations.get(relationsIndex);
if (relationIndex == relationString.length()) {
// case where in the last call the last char returned
// was the last char of the current relation.
buffer[bufferIndex++] = '\t';
relationsIndex++;
relationIndex = 0;
} else {
while (relationIndex < relationString.length() && bufferIndex < bufferEndIndex) {
buffer[bufferIndex++] = relationString.charAt(relationIndex++);
}
if (bufferIndex == bufferEndIndex) {
return len;
}
relationsIndex++;
relationIndex = 0;
buffer[bufferIndex++] = '\t';
if (bufferIndex == bufferEndIndex) {
return len;
}
}
}
}
};
}
public int getRelationsCount() {
return relations.size();
}
public boolean hasRelations() {
return !relations.isEmpty();
}
public boolean addRelation(String relation) {
if (relations.size() > MAX_RELATIONS) {
return false;
}
relations.add(relation);
return true;
}
public void clearRelations() {
relations.clear();
}
public void writeTo(Writer writer) throws IOException {
writer.write(Long.toString(id));
writer.write(FIELD_DELIMITER);
writer.write(Long.toString(previousId));
writer.write(FIELD_DELIMITER);
if (subject != null) {
writer.write(subject);
}
writer.write(FIELD_DELIMITER);
for (String relation : relations) {
writer.write(relation);
writer.write(FIELD_DELIMITER);
}
}
@Override
public String toString() {
StringWriter stringWriter = new StringWriter(4096);
try {
writeTo(stringWriter);
} catch (IOException e) {
e.printStackTrace();
}
return stringWriter.toString();
}
@Override
public boolean equals(Object object) {
if (object instanceof BySubjectRecord) {
BySubjectRecord that = (BySubjectRecord) object;
return id == that.id && previousId == that.previousId && (subject == null ? that.subject == null : subject.equals(that.subject))
&& relations.equals(that.relations);
}
return false;
}
}