TextDocument.java example

Explorer
Cloud9-master
- src
/*
 * Cloud9: A MapReduce Library for Hadoop
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License. You may
 * obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package edu.umd.cloud9.collection.line;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableUtils;

import edu.umd.cloud9.collection.Indexable;

/**
 * Object representing a simple document. Document is encoded as docid followed
 * by tab followed by document contents. Document contents cannot contain
 * embedded tabs or newlines. The entire document is encoded on a single line.
 * 
 * @author Jimmy Lin
 */
public class TextDocument extends Indexable {

	private String mContents;
	private String mDocid;

	/**
	 * Creates an empty <code>TrecDocument</code> object.
	 */
	public TextDocument() {
	}

	/**
	 * Deserializes this object.
	 */
	public void write(DataOutput out) throws IOException {
		out.writeUTF(mDocid);
		byte[] bytes = mContents.getBytes();
		WritableUtils.writeVInt(out, bytes.length);
		out.write(bytes, 0, bytes.length);
	}

	/**
	 * Serializes this object.
	 */
	public void readFields(DataInput in) throws IOException {
		mDocid = in.readUTF();
		int length = WritableUtils.readVInt(in);
		byte[] bytes = new byte[length];
		in.readFully(bytes, 0, length);
		mContents = new String(bytes);
	}

	public String getDocid() {
		return mDocid;
	}

  public void setDocid(String docid) {
    mDocid = docid;
  }

  public String getContent() {
		return mContents;
	}

	public void setContent(String contents) {
	  mContents = contents;
	}
	
	public static void readDocument(TextDocument doc, String s) {
		if (s == null) {
			throw new RuntimeException("Error, can't read null string!");
		}

		String[] arr = s.split("\\t");

		doc.mContents = arr[1];
		doc.mDocid = arr[0];
	}
}