/******************************************************************************* * Copyright 2013 * TU Darmstadt, FG Sprachtechnologie * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package org.dkpro.bigdata.io.hadoop; import java.util.Date; /** * Stores (raw!) data and meta data of a crawler record that's relevant to us * * @author Johannes Simon * */ public class CrawlerRecord { public CrawlerRecord() { } /** * URL of record. Must not be null. */ private String url; public void setURL(String url) { this.url = url; } public String getURL() { return url; } /** * Original text content (potentially with HTML markup etc.) of record. Must not be null. */ private String content; public void setContent(String content) { this.content = content; } public String getContent() { return content; } /** * Specifies whether <code>content</code> contains any markup, e.g. HTML, or is plain text. */ private boolean isHTML; public void setIsHTML(boolean isHTML) { this.isHTML = isHTML; } public boolean isHTML() { return isHTML; } /** * Original encoding of record. May be null. */ private String origEncoding; public void setOriginalEncoding(String origEncoding) { this.origEncoding = origEncoding; } public String getOriginalEncoding() { return origEncoding; } /** * Original language of record. May be null. */ private String origLanguage; public void setOriginalLanguage(String origLanguage) { this.origLanguage = origLanguage; } public String getOriginalLanguage() { return origLanguage; } /** * Name of machine that crawled this record. May be null. */ private String user; public void setUser(String user) { this.user = user; } public String getUser() { return user; } /** * Date this record was crawled. May be null. */ private Date date; public void setDate(Date date) { this.date = date; } public Date getDate() { return date; } }