/*
* Copyright 2010
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.io.jwpl;
import java.io.IOException;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import de.tudarmstadt.ukp.dkpro.core.io.jwpl.type.DBConfig;
import de.tudarmstadt.ukp.wikipedia.api.DatabaseConfiguration;
import de.tudarmstadt.ukp.wikipedia.api.WikiConstants.Language;
import de.tudarmstadt.ukp.wikipedia.api.Wikipedia;
import de.tudarmstadt.ukp.wikipedia.api.exception.WikiInitializationException;
/**
* Abstract base class for all Wikipedia readers.
*/
public abstract class WikipediaReaderBase extends JCasCollectionReader_ImplBase
{
/** The host server. */
public static final String PARAM_HOST = "Host";
@ConfigurationParameter(name = PARAM_HOST, mandatory=true)
private String host;
/** The name of the database. */
public static final String PARAM_DB = "Database";
@ConfigurationParameter(name = PARAM_DB, mandatory=true)
private String db;
/** The username of the database account. */
public static final String PARAM_USER = "User";
@ConfigurationParameter(name = PARAM_USER, mandatory=true)
private String user;
/** The password of the database account. */
public static final String PARAM_PASSWORD = "Password";
@ConfigurationParameter(name = PARAM_PASSWORD, mandatory=true)
private String password;
/** The language of the Wikipedia that should be connected to. */
public static final String PARAM_LANGUAGE = "Language";
@ConfigurationParameter(name = PARAM_LANGUAGE, mandatory=true)
private Language language;
/** Sets whether the database configuration should be stored in the CAS,
* so that annotators down the pipeline can access additional data. */
public static final String PARAM_CREATE_DATABASE_CONFIG_ANNOTATION = "CreateDBAnno";
@ConfigurationParameter(name = PARAM_CREATE_DATABASE_CONFIG_ANNOTATION, mandatory=true, defaultValue="false")
private boolean createDbAnno;
protected DatabaseConfiguration dbconfig;
protected Wikipedia wiki;
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException
{
super.initialize(context);
dbconfig = new DatabaseConfiguration(
host,
db,
user,
password,
language
);
try {
this.wiki = new Wikipedia(dbconfig);
}
catch (WikiInitializationException e) {
throw new ResourceInitializationException(e);
}
}
@Override
public void getNext(JCas jcas) throws IOException, CollectionException
{
if(createDbAnno){
DBConfig dbconfiganno = new DBConfig(jcas);
dbconfiganno.setHost(host);
dbconfiganno.setPassword(password);
dbconfiganno.setDB(db);
dbconfiganno.setUser(user);
dbconfiganno.setLanguage(language.toString());
dbconfiganno.addToIndexes();
}
}
@Override
public abstract Progress[] getProgress();
}