/* Open Data Service
Copyright (C) 2013 Tsysin Konstantin, Reischl Patrick
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.jvalue.ods.processor.adapter.domain;
import com.codahale.metrics.MetricRegistry;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.inject.assistedinject.Assisted;
import org.apache.commons.lang3.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jvalue.ods.api.sources.DataSource;
import org.jvalue.ods.processor.adapter.AbstractSourceAdapter;
import org.jvalue.ods.processor.adapter.SourceAdapterException;
import org.jvalue.ods.processor.adapter.SourceIterator;
import org.jvalue.ods.utils.HttpUtils;
import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.inject.Inject;
public final class PegelPortalMvSourceAdapter extends AbstractSourceAdapter {
private static final String KEY_STATION = "station", KEY_WATER = "water",
KEY_TIMESTAMP = "timestamp", KEY_LEVEL = "level",
KEY_LEVEL_UNIT = "levelUnit", KEY_EFFLUENT = "effluent",
KEY_EFFLUENT_UNIT = "effluentUnit", KEY_AGENCY = "agency";
private static final Map<String, Integer> tableMapping = new HashMap<>();
static {
tableMapping.put(KEY_STATION, 0);
tableMapping.put(KEY_WATER, 1);
tableMapping.put(KEY_TIMESTAMP, 2);
tableMapping.put(KEY_LEVEL, 3);
tableMapping.put(KEY_EFFLUENT, 4);
tableMapping.put(KEY_AGENCY, 8);
}
private static final Map<String, Class<?>> schema = new HashMap<>();
static {
schema.put(KEY_STATION, String.class);
schema.put(KEY_WATER, String.class);
schema.put(KEY_TIMESTAMP, String.class);
schema.put(KEY_LEVEL, Number.class);
schema.put(KEY_LEVEL_UNIT, String.class);
schema.put(KEY_EFFLUENT, Number.class);
schema.put(KEY_EFFLUENT_UNIT, String.class);
schema.put(KEY_AGENCY, String.class);
}
@Inject
PegelPortalMvSourceAdapter(
@Assisted DataSource source,
@Assisted String sourceUrl,
MetricRegistry registry) {
super(source, sourceUrl, registry);
}
@Override
protected SourceIterator doCreateIterator(DataSource source, URL sourceUrl, MetricRegistry registry) {
return new PegelPortalMvIterator(source, sourceUrl, registry);
}
private static final class PegelPortalMvIterator extends SourceIterator {
private final Iterator<Element> rowIterator;
private int currentRowIdx = 0;
public PegelPortalMvIterator(DataSource source, URL sourceUrl, MetricRegistry registry) {
super(source, sourceUrl, registry);
try {
String httpContent = HttpUtils.readUrl(sourceUrl, "UTF-8");
this.rowIterator = Jsoup.parse(httpContent).select("#pegeltab tbody tr").iterator();
} catch (IOException ioe) {
throw new SourceAdapterException(ioe);
}
}
@Override
protected boolean doHasNext() {
return rowIterator.hasNext();
}
@Override
protected ObjectNode doNext() {
ObjectNode objectNode = new ObjectNode(JsonNodeFactory.instance);
Element row = rowIterator.next();
for (Map.Entry<String, Integer> entry : tableMapping.entrySet()) {
String key = entry.getKey();
int colIdx = entry.getValue();
Class<?> type = schema.get(entry.getKey());
String value = extractText(row.child(colIdx));
if (value.equals("")) continue;
if (type.equals(String.class)) objectNode.put(key, StringEscapeUtils.unescapeHtml4(value));
else if (type.equals(Number.class)) objectNode.put(key, new Double(value));
else throw new SourceAdapterException("Unknown type " + type.getName());
}
objectNode.put(KEY_LEVEL_UNIT, "cm ü PNP");
objectNode.put(KEY_EFFLUENT_UNIT, "m³/s");
objectNode.put("id", String.valueOf(currentRowIdx++)); // website does not contain any one row with unique values --> combined keys required
return objectNode;
}
private String extractText(Element element) {
StringBuilder builder = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
builder.append(node.toString());
} else if (node instanceof Element) {
builder.append(extractText((Element) node));
}
}
return builder.toString();
}
}
}