/**
* This file is part of Alfresco/Apache Storm demo project.
*
* Alfresco/Apache Storm demo project is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco/Apache Storm demo project is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Alfresco/Apache Storm demo project. If not, see <http://www.gnu.org/licenses/>.
*/
package com.zaizi.alfresco.bolt;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import com.digitalpebble.storm.crawler.Constants;
import com.digitalpebble.storm.crawler.Metadata;
import com.digitalpebble.storm.crawler.persistence.Status;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
public class ProcessNodes extends BaseRichBolt {
OutputCollector _collector;
@Override
public void prepare(Map conf, TopologyContext context,
OutputCollector collector) {
_collector = collector;
}
@Override
public void execute(Tuple tuple) {
Iterator<String> iterator = tuple.getFields().iterator();
while (iterator.hasNext()) {
String fieldName = iterator.next();
Object obj = tuple.getValueByField(fieldName);
if (obj instanceof byte[]) {
System.out.println(fieldName + "\t"
+ tuple.getBinaryByField(fieldName).length + " bytes");
System.out.println(fieldName + "\t"
+ tuple.getValueByField(fieldName) + " something");
try {
String object = ((byte[]) obj).toString();
System.out.println(object);
} catch (Exception ex) {
}
}
else if (obj instanceof Metadata) {
Metadata md = (Metadata) obj;
System.out.println(md.toString(fieldName + "."));
} else {
String value = tuple.getValueByField(fieldName).toString();
System.out.println(fieldName + " : " + trimValue(value));
try {
JSONObject object = (JSONObject) new JSONParser().parse(value);
Set keys = object.keySet();
Metadata metadata = new Metadata();
String url = "";
for(Object key : keys) {
System.out.println(key.toString() + " : " + object.get(key));
metadata.setValue(key.toString(), object.get(key).toString());
if (key.toString().equals("propertiesUrl")){
url = object.get(key).toString();
}
}
metadata.setValue("status","processed");
_collector.emit(
com.digitalpebble.storm.crawler.Constants.StatusStreamName,
tuple, new Values(url, metadata, Status.FETCHED));
System.out.println("status : fetched");
System.out.println("bolt : " + ProcessNodes.class.getName() );
System.out.println();
} catch (Exception exc) {
}
}
}
_collector.ack(tuple);
}
private String trimValue(String value) {
if (value.length() > 100)
return value.length() + " chars";
return value;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("url", "content", "metadata"));
declarer.declareStream(Constants.StatusStreamName, new Fields("url",
"metadata", "status"));
}
}