/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.clustering.carrot2;
import java.util.Iterator;
import java.util.List;
import org.apache.nutch.clustering.HitsCluster;
import org.apache.nutch.searcher.HitDetails;
import org.carrot2.core.clustering.RawCluster;
import org.carrot2.core.clustering.RawDocument;
/**
* An adapter of Carrot2's {@link RawCluster} interface to
* {@link HitsCluster} interface.
*/
public class HitsClusterAdapter implements HitsCluster {
private RawCluster rawCluster;
private HitDetails [] hits;
/**
* Lazily initialized subclusters array.
*/
private HitsCluster [] subclusters;
/**
* Lazily initialized documents array.
*/
private HitDetails [] documents;
/**
* Creates a new adapter.
*/
public HitsClusterAdapter(RawCluster rawCluster, HitDetails [] hits) {
this.rawCluster = rawCluster;
this.hits = hits;
}
/*
* @see org.apache.nutch.clustering.HitsCluster#getSubclusters()
*/
public HitsCluster[] getSubclusters() {
if (this.subclusters == null) {
final List rawSubclusters = rawCluster.getSubclusters();
if (rawSubclusters == null || rawSubclusters.size() == 0) {
subclusters = null;
} else {
subclusters = new HitsCluster[rawSubclusters.size()];
int j = 0;
for (Iterator i = rawSubclusters.iterator(); i.hasNext(); j++) {
RawCluster c = (RawCluster) i.next();
subclusters[j] = new HitsClusterAdapter(c, hits);
}
}
}
return subclusters;
}
/*
* @see org.apache.nutch.clustering.HitsCluster#getHits()
*/
public HitDetails[] getHits() {
if (documents == null) {
List rawDocuments = this.rawCluster.getDocuments();
documents = new HitDetails[ rawDocuments.size() ];
int j = 0;
for (Iterator i = rawDocuments.iterator(); i.hasNext(); j++) {
RawDocument doc = (RawDocument) i.next();
Integer offset = (Integer) doc.getId();
documents[j] = this.hits[offset.intValue()];
}
}
return documents;
}
/*
* @see org.apache.nutch.clustering.HitsCluster#getDescriptionLabels()
*/
public String[] getDescriptionLabels() {
List phrases = this.rawCluster.getClusterDescription();
return (String []) phrases.toArray( new String [ phrases.size() ]);
}
/*
* @see org.apache.nutch.clustering.HitsCluster#isJunkCluster()
*/
public boolean isJunkCluster() {
return rawCluster.getProperty(RawCluster.PROPERTY_JUNK_CLUSTER) != null;
}
}