/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package ivory.smrf.model.constrained;
import ivory.smrf.model.Clique;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
/**
* @author Lidan Wang
*
*/
public class ConstraintModel {
public static List<ConstrainedClique> greedyJoint (List<ConstrainedClique> cliques, double binConstraint, double unigramAddThreshold, double bigramAddThreshold, double unigramRedundThreshold, double bigramRedundThreshold, double beta){
Deque<ConstrainedClique> sortedCliques = orderByProfitDensity(cliques);
List<ConstrainedClique> selectedCliques = new ArrayList<ConstrainedClique>();
double totalCost = 0.0;
Set<String> addedConcepts = new HashSet<String>();
Deque<ConstrainedClique> sortedCliques2 = new LinkedList<ConstrainedClique>();
while (sortedCliques.size()!=0 || sortedCliques2.size()!=0){
boolean fromQueue2 = false;
ConstrainedClique c;
if (sortedCliques.size()!=0){
c = sortedCliques.removeFirst();
}
else{
c = sortedCliques2.removeFirst();
fromQueue2 = true;
}
double cost = c.getAnalyticalCost();
double newCost = totalCost + cost;
double conceptWgt = c.getWeight();
if (((newCost- binConstraint))<binConstraint*0.07 && (c.getWeight() >= 0.00001)){
String concept = c.getConcept();
Clique.Type conceptType = c.getType();
boolean addConcept = false;
//see if passing the redundancy threshold
if (addedConcepts.contains(concept)){
if (conceptType.equals(Clique.Type.Term)) {
if (conceptWgt > unigramRedundThreshold){
addConcept = true;
}
}
else{
if (conceptWgt > bigramRedundThreshold){
addConcept = true;
}
}
}
//see if passing the add threshold
else{
if (conceptType.equals(Clique.Type.Term)){
if (conceptWgt > unigramAddThreshold){
addConcept = true;
}
}
else{
if (conceptWgt > bigramAddThreshold) {
addConcept = true;
}
}
}
if (!addConcept && !fromQueue2){
if ((conceptWgt - beta) > 0){
sortedCliques2.add(c);
}
}
else{
totalCost = newCost;
selectedCliques.add(c);
addedConcepts.add(concept);
}
}
}
return selectedCliques;
}
public static List<ConstrainedClique> greedyKnapsack (List<ConstrainedClique> cliques, double binConstraint, double unigramAddThreshold, double bigramAddThreshold) {
//System.out.println("Using independent model with add threshold... binConstraint is "+binConstraint);
Deque<ConstrainedClique> sortedCliques = orderByProfitDensity (cliques);
List<ConstrainedClique> selectedCliques = new ArrayList<ConstrainedClique>();
double totalCost = 0.0;
while (sortedCliques.size()!=0){
ConstrainedClique c = sortedCliques.removeFirst();
Clique.Type conceptType = c.getType();
double conceptWgt = c.getWeight();
double cost = c.getAnalyticalCost();
double newCost = totalCost + cost;
if (((newCost- binConstraint)<binConstraint*0.07) && (conceptWgt >= 0.00001)){
boolean addConcept = false;
if (conceptType.equals(Clique.Type.Term)){
if (conceptWgt > unigramAddThreshold){
addConcept = true;
}
}
else{
if (conceptWgt > bigramAddThreshold){
addConcept = true;
}
}
if (addConcept) {
totalCost = newCost;
selectedCliques.add(c);
}
}
}
return selectedCliques;
}
public static Deque<ConstrainedClique> orderByProfitDensity (List<ConstrainedClique> cliques){
//hold unique concept terms
List<String> holder1 = new ArrayList<String>();
//hold profit/weight/profitDensity value of each unique concept term
List<String> holder2 = new ArrayList<String>();
HashMap<String, List<ConstrainedClique>> featureOrder = new HashMap<String, List<ConstrainedClique>>();
for (int i=0; i<cliques.size(); i++){
ConstrainedClique c = cliques.get(i);
String term = c.getConcept();
if (featureOrder.containsKey(term)){
List<ConstrainedClique> l = featureOrder.get(term);
l.add(c);
}
else{
List<ConstrainedClique> l = new ArrayList<ConstrainedClique>();
l.add(c);
featureOrder.put(term, l);
holder1.add(term);
double p = 0 - c.getProfitDensity();
holder2.add(p+"");
}
}
double [] values = new double[holder2.size()];
for (int i=0; i<values.length; i++){
values[i] = Double.parseDouble((String)(holder2.get(i)));
}
List<String> reorderedTerms = orderCliques(holder1, values);
Deque<ConstrainedClique> sortedCliques = new LinkedList<ConstrainedClique>();
for (int i=0; i<reorderedTerms.size(); i++){
String term = reorderedTerms.get(i);
List<ConstrainedClique> l = featureOrder.get(term);
for (int j=0; j<l.size(); j++){
sortedCliques.add(l.get(j));
}
}
return sortedCliques;
}
//order cliques by asending order of values[]
public static List<String> orderCliques (List<String> cliques, double[] values){
int [] order = new int[values.length];
for (int i=0; i<order.length; i++){
order[i] = i;
}
Quicksort (values, order, 0, order.length-1);
List<String> returnCliques = new ArrayList<String>();
for (int i=0; i<order.length; i++){
int index = order[i];
returnCliques.add(cliques.get(index));
}
return returnCliques;
}
public static void Quicksort( double vec[], int order [], int loBound, int hiBound )
//..................................................................
// PRE: Assigned(loBound) && Assigned(hiBound)
// && Assigned(vec[loBound..hiBound])
// POST: vec[loBound..hiBound] contain same values as
// at invocation but are sorted into ascending order
//..................................................................
{
double pivot;
int loSwap;
int hiSwap;
double temp;
int orderTemp;
int orderPivot;
if (loBound >= hiBound) // Zero or one item to sort
return;
if (hiBound-loBound == 1) { // Just two items to sort
if (vec[loBound] > vec[hiBound]) {
temp = vec[loBound];
orderTemp=order[loBound];
vec[loBound] = vec[hiBound];
order[loBound]=order[hiBound];
vec[hiBound] = temp;
order[hiBound]=orderTemp;
}
return;
}
// 3 or more items to sort
pivot = vec[(loBound+hiBound)/2]; //use middle as pivot for performance
orderPivot=order[(loBound+hiBound)/2];
vec[(loBound+hiBound)/2] = vec[loBound];
order[(loBound+hiBound)/2]=order[loBound];
vec[loBound] = pivot;
order[loBound]=orderPivot;
loSwap = loBound + 1;
hiSwap = hiBound;
do { //the partitioning
while (loSwap <= hiSwap && vec[loSwap] <= pivot){
loSwap++;
}
while (vec[hiSwap] > pivot){
hiSwap--;
}
if (loSwap < hiSwap) {
temp = vec[loSwap];
orderTemp=order[loSwap];
vec[loSwap] = vec[hiSwap];
vec[hiSwap] = temp;
order[loSwap]=order[hiSwap];
order[hiSwap]=orderTemp;
}
} while (loSwap < hiSwap);
//put pivot back in correct position
vec[loBound] = vec[hiSwap];
vec[hiSwap] = pivot;
order[loBound]=order[hiSwap];
order[hiSwap]=orderPivot;
Quicksort(vec, order, loBound, hiSwap-1);
Quicksort(vec, order, hiSwap+1, hiBound);
}
}