/**
*
*/
package ecologylab.bigsemantics.collecting;
// import java.awt.Color;
import java.util.HashMap;
import ecologylab.bigsemantics.platformspecifics.SemanticsPlatformSpecifics;
import ecologylab.concurrent.BasicSite;
import ecologylab.generic.Colors;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.annotations.simpl_inherit;
/**
* BasicSite extended to maintain semantics
*
* @author damaraju
*
*/
@simpl_inherit
public class SemanticsSite extends BasicSite implements Colors
{
// static ConcurrentHashMap<String, SemanticsSite> allSites = new ConcurrentHashMap(50);
public static boolean disableDownloadInterval = false;
/**
* true if any Container from this Site is a Seed.
*/
private boolean isSeed;
protected static final int MAX_GENERATIONS = 15;
static final double MAX_SURROGATES_FROM_SITE = 25.0;
static final double NON_SEED_FACTOR = .25;
int numElementsInComposition;
private int numCandidateText = 0;
private int numCandidateImages = 0;
private int numCandidatesInArticleBody;
private int numArticleBodies;
/**
* counts of index pages and content pages initialized to 1 so indexContentRatio is always well
* defined.
*/
private int indexPages = 1;
/**
* initialized to 1 so indexContentRatio is always well defined.
*/
private int contentPages = 1;
private int numSurrogatesFromSite = 1;
private int numContainers;
ParsedURL pathToFavicon = null;
static public HashMap faviPathHash = new HashMap();
/**
*
* @param domain
* @param infoCollector
* TODO
*/
public SemanticsSite(String domain, SemanticsGlobalScope infoCollector)
{
this.domain = domain;
//pathToFavicon = ParsedURL.getAbsolute("http://www." + domain + "/favicon.ico", "Bad favicon path.");
//faviPathHash.put(domain, pathToFavicon);
strokeHue = nextStrokeHue();
fontIndex = infoCollector.getAppropriateFontIndex();
}
/**
* DO NOT USE. meant only for XML Exception
*/
public SemanticsSite()
{
}
public void setFaviconPath(String path, ParsedURL site)
{
ParsedURL favPath = ParsedURL.getAbsolute(path, "Malformed favicon path. Try with relative");
if(favPath == null) {
favPath = site.getRelative(path, "Still bad URL. Use root method.");
if(favPath == null)
{
//I don't like this if statement
favPath = ParsedURL.getAbsolute("http://" + site.domain() + "/favicon.ico");
}
}
if(favPath != null) {
pathToFavicon = favPath;
faviPathHash.put(domain, pathToFavicon);
}
}
public ParsedURL getFaviconPath()
{
return pathToFavicon;
}
public void newCandidateImage(boolean inArticleBody)
{
numCandidateImages++;
if (inArticleBody)
numCandidatesInArticleBody++;
}
public void newCandidateText(boolean inArticleBody)
{
numCandidateText++;
if (inArticleBody)
numCandidatesInArticleBody++;
}
public void newArticleBody()
{
this.numArticleBodies++;
}
/**
* Adds 1 to the index page count
*/
public void newIndexPage()
{
indexPages++;
}
/**
* Adds 1 to the index page count
*/
public void newContentPage()
{
contentPages++;
}
/**
* Make crawler work better by taking into account our efficiency in retrieving documents and
* extracting content from this site.
*
* @return 0 < weightingFactor <= 1
*/
public double weightingFactor()
{
double result = timeoutsFactor()
* (numSurrogatesFromSite > MAX_SURROGATES_FROM_SITE ? 1 : numSurrogatesFromSite
/ MAX_SURROGATES_FROM_SITE) * ((double) contentPages / (double) (numContainers + 1));
if (!isSeed)
result *= NON_SEED_FACTOR;
return result;
}
public void incrementNumSurrogatesFrom()
{
numSurrogatesFromSite++;
}
public void incrementNumContainers()
{
numContainers++;
}
/**
*
* @return true when this site has downloadables in the download monitor
*/
public synchronized boolean hasQueuedDownloadables()
{
return downloadsQueuedOrInProgress > 0;
}
/**
* Gets the ratio of index pages to content pages
*
* @return number of index pages divided by the number of content pages
*/
public double getIndexContentRatio()
{
return indexPages / (double) contentPages;
}
public boolean isSeed()
{
return isSeed;
}
@Override
public boolean isDownloadingConstrained()
{
return !disableDownloadInterval && minDownloadInterval > 0;
}
// /////////////////////////////////////// text color stuff
// //////////////////////////////////////////////////////
private float strokeHue;
private int fontIndex;
// final Color[] strokeColors = new Color[MAX_GENERATIONS];
final Object[] strokeColors = new Object[MAX_GENERATIONS];
static final int MAX_INTENSITY = 1024;
static float strokeHues[] =
{ YELLOW, GREEN, BLUE, RED, ORANGE, MAGENTA, YELLOW_GREEN,
RED_ORANGE, BLUE_MAGENTA, CYAN, YELLOW_ORANGE, RED_MAGENTA };
static int nextStrokeHue = 0;
// static final float MIN_STROKE_VALUE = .8f;
// static final float MAX_STROKE_VALUE = 1.0f;
// static final float STEP_STROKE_VALUE = .005f;
//
// static final float MIN_STROKE_SAT = .35f;
// static final float MAX_STROKE_SAT = .7f;
// static final float STEP_STROKE_SAT = .05f;
static final int DND_GENERATION = 3;
private static float nextStrokeHue()
{
float result = strokeHues[nextStrokeHue++];
nextStrokeHue %= strokeHues.length;
// println("nextStrokeHue="+nextStrokeHue+"\n");
return result;
}
public Object getStrokeColor()
{
return SemanticsPlatformSpecifics.get().getStrokeColor(DND_GENERATION, MAX_GENERATIONS,
strokeColors, strokeHue);
}
// public Object getStrokeColor(int generation)
// {
// Object result = null;
// if (generation < MAX_GENERATIONS)
// result = strokeColors[generation];
// // comment out for too many printouts during buzz which makes interaction really bad
// // -- eunyee
// // else
// // debug("WEIRD: generation="+generation);
// if (result == null)
// {
// result = calculateStrokeColor(generation);
// //debug("strokeColor(0)="+result);
// if (generation < MAX_GENERATIONS)
// strokeColors[generation] = result;
// }
// return result;
// }
// private Object calculateStrokeColor(int generation)
// {
// float strokeSat = MAX_STROKE_SAT - generation*STEP_STROKE_SAT;
// if (strokeSat < MIN_STROKE_SAT)
// strokeSat = MIN_STROKE_SAT;
// float strokeValue = MAX_STROKE_VALUE - generation*STEP_STROKE_VALUE;
// if (strokeValue < MIN_STROKE_VALUE)
// strokeValue = MIN_STROKE_VALUE;
//
// float strokeHue = this.strokeHue;
//
//
// if ((strokeHue == MAGENTA) || (strokeHue == GREEN))
// {
// strokeSat -= .1f;
// }
// else if ((strokeHue == RED) ||
// (strokeHue == BLUE_MAGENTA))
// {
// strokeSat -= .18f;
// }
// else if (strokeHue == BLUE)
// {
// strokeSat -= .25f;
// }
//
// return Palette.hsvColor(strokeHue, strokeSat, strokeValue);
// }
public int fontIndex()
{
return fontIndex;
}
public float strokeHue()
{
return strokeHue;
}
/**
* For use only by cm.state for restoring saved values.
*/
public void setStrokeHue(float strokeHue)
{
this.strokeHue = strokeHue;
}
/**
* For use only by cm.state for restoring saved values.
*/
public void setFontIndex(int fontIndex)
{
this.fontIndex = fontIndex;
}
public String toString()
{
return "SemanticsSite: " + domain;
}
}