/* ############################################################################ ## ## Copyright (C) 2006-2009 University of Utah. All rights reserved. ## ## This file is part of DeepPeep. ## ## This file may be used under the terms of the GNU General Public ## License version 2.0 as published by the Free Software Foundation ## and appearing in the file LICENSE.GPL included in the packaging of ## this file. Please review the following to ensure GNU General Public ## Licensing requirements will be met: ## http://www.opensource.org/licenses/gpl-license.php ## ## If you are unsure which license is appropriate for your use (for ## instance, you are interested in developing a commercial derivative ## of DeepPeep), please contact us at deeppeep@sci.utah.edu. ## ## This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE ## WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. ## ############################################################################ */ package focusedCrawler.util.parser; import java.io.Serializable; import java.net.MalformedURLException; import java.net.URL; @SuppressWarnings("serial") public class LinkNeighborhood implements Serializable { private URL link; private String[] anchor = new String[0]; private String[] around = new String[0]; private String imgSource; private String[] imgAlt; private int aroundPosition; private int numOfWordsAnchor; private boolean sameSite = false; public LinkNeighborhood(URL link) { this.link = link; } public void setURL(URL url) { this.link = url; } public void setAnchor(String[] anchor) { this.anchor = anchor; } public void setAround(String[] around) { this.around = around; } public void setAroundPosition(int pos) { this.aroundPosition = pos; } public void setNumberOfWordsAnchor(int num) { this.numOfWordsAnchor = num; } public void setImgSource(String source) { this.imgSource = source; } public void setImgAlt(String[] alt) { this.imgAlt = alt; } public void setSameSite(boolean sameSite) { this.sameSite = sameSite; } public URL getLink() { return this.link; } public String getDomainName() { String domain = link.getHost(); return domain.startsWith("www.") ? domain.substring(4) : domain; } public int getAroundPosition() { return this.aroundPosition; } public int getNumWordsAnchor() { return this.numOfWordsAnchor; } public String[] getAnchor() { return this.anchor; } public String getAltString() { StringBuffer buffer = new StringBuffer(); String[] alts = getImgAlt(); for (int j = 0; alts != null && j < alts.length; j++) { buffer.append(alts[j]); buffer.append(" "); } return buffer.toString(); } public String getAnchorString() { StringBuffer buffer = new StringBuffer(); String[] anchors = getAnchor(); for (int j = 0; j < anchors.length; j++) { buffer.append(anchors[j]); buffer.append(" "); } return buffer.toString(); } public String getAroundString() { StringBuffer buffer = new StringBuffer(); String[] arounds = getAround(); for (int j = 0; j < arounds.length; j++) { buffer.append(arounds[j]); buffer.append(" "); } return buffer.toString(); } public String[] getAround() { return this.around; } public String[] getImgAlt() { return this.imgAlt; } public String getImgSrc() { return this.imgSource; } public boolean getSameSite() { return this.sameSite; } public static LinkNeighborhood createLN(String strFormat) throws MalformedURLException { String[] parts = strFormat.split("::"); LinkNeighborhood ln = new LinkNeighborhood(new URL(parts[0])); if (parts.length > 2) { if (parts[1].contains(",")) { String[] anchorWords = parts[1].split(","); ln.setAnchor(anchorWords); } if (parts[2].contains(",")) { String[] aroundWords = parts[2].split(","); ln.setAround(aroundWords); } } return ln; } public String toString() { StringBuffer buffer = new StringBuffer(); for (int i = 0; anchor != null && i < anchor.length; i++) { if (i != 0) { buffer.append(","); } buffer.append(anchor[i]); } buffer.append("::"); for (int i = 0; around != null && i < around.length; i++) { if (i != 0) { buffer.append(","); } buffer.append(around[i]); } return buffer.toString(); } public LinkNeighborhood clone() { LinkNeighborhood ln = new LinkNeighborhood(link); ln.setAnchor(anchor); ln.setAround(around); ln.setNumberOfWordsAnchor(numOfWordsAnchor); ln.setAroundPosition(aroundPosition); ln.setImgAlt(imgAlt); ln.setImgSource(imgSource); return ln; } }