package models;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.FetchType;
import javax.persistence.Id;
import javax.persistence.JoinColumn;
import javax.persistence.JoinTable;
import javax.persistence.ManyToMany;
import javax.persistence.ManyToOne;
import javax.persistence.OneToMany;
import javax.persistence.OneToOne;
import javax.persistence.OrderBy;
import javax.persistence.PrePersist;
import javax.persistence.PreUpdate;
import javax.persistence.Table;
import javax.persistence.Transient;
import javax.persistence.Version;
import models.License.LicenseStatus;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import com.github.kevinsawicki.timeago.TimeAgo;
import play.Logger;
import play.data.validation.Constraints;
import play.data.validation.Constraints.Required;
import play.db.ebean.Model;
import scala.NotImplementedError;
import uk.bl.Const;
import uk.bl.api.OverallLicenseStatus;
import uk.bl.api.Utils;
import uk.bl.api.models.FieldModel;
import uk.bl.exception.ActException;
import uk.bl.exception.WhoisException;
import uk.bl.scope.Scope;
import com.avaje.ebean.Ebean;
import com.avaje.ebean.Expr;
import com.avaje.ebean.ExpressionList;
import com.avaje.ebean.Page;
import com.avaje.ebean.Query;
import com.avaje.ebean.annotation.Transactional;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* Target entity managed by Ebean
*/
@Entity
@Table(name = "target")
public class Target extends Model {
/**
*
*/
private static final long serialVersionUID = -8283372689443804260L;
@Id
public Long id;
@JsonProperty
@Column(unique=true)
public String url;
public Date createdAt;
@Version
public Timestamp updatedAt;
@Override
@Transactional
public void save() {
// Clear cached info:
Logger.info("Clearing cached license status result...");
this.overallLicenseStatus = null;
// need to save to get the ID
super.save();
if (StringUtils.isEmpty(this.url)) {
this.url = Const.ACT_URL + this.id;
}
if (createdAt == null) {
this.createdAt = new Date();
}
super.save();
}
public String toCreatedAtString() {
return Utils.INSTANCE.convertToDateString(createdAt);
}
public String toUpdatedAtString() {
return Utils.INSTANCE.convertToDateTime(updatedAt);
}
@Constraints.Required(message="Title Required")
public String title;
public String language;
public String secondLanguage;
@Column(name="web_form_info")
public String webFormInfo;
@Column(name="web_form_date")
public Date webFormDate;
@Column(columnDefinition = "text")
public String revision;
@JsonProperty
public String edit_url;
@JsonProperty
@ManyToOne(cascade = CascadeType.REFRESH)
@JoinColumn(name = "qaissue_id")
public QaIssue qaIssue;
@JsonProperty
@Column(columnDefinition = "text")
public String notes;
@JsonProperty
public String format;
public String getEdit_url() {
return edit_url;
}
public void setEdit_url(String edit_url) {
this.edit_url = edit_url;
}
@JsonProperty("originating_organisation")
@Column(columnDefinition = "text")
public String originatingOrganisation;
//@JsonProperty("crawl_permissions")
@OneToMany(mappedBy = "target", cascade = CascadeType.ALL)
// @OrderBy("createdAt DESC")
public List<CrawlPermission> crawlPermissions;
@JsonIgnore
@OneToMany(mappedBy = "target", cascade = CascadeType.ALL)
public List<Instance> instances;
@ManyToMany(cascade = CascadeType.ALL, fetch=FetchType.EAGER)
@JoinTable(name = "license_target", joinColumns = { @JoinColumn(name = "target_id", referencedColumnName="id") },
inverseJoinColumns = { @JoinColumn(name = "license_id", referencedColumnName="id") })
public List<License> licenses;
@JsonIgnore
@ManyToMany(cascade = CascadeType.ALL)
@JoinTable(name = "subject_target", joinColumns = { @JoinColumn(name = "target_id", referencedColumnName="id") },
inverseJoinColumns = { @JoinColumn(name = "subject_id", referencedColumnName="id") })
public List<Subject> subjects;
@JsonProperty
@Transient
public List<Long> getSubjectIds() {
List<Long> sids = new ArrayList<Long>();
if( this.subjects != null ) {
for( Subject s : subjects) {
sids.add(s.id);
}
}
return sids;
}
@JsonIgnore
@ManyToMany(cascade = CascadeType.ALL)
@JoinTable(name = "collection_target", joinColumns = { @JoinColumn(name = "target_id", referencedColumnName="id") },
inverseJoinColumns = { @JoinColumn(name = "collection_id", referencedColumnName="id") })
public List<Collection> collections;
@ManyToMany(cascade = CascadeType.ALL)
@JoinTable(name = "tag_target", joinColumns = { @JoinColumn(name = "target_id", referencedColumnName="id") },
inverseJoinColumns = { @JoinColumn(name = "tag_id", referencedColumnName="id") })
public List<Tag> tags;
@ManyToMany(cascade = CascadeType.ALL)
@JoinTable(name = "flag_target", joinColumns = { @JoinColumn(name = "target_id", referencedColumnName="id") },
inverseJoinColumns = { @JoinColumn(name = "flag_id", referencedColumnName="id") })
public List<Flag> flags;
@JsonIgnore
@OneToMany(mappedBy = "target", cascade = CascadeType.ALL)
public List<LookupEntry> lookupEntries;
@OneToMany(mappedBy = "target", cascade = CascadeType.ALL, fetch=FetchType.EAGER)
@JoinColumn(name = "target_id")
@OrderBy("position ASC")
public List<FieldUrl> fieldUrls;
@Column(name="target_start_date")
protected Timestamp targetStartDate;
@Column(name="target_end_date")
protected Timestamp targetEndDate;
@Column(columnDefinition = "text")
public String description;
@JsonIgnore
public Boolean isInScopeIp;
@JsonIgnore
public Boolean isInScopeIpWithoutLicense;
public Boolean active; // flag for the latest version of the target among
// targets with the same URL
@ManyToOne(cascade = CascadeType.REFRESH)
@JoinColumn(name = "author_id")
@Required(message="Author Required")
public User authorUser;
@ManyToOne(cascade = CascadeType.REFRESH)
@JoinColumn(name = "document_owner_id")
public User documentOwner;
@Column(columnDefinition = "text")
public String flagNotes;
/**
* This field comprises the current tab name for view and edit pages.
*/
/* This field is apparently the viaCorrespondenceExp field! */
@Column(columnDefinition = "text")
@JsonProperty
public String value;
@Column(columnDefinition = "text")
public String summary;
@JsonProperty("field_special_dispensation")
public Boolean specialDispensation = Boolean.FALSE;
@Column(columnDefinition = "text")
@JsonProperty("field_special_dispensation_reaso")
public String specialDispensationReason;
@JsonProperty("field_uk_hosting")
public Boolean isUkHosting;
public Boolean isTopLevelDomain;
public Boolean isUkRegistration;
@JsonProperty("field_live_site_status")
public String liveSiteStatus;
@JsonProperty("field_hidden")
public Boolean hidden;
@JsonProperty("field_key_site")
public Boolean keySite;
@JsonProperty("field_wct_id")
public Long wctId;
@JsonProperty("field_spt_id")
public Long sptId;
@Column(columnDefinition = "text")
public String keywords;
@Column(columnDefinition = "text")
public String synonyms;
//@JsonIgnore
@JsonProperty("nominating_organisation")
@ManyToOne(cascade = CascadeType.REFRESH)
@JoinColumn(name = "organisation_id")
public Organisation organisation;
@JsonIgnore
@Column(columnDefinition = "text")
public String authors;
public Date dateOfPublication;
@Column(columnDefinition = "text")
public String justification;
@Required(message="Selection Type Required")
public String selectionType;
@Column(columnDefinition = "text")
public String selectorNotes;
@Column(columnDefinition = "text")
public String archivistNotes;
public Long legacySiteId;
@JsonProperty("field_uk_postal_address")
public Boolean ukPostalAddress;
@Column(columnDefinition = "text")
@JsonProperty("uk_postal_address_url")
public String ukPostalAddressUrl;
@JsonProperty("field_via_correspondence")
public Boolean viaCorrespondence;
/*
@JsonProperty("field_via_correspondence_exp")
public String viaCorrespondenceExp;
*/
@JsonProperty("field_professional_judgement")
public Boolean professionalJudgement;
@Column(columnDefinition = "text")
@JsonProperty("field_professional_judgement_exp")
public String professionalJudgementExp;
@JsonProperty("field_no_ld_criteria_met")
public Boolean noLdCriteriaMet;
@JsonProperty("field_scope")
public String scope;
@JsonProperty("field_depth")
public String depth;
@JsonProperty("field_ignore_robots_txt")
public Boolean ignoreRobotsTxt;
@JsonProperty("field_crawl_frequency")
public String crawlFrequency;
@JsonIgnore
public Date crawlStartDate;
@JsonIgnore
public Date crawlEndDate;
public String whiteList; // regex for white list URLs
public String blackList; // regex for black list URLs
public String licenseStatus;
@Transient
public String tabStatus;
@Transient
@JsonProperty
private String field_uk_domain;
@Transient
@JsonProperty
private String field_uk_geoip;
@Transient
@JsonProperty
private String field_crawl_permission;
@SuppressWarnings("rawtypes")
@JsonProperty
private List<Map> field_url;
@Transient
@JsonProperty
private FieldModel field_subject;
@Transient
@JsonProperty
private Object field_description;
@Transient
@JsonProperty
private Object field_uk_postal_address_url;
@Transient
@JsonProperty
private FieldModel field_nominating_organisation;
@Transient
@JsonProperty
private List<FieldModel> field_suggested_collections;
@Transient
@JsonProperty
private List<FieldModel> field_collections;
@Transient
@JsonProperty
private Long field_crawl_start_date;
@Transient
@JsonIgnore
@JsonProperty
private Long field_crawl_end_date;
@Transient
@JsonProperty
private List<FieldModel> field_license;
@Transient
@JsonProperty
private Object field_instances;
@Transient
@JsonProperty
private List<FieldModel> field_collection_categories;
@Transient
@JsonProperty
private FieldModel field_qa_status;
@Transient
@JsonProperty
private List<FieldModel> field_snapshots;
@Transient
@JsonProperty
private Object field_notes;
@Transient
@Required(message="Url(s) Required")
public String formUrl;
@Transient
public String dateOfPublicationText;
@Transient
public String crawlStartDateText;
@Transient
public String crawlEndDateText;
@Transient
public String crawlStartDateISO;
@Transient
public String crawlEndDateISO;
@Transient
public String webFormDateText;
@Transient
public String subjectSelect;
@Transient
public String collectionSelect;
@Transient
public String authorIdText;
@Transient
@JsonProperty
public String fieldUrl;
@Transient
@JsonProperty
private List<String> field_urls;
@Transient
@JsonProperty
private List<String> field_subjects;
@Transient
@JsonProperty
private String field_nominating_org;
@Transient
@JsonProperty
private List<String> field_collection_cats;
@Transient
@JsonProperty
private String selector;
@OneToOne(mappedBy="target", cascade={CascadeType.REFRESH,CascadeType.REMOVE})
public WatchedTarget watchedTarget;
@Transient
private OverallLicenseStatus overallLicenseStatus;
public String loginPageUrl;
public String logoutUrl;
public Integer secretId;
@JsonProperty
public boolean isWatched() {
return watchedTarget != null;
}
@JsonIgnore
public void setWatched( boolean watched ) {
}
@JsonIgnore
public boolean hasDocuments() {
return isWatched() && !watchedTarget.documents.isEmpty();
}
// "title": "Your Thurrock" - fine
// "field_subject": ["24"],
// "field_crawl_frequency": "monthly" - fine
// "field_nominating_organisation": "101",
// "field_url": ["http://yourthurrock.com"],
// "field_collection_categories": ["297"],
// "field_crawl_start_date": "1417255200"
// "body":[],
// "field_scope":"root",
// "field_url":[
// {"url":"http:\/\/www.13baseraf.co.uk\/","attributes":[]},
// {"url":"http:\/\/www.northlincsweb.net\/13Base\/","attributes":[]}],
// "field_depth":"capped",
// "field_via_correspondence":false,
// "field_uk_postal_address":false,
// "field_uk_hosting":false,
// "field_description":[],
// "field_uk_postal_address_url":[],
// "field_nominating_organisation":{
// "uri":"http:\/\/webarchive.org.uk\/act\/node\/101","id":"101","resource":"node"
// },
// "field_crawl_frequency":"annual",
// "field_suggested_collections":[],
// "field_collections":[],
// "field_crawl_start_date":"1395968400",
// "field_crawl_end_date":"1401580800",
// "field_uk_domain":"No",
// "field_license":[
// {"uri":"http:\/\/webarchive.org.uk\/act\/taxonomy_term\/168","id":"168","resource":"taxonomy_term"}],
// "field_crawl_permission":"",
// "field_collection_categories":[],
// "field_special_dispensation":false,
// "field_special_dispensation_reaso":null,
// "field_live_site_status":null,
// "field_notes":[],
// "field_wct_id":"235438128",
// "field_spt_id":"169073",
// "field_snapshots":[],
// "field_no_ld_criteria_met":false,
// "field_key_site":false,
// "field_uk_geoip":"Yes",
// "field_professional_judgement":false,
// "field_professional_judgement_exp":null,
// "field_ignore_robots_txt":false,
// "field_instances":[],
// "nid":"14171",
// "vid":"28185",
// "is_new":false,
// "type":"url",
// "title":"13 Base - RAF",
// "language":"en",
// "url":"http:\/\/webarchive.org.uk\/act\/node\/14171",
// "edit_url":"http:\/\/webarchive.org.uk\/act\/node\/14171\/edit",
// "status":"1","promote":"0","sticky":"0","created":"1395767857","changed":"1404815005",
// "author":{
// "uri":"http:\/\/webarchive.org.uk\/act\/user\/191","id":"191","resource":"user"
// },"log":"",
// "revision":null,"comment":"2","comments":[],"comment_count":"0","comment_count_new":"0","feed_nid":null
public Target() {
}
public void setDefaultValues() {
// Defaults:
this.scope = Const.ScopeType.root.name();
this.depth = Const.DepthType.CAPPED.name();
this.isUkHosting = Boolean.FALSE;
this.isTopLevelDomain = Boolean.FALSE;
this.isUkRegistration = Boolean.FALSE;
this.liveSiteStatus = Const.SiteStatus.LIVE.name();
this.hidden = Boolean.FALSE;
this.keySite = Boolean.FALSE;
this.ukPostalAddress = Boolean.FALSE;
this.viaCorrespondence = Boolean.FALSE;
this.professionalJudgement = Boolean.FALSE;
this.ignoreRobotsTxt = Boolean.FALSE;
this.crawlFrequency = Const.CrawlFrequency.DOMAINCRAWL.name();
}
public static Model.Finder<Long, Target> find = new Model.Finder<>(Long.class, Target.class);
public static List<Target> findAll() {
return find.all();
}
/**
* Count total number of records that rely on whois?
*
* @param value
* @return
*/
public static int findWhoIsCount(boolean value) {
int count = 0;
if (value) {
ExpressionList<Target> ll = find.where().eq(Const.ACTIVE, true).eq("isUkRegistration", true);
count = ll.findRowCount();
} else {
ExpressionList<Target> ll = find
.where()
.eq(Const.ACTIVE, true)
.add(Expr.or(
Expr.eq("isUkRegistration",
false),
Expr.isNull("isUkRegistration")));
count = ll.findRowCount();
}
return count;
}
/**
* This method filters targets by given URLs.
*
* @return duplicate count
*/
public static List<Target> filterUrl(String url) {
List<Target> res = new ArrayList<Target>();
ExpressionList<Target> ll = find.fetch("fieldUrls").where().icontains("fieldUrls.url", url);
res = ll.findList();
return res;
}
public static List<Target> filterActiveUrl(String url) {
List<Target> res = new ArrayList<Target>();
ExpressionList<Target> ll = find.fetch("fieldUrls").where()
.eq(Const.ACTIVE, true)
.add(Expr.or(Expr.icontains("fieldUrls.url", url), Expr.icontains("t0.title", url)));
res = ll.findList();
return res;
}
/**
* Retrieve a Target by target URL.
*
* @param target
* URL
* @return target object
*/
public static Target findByTarget(String target) {
Logger.debug("findByTarget() target url: " + target);
if (!target.contains(Const.COMMA)) {
return find.fetch("fieldUrls").where().eq("active", true).eq("fieldUrls.url", target).findUnique();
}
return null;
}
/**
* This method returns previous Target revisions that are not more active
* for given URL
*
* @param url
* @return list of associated Targets
*/
public static List<Target> findRevisions(String url) {
Logger.debug("findRevisions() target url: " + url);
List<Target> res = new ArrayList<Target>();
if (url != null && url.length() > 0) {
ExpressionList<Target> ll = find.fetch("fieldUrls").where().eq("fieldUrls.url", url);
res = ll.findList();
}
return res;
}
public static Target findById(Long id) {
Target target = find.fetch("fieldUrls").fetch("licenses").where().eq("id", id).findUnique();
if( target != null ) {
target.formUrl = target.fieldUrl();
}
return target;
}
public static Target findByUrl(String url) {
return find.where().eq(Const.URL, url).eq(Const.ACTIVE, true).findUnique();
}
public static Target findByWct(String url) {
return find.where().eq("edit_url", url).eq(Const.ACTIVE, true).findUnique();
}
/**
* This method checks whether the passed URL is in scope and presents result
* as a string in GUI.
*
* @param fieldUrl
* The field URL
* @param url
* The identification URL
* @return result as a String
*/
public String checkScopeStr(String fieldUrl, String url) {
String res = "false";
if (fieldUrl != null && fieldUrl.length() > 0 && url != null
&& url.length() > 0 && this.isInScopeAllOrInheritedWithoutLicense() ) {
res = "true";
}
return res;
}
/**
* This method checks whether all the URLs are in scope for rules
* associated with scope IP.
*
* @param url
* The search URL
* @param nidUrl
* The identifier URL in the project domain model
* @return result as a flag
*/
@JsonIgnore
public boolean isInScope( boolean includedByPermission ) {
for (FieldUrl fieldUrl : this.fieldUrls) {
if( Scope.INSTANCE.check(fieldUrl.url, this, includedByPermission) == false ) {
return false;
}
}
return true;
}
/**
* This method analyzes manual scope settings for Target with given URL
*
* Professional Judgement.
* Postal Address (set manually to Yes by the user)
* By Correspondence (also set manually)
*
* In the UI, if any of the three manual tests (these two or professional judgement) is set to Yes, then the following field (which provide the evidence) are then required.
* ie. if Postal Address is Yes, then Postal Address URL is required (doesn't need validating)
*
* @param url
* @return true if one of manual settings is true
*/
public boolean checkManualScope() {
boolean res = false;
if (BooleanUtils.isTrue(this.professionalJudgement) || BooleanUtils.isTrue(this.ukPostalAddress) || BooleanUtils.isTrue(this.viaCorrespondence)) {
Logger.debug("checkManualScope(): " + this.ukPostalAddress + ", " + this.viaCorrespondence + ", "+ this.professionalJudgement);
res = true;
}
return res;
}
/**
* This method checks license for Target with given URL
*
* @param url
* @return true if license exists
*/
public boolean checkLicense() {
boolean res = false;
if (this.licenses != null && !this.licenses.isEmpty()) {
res = true;
}
return res;
}
/**
* This method checks whether the passed URL is in scope.
*
* @param url
* The search URL
* @param nidUrl
* The identifier URL in the project domain model
* @return result as a flag
*/
public boolean isInScope(String url) {
return Scope.INSTANCE.check(url, this, false);
}
/**
* This method returns the latest version of Target objects.
*
* @return
*/
public static List<Target> findAllActive() {
List<Target> res = new ArrayList<Target>();
ExpressionList<Target> ll = find.where().eq(Const.ACTIVE, true);
res = ll.orderBy(Const.UPDATED_AT + " " + Const.DESC).findList();
return res;
}
/**
* This method returns the latest version of Target objects.
*
* @param number
* The number of targets for which the elapsed time since the
* last check is greatest
* @return
*/
public static List<Target> findLastActive(int number) {
List<Target> res = new ArrayList<Target>();
res = find.where().eq(Const.ACTIVE, true)
.orderBy(Const.UPDATED_AT + " " + Const.DESC)
.setMaxRows(number).findList();
return res;
}
/**
* This method finds all targets that share a domain.
*/
public static List<Target> findAllTargetsForDomain(String domain) {
List<Target> res = new ArrayList<Target>();
ExpressionList<Target> ll = find.where()
.eq(Const.ACTIVE, true)
.ieq("fieldUrls.domain", domain);
res = ll.findList();
return res;
}
/**
* This method finds all targets similar to a domain
*/
public static List<Target> findAllTargetsForDomainLike(String domainLike) {
Logger.debug("Looking for domains like:"+domainLike);
List<Target> res = new ArrayList<Target>();
ExpressionList<Target> ll = find.where()
.eq(Const.ACTIVE, true)
.like("fieldUrls.domain", domainLike);
res = ll.findList();
return res;
}
/**
* This method finds all targets matching a url query
*/
public static List<Target> findAllTargetsForParentUrls(String parentDomain,
List<String> parentPaths) {
Logger.debug("Looking for URLs like: %"+parentDomain+" + "+ parentPaths);
List<Target> res = new ArrayList<Target>();
ExpressionList<Target> ll = find.where()
.eq(Const.ACTIVE, true).disjunction();
// Add on the different LIKE queries, starting with the base:
Logger.debug("Adding LIKE "+"%"+parentDomain);
ll = ll.like("fieldUrls.url", "%"+parentDomain);
Logger.debug("Adding LIKE "+"%"+parentDomain+"/");
ll = ll.like("fieldUrls.url", "%"+parentDomain+"/");
// And then add each parent path:
for( String parentPath :parentPaths) {
String q = "%"+parentDomain+parentPath;
Logger.debug("Adding LIKE "+q);
ll = ll.like("fieldUrls.url", q);
// And with a trailing slash too:
q = "%"+parentDomain+parentPath+"/";
Logger.debug("Adding LIKE "+q);
ll = ll.like("fieldUrls.url", q);
}
// And query:
res = ll.findList();
return res;
}
/**
* This method evaluates if element is in a list separated by list delimiter
* e.g. ', '.
*
* @param subject
* @return true if in list
*/
public boolean hasSubject(String subject) {
// boolean res = false;
// res = Utils.hasElementInList(subject, fieldSubject);
// return res;
throw new NotImplementedError();
}
/**
* This method evaluates if element is in a list separated by list delimiter
* e.g. ', '.
*
* @param subject
* @return true if in list
*/
public boolean hasSubSubject(String subject) {
// boolean res = false;
// res = Utils.hasElementInList(subject, fieldSubSubject);
// return res;
throw new NotImplementedError();
}
/**
* This method evaluates if element is in a list separated by list delimiter
* e.g. ', '.
*
* @param subject
* @return true if in list
*/
public static boolean hasSubSubject(String subsubject, String subject) {
boolean res = false;
res = Utils.INSTANCE.hasElementInList(subject, subsubject);
return res;
}
/**
* This method evaluates if element is in a list separated by list delimiter
* e.g. ', '.
*
* @param license
* @return true if in list
*/
public boolean hasLicense(String license) {
// boolean res = false;
// res = Utils.hasElementInList(license, fieldLicense);
// return res;
throw new NotImplementedError();
}
/**
* This method evaluates if element is in a list separated by list delimiter
* e.g. ', '.
*
* @param fastSubjects
* @return true if in list
*/
public boolean hasContactPerson(String curContactPerson) {
boolean res = false;
res = Utils.INSTANCE.hasElementInList(curContactPerson, this.authorUser.url);
return res;
}
/**
* Return a page of Target
*
* @param page
* Page to display
* @param pageSize
* Number of targets per page
* @param sortBy
* Target property used for sorting
* @param order
* Sort order (either or asc or desc)
* @param filter
* Filter applied on the name column
*/
public static Page<Target> pageQa(int page, int pageSize, String sortBy,
String order, String filter, String collections, Long qaIssueId) {
Logger.debug("pageQa() collection: " + collections + ", qaStatus: " + qaIssueId + ", filter: " + filter);
ExpressionList<Target> results = Target.find.fetch("fieldUrls").fetch("collections").fetch("instances").where();
if (StringUtils.isNotEmpty(filter)) {
results = results.add(Expr.or(
Expr.icontains("fieldUrls.url", filter),
Expr.icontains("title", filter))
);
}
Logger.debug("qaIssueId: " + qaIssueId);
if (qaIssueId == null || qaIssueId == 0) {
Logger.debug("nothing selected: " + qaIssueId);
} else {
results = results.eq("qaIssue_id", qaIssueId);
}
String collectionSelect = collections.replace("\"", "");
Logger.debug("collectionSelect: " + collectionSelect);
List<Collection> collectionIds = new ArrayList<Collection>();
if (StringUtils.isNotEmpty(collectionSelect)) {
String[] collectionArray = collectionSelect.split(", ");
for (String c : collectionArray) {
Long collectionId = Long.valueOf(c);
Collection collection = Collection.findById(collectionId);
collectionIds.add(collection);
}
results = results.in("collections", collectionIds);
}
results = results.eq("active", true);
Page<Target> res = results.query().orderBy(sortBy + " " + order).findPagingList(pageSize).setFetchAhead(false).getPage(page);
Logger.debug("results: " + res.getList().size());
return res;
}
/**
* Return a page of Target objects.
*
* @param page
* Current page number (starts from 0)
* @param sortBy
* Column to be sorted
* @param order
* Sort order (either asc or desc)
* @param filterUrl
* Filter applied on target urls
* @param curatorId
* Author of the target
* @param organisationUrl
* The author's organisation
* @param subjectUrl
* Target subject
* @param crawlFrequency
* The crawl frequency
* @param depth
* The crawl depth
* @param suggested_collections
* The associated collection
* @param license
* The license name
* @param pageSize
* The number of Target entries on the page
* @param flag
* The flag assigned by user
* @return
*/
public static Page<Target> pageTargets(int page, int pageSize,
String sortBy, String order, String filterUrl, Long curatorId,
Long organisationId, String subjectSelect, String crawlFrequencyName,
String depthName, String collectionSelect, Long licenseId,
Long flagId) {
ExpressionList<Target> exp = Target.find.fetch("fieldUrls").where();
exp = exp.eq(Const.ACTIVE, true);
exp = exp.add(Expr.or(
Expr.icontains("fieldUrls.url", filterUrl),
Expr.icontains("title", filterUrl))
);
if (curatorId != 0) {
exp = exp.eq("authorUser.id", curatorId);
}
if (organisationId != 0) {
exp = exp.eq("organisation.id", organisationId);
}
if (StringUtils.isNotEmpty(crawlFrequencyName)) {
exp = exp.eq("crawlFrequency", crawlFrequencyName);
}
if (StringUtils.isNotEmpty(depthName)) {
exp = exp.eq("depth", depthName);
}
if (licenseId != 0) {
exp = exp.eq("licenses.id", licenseId);
}
if (flagId != 0) {
exp = exp.eq("flags.id", flagId);
}
if (StringUtils.isNotEmpty(subjectSelect)) {
List<Long> subjectIds = new ArrayList<Long>();
String[] subjects = subjectSelect.split(", ");
for (String sId : subjects) {
Long subjectId = Long.valueOf(sId);
subjectIds.add(subjectId);
}
exp = exp.in("subjects.id", subjectIds);
}
if (StringUtils.isNotEmpty(collectionSelect)) {
List<Collection> collectionIds = new ArrayList<Collection>();
String[] collections = collectionSelect.split(", ");
for (String cId : collections) {
Long collectionId = Long.valueOf(cId);
Collection collection = Collection.findById(collectionId);
collectionIds.add(collection);
}
exp = exp.in("collections", collectionIds);
}
Page<Target> res = exp.query().orderBy(sortBy + " " + order).findPagingList(pageSize).setFetchAhead(false).getPage(page);
Logger.debug("Expression list size: " + res.getTotalRowCount());
return res;
}
/**
* Return a page of Target objects.
*
* @param page
* Page to display
* @param pageSize
* Number of targets per page
* @param sortBy
* Target property used for sorting
* @param order
* Sort order (either or asc or desc)
* @param status
* The type of report QA e.g. awaiting QA, with no QA issues...
* @param curatorUrl
* @param organisationUrl
* @param startDate
* The start date for filtering
* @param endDate
* The end date for filtering
* @param collectionCategoryUrl
* @return
*/
public static Page<Target> pageReportsQa(int page, int pageSize,
String sortBy, String order, String status, Long curatorId,
Long organisationId, String startDate, String endDate,
Long collectionId) {
// List<Instance> instanceList = Instance.processReportsQa(status, startDate, endDate);
ExpressionList<Target> exp = Target.find.fetch("collections").fetch("fieldUrls").where();
Page<Target> res = null;
exp = exp.eq(Const.ACTIVE, true);
if (curatorId != -1) {
exp = exp.eq("authorUser.id", curatorId);
}
if (organisationId != -1) {
exp = exp.eq("organisation.id", organisationId);
}
if (collectionId != -1) {
exp = exp.eq("collections.id", collectionId);
}
// List<String> targetUrlCollection = new ArrayList<String>();
// Iterator<Instance> itr = instanceList.iterator();
// while (itr.hasNext()) {
// Instance instance = itr.next();
// if (instance.fieldTarget != null
// && instance.fieldTarget.length() > 0) {
// // Logger.debug("Target.pageReportsQa() instance.field_target: "
// // + instance.field_target);
// targetUrlCollection.add(instance.fieldTarget);
// }
// }
// if (targetUrlCollection.size() > 0) {
// exp = exp.in(Const.URL, targetUrlCollection);
// }
res = exp.query().orderBy(sortBy + " " + order).orderBy("fieldUrls.domain").findPagingList(pageSize).setFetchAhead(false).getPage(page);
// Logger.debug("Expression list for targets size: " + res.getTotalRowCount());
return res;
}
/**
* Return a page of Target objects.
*
* @param page
* Page to display
* @param pageSize
* Number of targets per page
* @param sortBy
* Target property used for sorting
* @param order
* Sort order (either or asc or desc)
* @param curatorUrl
* @param organisationUrl
* @param startDate
* The start date for filtering
* @param endDate
* The end date for filtering
* @param npld
* The selection of NPLD scope rule for filtering
* @param crawlFrequency
* The crawl frequency value for filtering
* @param tld
* The top level domain setting for filtering
* @return
* @throws ParseException
*/
public static Page<Target> pageReportsCreation(int page, int pageSize,
String sortBy, String order, Long curatorId,
Long organisationId, String startDate, String endDate, String npld, String crawlFrequencyName, String tld) throws ActException {
ExpressionList<Target> exp = Target.find.fetch("fieldUrls").fetch("flags").fetch("licenses").fetch("subjects").fetch("collections").where();
Page<Target> res = null;
exp = exp.eq(Const.ACTIVE, true);
Logger.debug("" + curatorId + ", " + organisationId + ", " + startDate + ", " + npld + ", " + crawlFrequencyName + ", " + tld);
if (curatorId != -1) {
exp = exp.eq("authorUser.id", curatorId);
}
if (organisationId != -1) {
exp = exp.eq("organisation.id", organisationId);
}
if (StringUtils.isNotEmpty(crawlFrequencyName)) {
exp = exp.eq("crawlFrequency", crawlFrequencyName);
}
try {
if (StringUtils.isNotBlank(startDate)) {
Date date = Utils.INSTANCE.convertDate(startDate);
exp = exp.ge("createdAt", date);
}
if (StringUtils.isNotEmpty(endDate)) {
Date date = Utils.INSTANCE.convertDate(endDate);
exp = exp.le("createdAt", date);
}
} catch (ParseException e) {
throw new ActException(e);
}
// Create raw expr for matching domains:
String notdomexp = "";
String domexp = "";
Iterator<String> tlds = Scope.DOMAINS.iterator();
while( tlds.hasNext() ) {
String tnext = tlds.next();
notdomexp += "fieldUrls.domain NOT like '%"+ tnext + "'";
domexp += "fieldUrls.domain like '%"+ tnext + "'";
if( tlds.hasNext() ){
notdomexp += " and ";
domexp += " or ";
}
}
// new stuff
if (npld.equals(Const.NpldType.UK_POSTAL_ADDRESS.name())) {
exp = exp.eq("ukPostalAddress", true);
} else if (npld.equals(Const.NpldType.VIA_CORRESPONDENCE.name())) {
exp = exp.eq("viaCorrespondence", true);
} else if (npld.equals(Const.NpldType.NO_LD_CRITERIA_MET.name())) {
exp = exp.eq("noLdCriteriaMet", true);
} else if (npld.equals(Const.NpldType.PROFESSIONAL_JUDGEMENT.name())) {
exp = exp.eq("professionalJudgement", true);
} else if (npld.equals(Const.NpldType.NONE.name())) {
exp = exp.eq("ukPostalAddress", false);
exp = exp.eq("viaCorrespondence", false);
exp = exp.eq("noLdCriteriaMet", false);
exp = exp.eq("professionalJudgement", false);
exp = exp.eq("isUkHosting", false);
exp = exp.eq("isUkRegistration", false);
exp = exp.add(Expr.raw(notdomexp));
} else if (npld.equals(Const.NpldType.UK_TOP_LEVEL_DOMAIN.name())) {
exp = exp.add(Expr.raw(domexp));
} else if (npld.equals(Const.NpldType.UK_HOSTING.name())) {
// uk hosting
exp = exp.eq("isUkHosting", true);
} else if (npld.equals(Const.NpldType.UK_REGISTRATION.name())) {
// uk registration address
exp = exp.eq("isUkRegistration", true);
}
if (tld.equals("no")) {
// not a UK top level domain
exp = exp.eq("isTopLevelDomain", false);
}
if (tld.equals("yes") || npld.equals(Const.NpldType.UK_TOP_LEVEL_DOMAIN.name())) {
// UK top level domain
exp = exp.eq("isTopLevelDomain", true);
}
if (tld.equals(Const.EITHER)) {
// not a UK top level domain
// expressionList.eq("isTopLevelDomain", false);
// expressionList.eq("isTopLevelDomain", true);
}
// TODO: NONE SELECTED???
Logger.debug("pageReportsCreation() NPLD: " + npld);
/**
* Apply NPLD filters
*/
// if (!tld.equals(Const.EITHER)) {
// Logger.debug("pageReportsCreation() Apply NPLD filters");
// List<String> targetUrlCollection = new ArrayList<String>();
// Page<Target> tmp = exp.query()
// .orderBy(sortBy + " " + order)
// .findPagingList(pageSize)
// .setFetchAhead(false)
// .getPage(page);
// TODO: do we really need to query first?
// List<Target> tmp = expressionList.query()
// .orderBy(sortBy + " " + order)
// .findList();
//
//
// Logger.debug("pageReportsCreation() tmp list size: " + tmp.size());
// Iterator<Target> itr = tmp.iterator();
// while (itr.hasNext()) {
// Target target = itr.next();
// if (target != null
// && target.field_url != null
// && target.field_url.length() > 0
// && !target.field_url.toLowerCase().contains(Const.NONE)) {
//
// // target.isInScopeDomainValue =
// Target.isInScopeDomain(target.field_url, target.url);
// // // do a contains on target.field_url??? (url.contains(UK_DOMAIN)
// || url.contains(LONDON_DOMAIN) || url.contains(SCOT_DOMAIN))
// //
// // target.isUkHostingValue = Target.checkUkHosting(target.field_url);
// // target.isInScopeUkRegistrationValue =
// Target.isInScopeUkRegistration(target.field_url);
// }
//
// Logger.debug("pageReportsCreation() targetUrlCollection size: " +
// targetUrlCollection.size());
// expressionList = expressionList.in(Const.URL, targetUrlCollection);
// }
Query<Target> query = exp.query();
res = query.orderBy(sortBy + " " + order).findPagingList(pageSize)
.setFetchAhead(false).getPage(page);
Logger.debug("Expression list for targets created size: "
+ res.getTotalRowCount());
return res;
}
/**
* Return a page of Target
*
* @param page
* Page to display
* @param pageSize
* Number of targets per page
* @param sortBy
* Target property used for sorting
* @param order
* Sort order (either or asc or desc)
* @param filter
* Filter applied on the name column
* @param collection_url
* Collection where targets search occurs
* @return
*/
public static Page<Target> pageCollectionTargets(int page, int pageSize,
String sortBy, String order, String filter, Long collectionId) {
return find.where().eq("collections.id", collectionId)
.orderBy(sortBy + " " + order).findPagingList(pageSize)
.setFetchAhead(false).getPage(page);
}
/**
*
* @param collectionId
* @return
*/
public static List<Target> allCollectionTargets(Long collectionId) {
return find.where().eq("collections.id", collectionId).findList();
}
/**
* Return a page of Target
*
* @param page
* Page to display
* @param pageSize
* Number of targets per page
* @param sortBy
* Target property used for sorting
* @param order
* Sort order (either or asc or desc)
* @param filter
* Filter applied on the name column
* @param subject_url
* Subject where targets search occurs
* @return
*/
public static Page<Target> pageSubjectTargets(int page, int pageSize,
String sortBy, String order, String filter, Long subjectId) {
return find.where().eq("subjects.id", subjectId)
.orderBy(sortBy + " " + order).findPagingList(pageSize)
.setFetchAhead(false).getPage(page);
}
/**
* Return a page of Target
*
* @param page
* Page to display
* @param pageSize
* Number of targets per page
* @param sortBy
* Target property used for sorting
* @param order
* Sort order (either or asc or desc)
* @param filter
* Filter applied on the name column
* @param organisation_url
* Organisation where targets search occurs
* @return
*/
public static Page<Target> pageOrganisationTargets(int page, int pageSize,
String sortBy, String order, String filter, Long organisationId) {
return find.fetch("fieldUrls").where().eq("active", true)
.add(Expr.or(Expr.icontains("fieldUrls.url", filter),Expr.icontains("title", filter)))
.add(Expr.eq("organisation.id", organisationId))
.orderBy(sortBy + " " + order).findPagingList(pageSize)
.setFetchAhead(false).getPage(page);
}
/**
* Return a page of Target
*
* @param page
* Page to display
* @param pageSize
* Number of targets per page
* @param sortBy
* Target property used for sorting
* @param order
* Sort order (either or asc or desc)
* @param filter
* Filter applied on the name column
* @param user_url
* User for whom targets search occurs
* @param fastSubjects
* Taxonomy of type subject
* @param collection
* Taxonomy of type collection
* @return
*/
public static Page<Target> pageUserTargets(int page, int pageSize,
String sortBy, String order, String filter, Long userId,
Long subjectId, Long collectionId) {
Logger.debug("pageUserTargets " + userId + ", " + subjectId + ", " + collectionId);
ExpressionList<Target> exp = find.fetch("fieldUrls").fetch("collections").fetch("subjects").fetch("authorUser").where();
exp = exp.eq(Const.ACTIVE, true);
if (userId != null) {
exp = exp.eq("authorUser.id", userId);
}
if (subjectId.longValue() != 0L) {
exp = exp.eq("subjects.id", subjectId);
}
if (collectionId.longValue() != 0L) {
exp = exp.eq("collections.id", collectionId);
}
exp = exp.add(Expr.or(Expr.icontains("fieldUrls.url", filter), Expr.icontains("title", filter)));
Page<Target> pages = exp.query().orderBy(sortBy + " " + order).orderBy("fieldUrls.domain").findPagingList(pageSize).setFetchAhead(false).getPage(page);
return pages;
}
// checkScopeIpWithoutLicense
public static boolean isInScope(Target target) throws WhoisException {
for (FieldUrl fieldUrl : target.fieldUrls) {
if(!Scope.INSTANCE.check(fieldUrl.url, target, false)) return false;
}
return true;
}
public static boolean isInScopeDomain(Target target) throws ActException {
return Scope.isTopLevelDomain(target);
}
/**
* Get's the active Targets by frequency. Special values 'all' and 'frequent'.
*
* @param frequency
* @return
*/
public static List<Target> getByFrequency(String frequency) {
//
ExpressionList<Target> targets = find.fetch("fieldUrls").where().eq(Const.ACTIVE, true);
if( frequency.equalsIgnoreCase("all") ) {
// Omit NEVERCRAWL:
targets = targets.ne("crawlFrequency", Const.CrawlFrequency.NEVERCRAWL.name());
} else if ( frequency.equalsIgnoreCase("frequent") ) {
// Omit NEVERCRAWL and DOMAINCRAWL:
targets = targets.ne("crawlFrequency", Const.CrawlFrequency.NEVERCRAWL.name());
targets = targets.ne("crawlFrequency", Const.CrawlFrequency.DOMAINCRAWL.name());
} else {
targets = targets.ieq("crawlFrequency", frequency);
}
return targets.findList();
}
// Tolerance either side of current time for export feeds:
private static int EXPORT_TOLERANCE = 2;
/**
* This method provides NPLD crawl exports for given crawl-frequency. Method
* returns a list of Targets and associated crawl metadata.
*
* @param frequency
* The crawl frequency e.g. 'daily'
* @return list of Target objects
* @throws WhoisException
* @throws URISyntaxException
* @throws MalformedURLException
*/
public static List<Target> exportLdFrequency(String frequency) {
// Current date:
Calendar currentLo = Calendar.getInstance();
currentLo.add(Calendar.HOUR, -EXPORT_TOLERANCE);
Calendar currentHi = Calendar.getInstance();
currentHi.add(Calendar.HOUR, EXPORT_TOLERANCE);
// Get and filter down:
List<Target> result = new ArrayList<Target>();
for (Target target : getByFrequency(frequency)) {
// Is it in time range?
if( target.crawlEndDate == null || target.crawlEndDate.after(currentLo.getTime())) {
if( target.crawlStartDate != null && target.crawlStartDate.before(currentHi.getTime())) {
// Is in in LD scope?
if (target.isInScopeAllOrInheritedWithoutLicense() ) {
result.add(target);
}
}
}
}
Logger.debug("exportLdFrequency() resulting list size: " + result.size());
return result;
}
/**
* This method provides by-permission crawl exports for given crawl-frequency. Method
* returns a list of Targets and associated crawl metadata.
*
* FIXME Current inheritance check is very slow.
*
* @param frequency
* The crawl frequency e.g. 'daily'
* @return list of Target objects
*/
public static List<Target> exportByFrequency(String frequency) {
// Current date:
Calendar currentLo = Calendar.getInstance();
currentLo.add(Calendar.HOUR, -EXPORT_TOLERANCE);
Calendar currentHi = Calendar.getInstance();
currentHi.add(Calendar.HOUR, EXPORT_TOLERANCE);
// Get and filter down:
List<Target> result = new ArrayList<Target>();
Iterator<Target> itr = getByFrequency(frequency).iterator();
while (itr.hasNext()) {
Target target = itr.next();
// Is it in time range?
if( target.crawlEndDate == null || target.crawlEndDate.after(currentLo.getTime())) {
if( target.crawlStartDate != null && target.crawlStartDate.before(currentHi.getTime())) {
// This just includes the stuff that is Non-NPLD:
if (target.indicateLicenses() && ! target.isInScopeAllOrInheritedWithoutLicense() ) {
result.add(target);
}
}
}
}
Logger.debug("exportByFrequency() resulting list size: " + result.size());
return result;
}
/**
* This method provides open-access crawl exports for given crawl-frequency. Method
* returns a list of Targets and associated crawl metadata.
*
* NOTE Does not include inherited licenses, as the purpose is to define a SURT scope (and so the license will be inherited upon access).
*
* @param frequency
* The crawl frequency e.g. 'daily'
* @return list of Target objects
*/
public static List<Target> exportOAFrequency(String frequency) {
// Get and filter down:
List<Target> result = new ArrayList<Target>();
Iterator<Target> itr = getByFrequency(frequency).iterator();
while (itr.hasNext()) {
Target target = itr.next();
// All licenced material:
if (target.hasLicenses() ) {
result.add(target);
}
}
Logger.debug("exportOAFrequency() resulting list size: " + result.size());
return result;
}
/**
* This method evaluates the latest created target from the passed unsorted
* list.
*
* @param unsorted
* The unsorted list.
* @return
*/
public static Target getLatestCreatedTarget(List<Target> unsorted) {
Target res = null;
long latest = 0L;
Iterator<Target> itr = unsorted.iterator();
while (itr.hasNext()) {
Target target = itr.next();
if (target.createdAt != null) {
latest = target.createdAt.getTime();
res = target;
}
}
Logger.debug("getLatestCreatedTarget() res: " + res);
return res;
}
public Boolean getIsUkHosting() {
return isUkHosting;
}
public void setIsUkHosting(Boolean isUkHosting) {
this.isUkHosting = isUkHosting;
}
@SuppressWarnings("rawtypes")
public List<Map> getField_url() {
return field_url;
}
@SuppressWarnings("rawtypes")
public void setField_url(List<Map> field_url) {
this.field_url = field_url;
}
public FieldModel getField_subject() {
return field_subject;
}
public void setField_subject(FieldModel field_subject) {
this.field_subject = field_subject;
}
public String getDepth() {
return depth;
}
public void setDepth(String depth) {
this.depth = depth;
}
public Boolean getViaCorrespondence() {
return viaCorrespondence;
}
public void setViaCorrespondence(Boolean viaCorrespondence) {
this.viaCorrespondence = viaCorrespondence;
}
public Boolean getUkPostalAddress() {
return ukPostalAddress;
}
public void setUkPostalAddress(Boolean ukPostalAddress) {
this.ukPostalAddress = ukPostalAddress;
}
public Object getField_description() {
return field_description;
}
public void setField_description(Object field_description) {
this.field_description = field_description;
}
public Object getField_uk_postal_address_url() {
return field_uk_postal_address_url;
}
public void setField_uk_postal_address_url(
Object field_uk_postal_address_url) {
this.field_uk_postal_address_url = field_uk_postal_address_url;
}
public FieldModel getField_nominating_organisation() {
return field_nominating_organisation;
}
public void setField_nominating_organisation(
FieldModel field_nominating_organisation) {
this.field_nominating_organisation = field_nominating_organisation;
}
public List<FieldModel> getField_suggested_collections() {
return field_suggested_collections;
}
public void setField_suggested_collections(
List<FieldModel> field_suggested_collections) {
this.field_suggested_collections = field_suggested_collections;
}
public List<FieldModel> getField_collections() {
return field_collections;
}
public void setField_collections(List<FieldModel> field_collections) {
this.field_collections = field_collections;
}
public Long getField_crawl_start_date() {
return field_crawl_start_date;
}
public void setField_crawl_start_date(Long field_crawl_start_date) {
this.field_crawl_start_date = field_crawl_start_date;
}
public Long getField_crawl_end_date() {
return field_crawl_end_date;
}
public void setField_crawl_end_date(Long field_crawl_end_date) {
this.field_crawl_end_date = field_crawl_end_date;
}
public String getField_uk_domain() {
return field_uk_domain;
}
public void setField_uk_domain(String field_uk_domain) {
this.field_uk_domain = field_uk_domain;
}
public List<FieldModel> getField_license() {
return field_license;
}
public void setField_license(List<FieldModel> field_license) {
this.field_license = field_license;
}
public String getField_crawl_permission() {
return field_crawl_permission;
}
public void setField_crawl_permission(String field_crawl_permission) {
this.field_crawl_permission = field_crawl_permission;
}
public List<FieldModel> getField_collection_categories() {
return field_collection_categories;
}
public void setField_collection_categories(
List<FieldModel> field_collection_categories) {
this.field_collection_categories = field_collection_categories;
}
public Boolean getSpecialDispensation() {
return specialDispensation;
}
public void setSpecialDispensation(Boolean specialDispensation) {
this.specialDispensation = specialDispensation;
}
public String getSpecialDispensationReason() {
return specialDispensationReason;
}
public void setSpecialDispensationReason(String specialDispensationReason) {
this.specialDispensationReason = specialDispensationReason;
}
public FieldModel getField_qa_status() {
return field_qa_status;
}
public void setField_qa_status(FieldModel field_qa_status) {
this.field_qa_status = field_qa_status;
}
public String getLiveSiteStatus() {
return liveSiteStatus;
}
public void setLiveSiteStatus(String liveSiteStatus) {
this.liveSiteStatus = liveSiteStatus;
}
public Object getField_notes() {
return field_notes;
}
public void setField_notes(Object field_notes) {
this.field_notes = field_notes;
}
public Long getWctId() {
return wctId;
}
public void setWctId(Long wctId) {
this.wctId = wctId;
}
public Long getSptId() {
return sptId;
}
public void setSptId(Long sptId) {
this.sptId = sptId;
}
public List<FieldModel> getField_snapshots() {
return field_snapshots;
}
public void setField_snapshots(List<FieldModel> field_snapshots) {
this.field_snapshots = field_snapshots;
}
public Boolean getNoLdCriteriaMet() {
return noLdCriteriaMet;
}
public void setNoLdCriteriaMet(Boolean noLdCriteriaMet) {
this.noLdCriteriaMet = noLdCriteriaMet;
}
public Boolean getKeySite() {
return keySite;
}
public void setKeySite(Boolean keySite) {
this.keySite = keySite;
}
public Boolean getHidden() {
return hidden;
}
public void setHidden(Boolean hidden) {
this.hidden = hidden;
}
public String getField_uk_geoip() {
return field_uk_geoip;
}
public void setField_uk_geoip(String field_uk_geoip) {
this.field_uk_geoip = field_uk_geoip;
}
public Boolean getProfessionalJudgement() {
return professionalJudgement;
}
public void setProfessionalJudgement(Boolean professionalJudgement) {
this.professionalJudgement = professionalJudgement;
}
public String getProfessionalJudgementExp() {
return professionalJudgementExp;
}
public void setProfessionalJudgementExp(String professionalJudgementExp) {
this.professionalJudgementExp = professionalJudgementExp;
}
public Boolean getIgnoreRobotsTxt() {
return ignoreRobotsTxt;
}
public void setIgnoreRobotsTxt(Boolean ignoreRobotsTxt) {
this.ignoreRobotsTxt = ignoreRobotsTxt;
}
public Object getField_instances() {
return field_instances;
}
public void setField_instances(Object field_instances) {
this.field_instances = field_instances;
}
public String getFormat() {
return format;
}
public void setFormat(String format) {
this.format = format;
}
@JsonIgnore
@Transient
public String fieldUrl() {
List<String> urls = new ArrayList<String>();
//
/*
Collections.sort(fieldUrls, new Comparator<FieldUrl>(){
@Override
public int compare(FieldUrl o1, FieldUrl o2){
if(o1.position == o2.position)
return 0;
return o1.position < o2.position ? -1 : 1;
}
});
*/
// Build up the field string:
for (FieldUrl fieldUrl : fieldUrls) {
Logger.info("Adding URL to string: "+fieldUrl.id+":"+fieldUrl.url+" "+fieldUrl.position);
urls.add(fieldUrl.url);
}
return StringUtils.join(urls, ", ");
}
@JsonIgnore
@Transient
public String primaryUrl() {
if( this.fieldUrls != null && this.fieldUrls.size() > 0 ) {
return this.fieldUrls.get(0).url;
} else {
return null;
}
}
@JsonIgnore
@Transient
public String subjectIdsAsString() {
return StringUtils.join(this.subjectIds(), ", ");
}
@JsonIgnore
@Transient
public List<Long> subjectIds() {
List<Long> ids = new ArrayList<Long>();
for (Subject subject : this.subjects) {
ids.add(subject.id);
}
return ids;
}
@JsonIgnore
@Transient
public String subjectsAsString() {
List<String> names = new ArrayList<String>();
for (Subject subject : this.subjects) {
names.add(subject.name);
}
return StringUtils.join(names, ", ");
}
@JsonIgnore
@Transient
public List<Long> collectionIds() {
List<Long> ids = new ArrayList<Long>();
for (Collection collection : this.collections) {
ids.add(collection.id);
}
return ids;
}
@JsonIgnore
@Transient
public String collectionIdsAsString() {
return StringUtils.join(collectionIds(), ", ");
}
@JsonIgnore
@Transient
public String collectionsAsString() {
List<String> names = new ArrayList<String>();
for (Collection collection : this.collections) {
names.add(collection.name);
}
return StringUtils.join(names, ", ");
}
@JsonIgnore
@Transient
public String licensesAsString() {
Logger.debug("licensesAsString");
List<String> names = new ArrayList<String>();
for (License license : this.licenses) {
names.add(license.name);
}
Logger.debug("" + names);
return StringUtils.join(names, ", ");
}
@JsonIgnore
public boolean isUkHosting() {
if( this.isUkHosting == null ) return false;
return this.isUkHosting;
}
@JsonIgnore
public boolean isTopLevelDomain() {
if( this.isTopLevelDomain == null ) return false;
return this.isTopLevelDomain;
}
@JsonIgnore
public boolean isUkRegistration() throws WhoisException {
if( this.isUkRegistration == null ) return false;
return this.isUkRegistration;
}
@JsonIgnore
public String tagsAsString() {
List<String> names = new ArrayList<String>();
for (Tag tag : this.tags) {
names.add(tag.name);
}
return StringUtils.join(names, ", ");
}
@JsonIgnore
public String flagsAsString() {
List<String> names = new ArrayList<String>();
for (Flag flag : this.flags) {
names.add(flag.name);
}
return StringUtils.join(names, ", ");
}
public String getDateOfPublicationText() {
if (dateOfPublication != null) {
DateFormat dateFormat = new SimpleDateFormat("dd-MM-yyyy");
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
dateOfPublicationText = dateFormat.format(dateOfPublication);
}
return dateOfPublicationText;
}
public String getWebFormDateText() {
if (webFormDate != null) {
webFormDateText = Utils.INSTANCE.convertToDateString(webFormDate);
}else{
webFormDateText = "";
}
return webFormDateText;
}
public String getCrawlStartDateText() {
if (crawlStartDate != null) {
crawlStartDateText = Utils.INSTANCE.convertToDateTime(crawlStartDate);
}else{
crawlStartDateText = "";
}
return crawlStartDateText;
}
public String getCrawlEndDateText() {
if (crawlEndDate != null) {
crawlEndDateText = Utils.INSTANCE.convertToDateTime(crawlEndDate);
}else{
crawlEndDateText = "";
}
return crawlEndDateText;
}
public String getCrawlStartDateISO() {
if (crawlStartDate != null) {
crawlStartDateISO = Utils.INSTANCE.convertToDateTimeISO(crawlStartDate);
}
return crawlStartDateISO;
}
public String getCrawlEndDateISO() {
if (crawlEndDate != null) {
crawlEndDateISO = Utils.INSTANCE.convertToDateTimeISO(crawlEndDate);
}
return crawlEndDateISO;
}
@JsonIgnore
public boolean isScheduledToCrawl() {
Calendar now = Calendar.getInstance();
// Cases where no crawl is scheduled:
if( this.crawlEndDate != null && now.getTime().after(this.crawlEndDate)) {
return false;
}
if( this.crawlStartDate == null || Const.CrawlFrequency.NEVERCRAWL.name().equals(crawlFrequency) ||
Const.CrawlFrequency.DOMAINCRAWL.name().equals(crawlFrequency)) {
return false;
}
return true;
}
@JsonIgnore
public Date getNextScheduledCrawlDate() throws Exception {
// Skip if not scheduled:
if( ! this.isScheduledToCrawl()) return null;
// Otherwise, determine the next scheduled launch:
Calendar now = Calendar.getInstance();
Calendar scheduled = Calendar.getInstance();
scheduled.setTime(crawlStartDate);
// Ensure move to a recent year if the start date is a long time ago:
if( (now.get(Calendar.YEAR) - scheduled.get(Calendar.YEAR)) > 1 ) {
scheduled.set(Calendar.YEAR, now.get(Calendar.YEAR) - 1);
}
// Now step forward by the crawl frequency until we cross the current date:
while( scheduled.before(now)) {
if( Const.CrawlFrequency.DAILY.name().equals(crawlFrequency)) {
scheduled.add(Calendar.DAY_OF_YEAR, 1);
} else if( Const.CrawlFrequency.WEEKLY.name().equals(crawlFrequency)) {
scheduled.add(Calendar.WEEK_OF_YEAR, 1);
} else if( Const.CrawlFrequency.MONTHLY.name().equals(crawlFrequency)) {
scheduled.add(Calendar.MONTH, 1);
} else if( Const.CrawlFrequency.QUARTERLY.name().equals(crawlFrequency)) {
scheduled.add(Calendar.MONTH, 3);
} else if( Const.CrawlFrequency.SIXMONTHLY.name().equals(crawlFrequency)) {
scheduled.add(Calendar.MONTH, 6);
} else if( Const.CrawlFrequency.ANNUAL.name().equals(crawlFrequency)) {
scheduled.add(Calendar.YEAR, 1);
} else {
throw new Exception("Unknown crawl frequency "+this.crawlFrequency);
}
}
// And return it:
return scheduled.getTime();
}
@JsonIgnore
public String getNextScheduledCrawlDateAsString() throws Exception {
TimeAgo time = new TimeAgo();
long current = System.currentTimeMillis();
Date next = getNextScheduledCrawlDate();
if( next == null ) return "unscheduled";
long difference = next.getTime() - current;
// Getting future 'time until' values of this utility seems to need this approach:
return time.timeUntil(current - difference);
}
/**
* This method checks whether the passed URL is in scope for rules
* associated with scope IP. This check is without license field.
* NPLD
* @param url
* The search URL
* @param nidUrl
* The identifier URL in the project domain model
* @return result as a flag
*/
@JsonIgnore
public boolean isInScopeAllWithoutLicense() {
Logger.debug("isInScopeAllWithoutLicense()");
// Manual scope:
if (this.checkManualScope())
return true;
// Cached values for other allowed mechanisms:
if ( Boolean.TRUE.equals(this.isTopLevelDomain) ||
Boolean.TRUE.equals(this.isUkHosting) ||
Boolean.TRUE.equals(this.isUkRegistration)) {
return true;
}
// Otherwise, nope:
return false;
}
@JsonProperty
@Transient
public boolean getInScopeForLegalDeposit() {
return this.isInScopeAllOrInheritedWithoutLicense();
}
@JsonIgnore
public void setInScopeForLegalDeposit(boolean dummy) {
}
@JsonIgnore
public boolean isInScopeAllOrInheritedWithoutLicense() {
Logger.debug("isInScopeAllOrInheritedWithoutLicense()");
// Manual scope:
if (this.isInScopeAllWithoutLicense())
return true;
// Possibly inherited scope:
if (this.hasInheritedNpldScope())
return true;
// Otherwise, nope:
return false;
}
/**
* This helper looks at the direct and inherited licenses and returns an object that
* describes the current state.
*
* @param targetId
* @return
*/
@JsonIgnore
@Transient
public OverallLicenseStatus getOverallLicenseStatus() {
if( this.overallLicenseStatus == null)
this.overallLicenseStatus = new OverallLicenseStatus(this);
return this.overallLicenseStatus;
}
@JsonIgnore
@Transient
public void clearOverallLicenseStatusCache() {
this.overallLicenseStatus = null;
}
/**
* Simple boolean check build on presence of a license.
*
* @return
*/
@JsonIgnore
public boolean hasInheritedLicense() {
OverallLicenseStatus ols = getOverallLicenseStatus();
return ols.inheritedLicense;
}
@JsonIgnore
public boolean hasInheritedNpldScope() {
OverallLicenseStatus ols = getOverallLicenseStatus();
return ols.inheritedNPLDScope;
}
@JsonIgnore
public boolean indicateNpldStatus() {
return hasInheritedNpldScope();
}
@JsonIgnore
public Set<Target> getNpldStatusList() {
OverallLicenseStatus ols = getOverallLicenseStatus();
return ols.NPLDParents;
}
public boolean indicateUkwaLicenceStatus() {
OverallLicenseStatus ols = getOverallLicenseStatus();
return ols.licensedOrPending;
}
/**
* This method should give a list of the Target records, which have an Open
* UKWA Licence request in progress for a target at a higher level in the
* domain. [ie. when Open UKWA License Request field = Queued, Pending,
* Refused, Granted - any value except None.
*
* @return target list
*/
@JsonIgnore
public Set<Target> getUkwaLicenceStatusList() {
OverallLicenseStatus ols = getOverallLicenseStatus();
return ols.licenseParents;
}
@JsonIgnore
public boolean hasLicenses() {
return (this.licenses != null && !this.licenses.isEmpty());
}
@JsonProperty
@Transient
public boolean getHasOpenAccessLicense() {
return this.indicateLicenses();
}
@JsonIgnore
public void setHasOpenAccessLicense(boolean dummy) {
}
@JsonIgnore
public boolean indicateLicenses() {
return (hasLicenses() || hasInheritedLicense());
}
// Cannot create
public boolean enableLicenseCreation() {
return (StringUtils.isBlank(this.licenseStatus) || this.isNotInitiated());
}
// only sys admin and archivist can create
@JsonIgnore
public boolean hasInvalidLicenses() {
return (this.isRefused() || this.isEmailRejected() || this.isSuperseded());
}
@JsonIgnore
public boolean hasStatus(String licenseStatus) {
return (StringUtils.isNotEmpty(this.licenseStatus) && this.licenseStatus.equals(licenseStatus));
}
@JsonIgnore
public boolean isGranted() {
return this.hasStatus(LicenseStatus.GRANTED.name());
}
@JsonIgnore
public boolean isNotInitiated() {
return this.hasStatus(LicenseStatus.NOT_INITIATED.name());
}
@JsonIgnore
public boolean isQueued() {
return this.hasStatus(LicenseStatus.QUEUED.name());
}
@JsonIgnore
public boolean isPending() {
return this.hasStatus(LicenseStatus.PENDING.name());
}
@JsonIgnore
public boolean isRefused() {
return this.hasStatus(LicenseStatus.REFUSED.name());
}
@JsonIgnore
public boolean isEmailRejected() {
return this.hasStatus(LicenseStatus.EMAIL_REJECTED.name());
}
@JsonIgnore
public boolean isSuperseded() {
return this.hasStatus(LicenseStatus.SUPERSEDED.name());
}
// to helper
@JsonIgnore
public boolean hasGrantedLicense() {
Logger.debug("hasGrantedLicense");
return (this.hasLicenses());
}
public boolean hasQaIssue() {
return (this.qaIssue != null);
}
public static boolean hasQaIssue(Long targetId) {
Target target = Target.findById(targetId);
return (target.qaIssue != null);
}
@JsonIgnore
public CrawlPermission getLatestCrawlPermission() {
if (crawlPermissions != null && crawlPermissions.size() > 0) {
return crawlPermissions.get(crawlPermissions.size() - 1);
}
return null;
}
@JsonIgnore
public Instance getLatestInstance() {
if (instances != null && instances.size() > 0) {
return instances.get(instances.size() - 1);
}
return null;
}
@JsonIgnore
public Instance findLastInstance() {
Instance instance = Instance.findLastInstanceByTarget(this.id);
return instance;
}
@JsonIgnore
public boolean checkInstance() {
boolean res = false;
if (this.instances != null && !this.instances.isEmpty()) {
res = true;
}
return res;
}
@JsonIgnore
public boolean isDeletable() {
Logger.debug("collections size...." + this.collections.size());
Logger.debug("licenses size...." + this.licenses.size());
Logger.debug("instances size...." + this.instances.size());
return (!this.checkLicense() && !this.checkInstance() && CollectionUtils.isEmpty(this.collections));
}
@PreUpdate
@PrePersist
public void preSaveChecks() {
Logger.debug("before persist");
runChecks();
Logger.debug("after persist");
}
public void runChecks() {
this.isUkHosting = Scope.INSTANCE.isUkHosting(this);
this.isTopLevelDomain = Scope.isTopLevelDomain(this);
this.isUkRegistration = Scope.INSTANCE.isUkRegistration(this);
Logger.debug("runChecks done: "+this.isUkHosting+" "+this.isTopLevelDomain+" "+this.isUkRegistration);
}
@JsonIgnore
@Transient
public List<Collection> getCollectionCategories() {
List<Collection> categories = new ArrayList<Collection>();
for (Collection collection : this.collections) {
if (collection.parent == null) {
categories.add(collection);
}
}
Logger.debug(this.collections.size() + " - " + categories.size());
return categories;
// return Collection.getCollectionCategoriesByTargetId(this.id);
}
@JsonProperty
@Transient
public List<Long> getCollectionIds() {
List<Long> ids = new ArrayList<Long>();
for( Collection c : collections ) {
ids.add( c.id );
}
return ids;
}
@JsonProperty
@Transient
public void setCollectionIds(List<Long> ids) {
List<Collection> newList = new ArrayList<Collection>();
for( Long newid : ids) {
newList.add(Collection.findById(newid));
}
this.collections = newList;
}
public List<String> getField_urls() {
return field_urls;
}
public void setField_urls(List<String> field_urls) {
this.field_urls = field_urls;
}
public List<String> getField_subjects() {
return field_subjects;
}
public void setField_subjects(List<String> field_subjects) {
this.field_subjects = field_subjects;
}
public String getField_nominating_org() {
return field_nominating_org;
}
public void setField_nominating_org(String field_nominating_org) {
this.field_nominating_org = field_nominating_org;
}
public List<String> getField_collection_cats() {
return field_collection_cats;
}
public void setField_collection_cats(List<String> field_collection_cats) {
this.field_collection_cats = field_collection_cats;
}
@JsonIgnore
public String getSelector() {
return selector;
}
public void setSelector(String selector) {
this.selector = selector;
}
public String getUkPostalAddressUrl() {
return ukPostalAddressUrl;
}
public void setUkPostalAddressUrl(String ukPostalAddressUrl) {
this.ukPostalAddressUrl = ukPostalAddressUrl;
}
@Override
public String toString() {
return "Target [qaIssue=" + qaIssue + ", authorUser=" + authorUser
+ ", authors=" + authors + ", organisation=" + organisation
+ ", crawlPermissions=" + crawlPermissions + ", instances="
+ instances + ", licenses=" + licenses + ", subjects="
+ subjects + ", collections=" + collections + ", tags=" + tags
+ ", flags=" + flags + ", fieldUrls=" + fieldUrls
+ ", isUkHosting=" + isUkHosting + ", isTopLevelDomain="
+ isTopLevelDomain + ", isUkRegistration=" + isUkRegistration
+ ", isInScopeIp=" + isInScopeIp
+ ", isInScopeIpWithoutLicense=" + isInScopeIpWithoutLicense
+ ", crawlStartDate=" + crawlStartDate + ", crawlEndDate="
+ crawlEndDate + ", legacySiteId=" + legacySiteId + ", active="
+ active + ", whiteList=" + whiteList + ", blackList="
+ blackList + ", dateOfPublication=" + dateOfPublication
+ ", justification=" + justification + ", selectorNotes="
+ selectorNotes + ", archivistNotes=" + archivistNotes
+ ", selectionType=" + selectionType + ", flagNotes="
+ flagNotes + ", tabStatus=" + tabStatus + ", description="
+ description + ", ukPostalAddressUrl=" + ukPostalAddressUrl
+ ", keywords=" + keywords + ", synonyms="
+ synonyms + ", value=" + value + ", summary=" + summary
+ ", scope=" + scope + ", depth=" + depth
+ ", viaCorrespondence=" + viaCorrespondence
+ ", ukPostalAddress=" + ukPostalAddress + ", crawlFrequency="
+ crawlFrequency + ", specialDispensation="
+ specialDispensation + ", specialDispensationReason="
+ specialDispensationReason + ", liveSiteStatus="
+ liveSiteStatus + ", wctId=" + wctId + ", sptId=" + sptId
+ ", noLdCriteriaMet=" + noLdCriteriaMet + ", hidden="+ hidden + ",keySite="
+ keySite + ", professionalJudgement=" + professionalJudgement
+ ", professionalJudgementExp=" + professionalJudgementExp
+ ", ignoreRobotsTxt=" + ignoreRobotsTxt + ", format=" + format
+ ", field_uk_domain=" + field_uk_domain + ", field_uk_geoip="
+ field_uk_geoip + ", field_crawl_permission="
+ field_crawl_permission + ", field_url=" + field_url
+ ", field_subject=" + field_subject + ", field_description="
+ field_description + ", field_uk_postal_address_url="
+ field_uk_postal_address_url
+ ", field_nominating_organisation="
+ field_nominating_organisation
+ ", field_suggested_collections="
+ field_suggested_collections + ", field_collections="
+ field_collections + ", field_crawl_start_date="
+ field_crawl_start_date + ", field_crawl_end_date="
+ field_crawl_end_date + ", field_license=" + field_license
+ ", field_instances=" + field_instances
+ ", field_collection_categories="
+ field_collection_categories + ", field_qa_status="
+ field_qa_status + ", field_snapshots=" + field_snapshots
+ ", field_notes=" + field_notes + ", title=" + title
+ ", edit_url=" + edit_url + ", language=" + language
+ ", revision=" + revision + ", id=" + id
+ ", url=" + url + ", loginPageUrl="
+ loginPageUrl + ", logoutUrl=" + logoutUrl + ", secretId="
+ secretId + ", watched=" + isWatched()
+ ", watchedTarget=" + watchedTarget + "]";
}
}