package org.entermedia.connectors.lucene; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.openedit.Data; import org.openedit.data.PropertyDetail; import org.openedit.data.PropertyDetails; import org.openedit.data.lucene.LuceneIndexer; import org.openedit.entermedia.Asset; import org.openedit.entermedia.Category; import org.openedit.entermedia.MediaArchive; import org.openedit.entermedia.search.AssetSecurityArchive; import com.openedit.OpenEditException; public class LuceneAssetIndexer extends LuceneIndexer { static final Log log = LogFactory.getLog(LuceneAssetIndexer.class); protected Analyzer fieldAnalyzer; protected boolean usesSearchSecurity = false; protected MediaArchive fieldMediaArchive; protected File fieldRootDirectory; protected AssetSecurityArchive fieldAssetSecurityArchive; public LuceneAssetIndexer() { } public File getRootDirectory() { return fieldRootDirectory; } @Override protected List getStandardProperties() { if (fieldStandardProperties == null) { fieldStandardProperties = Arrays.asList("name","description","primaryfile","id","category","viewasset","editasset","ordering","assettype","fileformat","sourcepath"); } return fieldStandardProperties; } public void setRootDirectory(File inRootDirectory) { fieldRootDirectory = inRootDirectory; } public boolean usesSearchSecurity() { return usesSearchSecurity; } public void setUsesSearchSecurity(boolean inUsesSearchSecurity) { usesSearchSecurity = inUsesSearchSecurity; } public Analyzer getAnalyzer() { return fieldAnalyzer; } public void setAnalyzer(Analyzer inAnalyzer) { fieldAnalyzer = inAnalyzer; } /** * Builds a set of all categories (including parents) that include this asset * @param inAsset * @return */ protected Set buildCategorySet(Asset inAsset) { HashSet allCatalogs = new HashSet(); allCatalogs.add(getMediaArchive().getCategoryArchive().getRootCategory()); Collection catalogs = inAsset.getCategories(); if( catalogs.size() > 0 ) { allCatalogs.addAll(catalogs); for (Iterator iter = catalogs.iterator(); iter.hasNext();) { Category catalog = (Category) iter.next(); buildCategorySet(catalog, allCatalogs); } } return allCatalogs; } /** * Builds a set of all parent categories * @param inCatalog * @param inCatalogSet */ protected void buildCategorySet(Category inCatalog, Set inCatalogSet) { inCatalogSet.add(inCatalog); Category parent = inCatalog.getParentCategory(); if (parent != null) { buildCategorySet(parent, inCatalogSet); } } public Document createAssetDoc(Asset asset, PropertyDetails inDetails) { Document doc = new Document(); updateIndex(asset, doc, inDetails); return doc; } /* protected void addAttachment(Document inDoc, Asset inAsset, String inType, String orig,String origtype) { String value = inAsset.getAttachmentByType(inType); if( value == null && origtype != null && inType.startsWith(origtype)) { value = orig; } if( value != null) { Field path = new Field(inType, value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); inDoc.add(path); } } */ public Document populateAsset(IndexWriter writer, Asset asset, boolean add, PropertyDetails inDetails) throws OpenEditException { Document doc = createAssetDoc(asset, inDetails); return doc; } protected void populatePermission(Document inDoc, List inAccessList, String inPermission) { //TODO Not needed any more if (inAccessList.size() == 0) { inAccessList.add("true"); } // permission is "viewasset" for earch security StringBuffer buffer = new StringBuffer(); for (Iterator iterator = inAccessList.iterator(); iterator.hasNext();) { String allow = (String) iterator.next(); buffer.append(" | "); buffer.append(allow); } inDoc.add(new Field(inPermission, buffer.toString(), INTERNAL_FIELD_TYPE)); } // protected void populatePermission(Document inDoc, Page inPage, String inPermission, Asset inAsset) throws OpenEditException // { // List add = getAssetSecurityArchive().getAccessList(getMediaArchive(), inPage, inAsset); // populatePermission(inDoc, add, inPermission); // } protected void populatePermission(Document inDoc, Asset inAsset, String inPermission) throws OpenEditException { if(getAssetSecurityArchive() == null) { return; } List add = getAssetSecurityArchive().getAccessList(getMediaArchive(), inAsset); //add the zone if the asset has one, this is necessary for matt // if(inAsset.get("zone") != null) // { // add.add("zone" + inAsset.get("zone")); // } populatePermission(inDoc, add, inPermission); } @Override protected void readStandardProperties(PropertyDetails inDetails, Data inData, StringBuffer inKeywords, Document doc) { super.readStandardProperties(inDetails, inData, inKeywords, doc); Asset asset = (Asset)inData; /** String datatype = asset.getProperty("datatype"); if (datatype == null) { datatype = "original"; //What is this for? } doc.add(new Field("datatype", datatype, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); */ // if(asset.getId() != null) // { // Field id = new Field("id", asset.getId(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS); // doc.add(id); // Why is this tokenized? Guess so we can find lower // // case versions // } Field path = new Field("sourcepath", asset.getSourcePath(), ID_FIELD_TYPE ); doc.add(path); String primaryfile = asset.getPrimaryFile(); if (primaryfile != null) { Field imagename = new Field("primaryfile", primaryfile, ID_FIELD_TYPE); doc.add(imagename); } String fileformat = asset.getFileFormat(); if(fileformat != null) { Field format = new Field("fileformat", fileformat, ID_FIELD_TYPE); doc.add(format); } String assettype = asset.get("assettype"); if(assettype == null) { assettype = "none"; } PropertyDetail detail = inDetails.getDetail("assettype"); docAdd(detail, doc, "assettype", assettype); if (asset.getCatalogId() == null) { asset.setCatalogId(getMediaArchive().getCatalogId()); } Field catalogid = new Field("catalogid", asset.getCatalogId(),ID_FIELD_TYPE); doc.add(catalogid); // this may be invalid field of -1 but we still need to add it for // the search to work /* if (asset.getOrdering() != -1) { doc.add(new Field("ordering", Integer.toString(asset.getOrdering()), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } */ String tagString = asset.get("keywords"); // if( tagString != null ) // { // Field keys = new Field("keywords", tagString, ); //make tokens // doc.add(keys); // } Set catalogs = buildCategorySet(asset); populateDescription(doc, asset, inKeywords, catalogs, tagString); /* * 'category' contains all categories, including parents */ populateJoinData("category", doc, catalogs, "id", true); for (Iterator iterator = asset.getLibraries().iterator(); iterator.hasNext();) { String libraryid = (String) iterator.next(); Data library = getMediaArchive().getLibrary(libraryid); if(library == null){ continue; } inKeywords.append(library.getName()); inKeywords.append(' '); } // Searcher searcher = getSearcherManager().getSearcher(asset.getCatalogId(),"assetalbums"); // SearchQuery query = searcher.createSearchQuery(); // query.addMatches("assetid", asset.getId()); // HitTracker tracker = searcher.search(query); //populateJoinData("album", doc, tracker, "albumid", true); // populateSecurity(doc, asset, catalogs); if (usesSearchSecurity()) { populatePermission(doc, asset, "viewasset"); } /* * 'exact-category' only contains categories that we immediately belong to */ if( inDetails.getDetail("category-exact") != null) { populateExactCategory(doc, asset); } } protected void populateExactCategory(Document doc, Asset item) { // the idea here is to add a field that allows you to search for // assets in a category WITHOUT sub category assets showing. StringBuffer buffer = new StringBuffer(); for (Iterator iter = item.getCategories().iterator(); iter.hasNext();) { Category catalog = (Category) iter.next(); buffer.append(catalog.getId()); buffer.append(" "); } if (buffer.length() > 0) { doc.add(new Field("category-exact", buffer.toString(), INTERNAL_FIELD_TYPE )); } /* * Not used any more if ( item.getDepartment() != null) { doc.add( new * Field("department", item.getDepartment(), Field.Store.YES, * Field.Index.ANALYZED_NO_NORMS)); } */ } protected void populateDescription(Document doc, Asset asset, StringBuffer fullDesc, Set inCategories, String inTagString) { // if (asset.getName() != null) // { // // This cannot be used in sorts since it is TOKENIZED. For sorts use // doc.add(new Field("name", asset.getName(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); // doc.add(new Field("name_sortable", asset.getName().toLowerCase(), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); // } // Low level reading in of text fullDesc.append(asset.getName()); fullDesc.append(' '); fullDesc.append(asset.getFileFormat()); fullDesc.append(' '); fullDesc.append(asset.getId()); fullDesc.append(' '); if( inTagString != null ) { inTagString = inTagString.replace(" | "," "); //remove junk fullDesc.append(inTagString); } //populateKeywords(fullDesc, asset, inDetails); // add a bunch of stuff to the full text field //never need this anymore /* File descriptionFile = new File(getRootDirectory(), "/" + getCatalogId() + "/assets/" + htmlPath); if (descriptionFile.exists() || descriptionFile.length() > 0) { FileReader descread = null; try { descread = new FileReader(descriptionFile); StringWriter out = new StringWriter(); new OutputFiller().fill(descread, out); fullDesc.append(out.toString()); } catch (IOException ex) { throw new OpenEditException(ex); } finally { FileUtils.safeClose(descread); } } */ fullDesc.append(' '); for (Iterator iter = inCategories.iterator(); iter.hasNext();) { Category cat = (Category) iter.next(); fullDesc.append(cat.getName()); fullDesc.append(' '); } String[] dirs = asset.getSourcePath().split("/"); for (int i = 0; i < dirs.length; i++) { fullDesc.append(dirs[i]); fullDesc.append(' '); } // try // { // String result = fullDesc.toString();//fixInvalidCharacters(fullDesc.toString()); // doc.add(new Field("description", result, Field.Store.NO, Field.Index.ANALYZED)); // } // catch (IOException ex) // { // throw new OpenEditException(ex); // } } /** * This is here to help the stemmer handle weird cases of words For example: * century21 should contain both centuri and century21 in the search index * * @param inString * @return * @throws IOException protected String fixInvalidCharacters(String inString) throws IOException { StringBuffer fixed = new StringBuffer(inString.length() + 20); RecordLookUpAnalyzer analyser = new RecordLookUpAnalyzer(); TokenStream stream = analyser.tokenStream("description", new StringReader(inString)); boolean hasnext = stream.incrementToken(); while (hasnext) { char[] text = stream. token.termBuffer(); if (text.length > 3) { // loop over all the words until we find an invalid one for (int i = 0; i < text.length; i++) { fixed.append(text[i]); // Checking for Y in the middle of words: harleydavidson // will now // index as harley davidson. if (text[i] == 'y') { break; } } fixed.append(' '); } // Always put the original back in there fixed.append(text); fixed.append(' '); token = stream.next(); } return fixed.toString(); } */ /* protected void populateKeywords(StringBuffer inFullDesc, Asset inAsset, PropertyDetails inDetails) { for (Iterator iter = inDetails.getDetails().iterator(); iter.hasNext();) { PropertyDetail det = (PropertyDetail) iter.next(); if (det.isKeyword()) { String prop = inAsset.getProperty(det.getId()); if (prop != null) { inFullDesc.append(prop); inFullDesc.append(' '); } } } } */ // protected String getTagString(Asset inAsset) // { // StringBuffer buffer = new StringBuffer(); // if (inAsset.hasKeywords()) // { // for (Iterator iter = inAsset.getKeywords().iterator(); iter.hasNext();) // { // String desc = (String) iter.next(); // desc = desc.replace('/', ' '); // Is this needed? // desc = desc.replace('\\', ' '); // buffer.append(desc); // if( iter.hasNext() ) // { // buffer.append(" | "); // } // } // } // return buffer.toString(); // } // public String pad(String inValue) // { // // // return getDecimalFormatter().format(inShortprice); // // String all = "0000000000000" + inValue; // String cut = all.substring(all.length() - 10); // 10 is the max width // // of integers // return cut; // } public void writeDoc(IndexWriter writer, String inId, Document doc, boolean add) { try { if (add) { writer.addDocument(doc, getAnalyzer()); } else { Term term = new Term("id", inId); writer.updateDocument(term, doc, getAnalyzer()); } } catch (IOException ex) { throw new OpenEditException(ex); } } public MediaArchive getMediaArchive() { return fieldMediaArchive; } public void setMediaArchive(MediaArchive inMediaArchive) { fieldMediaArchive = inMediaArchive; } protected String getCatalogId() { return getMediaArchive().getCatalogId(); } public AssetSecurityArchive getAssetSecurityArchive() { return fieldAssetSecurityArchive; } public void setAssetSecurityArchive(AssetSecurityArchive inAssetSecurityArchive) { fieldAssetSecurityArchive = inAssetSecurityArchive; } }