/* * Copyright (C) 2003-2007 eXo Platform SAS. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * as published by the Free Software Foundation; either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see<http://www.gnu.org/licenses/>. */ package org.exoplatform.services.jcr.impl.core.query; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.exoplatform.services.document.DocumentReader; import org.exoplatform.services.document.DocumentReaderService; import org.exoplatform.services.document.impl.MSExcelDocumentReader; import org.exoplatform.services.document.impl.tika.TikaDocumentReader; import org.exoplatform.services.jcr.impl.core.NodeImpl; import org.exoplatform.services.jcr.impl.core.query.lucene.FieldNames; import org.exoplatform.services.jcr.impl.core.query.lucene.Util; import java.io.FileInputStream; import java.net.URL; import java.util.Calendar; /** * Created by The eXo Platform SAS Author : Sergey Karpenko <sergey.karpenko@exoplatform.com.ua> * * @version $Id: $ */ public class TestExcelFileSearch extends BaseQueryTest { public void testFindFileContent() throws Exception { URL url = TestExcelFileSearch.class.getResource("/test.xls"); assertNotNull("test.xls not found", url); FileInputStream fis = new FileInputStream(url.getFile()); NodeImpl node = (NodeImpl)root.addNode("excelFile", "nt:file"); NodeImpl cont = (NodeImpl)node.addNode("jcr:content", "nt:resource"); cont.setProperty("jcr:mimeType", "application/excel"); cont.setProperty("jcr:lastModified", Calendar.getInstance()); // cont.setProperty("jcr:encoding","UTF-8"); cont.setProperty("jcr:data", fis); root.save(); fis.close(); fis = new FileInputStream(url.getFile()); DocumentReaderService extr = (DocumentReaderService)session.getContainer().getComponentInstanceOfType(DocumentReaderService.class); DocumentReader dreader = extr.getDocumentReader("application/excel"); assertNotNull(dreader); if (dreader instanceof MSExcelDocumentReader) { // OK } else if (dreader instanceof TikaDocumentReader) { String[] mimetypes = ((TikaDocumentReader)dreader).getMimeTypes(); assertEquals("application/excel", mimetypes[0]); } else { fail("Wrong document reader"); } // String text = dreader.getContentAsText(fis); // System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> \n"+text + // "\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); // Arabic word String word = "eric"; // Check is node indexed ScoreDoc doc = getDocument(cont.getInternalIdentifier(), false); assertNotNull("Node is not indexed", doc); IndexReader reader = defaultSearchIndex.getIndexReader(); IndexSearcher is = new IndexSearcher(reader); TermQuery query = new TermQuery(new Term(FieldNames.FULLTEXT, word)); TopDocs topDocs = is.search(query, null, Integer.MAX_VALUE); assertEquals(1, topDocs.totalHits); is.close(); Util.closeOrRelease(reader); } }