Java Examples for org.apache.lucene.document.Field.Index
The following java examples will help you to understand the usage of org.apache.lucene.document.Field.Index. These source code samples are taken from different open source projects.
Example 1
Project: hibernate-search-master File: DocumentBuilderIndexedEntity.java View source code |
/**
* Builds the Lucene {@code Document} for a given entity instance and its id.
*
* @param tenantId the identifier of the tenant or null if there isn't one
* @param instance The entity for which to build the matching Lucene {@code Document}
* @param id the entity id.
* @param fieldToAnalyzerMap this maps gets populated while generating the {@code Document}.
* It allows to specify for any document field a named analyzer to use. This parameter cannot be {@code null}.
* @param objectInitializer used to ensure that all objects are initialized
* @param conversionContext a {@link org.hibernate.search.bridge.spi.ConversionContext} object.
* @param includedFieldNames list of field names to consider. Others can be excluded. Null if all fields are considered.
*
* @return The Lucene {@code Document} for the specified entity.
*/
public Document getDocument(String tenantId, Object instance, Serializable id, Map<String, String> fieldToAnalyzerMap, InstanceInitializer objectInitializer, ConversionContext conversionContext, String[] includedFieldNames) {
if (fieldToAnalyzerMap == null) {
throw new IllegalArgumentException("fieldToAnalyzerMap cannot be null");
}
//sensible default for outside callers
if (objectInitializer == null) {
objectInitializer = getInstanceInitializer();
}
Document doc = new Document();
FacetHandling faceting = new FacetHandling();
Class<?> entityType = objectInitializer.getClass(instance);
float documentLevelBoost = getMetadata().getClassBoost(instance);
// add the class name of the entity to the document
if (containsFieldName(ProjectionConstants.OBJECT_CLASS, includedFieldNames)) {
@SuppressWarnings("deprecation") Field classField = new Field(ProjectionConstants.OBJECT_CLASS, entityType.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
doc.add(classField);
}
addTenantIdIfRequired(tenantId, doc);
// now add the entity id to the document
if (containsFieldName(idFieldName, includedFieldNames)) {
DocumentFieldMetadata idFieldMetaData = idPropertyMetadata.getFieldMetadata(idFieldName);
LuceneOptions luceneOptions = new LuceneOptionsImpl(idFieldMetaData, idFieldMetaData.getBoost(), documentLevelBoost);
final FieldBridge contextualizedBridge = conversionContext.oneWayConversionContext(getIdBridge());
conversionContext.setClass(entityType);
if (idPropertyName != null) {
conversionContext.pushProperty(idPropertyName);
}
try {
contextualizedBridge.set(idFieldMetaData.getAbsoluteName(), id, doc, luceneOptions);
addSortFieldDocValues(doc, idPropertyMetadata, documentLevelBoost, id);
} finally {
if (idPropertyName != null) {
conversionContext.popProperty();
}
}
}
// finally add all other document fields
Set<String> processedFieldNames = new HashSet<>();
buildDocumentFields(instance, doc, faceting, getMetadata(), fieldToAnalyzerMap, processedFieldNames, conversionContext, objectInitializer, documentLevelBoost, false, nestingContextFactory.createNestingContext(getTypeMetadata().getType()));
doc = faceting.build(doc);
return doc;
}
Example 2
Project: zoie-master File: TweetInterpreter.java View source code |
@Override
public ZoieIndexable convertAndInterpret(String tweet) {
try {
final String text = tweet;
final long uid = id++;
return new AbstractZoieIndexable() {
@Override
public IndexingReq[] buildIndexingReqs() {
Document doc = new Document();
doc.add(new Field("contents", text, Store.NO, Index.ANALYZED));
return new IndexingReq[] { new IndexingReq(doc) };
}
@Override
public long getUID() {
return uid;
}
@Override
public boolean isDeleted() {
return false;
}
@Override
public boolean isSkip() {
return false;
}
};
} catch (Exception e) {
return new AbstractZoieIndexable() {
@Override
public IndexingReq[] buildIndexingReqs() {
return null;
}
@Override
public long getUID() {
return 0;
}
@Override
public boolean isDeleted() {
return false;
}
@Override
public boolean isSkip() {
return true;
}
};
}
}
Example 3
Project: cloud-zoie-master File: ThrottledLuceneNRTDataConsumer.java View source code |
public void consume(Collection<proj.zoie.api.DataConsumer.DataEvent<V>> events) throws ZoieException {
if (_writer == null) {
throw new ZoieException("Internal IndexWriter null, perhaps not started?");
}
if (events.size() > 0) {
for (DataEvent<V> event : events) {
ZoieIndexable indexable = _interpreter.convertAndInterpret(event.getData());
if (indexable.isSkip())
continue;
try {
_writer.deleteDocuments(new Term(DOCUMENT_ID_FIELD, String.valueOf(indexable.getUID())));
} catch (IOException e) {
throw new ZoieException(e.getMessage(), e);
}
IndexingReq[] reqs = indexable.buildIndexingReqs();
for (IndexingReq req : reqs) {
Analyzer localAnalyzer = req.getAnalyzer();
Document doc = req.getDocument();
Field uidField = new Field(DOCUMENT_ID_FIELD, String.valueOf(indexable.getUID()), Store.NO, Index.NOT_ANALYZED_NO_NORMS);
uidField.setOmitTermFreqAndPositions(true);
doc.add(uidField);
if (localAnalyzer == null)
localAnalyzer = _analyzer;
try {
_writer.addDocument(doc, localAnalyzer);
} catch (IOException e) {
throw new ZoieException(e.getMessage(), e);
}
}
}
int numdocs;
try {
// for realtime commit is not needed per lucene mailing list
//_writer.commit();
numdocs = _writer.numDocs();
} catch (IOException e) {
throw new ZoieException(e.getMessage(), e);
}
logger.info("flushed " + events.size() + " events to index, index now contains " + numdocs + " docs.");
}
}
Example 4
Project: Hibernate-Search-on-action-master File: PersonPkBridge.java View source code |
public void set(String name, Object value, Document document, LuceneOptions luceneOptions) {
PersonPK id = (PersonPK) value;
Store store = luceneOptions.getStore();
Index index = luceneOptions.getIndex();
TermVector termVector = luceneOptions.getTermVector();
Float boost = luceneOptions.getBoost();
Field field = new //store each sub property in a field
Field(//store each sub property in a field
name + ".firstName", //store each sub property in a field
id.getFirstName(), store, index, termVector);
field.setBoost(boost);
document.add(field);
field = new Field(name + ".lastName", id.getLastName(), store, index, termVector);
field.setBoost(boost);
document.add(field);
field = new //store unique representation in named field
Field(//store unique representation in named field
name, //store unique representation in named field
objectToString(id), store, index, termVector);
field.setBoost(boost);
document.add(field);
}
Example 5
Project: luja-master File: FieldAnnotated.java View source code |
public Document toDocument() { Document document = new Document(); document.add(new NumericField("intNumber", Store.YES, true).setIntValue(intNumber)); document.add(new org.apache.lucene.document.Field("date", DateTools.timeToString(date.toDateTimeAtStartOfDay(DateTimeZone.UTC).getMillis(), Resolution.DAY.asLuceneResolution()), Store.YES, Index.NOT_ANALYZED)); document.add(new org.apache.lucene.document.Field("time", DateTools.timeToString(time.getMillis(), Resolution.MILLISECOND.asLuceneResolution()), Store.YES, Index.NOT_ANALYZED)); document.add(new org.apache.lucene.document.Field("localTime", DateTools.timeToString(localTime.toDateTime(DateTimeZone.UTC).getMillis(), Resolution.MILLISECOND.asLuceneResolution()), Store.YES, Index.NOT_ANALYZED)); document.add(new org.apache.lucene.document.Field("javaDate", DateTools.timeToString(javaDate.getTime(), Resolution.DAY.asLuceneResolution()), Store.YES, Index.NOT_ANALYZED)); document.add(new org.apache.lucene.document.Field("code", code, Store.YES, Index.NOT_ANALYZED)); document.add(new org.apache.lucene.document.Field("name", name, Store.YES, Index.NOT_ANALYZED)); document.add(new org.apache.lucene.document.Field("tokenized", tokenized, Store.YES, Index.ANALYZED)); document.add(new org.apache.lucene.document.Field("locale", locale.toString(), Store.YES, Index.NOT_ANALYZED)); return document; }
Example 6
Project: thrudb-java-master File: ThrudexLuceneHandler.java View source code |
/** * Add/Replace a document */ public void put(Document d) throws ThrudexException, TException { // make sure index is valid if (!isValidIndex(d.index)) throw new ThrudexExceptionImpl("No Index Found: " + d.index); // make sure document has a key if (!d.isSetKey() || d.key.trim().equals("")) throw new ThrudexExceptionImpl("No Document key found"); // Start new lucene document org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); luceneDocument.add(new org.apache.lucene.document.Field(LuceneIndex.DOCUMENT_KEY, d.key, org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED)); // Start analyzer Analyzer defaultAnalyzer = getAnalyzer(org.thrudb.thrudex.Analyzer.STANDARD); PerFieldAnalyzerWrapper qAnalyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer); // Add fields for (Field field : d.fields) { if (!field.isSetKey()) throw new ThrudexExceptionImpl("Field key not set"); // Convert Field store type to Lucene type org.apache.lucene.document.Field.Store fieldStoreType; if (field.isStore()) fieldStoreType = org.apache.lucene.document.Field.Store.YES; else fieldStoreType = org.apache.lucene.document.Field.Store.NO; // Create Lucene Field org.apache.lucene.document.Field luceneField = new org.apache.lucene.document.Field(field.key, field.value, fieldStoreType, org.apache.lucene.document.Field.Index.ANALYZED); if (field.isSetWeight()) luceneField.setBoost(field.weight); luceneDocument.add(luceneField); // Create sortable field? if (field.isSetSortable() && field.sortable) { luceneDocument.add(new org.apache.lucene.document.Field(field.key + "_sort", field.value, org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED)); } // Add field specific analyzer to qAnalyzer qAnalyzer.addAnalyzer(field.key, getAnalyzer(field.getAnalyzer())); } // Add payload if (d.isSetPayload()) { luceneDocument.add(new org.apache.lucene.document.Field(LuceneIndex.PAYLOAD_KEY, d.payload, org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED)); } // Document is not ready to put into the index indexMap.get(d.index).put(d.key, luceneDocument, qAnalyzer); }
Example 7
Project: wesearch-master File: IndexerCreator.java View source code |
/** * This method has to create indexes for classes of the ontology * @return A list of indexes */ public static List<IndexLucene> createIndexerForClasses() { List<IndexLucene> indexers = new LinkedList<IndexLucene>(); indexers.add(new IndexLucene("label", "label", Index.ANALYZED, TermVector.YES, true)); indexers.add(new IndexLucene("comment", "comment", Index.ANALYZED, TermVector.YES, true)); indexers.add(0, new IndexLucene("class", "class", Index.ANALYZED, TermVector.YES, true)); return indexers; }
Example 8
Project: graphdb-traversal-context-master File: IndexType.java View source code |
Fieldable instantiateField(String key, Object value, Index analyzed) {
Fieldable field = null;
if (value instanceof Number) {
Number number = (Number) value;
NumericField numberField = new NumericField(key, Store.YES, true);
if (value instanceof Long) {
numberField.setLongValue(number.longValue());
} else if (value instanceof Float) {
numberField.setFloatValue(number.floatValue());
} else if (value instanceof Double) {
numberField.setDoubleValue(number.doubleValue());
} else {
numberField.setIntValue(number.intValue());
}
field = numberField;
} else {
field = new Field(key, value.toString(), Store.YES, analyzed);
}
return field;
}
Example 9
Project: neo4j-lucene4-index-master File: IndexType.java View source code |
IndexableField instantiateField(String key, Object value, Index analyzed) {
IndexableField field = null;
if (value instanceof Number) {
Number number = (Number) value;
final IndexableField numberField;
if (value instanceof Long) {
numberField = new LongField(key, number.longValue(), Store.YES);
} else if (value instanceof Float) {
numberField = new FloatField(key, number.floatValue(), Store.YES);
} else if (value instanceof Double) {
numberField = new DoubleField(key, number.doubleValue(), Store.YES);
} else {
numberField = new IntField(key, number.intValue(), Store.YES);
}
field = numberField;
} else {
field = new Field(key, value.toString(), Store.YES, analyzed);
}
return field;
}
Example 10
Project: neo4j-mobile-android-master File: IndexType.java View source code |
Fieldable instantiateField(String key, Object value, Index analyzed) {
Fieldable field = null;
if (value instanceof Number) {
Number number = (Number) value;
NumericField numberField = new NumericField(key, Store.YES, true);
if (value instanceof Long) {
numberField.setLongValue(number.longValue());
} else if (value instanceof Float) {
numberField.setFloatValue(number.floatValue());
} else if (value instanceof Double) {
numberField.setDoubleValue(number.doubleValue());
} else {
numberField.setIntValue(number.intValue());
}
field = numberField;
} else {
field = new Field(key, value.toString(), Store.YES, analyzed);
}
return field;
}
Example 11
Project: cloudtm-data-platform-master File: DocumentBuilderIndexedEntity.java View source code |
protected void checkDocumentId(XProperty member, PropertiesMetadata propertiesMetadata, boolean isRoot, String prefix, ConfigContext context, PathsContext pathsContext) { Annotation idAnnotation = getIdAnnotation(member, context); NumericField numericFieldAnn = member.getAnnotation(NumericField.class); if (idAnnotation != null) { String attributeName = getIdAttributeName(member, idAnnotation); if (pathsContext != null) { pathsContext.markEncounteredPath(prefix + attributeName); } if (isRoot) { if (explicitDocumentId) { if (idAnnotation instanceof DocumentId) { throw log.duplicateDocumentIdFound(getBeanClass().getName()); } else { //If it's not a DocumentId it's a JPA @Id: ignore it as we already have a @DocumentId return; } } if (idAnnotation instanceof DocumentId) { explicitDocumentId = true; } idKeywordName = prefix + attributeName; FieldBridge fieldBridge = BridgeFactory.guessType(null, numericFieldAnn, member, reflectionManager); if (fieldBridge instanceof TwoWayFieldBridge) { idBridge = (TwoWayFieldBridge) fieldBridge; } else { throw new SearchException("Bridge for document id does not implement TwoWayFieldBridge: " + member.getName()); } Float boost = AnnotationProcessingHelper.getBoost(member, null); if (boost != null) { idBoost = boost.floatValue(); } ReflectionHelper.setAccessible(member); idGetter = member; } else { //component should index their document id ReflectionHelper.setAccessible(member); propertiesMetadata.fieldGetters.add(member); propertiesMetadata.fieldGetterNames.add(member.getName()); String fieldName = prefix + attributeName; propertiesMetadata.fieldNames.add(fieldName); propertiesMetadata.fieldStore.add(Store.YES); Field.Index index = AnnotationProcessingHelper.getIndex(Index.YES, Analyze.NO, Norms.YES); propertiesMetadata.fieldIndex.add(index); propertiesMetadata.fieldTermVectors.add(AnnotationProcessingHelper.getTermVector(TermVector.NO)); propertiesMetadata.fieldNullTokens.add(null); propertiesMetadata.fieldBridges.add(BridgeFactory.guessType(null, null, member, reflectionManager)); propertiesMetadata.fieldBoosts.add(AnnotationProcessingHelper.getBoost(member, null)); propertiesMetadata.precisionSteps.add(getPrecisionStep(null)); propertiesMetadata.dynamicFieldBoosts.add(AnnotationProcessingHelper.getDynamicBoost(member)); // property > entity analyzer (no field analyzer) Analyzer analyzer = AnnotationProcessingHelper.getAnalyzer(member.getAnnotation(org.hibernate.search.annotations.Analyzer.class), context); if (analyzer == null) { analyzer = propertiesMetadata.analyzer; } if (analyzer == null) { throw new AssertionFailure("Analyzer should not be undefined"); } addToScopedAnalyzer(fieldName, analyzer, index); } } }
Example 12
Project: PartyDJ-master File: HighlighterTest.java View source code |
public void testMultiSearcher() throws Exception { // setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED); d.add(f); writer1.addDocument(d); writer1.optimize(); writer1.close(); IndexReader reader1 = IndexReader.open(ramDir1, true); // setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED); d.add(f); writer2.addDocument(d); writer2.optimize(); writer2.close(); IndexReader reader2 = IndexReader.open(ramDir2, true); IndexSearcher searchers[] = new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1, true); searchers[1] = new IndexSearcher(ramDir2, true); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, new StandardAnalyzer(TEST_VERSION)); parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.parse("multi*"); System.out.println("Searching for: " + query.toString(FIELD_NAME)); // at this point the multisearcher calls combine(query[]) hits = multiSearcher.search(query, null, 1000); // query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer(TEST_VERSION)); Query expandedQueries[] = new Query[2]; expandedQueries[0] = query.rewrite(reader1); expandedQueries[1] = query.rewrite(reader2); query = query.combine(expandedQueries); // create an instance of the highlighter with the tags used to surround // highlighted text Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query)); for (int i = 0; i < hits.totalHits; i++) { String text = multiSearcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); String highlightedText = highlighter.getBestFragment(tokenStream, text); System.out.println(highlightedText); } assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2); }
Example 13
Project: PersonalityExtraction-master File: CreateLuceneIndex.java View source code |
public static void main(String[] args) throws Exception { if (args.length != 1) { System.out.println("Args: index-dir"); System.exit(-1); } File indexDir = new File(args[0]); if (indexDir.exists()) { System.out.println("Index directory already exists: " + indexDir.getAbsolutePath()); System.exit(-2); } Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), analyzer, true, MaxFieldLength.UNLIMITED); for (Document d : SampleDocumentData.DOCUMENTS_DATA_MINING) { final org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); /* * We will create Lucene documents with searchable "fullContent" field and "title", * "url" and "snippet" fields for clustering. */ doc.add(new Field("fullContent", d.getSummary(), Store.NO, Index.ANALYZED)); doc.add(new Field("title", d.getTitle(), Store.YES, Index.NO)); doc.add(new Field("snippet", d.getSummary(), Store.YES, Index.NO)); doc.add(new Field("url", d.getContentUrl(), Store.YES, Index.NO)); writer.addDocument(doc); } writer.close(); }
Example 14
Project: pylucene-master File: TestIndexWriter.java View source code |
/** * Make sure we skip wicked long terms. */ public void testWickedLongTerm() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); char[] chars = new char[DocumentsWriter.CHAR_BLOCK_SIZE - 1]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); // Max length term is 16383, so this contents produces // a too-long term: String contents = "abc xyz x" + bigTerm + " another term"; doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir, true); // Make sure all terms < max size were indexed assertEquals(2, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(1, reader.docFreq(new Term("content", "term"))); assertEquals(1, reader.docFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: TermPositions tps = reader.termPositions(new Term("content", "another")); assertTrue(tps.next()); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); reader.close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED)); StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT); sa.setMaxTokenLength(100000); writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(dir, true); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); reader.close(); dir.close(); }
Example 15
Project: Solbase-master File: DocumentLoader.java View source code |
public CachedObjectWrapper<Document, Long> loadObject(Integer docNum, int start, int end, LayeredCache<Integer, Document, Long, ParsedDoc> cache) throws IOException { Document document = new Document(); Get documentGet = new Get(SolbaseUtil.randomize(docNum)); if (fieldNames == null || fieldNames.size() == 0) { // get all columns ( except this skips meta info ) documentGet.addFamily(Bytes.toBytes("field")); } else { for (byte[] fieldName : fieldNames) { documentGet.addColumn(Bytes.toBytes("field"), fieldName); } } Result documentResult = null; // if docTable is set up, reuse instance, otherwise create brand new one and close after done if (this.docTable == null) { HTableInterface docTable = null; try { docTable = SolbaseUtil.getDocTable(); documentResult = docTable.get(documentGet); } finally { SolbaseUtil.releaseTable(docTable); } } else { documentResult = this.docTable.get(documentGet); } if (documentResult == null || documentResult.isEmpty()) { return null; } // TODO, get from result Long versionIdentifier = 0l; NavigableMap<byte[], byte[]> familyMap = documentResult.getFamilyMap(Bytes.toBytes("field")); for (Map.Entry<byte[], byte[]> fieldColumn : familyMap.entrySet()) { Field field = null; String fieldName = Bytes.toString(fieldColumn.getKey()); byte[] value; ByteBuffer v = ByteBuffer.wrap(fieldColumn.getValue()); int vlimit = v.limit() + v.arrayOffset(); if (v.array()[vlimit - 1] != Byte.MAX_VALUE && v.array()[vlimit - 1] != Byte.MIN_VALUE) { throw new CorruptIndexException("Solbase field is not properly encoded: " + docNum + "(" + fieldName + ")"); } else if (// Binary v.array()[vlimit - 1] == Byte.MAX_VALUE) { value = new byte[vlimit - 1]; System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1); field = new Field(fieldName, value, Store.YES); document.add(field); } else if (// String v.array()[vlimit - 1] == Byte.MIN_VALUE) { value = new byte[vlimit - 1]; System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1); // Check for multi-fields String fieldString = new String(value, "UTF-8"); if (fieldString.indexOf(Bytes.toString(SolbaseUtil.delimiter)) >= 0) { StringTokenizer tok = new StringTokenizer(fieldString, Bytes.toString(SolbaseUtil.delimiter)); while (tok.hasMoreTokens()) { // update logic if (schema != null) { SchemaField sfield = schema.getFieldOrNull(fieldName); if (sfield.getType() instanceof EmbeddedIndexedIntField) { EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType(); EmbeddedSortField sf = new EmbeddedSortField(fieldName, tok.nextToken(), Field.Store.YES, Field.Index.NO, eiif.getFieldNumber()); document.add(sf); } else { Field f = sfield.createField(tok.nextToken(), 1.0f); // null fields are not added if (f != null) { document.add(f); } } } else { field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED); document.add(field); } } } else { // update logic if (schema != null) { SchemaField sfield = schema.getFieldOrNull(fieldName); if (sfield.getType() instanceof EmbeddedIndexedIntField) { EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType(); EmbeddedSortField sf = new EmbeddedSortField(fieldName, fieldString, Field.Store.YES, Field.Index.NO, eiif.getFieldNumber()); document.add(sf); } else { Field f = sfield.createField(fieldString, 1.0f); // null fields are not added if (f != null) { document.add(f); } } } else { field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED); document.add(field); } } } } return new CachedObjectWrapper<Document, Long>(document, versionIdentifier, System.currentTimeMillis()); }
Example 16
Project: solrcene-master File: TestIndexWriter.java View source code |
public void testOptimizeMaxNumSegments() throws IOException {
MockDirectoryWrapper dir = newDirectory(random);
final Document doc = new Document();
doc.add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED));
for (int numDocs = 38; numDocs < 500; numDocs += 38) {
LogDocMergePolicy ldmp = new LogDocMergePolicy();
ldmp.setMinMergeDocs(1);
ldmp.setMergeFactor(5);
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(2).setMergePolicy(ldmp));
for (int j = 0; j < numDocs; j++) writer.addDocument(doc);
writer.close();
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
final int segCount = sis.size();
ldmp = new LogDocMergePolicy();
ldmp.setMergeFactor(5);
writer = new IndexWriter(dir, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(ldmp));
writer.optimize(3);
writer.close();
sis = new SegmentInfos();
sis.read(dir);
final int optSegCount = sis.size();
if (segCount < 3)
assertEquals(segCount, optSegCount);
else
assertEquals(3, optSegCount);
}
dir.close();
}
Example 17
Project: ansj_seg-master File: NearTest.java View source code |
public static void createIndex() throws Exception {
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, new AnsjAnalysis());
Directory directory = FSDirectory.open(new File("c:/index"));
IndexWriter writer = new IndexWriter(directory, conf);
String str = "文化人;文化人谈文化";
String[] values = str.split(";");
for (String value : values) {
Document doc = new Document();
Field field = new Field("test", value, Store.YES, Index.ANALYZED_NO_NORMS, TermVector.WITH_POSITIONS_OFFSETS);
// field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
doc.add(field);
writer.addDocument(doc);
writer.commit();
}
writer.close();
}
Example 18
Project: datahotel-master File: IndexBean.java View source code |
@SuppressWarnings("rawtypes") public void update(Metadata metadata) { MetadataLogger logger = metadata.getLogger(); Timestamp ts = new Timestamp(FOLDER_CACHE_INDEX, metadata.getLocation(), "timestamp"); if (metadata.getUpdated() == ts.getTimestamp()) { logger.info("Index up to date."); return; } logger.info("Building index."); long i = 0; try { File filename = Filesystem.getFile(FOLDER_SLAVE, metadata.getLocation(), FILE_DATASET); Directory dir = FSDirectory.open(Filesystem.getFolder(FOLDER_CACHE_INDEX, metadata.getLocation())); StandardAnalyzer analyzer = new StandardAnalyzer(version, new HashSet()); IndexWriterConfig writerConfig = new IndexWriterConfig(version, analyzer); IndexWriter writer = new IndexWriter(dir, writerConfig); writer.deleteAll(); CSVReader csv = csvReaderFactory.open(filename); while (csv.hasNext()) { try { i++; Map<String, String> line = csv.getNextLine(); Document doc = new Document(); String searchable = ""; for (FieldLight f : fieldBean.getFields(metadata)) { String value = line.get(f.getShortName()); if (value == null) logger.info("Field not found: " + f.getShortName()); // TODO if (f.getGroupable()) if (value.matches("[0-9.,]+")) doc.add(new Field(f.getShortName(), value, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); else doc.add(new Field(f.getShortName(), value, Store.YES, Index.ANALYZED)); if (f.getSearchable()) searchable += " " + line.get(f.getShortName()); } if (!searchable.trim().isEmpty()) doc.add(new Field("searchable", searchable.trim(), Store.NO, Index.ANALYZED)); writer.addDocument(doc); } catch (Exception e) { logger.info("[" + e.getClass().getSimpleName() + (e.getStackTrace().length > 0 ? "][" + e.getStackTrace()[0].getFileName() + ":" + e.getStackTrace()[0].getLineNumber() : "") + "] Unable to index line " + i + ". (" + String.valueOf(e.getMessage()) + ")"); } if (i % 10000 == 0) logger.info("Document " + i); } writer.optimize(); writer.commit(); writer.close(); dir.close(); ts.setTimestamp(metadata.getUpdated()); ts.save(); } catch (Exception e) { logger.log(Level.WARNING, e.getMessage(), e); } }
Example 19
Project: Europeana-Creative-master File: ExtendedLireIndexer.java View source code |
protected Document buildDocument(LireObject s, String id) throws IOException, CorruptIndexException, BoFException {
Document doc = new Document();
// access private field by reflection
// MPEG-7
getM_sfaALL().addFieldToDoc(doc, s, getM_toppivs());
// ID
getM_sfaALL().AddIDField(doc, id);
System.out.println("id " + id);
// add URL to doc
doc.add(new org.apache.lucene.document.Field("THMBURL", s.getThmbURL(), org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED, org.apache.lucene.document.Field.TermVector.NO));
return doc;
}
Example 20
Project: Genoogle-master File: LuceneIndexer.java View source code |
public static void main(String[] args) throws IOException, IllegalSymbolException, NoSuchElementException, ParseException { Directory indexDir = FSDirectory.open(new File("./index")); if (new File("./index").exists()) { IndexSearcher is = new IndexSearcher(indexDir); Query q = new TermQuery(new Term("header", "100")); TopDocs search = is.search(q, 20); System.out.println(search.totalHits); System.out.println(search.scoreDocs[0]); } else { final boolean forceFormatting = true; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_31, new StandardAnalyzer(Version.LUCENE_31)); IndexWriter indexWriter = new IndexWriter(indexDir, indexWriterConfig); BufferedReader is = new BufferedReader(new FileReader("/Users/albrecht/genoogle/files/fasta/ecoli.nt")); RichSequenceStreamReader readFastaDNA = IOTools.readFasta(is, DNAAlphabet.SINGLETON); while (readFastaDNA.hasNext()) { RichSequence s; try { s = readFastaDNA.nextRichSequence(); } catch (IllegalSymbolException e) { if (forceFormatting) { continue; } else { throw e; } } int id = getNextSequenceId(); String gi = s.getGi(); String name = s.getName(); String type = s.getType(); String accession = s.getAccession(); String description = s.getDescription(); String header = s.getHeader(); System.out.println(id); System.out.println(gi); System.out.println(name); System.out.println(type); System.out.println(accession); System.out.println(description); Document doc = new Document(); doc.add(new Field("header", header, Store.YES, Index.ANALYZED)); doc.add(new Field("gi", gi, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("name", name, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("type", type, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("accession", accession, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("description", description, Store.YES, Index.ANALYZED)); doc.add(new Field("id", Integer.toString(id), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("file", "ecoli.nt", Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("db", "ECOLI_DB", Store.YES, Index.NOT_ANALYZED)); indexWriter.addDocument(doc); } indexWriter.optimize(); indexWriter.close(); } }
Example 21
Project: high-scale-lucene-master File: TestIndexWriterReader.java View source code |
public void testUpdateDocument() throws Exception { boolean optimize = true; Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); // create the index createIndexNoClose(!optimize, "index1", writer); // writer.flush(false, true, true); // get a reader IndexReader r1 = writer.getReader(); assertTrue(r1.isCurrent()); String id10 = r1.document(10).getField("id").stringValue(); Document newDoc = r1.document(10); newDoc.removeField("id"); newDoc.add(new Field("id", Integer.toString(8000), Store.YES, Index.NOT_ANALYZED)); writer.updateDocument(new Term("id", id10), newDoc); assertFalse(r1.isCurrent()); IndexReader r2 = writer.getReader(); assertTrue(r2.isCurrent()); assertEquals(0, count(new Term("id", id10), r2)); assertEquals(1, count(new Term("id", Integer.toString(8000)), r2)); r1.close(); writer.close(); assertTrue(r2.isCurrent()); IndexReader r3 = IndexReader.open(dir1, true); assertTrue(r3.isCurrent()); assertTrue(r2.isCurrent()); assertEquals(0, count(new Term("id", id10), r3)); assertEquals(1, count(new Term("id", Integer.toString(8000)), r3)); writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); assertTrue(r2.isCurrent()); assertTrue(r3.isCurrent()); writer.close(); assertFalse(r2.isCurrent()); assertTrue(!r3.isCurrent()); r2.close(); r3.close(); dir1.close(); }
Example 22
Project: lucene-Korean-Analyzer-master File: SynonymDictionaryIndex.java View source code |
public synchronized void indexingDictionary(List<String> synonyms) {
try {
indexWriter.deleteAll();
indexWriter.commit();
int recordCnt = 0;
for (String syn : synonyms) {
String[] synonymWords = syn.split(",");
Document doc = new Document();
for (int i = 0, size = synonymWords.length; i < size; i++) {
String fieldValue = synonymWords[i];
Field field = new Field("syn", fieldValue, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
doc.add(field);
recordCnt++;
//end inner for
}
indexWriter.addDocument(doc);
}
//end outer for
indexWriter.commit();
logger.info("��어 색� 단어 갯수 : {}", recordCnt);
} catch (Exception e) {
throw new IllegalStateException();
}
}
Example 23
Project: lucene-korean-master File: SynonymDictionaryIndex.java View source code |
public synchronized void indexingDictionary(List<String> synonyms) {
try {
indexWriter.deleteAll();
indexWriter.commit();
int recordCnt = 0;
for (String syn : synonyms) {
String[] synonymWords = syn.split(",");
Document doc = new Document();
for (int i = 0, size = synonymWords.length; i < size; i++) {
String fieldValue = synonymWords[i];
Field field = new Field("syn", fieldValue, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
doc.add(field);
recordCnt++;
//end inner for
}
indexWriter.addDocument(doc);
}
//end outer for
indexWriter.commit();
logger.info("��어 색� 단어 갯수 : {}", recordCnt);
} catch (Exception e) {
throw new IllegalStateException();
}
}
Example 24
Project: NLP-master File: NearTest.java View source code |
public static void createIndex() throws Exception {
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, new AnsjAnalysis());
Directory directory = FSDirectory.open(new File("c:/index"));
IndexWriter writer = new IndexWriter(directory, conf);
String str = "文化人;文化人谈文化";
String[] values = str.split(";");
for (String value : values) {
Document doc = new Document();
Field field = new Field("test", value, Store.YES, Index.ANALYZED_NO_NORMS, TermVector.WITH_POSITIONS_OFFSETS);
// field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
doc.add(field);
writer.addDocument(doc);
writer.commit();
}
writer.close();
}
Example 25
Project: Solbase-Lucene-master File: HighlighterTest.java View source code |
public void testMultiSearcher() throws Exception { // setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED); d.add(f); writer1.addDocument(d); writer1.optimize(); writer1.close(); IndexReader reader1 = IndexReader.open(ramDir1); // setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED); d.add(f); writer2.addDocument(d); writer2.optimize(); writer2.close(); IndexReader reader2 = IndexReader.open(ramDir2); IndexSearcher searchers[] = new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1); searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.parse("multi*"); System.out.println("Searching for: " + query.toString(FIELD_NAME)); // at this point the multisearcher calls combine(query[]) hits = multiSearcher.search(query); // query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer()); Query expandedQueries[] = new Query[2]; expandedQueries[0] = query.rewrite(reader1); expandedQueries[1] = query.rewrite(reader2); query = query.combine(expandedQueries); // create an instance of the highlighter with the tags used to surround // highlighted text Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); String highlightedText = highlighter.getBestFragment(tokenStream, text); System.out.println(highlightedText); } assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2); }
Example 26
Project: tika-master File: MetadataAwareLuceneIndexer.java View source code |
public void indexContentSpecificMet(File file) throws Exception {
Metadata met = new Metadata();
try (InputStream is = new FileInputStream(file)) {
tika.parse(is, met);
Document document = new Document();
for (String key : met.names()) {
String[] values = met.getValues(key);
for (String val : values) {
document.add(new Field(key, val, Store.YES, Index.ANALYZED));
}
writer.addDocument(document);
}
}
}
Example 27
Project: yarep-master File: DateIndexerSearcherImplV1.java View source code |
/**
* Get revision from index file
* @param path2 Absolute path of index file
* @return TODO
*/
private Revision getRevisionFromIndexFile(String path2) throws Exception, IndexOutOfSyncException {
if (path2 != null) {
if (new File(path2).isFile()) {
String revisionName = getRevisionName(path2);
if (revisionName != null) {
try {
//log.debug("Get revision name from index file '" + path2 + "' for node '" + nodePath + "'.");
return new VirtualFileSystemRevision(repo, nodePath, revisionName);
} catch (NoSuchRevisionException e) {
log.warn("No revision for revision name '" + revisionName + "' of index file: " + path2);
throw new IndexOutOfSyncException(path2);
}
} else {
log.warn("Index file '" + path2 + "' does not seem to contain a revision name!");
return null;
}
} else {
log.warn("No such index file: " + path2);
return null;
}
} else {
//log.debug("No path.");
return null;
}
}
Example 28
Project: apache-nutch-fork-master File: TestIndexSorter.java View source code |
protected void setUp() throws Exception { if (conf == null) conf = NutchConfiguration.create(); // create test index testDir = new File("indexSorter-test-" + System.currentTimeMillis()); if (!testDir.mkdirs()) { throw new Exception("Can't create test dir " + testDir.toString()); } LOG.info("Creating test index: " + testDir.getAbsolutePath()); File plain = new File(testDir, INDEX_PLAIN); Directory dir = FSDirectory.open(plain); IndexWriter writer = new IndexWriter(dir, new NutchDocumentAnalyzer(conf), true, MaxFieldLength.UNLIMITED); // create test documents for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); for (int k = 0; k < fieldNames.length; k++) { Field f; Store s; Index ix; String val = null; if (fieldNames[k].equals("id")) { s = Store.YES; ix = Index.NOT_ANALYZED; val = String.valueOf(i); } else if (fieldNames[k].equals("host")) { s = Store.YES; ix = Index.NOT_ANALYZED; val = "www.example" + i + ".com"; } else if (fieldNames[k].equals("site")) { s = Store.NO; ix = Index.NOT_ANALYZED; val = "www.example" + i + ".com"; } else if (fieldNames[k].equals("content")) { s = Store.NO; ix = Index.ANALYZED; val = "This is the content of the " + i + "-th document."; } else if (fieldNames[k].equals("boost")) { s = Store.YES; ix = Index.NO; // XXX note that this way we ensure different values of encoded boost // XXX note also that for this reason we can't reliably test more than // XXX 255 documents. float boost = Similarity.decodeNorm((byte) (i + 1)); val = String.valueOf(boost); doc.setBoost(boost); } else { s = Store.YES; ix = Index.ANALYZED; if (fieldNames[k].equals("anchor")) { val = "anchors to " + i + "-th page."; } else if (fieldNames[k].equals("url")) { val = "http://www.example" + i + ".com/" + i + ".html"; } } f = new Field(fieldNames[k], val, s, ix); doc.add(f); } writer.addDocument(doc); } writer.optimize(); writer.close(); }
Example 29
Project: capedwarf-blue-master File: DocumentFieldBridge.java View source code |
@SuppressWarnings("unchecked")
public void set(String name, Object value, Document document, LuceneOptions luceneOptions) {
com.google.appengine.api.search.Document googleDocument = (com.google.appengine.api.search.Document) value;
document.add(new org.apache.lucene.document.Field(CacheValue.MATCH_ALL_DOCS_FIELD_NAME, CacheValue.MATCH_ALL_DOCS_FIELD_VALUE, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
for (Field field : googleDocument.getFields()) {
if (field.getType() == null) {
throw new IllegalStateException("Field " + field.getName() + " of document " + googleDocument.getId() + " has null type!");
}
String prefixedFieldName = fieldNamePrefixer.getPrefixedFieldName(field.getName(), field.getType());
String prefixedAllFieldName = fieldNamePrefixer.getPrefixedFieldName(CacheValue.ALL_FIELD_NAME, field.getType());
if (field.getType() == Field.FieldType.NUMBER) {
luceneOptions.addNumericFieldToDocument(prefixedFieldName, field.getNumber(), document);
luceneOptions.addNumericFieldToDocument(prefixedAllFieldName, field.getNumber(), document);
} else if (field.getType() == Field.FieldType.GEO_POINT) {
spatialFieldBridgeByGrid.set(prefixedFieldName, Point.fromDegrees(field.getGeoPoint().getLatitude(), field.getGeoPoint().getLongitude()), document, luceneOptions);
document.getFields();
} else {
luceneOptions.addFieldToDocument(prefixedFieldName, convertToString(field), document);
luceneOptions.addFieldToDocument(prefixedAllFieldName, convertToString(field), document);
}
}
}
Example 30
Project: hsearch-obsolete-master File: LuceneIndexManager.java View source code |
public void insert(HDocument hdoc) throws Exception { Document doc = new Document(); for (com.bizosys.hsearch.common.Field fld : hdoc.fields) { ByteField bf = fld.getByteField(); Store store = (fld.isStore()) ? Field.Store.YES : Field.Store.NO; Index index = (fld.isAnalyze()) ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED; doc.add(new Field(bf.name, bf.getValue().toString(), store, index)); } doc.add(new Field("id", hdoc.getTenantDocumentKey(), Field.Store.YES, Field.Index.ANALYZED)); if (null != hdoc.docType) doc.add(new Field("type", hdoc.docType, Field.Store.YES, Field.Index.ANALYZED)); if (null != hdoc.url) doc.add(new Field("url", hdoc.url, Field.Store.YES, Field.Index.ANALYZED)); if (null != hdoc.title) doc.add(new Field("title", hdoc.title, Field.Store.YES, Field.Index.ANALYZED)); if (null != hdoc.preview) doc.add(new Field("preview", hdoc.preview, Field.Store.YES, Field.Index.ANALYZED)); if (null != hdoc.cacheText) doc.add(new Field("cache", hdoc.cacheText, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); }
Example 31
Project: jtrac-master File: Item.java View source code |
/** * Lucene DocumentCreator implementation */ public Document createDocument() { Document d = new Document(); d.add(new org.apache.lucene.document.Field("id", getId() + "", Store.YES, Index.NO)); d.add(new org.apache.lucene.document.Field("type", "item", Store.YES, Index.NO)); StringBuffer sb = new StringBuffer(); if (getSummary() != null) { sb.append(getSummary()); } if (getDetail() != null) { if (sb.length() > 0) { sb.append(" | "); } sb.append(getDetail()); } d.add(new org.apache.lucene.document.Field("text", sb.toString(), Store.NO, Index.TOKENIZED)); return d; }
Example 32
Project: katta-master File: LuceneClientTest.java View source code |
@Test
public void testGetBinaryDetails() throws Exception {
File index = _temporaryFolder.newFolder("indexWithBinaryData");
String textFieldName = "textField";
String binaryFieldName = "binaryField";
String textFieldContent = "sample text";
byte[] bytesFieldContent = new byte[] { 1, 2, 3 };
IndexWriter indexWriter = new IndexWriter(FSDirectory.open(index), createIndexWriter());
Document document = new Document();
document.add(new Field(binaryFieldName, bytesFieldContent));
document.add(new Field(textFieldName, textFieldContent, Store.NO, Index.ANALYZED));
indexWriter.addDocument(document);
indexWriter.close();
DeployClient deployClient = new DeployClient(_clusterRule.getCluster().getProtocol());
IndexState indexState = deployClient.addIndex(index.getName(), index.getParentFile().getAbsolutePath(), 1).joinDeployment();
assertEquals(IndexState.DEPLOYED, indexState);
ILuceneClient client = new LuceneClient(_clusterRule.getZkConfiguration());
final Query query = new QueryParser(Version.LUCENE_35, "", new KeywordAnalyzer()).parse(textFieldName + ": " + textFieldContent);
final Hits hits = client.search(query, new String[] { index.getName() }, 10);
assertNotNull(hits);
assertEquals(1, hits.getHits().size());
final Hit hit = hits.getHits().get(0);
final MapWritable details = client.getDetails(hit);
final Set<Writable> keySet = details.keySet();
assertEquals(1, keySet.size());
final Writable writable = details.get(new Text(binaryFieldName));
assertNotNull(writable);
assertThat(writable, instanceOf(BytesWritable.class));
BytesWritable bytesWritable = (BytesWritable) writable;
// getBytes() returns
bytesWritable.setCapacity(bytesWritable.getLength());
// the full array
assertArrayEquals(bytesFieldContent, bytesWritable.getBytes());
client.close();
}
Example 33
Project: l4ia-master File: FastVectorHighlighterSample.java View source code |
static void makeIndex() throws IOException {
IndexWriter writer = new IndexWriter(dir, analyzer, true, MaxFieldLength.UNLIMITED);
for (String d : DOCS) {
Document doc = new Document();
doc.add(new Field(F, d, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc);
}
writer.close();
}
Example 34
Project: maven-indexer-master File: MinimalArtifactInfoIndexCreator.java View source code |
public void updateLegacyDocument(ArtifactInfo ai, Document doc) { updateDocument(ai, doc); // legacy! if (ai.getPrefix() != null) { doc.add(new Field(ArtifactInfo.PLUGIN_PREFIX, ai.getPrefix(), Field.Store.YES, Field.Index.NOT_ANALYZED)); } if (ai.getGoals() != null) { doc.add(new Field(ArtifactInfo.PLUGIN_GOALS, ArtifactInfo.lst2str(ai.getGoals()), Field.Store.YES, Field.Index.NO)); } doc.removeField(ArtifactInfo.GROUP_ID); doc.add(new Field(ArtifactInfo.GROUP_ID, ai.getGroupId(), Field.Store.NO, Field.Index.NOT_ANALYZED)); }
Example 35
Project: nutchbase-master File: TestIndexSorter.java View source code |
protected void setUp() throws Exception { if (conf == null) conf = NutchConfiguration.create(); // create test index testDir = new File("indexSorter-test-" + System.currentTimeMillis()); if (!testDir.mkdirs()) { throw new Exception("Can't create test dir " + testDir.toString()); } LOG.info("Creating test index: " + testDir.getAbsolutePath()); File plain = new File(testDir, INDEX_PLAIN); Directory dir = FSDirectory.getDirectory(plain); IndexWriter writer = new IndexWriter(dir, new NutchDocumentAnalyzer(conf), true); // create test documents for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); for (int k = 0; k < fieldNames.length; k++) { Field f; Store s; Index ix; String val = null; if (fieldNames[k].equals("id")) { s = Store.YES; ix = Index.UN_TOKENIZED; val = String.valueOf(i); } else if (fieldNames[k].equals("host")) { s = Store.YES; ix = Index.UN_TOKENIZED; val = "www.example" + i + ".com"; } else if (fieldNames[k].equals("site")) { s = Store.NO; ix = Index.UN_TOKENIZED; val = "www.example" + i + ".com"; } else if (fieldNames[k].equals("content")) { s = Store.NO; ix = Index.TOKENIZED; val = "This is the content of the " + i + "-th document."; } else if (fieldNames[k].equals("boost")) { s = Store.YES; ix = Index.NO; // XXX note that this way we ensure different values of encoded boost // XXX note also that for this reason we can't reliably test more than // XXX 255 documents. float boost = Similarity.decodeNorm((byte) (i + 1)); val = String.valueOf(boost); doc.setBoost(boost); } else { s = Store.YES; ix = Index.TOKENIZED; if (fieldNames[k].equals("anchor")) { val = "anchors to " + i + "-th page."; } else if (fieldNames[k].equals("url")) { val = "http://www.example" + i + ".com/" + i + ".html"; } } f = new Field(fieldNames[k], val, s, ix); doc.add(f); } writer.addDocument(doc); } writer.optimize(); writer.close(); }
Example 36
Project: partake-master File: EventSearchService.java View source code |
/** * create a lucene document from eventId and event. */ private Document makeDocument(Event event, List<EventTicket> tickets) { StringBuilder builder = new StringBuilder(); builder.append(event.getTitle()).append(" "); builder.append(event.getSummary()).append(" "); builder.append(event.getAddress()).append(" "); builder.append(event.getPlace()).append(" "); builder.append(Util.removeTags(event.getDescription())); long beginTime = event.getBeginDate().getTime(); long deadlineTime = event.acceptsSomeTicketsTill(tickets).getTime(); Document doc = new Document(); doc.add(new Field("ID", event.getId(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("CATEGORY", event.getCategory(), Store.NO, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS)); doc.add(new Field("CREATED-AT", TimeUtil.getTimeString(event.getCreatedAt().getTime()), Store.NO, Index.NOT_ANALYZED)); doc.add(new Field("BEGIN-TIME", TimeUtil.getTimeString(beginTime), Store.NO, Index.NOT_ANALYZED)); doc.add(new Field("DEADLINE-TIME", TimeUtil.getTimeString(deadlineTime), Store.NO, Index.NOT_ANALYZED)); doc.add(new Field("TITLE", event.getTitle(), Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS)); doc.add(new Field("CONTENT", builder.toString(), Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS)); return doc; }
Example 37
Project: RSSOwl-master File: SearchDocument.java View source code |
/** * Creates a new <code>Field</code> from the given {@link IPerson}. * * @param fieldConstant the constant identifying the target field * @param person the value of the field * @param store one of the {@link Store} constants. * @param index one of the {@link Index} constants. * @return Field the {@link Field} that can be used for indexing. */ protected Field createPersonField(int fieldConstant, IPerson person, Store store, Index index) { if (person == null) return null; /* Add Name and EMail */ if (person.getName() != null && person.getEmail() != null) { //$NON-NLS-1$ return createStringField(fieldConstant, person.getName() + " " + person.getEmail().toString(), store, index); } /* Add Name if present */ if (person.getName() != null) return createStringField(fieldConstant, person.getName(), store, index); else /* Add EMail if present */ if (person.getEmail() != null) return createURIField(fieldConstant, person.getEmail().toString(), store, Index.UN_TOKENIZED); return null; }
Example 38
Project: sensei-master File: SenseiSchema.java View source code |
public static SenseiSchema build(JSONObject schemaObj) throws JSONException, ConfigurationException { SenseiSchema schema = new SenseiSchema(); schema.setSchemaObj(schemaObj); schema._fieldDefMap = new HashMap<String, FieldDefinition>(); JSONObject tableElem = schemaObj.optJSONObject("table"); if (tableElem == null) { throw new ConfigurationException("empty schema"); } schema._uidField = tableElem.getString("uid"); schema._deleteField = tableElem.optString("delete-field", ""); schema._skipField = tableElem.optString("skip-field", ""); schema._srcDataStore = tableElem.optString("src-data-store", ""); schema._srcDataField = tableElem.optString("src-data-field", "src_data"); schema._compressSrcData = tableElem.optBoolean("compress-src-data", true); JSONArray columns = tableElem.optJSONArray("columns"); int count = 0; if (columns != null) { count = columns.length(); } for (int i = 0; i < count; ++i) { JSONObject column = columns.getJSONObject(i); try { String n = column.getString("name"); String t = column.getString("type"); String frm = column.optString("from"); FieldDefinition fdef = new FieldDefinition(); fdef.formatter = null; fdef.fromField = frm.length() > 0 ? frm : n; fdef.isMeta = true; fdef.isMulti = column.optBoolean("multi"); fdef.isActivity = column.optBoolean("activity"); fdef.name = n; String delimString = column.optString("delimiter"); if (delimString != null && delimString.trim().length() > 0) { fdef.delim = delimString; } fdef.hasWildCards = column.optBoolean("wildcard"); if (fdef.hasWildCards) { Assert.isTrue(fdef.fromField.equals(fdef.name), "Cannot have a different \"from\" field with wildcards"); fdef.wildCardPattern = Pattern.compile(fdef.name); } schema._fieldDefMap.put(n, fdef); if (t.equals("int")) { MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(int.class); String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType); fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US)); fdef.type = int.class; } else if (t.equals("short")) { MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(short.class); String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType); fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US)); fdef.type = int.class; } else if (t.equals("long")) { MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(long.class); String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType); fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US)); fdef.type = long.class; } else if (t.equals("float")) { MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(float.class); String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType); fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US)); fdef.type = double.class; } else if (t.equals("double")) { MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(double.class); String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType); fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US)); fdef.type = double.class; } else if (t.equals("char")) { fdef.formatter = null; } else if (t.equals("string")) { fdef.formatter = null; } else if (t.equals("boolean")) { MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(boolean.class); String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType); fdef.type = boolean.class; } else if (t.equals("date")) { String f = ""; try { f = column.optString("format"); } catch (Exception ex) { logger.error(ex.getMessage(), ex); } if (f.isEmpty()) throw new ConfigurationException("Date format cannot be empty."); fdef.formatter = new SimpleDateFormat(f); fdef.type = Date.class; } else if (t.equals("text")) { fdef.isMeta = false; String idxString = column.optString("index", null); String storeString = column.optString("store", null); String tvString = column.optString("termvector", null); Index idx = idxString == null ? Index.ANALYZED : DefaultSenseiInterpreter.INDEX_VAL_MAP.get(idxString.toUpperCase()); Store store = storeString == null ? Store.NO : DefaultSenseiInterpreter.STORE_VAL_MAP.get(storeString.toUpperCase()); TermVector tv = tvString == null ? TermVector.NO : DefaultSenseiInterpreter.TV_VAL_MAP.get(tvString.toUpperCase()); if (idx == null || store == null || tv == null) { throw new ConfigurationException("Invalid indexing parameter specification"); } IndexSpec indexingSpec = new IndexSpec(); indexingSpec.store = store; indexingSpec.index = idx; indexingSpec.tv = tv; fdef.textIndexSpec = indexingSpec; } } catch (Exception e) { throw new ConfigurationException("Error parsing schema: " + column, e); } } JSONArray facetsList = schemaObj.optJSONArray("facets"); if (facetsList != null) { for (int i = 0; i < facetsList.length(); i++) { JSONObject facet = facetsList.optJSONObject(i); if (facet != null) { schema.facets.add(FacetDefinition.valueOf(facet)); } } } return schema; }
Example 39
Project: Solandra-master File: IndexReader.java View source code |
public Document document(int docNum, FieldSelector selector) throws CorruptIndexException, IOException { Document doc = getDocumentCache().get(docNum); if (doc != null) { logger.debug("Found doc in cache"); return doc; } String docId = getDocIndexToDocId().get(docNum); if (docId == null) return null; Map<Integer, String> keyMap = new HashMap<Integer, String>(); keyMap.put(docNum, CassandraUtils.hashKey(indexName + CassandraUtils.delimeter + docId)); List<byte[]> fieldNames = null; // Parallel for Solr Performance if (selector != null && selector instanceof SolandraFieldSelector) { List<Integer> otherDocIds = ((SolandraFieldSelector) selector).getOtherDocsToCache(); fieldNames = ((SolandraFieldSelector) selector).getFieldNames(); logger.debug("Going to bulk load " + otherDocIds.size() + " documents"); for (Integer otherDocNum : otherDocIds) { if (otherDocNum == docNum) continue; if (getDocumentCache().containsKey(otherDocNum)) continue; String docKey = getDocIndexToDocId().get(otherDocNum); if (docKey == null) continue; keyMap.put(otherDocNum, CassandraUtils.hashKey(indexName + CassandraUtils.delimeter + docKey)); } } ColumnParent columnParent = new ColumnParent(); columnParent.setColumn_family(CassandraUtils.docColumnFamily); SlicePredicate slicePredicate = new SlicePredicate(); if (fieldNames == null || fieldNames.size() == 0) { // get all columns ( except this skips meta info ) slicePredicate.setSlice_range(new SliceRange(new byte[] {}, CassandraUtils.finalToken.getBytes("UTF-8"), false, 100)); } else { slicePredicate.setColumn_names(fieldNames); } long start = System.currentTimeMillis(); try { Map<String, List<ColumnOrSuperColumn>> docMap = client.multiget_slice(CassandraUtils.keySpace, Arrays.asList(keyMap.values().toArray(new String[] {})), columnParent, slicePredicate, ConsistencyLevel.ONE); for (Map.Entry<Integer, String> key : keyMap.entrySet()) { List<ColumnOrSuperColumn> cols = docMap.get(key.getValue()); if (cols == null) { logger.warn("Missing document in multiget_slice for: " + key.getValue()); continue; } Document cacheDoc = new Document(); for (ColumnOrSuperColumn col : cols) { Field field = null; String fieldName = new String(col.column.name); //Incase __META__ slips through if (Arrays.equals(col.column.name, CassandraUtils.documentMetaField.getBytes())) { logger.debug("Filtering out __META__ key"); continue; } byte[] value; if (col.column.value[col.column.value.length - 1] != Byte.MAX_VALUE && col.column.value[col.column.value.length - 1] != Byte.MIN_VALUE) { throw new CorruptIndexException("Lucandra field is not properly encoded: " + docId + "(" + fieldName + ")"); } else if (col.column.value[col.column.value.length - 1] == Byte.MAX_VALUE) { //Binary value = new byte[col.column.value.length - 1]; System.arraycopy(col.column.value, 0, value, 0, col.column.value.length - 1); field = new Field(fieldName, value, Store.YES); cacheDoc.add(field); } else if (col.column.value[col.column.value.length - 1] == Byte.MIN_VALUE) { //String value = new byte[col.column.value.length - 1]; System.arraycopy(col.column.value, 0, value, 0, col.column.value.length - 1); //Check for multi-fields String fieldString = new String(value, "UTF-8"); if (fieldString.indexOf(CassandraUtils.delimeter) >= 0) { StringTokenizer tok = new StringTokenizer(fieldString, CassandraUtils.delimeter); while (tok.hasMoreTokens()) { field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED); cacheDoc.add(field); } } else { field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED); cacheDoc.add(field); } } } //Mark the required doc if (key.getKey().equals(docNum)) doc = cacheDoc; getDocumentCache().put(key.getKey(), cacheDoc); } long end = System.currentTimeMillis(); logger.debug("Document read took: " + (end - start) + "ms"); return doc; } catch (Exception e) { throw new IOException(e.getLocalizedMessage()); } }
Example 40
Project: step-master File: FieldConfig.java View source code |
/**
* Gets a numerical field
*
* @param fieldValue the field value
* @return the field
*/
public Fieldable getField(final Number fieldValue) {
final NumericField field = new NumericField(this.name, this.store, this.index == Index.ANALYZED);
if (fieldValue instanceof Double) {
field.setDoubleValue((Double) fieldValue);
} else if (fieldValue instanceof Integer) {
field.setIntValue((Integer) fieldValue);
} else if (fieldValue instanceof Long) {
field.setLongValue((Long) fieldValue);
} else {
throw new StepInternalException("Unsupported type: " + fieldValue.getClass());
}
return field;
}
Example 41
Project: recommenders-master File: FileSnippetRepository.java View source code |
private void indexSnippet(IndexWriter writer, ISnippet snippet, String path) throws IOException { Document doc = new Document(); doc.add(new Field(F_PATH, path, Store.YES, Index.NO)); doc.add(new Field(F_UUID, snippet.getUuid().toString(), Store.NO, Index.NOT_ANALYZED)); String name = snippet.getName(); doc.add(new Field(F_NAME, name, Store.YES, Index.ANALYZED)); String description = snippet.getDescription(); doc.add(new Field(F_DESCRIPTION, description, Store.YES, Index.ANALYZED)); for (String tag : snippet.getTags()) { doc.add(new Field(F_TAG, tag, Store.YES, Index.ANALYZED_NO_NORMS)); } for (String extraSearchTerm : snippet.getExtraSearchTerms()) { doc.add(new Field(F_EXTRA_SEARCH_TERM, extraSearchTerm, Store.YES, Index.ANALYZED)); } for (Location location : expandLocation(snippet.getLocation())) { Field field = new Field(F_LOCATION, getIndexString(location), Store.NO, Index.NOT_ANALYZED); field.setBoost(0); doc.add(field); } for (ProjectCoordinate dependency : snippet.getNeededDependencies()) { doc.add(new Field(F_DEPENDENCY, getDependencyString(dependency), Store.YES, Index.ANALYZED)); } if (snippet.getLocation() == Location.FILE) { if (snippet.getFilenameRestrictions().isEmpty()) { doc.add(new Field(F_FILENAME_RESTRICTION, NO_FILENAME_RESTRICTION, Store.NO, Index.NOT_ANALYZED)); } for (String restriction : snippet.getFilenameRestrictions()) { doc.add(new Field(F_FILENAME_RESTRICTION, restriction.toLowerCase(), Store.NO, Index.NOT_ANALYZED)); } } else { doc.add(new Field(F_FILENAME_RESTRICTION, NO_FILENAME_RESTRICTION, Store.NO, Index.NOT_ANALYZED)); } writer.addDocument(doc); }
Example 42
Project: AdServing-master File: GeoIpIndex.java View source code |
public void importIPs(String path) { try { if (!path.endsWith("/")) { path += "/"; } Directory directory = FSDirectory.open(new File(db, "geo")); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, new StandardAnalyzer(Version.LUCENE_31)); IndexWriter writer = new IndexWriter(directory, config); List<String> cnames = new ArrayList<String>(); BufferedReader br = new BufferedReader(new FileReader(path + "GeoLiteCity-Blocks.csv")); CSVReader reader = new CSVReader(br, ',', '\"', 2); // Scanner scanner = new Scanner(new FileReader(filename)); // boolean firstLine = true; int count = 0; String[] values; Map<String, Map<String, String>> locations = getLocations(path); while ((values = reader.readNext()) != null) { String ipfrom = values[0]; String ipto = values[1]; String locid = values[2]; Map<String, String> location = locations.get(locid); Document doc = new Document(); doc.add(new Field("city", location.get("city"), Store.YES, Index.ANALYZED)); doc.add(new Field("postalcode", location.get("postalcode"), Store.YES, Index.ANALYZED)); doc.add(new Field("country", location.get("country"), Store.YES, Index.ANALYZED)); doc.add(new Field("region", location.get("region"), Store.YES, Index.ANALYZED)); doc.add(new Field("latitude", location.get("latitude"), Store.YES, Index.ANALYZED)); doc.add(new Field("longitude", location.get("longitude"), Store.YES, Index.ANALYZED)); NumericField ipfromField = new NumericField("ipfrom", 8, Store.YES, true); ipfromField.setLongValue(Long.parseLong(ipfrom.trim())); doc.add(ipfromField); NumericField iptoField = new NumericField("ipto", 8, Store.YES, true); iptoField.setLongValue(Long.parseLong(ipto.trim())); doc.add(iptoField); // doc.add(new NumericField("ipto", ipto, Store.YES, Index.ANALYZED)); writer.addDocument(doc); count++; if (count % 100 == 0) { writer.commit(); } } System.out.println(count + " Eintr�ge importiert"); writer.optimize(); writer.close(); } catch (Exception e) { e.printStackTrace(); } }
Example 43
Project: ClusterBasedRelevanceFeedback-master File: PersistentSnapshotDeletionPolicy.java View source code |
/** * Persists all snapshots information. If the given id and segment are not * null, it persists their information as well. */ private void persistSnapshotInfos(String id, String segment) throws IOException { writer.deleteAll(); Document d = new Document(); d.add(new Field(SNAPSHOTS_ID, "", Store.YES, Index.NO)); for (Entry<String, String> e : super.getSnapshots().entrySet()) { d.add(new Field(e.getKey(), e.getValue(), Store.YES, Index.NO)); } if (id != null) { d.add(new Field(id, segment, Store.YES, Index.NO)); } writer.addDocument(d); writer.commit(); }
Example 44
Project: elasticsearch-server-master File: TypeParsers.java View source code |
public static Field.Index parseIndex(String fieldName, String index) throws MapperParsingException { index = Strings.toUnderscoreCase(index); if ("no".equals(index)) { return Field.Index.NO; } else if ("not_analyzed".equals(index)) { return Field.Index.NOT_ANALYZED; } else if ("analyzed".equals(index)) { return Field.Index.ANALYZED; } else { throw new MapperParsingException("Wrong value for index [" + index + "] for field [" + fieldName + "]"); } }
Example 45
Project: extension-aws-master File: RelatedKeywordLuceneSearcher.java View source code |
public void indexWord(String inWord, HitTracker inResults, Searcher inTypeSearcher) throws Exception { if (inWord == null || inWord.equals("")) { return; } HashSet<String> terms = new HashSet<String>(); int count = 0; for (Object o : inResults) { count++; if (count > 50) { //Dont look over the entire result set break; } String keywords = inResults.getValue(o, "keywords"); if (keywords != null) { for (String keyword : keywords.split(" ")) { keyword = keyword.trim(); if (keyword.length() > 1 && !keyword.equals(inWord)) { terms.add(keyword); } } } if (terms.size() > 9) break; } //Now check for categories? count = 0; if (terms.size() < 9) { for (Object o : inResults) { count++; if (count > 50) { //Dont look over the entire result set break; } String catalogid = inResults.getValue(o, "catalogid"); String categoryid = inResults.getValue(o, "category"); if (catalogid != null && categoryid != null && !"index".equals(categoryid)) { CategoryArchive archive = getMediaArchive(catalogid).getCategoryArchive(); for (String keyword : categoryid.split(" ")) { keyword = keyword.trim(); if (keyword.length() > 0 && !keyword.equals(inWord)) { Category cat = archive.getCategory(keyword); if (cat != null) { keyword = cat.getName(); terms.add(keyword); } } } } if (terms.size() > 9) break; } } Document doc = new Document(); StringBuffer saved = new StringBuffer(); StringBuffer savedenc = new StringBuffer(); //Find out how many asset hits exists for (String synonym : terms) { SearchQuery typeQuery = inTypeSearcher.createSearchQuery(); synonym = synonym.replaceAll("\\(.*?\\)", ""); synonym = synonym.replace("(", "").replace(")", "").replace("-", ""); typeQuery.addStartsWith("description", synonym); typeQuery.setHitsName("relatedkeywords"); try { int hits = inTypeSearcher.search(typeQuery).getTotal(); if (hits > 1) { saved.append(synonym); saved.append(" ("); saved.append(hits); saved.append(")"); saved.append(";"); synonym = synonym.replace(' ', '_').replace(";", " "); savedenc.append(synonym); savedenc.append(" "); } } catch (Exception ex) { log.error(ex); } } // into the index if (saved.length() > 0) { doc.add(new Field("synonyms", saved.toString(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); } doc.add(new Field("synonymsenc", savedenc.toString(), Store.NO, Index.ANALYZED_NO_NORMS)); doc.add(new Field("word", inWord.replace(" ", "_"), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); /* Timestamp */ String timestamp = DateTools.dateToString(new Date(), Resolution.SECOND); doc.add(new Field("timestamp", timestamp, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); getIndexWriter().addDocument(doc, getAnalyzer()); clearIndex(); }
Example 46
Project: guj.com.br-master File: LuceneIndexer.java View source code |
private Document createDocument(Post p) { Document d = new Document(); d.add(new Field(SearchFields.Keyword.POST_ID, String.valueOf(p.getId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.FORUM_ID, String.valueOf(p.getForumId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.TOPIC_ID, String.valueOf(p.getTopicId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.USER_ID, String.valueOf(p.getUserId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.DATE, this.settings.formatDateTime(p.getTime()), Store.YES, Index.UN_TOKENIZED)); // We add the subject and message text together because, when searching, we only care about the // matches, not where it was performed. The real subject and contents will be fetched from the database d.add(new Field(SearchFields.Indexed.CONTENTS, p.getSubject() + " " + p.getText(), Store.NO, Index.TOKENIZED)); return d; }
Example 47
Project: hibernate-hql-parser-master File: ClassBasedLucenePropertyHelper.java View source code |
public boolean isAnalyzed(Class<?> type, String... propertyPath) { EntityIndexBinding entityIndexBinding = getIndexBinding(type); if (isIdentifierProperty(entityIndexBinding, propertyPath)) { return false; } TypeMetadata metadata = getLeafTypeMetadata(type, propertyPath); Index index = metadata.getPropertyMetadataForProperty(propertyPath[propertyPath.length - 1]).getFieldMetadata().iterator().next().getIndex(); return EnumSet.of(Field.Index.ANALYZED, Field.Index.ANALYZED_NO_NORMS).contains(index); }
Example 48
Project: hipergate-master File: Indexer.java View source code |
// rebuild public static void add(IndexWriter oIWrt, Map oKeywords, Map oTexts, Map oUnStored) throws ClassNotFoundException, IOException, IllegalArgumentException, NoSuchFieldException, IllegalAccessException, InstantiationException, NullPointerException { String sFieldName; Object oFieldValue; Document oDoc = new Document(); // ******************************************* // Index keywords as stored untokenized fields Iterator oKeys = oKeywords.keySet().iterator(); while (oKeys.hasNext()) { sFieldName = (String) oKeys.next(); oFieldValue = oKeywords.get(sFieldName); if (null == oFieldValue) oFieldValue = ""; if (oFieldValue.getClass().getName().equals("java.util.Date")) oDoc.add(new Field(sFieldName, DateTools.dateToString((Date) oFieldValue, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); else oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.YES, Field.Index.NOT_ANALYZED)); } // wend // ****************************************************** // Index titles, authors, etc. as stored tokenized fields Iterator oTxts = oTexts.keySet().iterator(); while (oTxts.hasNext()) { sFieldName = (String) oTxts.next(); oFieldValue = oTexts.get(sFieldName); if (null == oFieldValue) oFieldValue = ""; oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.YES, Field.Index.ANALYZED)); } // wend // ********************************************* // Index full texts as unstored tokenized fields Iterator oUnStor = oUnStored.keySet().iterator(); while (oUnStor.hasNext()) { sFieldName = (String) oUnStor.next(); oFieldValue = oUnStored.get(sFieldName); if (null == oFieldValue) oFieldValue = ""; oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.NO, Field.Index.ANALYZED)); } // wend oIWrt.addDocument(oDoc); }
Example 49
Project: IPAddressZipCodeStateCountryLuceneJavaSearch-master File: IndexIpAddressTask.java View source code |
// while loop continues /** * Adds IPSearch data to the index. * * @param bean * the bean it needs to index * @throws IOException */ public void addLocation(IpSearchCityBean bean) throws IOException { Document doc = new Document(); doc.add(new NumericField("ip_start", Field.Store.YES, true).setLongValue(bean.getIpStart())); doc.add(new NumericField("ip_end", Field.Store.YES, true).setLongValue(bean.getIpEnd())); doc.add(new NumericField("ip_start_a", Field.Store.NO, true).setLongValue((bean.getIpStart() / 16777216l) % 256)); doc.add(new NumericField("ip_start_b", Field.Store.NO, true).setLongValue((bean.getIpStart() / 65536) % 256)); doc.add(new NumericField("ip_start_c", Field.Store.NO, true).setLongValue((bean.getIpStart() / 256) % 256)); doc.add(new NumericField("ip_start_d", Field.Store.NO, true).setLongValue((bean.getIpStart()) % 256)); doc.add(new NumericField("ip_end_a", Field.Store.NO, true).setLongValue((bean.getIpEnd() / 16777216l) % 256)); doc.add(new NumericField("ip_end_b", Field.Store.NO, true).setLongValue((bean.getIpEnd() / 65536) % 256)); doc.add(new NumericField("ip_end_c", Field.Store.NO, true).setLongValue((bean.getIpEnd() / 256) % 256)); doc.add(new NumericField("ip_end_d", Field.Store.NO, true).setLongValue((bean.getIpEnd()) % 256)); doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(bean.getLat()), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(bean.getLon()), Field.Store.YES, Field.Index.NOT_ANALYZED)); // some of these fields have a chance of being null addToDoc(doc, "city", bean.getCity(), Field.Store.YES, Field.Index.ANALYZED); addToDoc(doc, "zip_code", bean.getZipCode(), Field.Store.YES, Field.Index.ANALYZED); addToDoc(doc, "country_code", bean.getCountryCode(), Field.Store.YES, Field.Index.ANALYZED); addToDoc(doc, "country_name", bean.getCountryName(), Field.Store.YES, Field.Index.ANALYZED); addToDoc(doc, "metro_code", bean.getMetroCode(), Field.Store.YES, Field.Index.ANALYZED); addToDoc(doc, "region_code", bean.getRegionCode(), Field.Store.YES, Field.Index.ANALYZED); addToDoc(doc, "region_name", bean.getRegionName(), Field.Store.YES, Field.Index.ANALYZED); IProjector projector = new SinusoidalProjector(); int startTier = 5; int endTier = 15; for (; startTier <= endTier; startTier++) { CartesianTierPlotter ctp; ctp = new CartesianTierPlotter(startTier, projector, tierPrefix); double boxId = ctp.getTierBoxId(bean.getLat(), bean.getLon()); doc.add(new Field(ctp.getTierFieldName(), NumericUtils.doubleToPrefixCoded(boxId), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); } writer.addDocument(doc); }
Example 50
Project: jabylon-master File: PropertyFileAnalyzer.java View source code |
public List<Document> createDocuments(PropertyFileDescriptor descriptor) { PropertyFile file = descriptor.loadProperties(); List<Document> documents = new ArrayList<Document>(file.getProperties().size()); Map<String, Property> masterProperties = Collections.emptyMap(); if (!descriptor.isMaster()) { PropertyFile masterFile = descriptor.getMaster().loadProperties(); masterProperties = masterFile.asMap(); } EList<Property> properties = file.getProperties(); for (Property property : properties) { Document doc = new Document(); ProjectLocale locale = descriptor.getProjectLocale(); ProjectVersion version = locale.getParent(); Project project = version.getParent(); Field projectField = new Field(QueryService.FIELD_PROJECT, project.getName(), Store.YES, Index.NOT_ANALYZED); doc.add(projectField); Field versionField = new Field(QueryService.FIELD_VERSION, version.getName(), Store.YES, Index.NOT_ANALYZED); doc.add(versionField); if (locale.isMaster()) { //mark the master files specifically Field localeField = new Field(QueryService.FIELD_LOCALE, QueryService.MASTER, Store.YES, Index.NOT_ANALYZED); doc.add(localeField); } else if (locale.getLocale() != null) { Field localeField = new Field(QueryService.FIELD_LOCALE, locale.getLocale().toString(), Store.YES, Index.NOT_ANALYZED); doc.add(localeField); //only add the master to a localized document if (masterProperties.get(property.getKey()) != null && masterProperties.get(property.getKey()).getValue() != null) { Field masterValueField = new Field(QueryService.FIELD_MASTER_VALUE, masterProperties.get(property.getKey()).getValue(), Store.YES, Index.ANALYZED); doc.add(masterValueField); } if (masterProperties.get(property.getKey()) != null && masterProperties.get(property.getKey()).getComment() != null) { Field masterCommentField = new Field(QueryService.FIELD_MASTER_COMMENT, masterProperties.get(property.getKey()).getComment(), Store.YES, Index.ANALYZED); doc.add(masterCommentField); } } Field uriField = new Field(QueryService.FIELD_URI, descriptor.getLocation().toString(), Store.YES, Index.NOT_ANALYZED); doc.add(uriField); Field pathField = new Field(QueryService.FIELD_FULL_PATH, descriptor.fullPath().toString(), Store.YES, Index.NOT_ANALYZED); doc.add(pathField); CDOID cdoID = descriptor.cdoID(); StringBuilder builder = new StringBuilder(); CDOIDUtil.write(builder, cdoID); Field idField = new Field(QueryService.FIELD_CDO_ID, builder.toString(), Store.YES, Index.NOT_ANALYZED); doc.add(idField); Field comment = new Field(QueryService.FIELD_COMMENT, nullSafe(property.getComment()), Store.YES, Index.ANALYZED); doc.add(comment); Field key = new Field(QueryService.FIELD_KEY, nullSafe(property.getKey()), Store.YES, Index.NOT_ANALYZED); doc.add(key); Field analyzedKey = new Field(QueryService.FIELD_KEY, nullSafe(property.getKey()), Store.YES, Index.ANALYZED); doc.add(analyzedKey); Field value = new Field(QueryService.FIELD_VALUE, nullSafe(property.getValue()), Store.YES, Index.ANALYZED); doc.add(value); String templateLocation = descriptor.getMaster() == null ? "" : descriptor.getMaster().getLocation().toString(); Field templateLoc = new Field(QueryService.FIELD_TEMPLATE_LOCATION, templateLocation, Store.YES, Index.NOT_ANALYZED); doc.add(templateLoc); documents.add(doc); } return documents; }
Example 51
Project: jcr-master File: TestChangesHolder.java View source code |
public void testSerNDeserializeDocs() throws Exception { //System.out.println("### testSerNDeserializeDocs ###"); Collection<Document> add = new ArrayList<Document>(3); Document doc = new Document(); doc.setBoost(2.0f); Field fieldFull = new Field("full", "full-value", Store.YES, Index.ANALYZED_NO_NORMS, TermVector.WITH_POSITIONS_OFFSETS); fieldFull.setBoost(2.0f); fieldFull.setOmitTermFreqAndPositions(true); doc.add(fieldFull); Field fieldEmpty = new Field("empty", "empty-value", Store.NO, Index.NOT_ANALYZED, TermVector.NO); doc.add(fieldEmpty); add.add(doc); doc = new Document(); doc.add(fieldFull); add.add(doc); doc = new Document(); doc.add(fieldEmpty); add.add(doc); ByteArrayOutputStream baos = null; int total = 100000; long start; Collection<String> remove = Collections.emptyList(); Collection<Document> addResult = null; start = System.currentTimeMillis(); for (int i = 0; i < total; i++) { baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); oos.writeObject(new ChangesHolder(remove, add)); oos.close(); } //System.out.println("Custom serialization: total time = " + (System.currentTimeMillis() - start) + ", size = " + baos.size()); start = System.currentTimeMillis(); for (int i = 0; i < total; i++) { ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray())); addResult = ((ChangesHolder) ois.readObject()).getAdd(); ois.close(); } //System.out.println("Custom deserialization: total time = " + (System.currentTimeMillis() - start)); checkDocs(addResult); start = System.currentTimeMillis(); for (int i = 0; i < total; i++) { baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); oos.writeObject(add); oos.close(); } //System.out.println("Native serialization: total time = " + (System.currentTimeMillis() - start) + ", size = " + baos.size()); start = System.currentTimeMillis(); for (int i = 0; i < total; i++) { ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray())); addResult = (Collection<Document>) ois.readObject(); ois.close(); } //System.out.println("Native deserialization: total time = " + (System.currentTimeMillis() - start)); checkDocs(addResult); }
Example 52
Project: jforum2-master File: LuceneIndexer.java View source code |
private Document createDocument(Post p) { Document d = new Document(); d.add(new Field(SearchFields.Keyword.POST_ID, String.valueOf(p.getId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.FORUM_ID, String.valueOf(p.getForumId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.TOPIC_ID, String.valueOf(p.getTopicId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.USER_ID, String.valueOf(p.getUserId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.DATE, this.settings.formatDateTime(p.getTime()), Store.YES, Index.UN_TOKENIZED)); // We add the subject and message text together because, when searching, we only care about the // matches, not where it was performed. The real subject and contents will be fetched from the database d.add(new Field(SearchFields.Indexed.CONTENTS, p.getSubject() + " " + p.getText(), Store.NO, Index.TOKENIZED)); return d; }
Example 53
Project: mdrill-master File: FieldTermStack.java View source code |
public static void main(String[] args) throws Exception { Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer); Query query = parser.parse("a x:b"); FieldQuery fieldQuery = new FieldQuery(query, true, false); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)); Document doc = new Document(); doc.add(new Field("f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir, true); new FieldTermStack(reader, 0, "f", fieldQuery); reader.close(); }
Example 54
Project: neo4j-components-svn-master File: SimpleFulltextIndex.java View source code |
private void doIndex(IndexWriter writer, long nodeId, String predicate, Object literal) { try { Document doc = new Document(); doc.add(new Field(KEY_ID, String.valueOf(nodeId), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(KEY_INDEX, getLiteralReader().read(literal), Store.YES, Index.ANALYZED)); doc.add(new Field(KEY_PREDICATE, predicate, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(KEY_INDEX_SOURCE, literal.toString(), Store.YES, Index.NOT_ANALYZED)); writer.addDocument(doc); } catch (IOException e) { throw new RuntimeException(e); } }
Example 55
Project: ocms-master File: ContentTableManagerDBImpl.java View source code |
public void setIndexProp(ContentTable ct) { ct.setAllowIndex(1); Set<ContentField> fields = ct.getContentFieldsSet(); if (fields != null) { for (ContentField field : fields) { String indexType = field.getIndexType(); String fieldType = field.getFieldType(); if (!StringUtils.hasText(indexType) || indexType.equalsIgnoreCase(Index.NO.toString())) { if (fieldType.equalsIgnoreCase("varchar") || fieldType.equalsIgnoreCase("text")) { field.setIndexType(Index.TOKENIZED.toString()); } else { field.setIndexType(Index.UN_TOKENIZED.toString()); } } field.setStoreType(Store.COMPRESS.toString()); contentFieldDao.saveContentField(field); } } this.saveContentTable(ct); }
Example 56
Project: opencms-core-master File: CmsSearchField.java View source code |
/** * Creates a Lucene field with the given name from the configuration and the provided content.<p> * * If no valid content is provided (that is the content is either <code>null</code> or * only whitespace), then no field is created and <code>null</code> is returned.<p> * * @param name the name of the field to create * @param content the content to create the field with * * @return a Lucene field with the given name from the configuration and the provided content */ public Field createField(String name, String content) { if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) { content = getDefaultValue(); } if (content != null) { Index index = Field.Index.NO; if (isIndexed()) { if (isTokenizedAndIndexed()) { index = Field.Index.ANALYZED; } else { index = Field.Index.NOT_ANALYZED; } } Field.Store store = Field.Store.NO; if (isStored() || isCompressed()) { store = Field.Store.YES; } Field result = new Field(name, content, store, index); if (getBoost() != BOOST_DEFAULT) { result.setBoost(getBoost()); } return result; } return null; }
Example 57
Project: opencms-master File: CmsSearchField.java View source code |
/** * Creates a Lucene field with the given name from the configuration and the provided content.<p> * * If no valid content is provided (that is the content is either <code>null</code> or * only whitespace), then no field is created and <code>null</code> is returned.<p> * * @param name the name of the field to create * @param content the content to create the field with * * @return a Lucene field with the given name from the configuration and the provided content */ public Field createField(String name, String content) { if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) { content = getDefaultValue(); } if (content != null) { Index index = Field.Index.NO; if (isIndexed()) { if (isTokenizedAndIndexed()) { index = Field.Index.ANALYZED; } else { index = Field.Index.NOT_ANALYZED; } } Field.Store store = Field.Store.NO; if (isStored() || isCompressed()) { store = Field.Store.YES; } Field result = new Field(name, content, store, index); if (getBoost() != BOOST_DEFAULT) { result.setBoost(getBoost()); } return result; } return null; }
Example 58
Project: querydsl-master File: LuceneQueryTest.java View source code |
@Test // FIXME @Ignore public void sorted_by_different_locales() throws Exception { Document d1 = new Document(); Document d2 = new Document(); Document d3 = new Document(); d1.add(new Field("sort", "aÄ", Store.YES, Index.NOT_ANALYZED)); d2.add(new Field("sort", "ab", Store.YES, Index.NOT_ANALYZED)); d3.add(new Field("sort", "aa", Store.YES, Index.NOT_ANALYZED)); writer = createWriter(idx); writer.addDocument(d1); writer.addDocument(d2); writer.addDocument(d3); writer.close(); IndexReader reader = IndexReader.open(idx); searcher = new IndexSearcher(reader); query = new LuceneQuery(new LuceneSerializer(true, true, Locale.ENGLISH), searcher); assertEquals(3, query.fetch().size()); List<Document> results = query.where(sort.startsWith("a")).orderBy(sort.asc()).fetch(); assertEquals(3, results.size()); assertEquals("aa", results.get(0).getField("sort").stringValue()); assertEquals("aÄ", results.get(1).getField("sort").stringValue()); assertEquals("ab", results.get(2).getField("sort").stringValue()); query = new LuceneQuery(new LuceneSerializer(true, true, new Locale("fi", "FI")), searcher); results = query.where(sort.startsWith("a")).orderBy(sort.asc()).fetch(); assertEquals("aa", results.get(0).getField("sort").stringValue()); assertEquals("ab", results.get(1).getField("sort").stringValue()); assertEquals("aÄ", results.get(2).getField("sort").stringValue()); }
Example 59
Project: sakai-cle-master File: IndexUpdateTransactionImpl.java View source code |
/* * (non-Javadoc) * * @see org.sakaiproject.search.transaction.impl.IndexTransactionImpl#doBeforePrepare() */ @Override protected void doBeforePrepare() throws IndexTransactionException { try { transactionId = manager.getSequence().getNextId(); Document savepointMarker = new Document(); savepointMarker.add(new Field("_txid", String.valueOf(transactionId), Store.YES, Index.NOT_ANALYZED)); savepointMarker.add(new Field("_txts", String.valueOf(System.currentTimeMillis()), Store.YES, Index.NOT_ANALYZED)); savepointMarker.add(new Field("_worker", String.valueOf(Thread.currentThread().getName()), Store.YES, Index.NOT_ANALYZED)); getInternalIndexWriter(); indexWriter.addDocument(savepointMarker); indexWriter.close(); indexWriter = null; // save all items searchBuilderItemSerializer.saveTransactionList(tempIndex, getItems()); } catch (Exception ex) { throw new IndexTransactionException("Failed to prepare transaction", ex); } super.doBeforePrepare(); }
Example 60
Project: swf-all-master File: LuceneIndexer.java View source code |
private Document getDocument(Record r) throws IOException { if (!hasIndexedFields()) { return null; } Document doc = new Document(); boolean addedFields = false; for (String columnName : indexedColumns) { ModelReflector<?> reflector = Database.getTable(tableName).getReflector(); String fieldName = reflector.getFieldName(columnName); Object value = reflector.get(r, fieldName); if (!ObjectUtil.isVoid(value)) { TypeRef<?> ref = Database.getJdbcTypeHelper(reflector.getPool()).getTypeRef(reflector.getFieldGetter(fieldName).getReturnType()); TypeConverter<?> converter = ref.getTypeConverter(); if (!ref.isBLOB()) { addedFields = true; if (Reader.class.isAssignableFrom(ref.getJavaClass())) { doc.add(new Field(fieldName, converter.toString(value), Field.Store.NO, Index.ANALYZED)); } else { Class<? extends Model> referredModelClass = indexedReferenceColumns.get(columnName); String sValue = converter.toString(value); if (ref.isNumeric() && referredModelClass != null) { ModelReflector<?> referredModelReflector = ModelReflector.instance(referredModelClass); Model referred = Database.getTable(referredModelClass).get(((Number) converter.valueOf(value)).intValue()); if (referred != null) { doc.add(new Field(fieldName.substring(0, fieldName.length() - "_ID".length()), StringUtil.valueOf(referred.getRawRecord().get(referredModelReflector.getDescriptionField())), Field.Store.YES, Field.Index.ANALYZED)); } } doc.add(new Field(fieldName, sValue, Field.Store.YES, Field.Index.ANALYZED)); } } } else { addedFields = true; if (indexedReferenceColumns.containsKey(fieldName)) { doc.add(new Field(fieldName.substring(0, fieldName.length() - "_ID".length()), "NULL", Field.Store.YES, Field.Index.ANALYZED)); } doc.add(new Field(fieldName, "NULL", Field.Store.YES, Field.Index.ANALYZED)); } } if (addedFields) { doc.add(new Field("ID", StringUtil.valueOf(r.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED)); } else { doc = null; } return doc; }
Example 61
Project: trydone-master File: LuceneIndexer.java View source code |
private Document createDocument(Post p) { Document d = new Document(); d.add(new Field(SearchFields.Keyword.POST_ID, String.valueOf(p.getId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.FORUM_ID, String.valueOf(p.getForumId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.TOPIC_ID, String.valueOf(p.getTopicId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.USER_ID, String.valueOf(p.getUserId()), Store.YES, Index.UN_TOKENIZED)); d.add(new Field(SearchFields.Keyword.DATE, this.settings.formatDateTime(p.getTime()), Store.YES, Index.UN_TOKENIZED)); // We add the subject and message text together because, when searching, we only care about the // matches, not where it was performed. The real subject and contents will be fetched from the database d.add(new Field(SearchFields.Indexed.CONTENTS, p.getSubject() + " " + p.getText(), Store.NO, Index.TOKENIZED)); return d; }
Example 62
Project: arastreju-master File: ArastrejuIndex.java View source code |
// ----------------------------------------------------
private Document createDocument(ResourceNode node) {
Document doc = new Document();
doc.add(new Field(IndexFields.QUALIFIED_NAME, node.toURI(), Store.YES, Index.ANALYZED));
Set<Statement> asserted = node.getAssociations();
Set<Statement> inferred = new HashSet<Statement>();
for (Statement stmt : asserted) {
for (Inferencer inferencer : inferencers) {
inferencer.addInferenced(stmt, inferred);
}
addFields(doc, stmt);
}
for (Statement stmt : inferred) {
addFields(doc, stmt);
}
return doc;
}
Example 63
Project: openmicroscopy-master File: FullTextBridge.java View source code |
/**
* Parses all ownership and time-based details to the index for the given
* object.
*
* @param name
* @param object
* @param document
* @param opts
*/
public void set_details(final String name, final IObject object, final Document document, final LuceneOptions opts) {
final LuceneOptions stored = new SimpleLuceneOptions(opts, Store.YES);
final LuceneOptions storedNotAnalyzed = new SimpleLuceneOptions(opts, Index.NOT_ANALYZED, Store.YES);
Details details = object.getDetails();
if (details != null) {
Experimenter e = details.getOwner();
if (e != null && e.isLoaded()) {
String omename = e.getOmeName();
String firstName = e.getFirstName();
String lastName = e.getLastName();
add(document, "details.owner.omeName", omename, stored);
add(document, "details.owner.firstName", firstName, opts);
add(document, "details.owner.lastName", lastName, opts);
}
ExperimenterGroup g = details.getGroup();
if (g != null && g.isLoaded()) {
String groupName = g.getName();
add(document, "details.group.name", groupName, stored);
}
Event creationEvent = details.getCreationEvent();
if (creationEvent != null) {
add(document, "details.creationEvent.id", creationEvent.getId().toString(), storedNotAnalyzed);
if (creationEvent.isLoaded()) {
String creation = DateBridge.DATE_SECOND.objectToString(creationEvent.getTime());
add(document, "details.creationEvent.time", creation, storedNotAnalyzed);
}
}
Event updateEvent = details.getUpdateEvent();
if (updateEvent != null) {
add(document, "details.updateEvent.id", updateEvent.getId().toString(), storedNotAnalyzed);
if (updateEvent.isLoaded()) {
String update = DateBridge.DATE_SECOND.objectToString(updateEvent.getTime());
add(document, "details.updateEvent.time", update, storedNotAnalyzed);
}
}
Permissions perms = details.getPermissions();
if (perms != null) {
add(document, "details.permissions", perms.toString(), stored);
}
}
}
Example 64
Project: mylyn.tasks-master File: TaskListIndex.java View source code |
/**
* call to wait until index maintenance has completed
*
* @throws InterruptedException
*/
public void waitUntilIdle() throws InterruptedException {
if (!Platform.isRunning() && reindexDelay != 0L) {
// job join() behaviour is not the same when platform is not running
Logger.getLogger(TaskListIndex.class.getName()).warning("Index job joining may not work properly when Eclipse platform is not running");
}
maintainIndexJob.join();
}
Example 65
Project: org.eclipse.mylyn.tasks-master File: TaskListIndex.java View source code |
/**
* call to wait until index maintenance has completed
*
* @throws InterruptedException
*/
public void waitUntilIdle() throws InterruptedException {
if (!Platform.isRunning() && reindexDelay != 0L) {
// job join() behaviour is not the same when platform is not running
Logger.getLogger(TaskListIndex.class.getName()).warning("Index job joining may not work properly when Eclipse platform is not running");
}
maintainIndexJob.join();
}
Example 66
Project: CMISBox-master File: Storage.java View source code |
private void index(StoredItem si) throws Exception { org.apache.lucene.document.Document ldoc = new org.apache.lucene.document.Document(); ldoc.add(new Field(Storage.FIELD_PATH, si.getPath(), Store.YES, Index.NOT_ANALYZED)); ldoc.add(new Field(Storage.FIELD_TYPE, si.getType(), Store.YES, Index.NOT_ANALYZED)); ldoc.add(new Field(Storage.FIELD_ID, si.getId(), Store.YES, Index.NOT_ANALYZED)); ldoc.add(new Field(Storage.FIELD_VERSION, si.getVersion(), Store.YES, Index.NOT_ANALYZED)); ldoc.add(new Field(Storage.FIELD_LOCAL_MODIFIED, DateTools.timeToString(si.getLocalModified(), Resolution.MILLISECOND), Store.YES, Index.NOT_ANALYZED)); ldoc.add(new Field(Storage.FIELD_REMOTE_MODIFIED, DateTools.timeToString(si.getRemoteModified(), Resolution.MILLISECOND), Store.YES, Index.NOT_ANALYZED)); this.writer.addDocument(ldoc); this.log.debug(String.format("Indexed %s", ldoc)); }
Example 67
Project: comm-master File: LuceneUtils.java View source code |
/** * 新增Bean索引 * * @param revert * @return */ public boolean createrIndex(List<BbsBean> list) { Directory directory = null; IndexWriter indexWriter = null; try { // 打开索引库 directory = FSDirectory.open(new File(indexDir)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); indexWriter = new IndexWriter(directory, iwc); indexWriter.deleteAll(); for (int i = 0; i < list.size(); i++) { Document doc = new Document(); BbsBean bean = list.get(i); doc.add(new Field(LuceneType.ALL_TYPE, LuceneType.TYPE_TOPIC, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(LuceneType.ALL_ID, bean.getTopicId(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(LuceneType.ALL_TITLE, bean.getTitle(), Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneType.ALL_CONTENT, bean.getRevertContent().replaceAll("<\\S[^>]+>", "").replaceAll("<p>", ""), Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneType.ALL_URL, bean.getRevertUrl(), Store.YES, Index.NOT_ANALYZED)); // 将索引关键å—æ·»åŠ åˆ°æ–‡ä»¶å¤¹ä¸ indexWriter.addDocument(doc); } } catch (Exception e) { e.printStackTrace(); return false; } finally { try { if (indexWriter != null) { indexWriter.close(); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } return true; }
Example 68
Project: jucy-master File: TextIndexer.java View source code |
public synchronized Set<HashValue> search(Set<String> keys, Set<String> excludes, Collection<String> endings) {
if (//if inverted Index is empty .. -> no results..
presentHashes.isEmpty()) {
return Collections.<HashValue>emptySet();
}
BooleanQuery bq = new BooleanQuery();
for (String s : keys) {
if (s.contains(" ")) {
PhraseQuery pq = new PhraseQuery();
for (String subterm : s.split(" ")) {
pq.add(new Term(FIELD_CONTENT, subterm));
}
bq.add(pq, BooleanClause.Occur.MUST);
} else {
bq.add(new TermQuery(new Term(FIELD_CONTENT, s)), BooleanClause.Occur.MUST);
}
}
for (String s : excludes) {
if (s.contains(" ")) {
PhraseQuery pq = new PhraseQuery();
for (String subterm : s.split(" ")) {
pq.add(new Term(FIELD_CONTENT, subterm));
}
bq.add(pq, BooleanClause.Occur.MUST_NOT);
} else {
bq.add(new TermQuery(new Term(FIELD_CONTENT, s)), BooleanClause.Occur.MUST_NOT);
}
}
if (!endings.isEmpty()) {
BooleanQuery equery = new BooleanQuery();
for (String s : endings) {
equery.add(new TermQuery(new Term(FIELD_ENDING, s)), BooleanClause.Occur.SHOULD);
}
bq.add(equery, BooleanClause.Occur.MUST);
}
Set<HashValue> found = new HashSet<HashValue>();
try {
IndexSearcher searcher = new IndexSearcher(index, true);
// new TopDocCollector(10);
TopScoreDocCollector collector = TopScoreDocCollector.create(25, false);
searcher.search(bq, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (ScoreDoc sd : hits) {
int docId = sd.doc;
Document d = searcher.doc(docId);
found.add(HashValue.createHash(d.getBinaryValue(FIELD_HASH)));
}
searcher.close();
} catch (Exception e) {
logger.warn(e, e);
}
return found;
}
Example 69
Project: xcmis-master File: LuceneIndexer.java View source code |
/** * * @see org.xcmis.search.content.ContentIndexer#createDocument(org.xcmis.search.content.ContentEntry) */ public Document createDocument(ContentEntry contentEntry) { final Document doc = new Document(); // UUID doc.add(new Field(FieldNames.UUID, contentEntry.getIdentifier(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); //root if (contentEntry.getParentIdentifiers().length == 0) { doc.add(new Field(FieldNames.PARENT, indexConfiguration.getRootParentUuid(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); doc.add(new Field(FieldNames.LABEL, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); } else { //parent uuids for (int i = 0; i < contentEntry.getParentIdentifiers().length; i++) { String parentIdetifier = contentEntry.getParentIdentifiers()[i]; doc.add(new Field(FieldNames.PARENT, parentIdetifier, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); doc.add(new Field(FieldNames.LABEL, contentEntry.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); } } //table names for (int i = 0; i < contentEntry.getTableNames().length; i++) { doc.add(new Field(FieldNames.TABLE_NAME, contentEntry.getTableNames()[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); } for (int i = 0; i < contentEntry.getProperties().length; i++) { Property property = contentEntry.getProperties()[i]; if (isIndexed(property.getName())) { addProperty(doc, property); } } return doc; }
Example 70
Project: Bee-Browser-master File: LuceneHandler.java View source code |
protected void write(Entity entity, Document doc) { String schema = entity.getSchema(); if (schema == null) schema = ""; String[] fields = schema.split("\\|"); for (int i = 0; i < fields.length && i + 1 < fields.length; i += 2) { String kind = fields[i]; String fname = fields[i + 1]; if (Entity.STRING.equalsIgnoreCase(kind)) { Field field = new Field(fname, entity.getString(fname), Store.YES, Index.NOT_ANALYZED_NO_NORMS); doc.add(field); } else if (Entity.DOUBLE.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setDoubleValue(entity.getDouble(fname)); doc.add(field); } else if (Entity.FLOAT.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setFloatValue(entity.getFloat(fname)); doc.add(field); } else if (Entity.INTEGER.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setIntValue(entity.getInteger(fname)); doc.add(field); } else if (Entity.LONG.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setLongValue(entity.getLong(fname)); doc.add(field); } else if (Entity.ANALYZED.equalsIgnoreCase(kind)) { Field field = new Field(fname, entity.getString(fname), Store.YES, Index.ANALYZED); doc.add(field); } } }
Example 71
Project: jedit-CtagsInterface-master File: TagIndex.java View source code |
public Origin getOrigin(OriginType type, String id, boolean createIfNotExists) { Origin origin = new Origin(type, id); if (!createIfNotExists) return origin; final boolean b[] = new boolean[1]; b[0] = false; String query = DOCTYPE_FLD + ":" + ORIGIN_DOC_TYPE + " AND " + TYPE_FLD + ":" + type.name + " AND " + ORIGIN_ID_FLD + ":" + escape(id); runQuery(query, 1, new DocHandler() { public void handle(Document doc) { b[0] = true; } }); if (!b[0]) { startActivity(); Document doc = new Document(); doc.add(new Field(DOCTYPE_FLD, ORIGIN_DOC_TYPE, Store.YES, Index.ANALYZED)); doc.add(new Field(TYPE_FLD, type.name, Store.YES, Index.ANALYZED)); doc.add(new Field(ORIGIN_ID_FLD, id, Store.YES, Index.ANALYZED)); try { writer.addDocument(doc); } catch (IOException e) { e.printStackTrace(); } endActivity(); } return origin; }
Example 72
Project: moxie-master File: LuceneExecutor.java View source code |
/** * This completely indexes the repository and will destroy any existing * index. * * @param repositoryName * @return IndexResult */ public IndexResult reindex(String repository) { IndexResult result = new IndexResult(); if (!deleteIndex(repository)) { return result; } try { MoxieCache moxieCache = config.getMoxieCache(); IMavenCache repositoryCache = config.getMavenCache(repository); Collection<File> files = repositoryCache.getFiles("." + org.moxie.Constants.POM); IndexWriter writer = getIndexWriter(repository); for (File pomFile : files) { try { Pom pom = PomReader.readPom(moxieCache, pomFile); String date = DateTools.timeToString(pomFile.lastModified(), Resolution.MINUTE); Document doc = new Document(); doc.add(new Field(FIELD_PACKAGING, pom.packaging, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_GROUPID, pom.groupId, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_ARTIFACTID, pom.artifactId, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_VERSION, pom.version, Store.YES, Index.ANALYZED)); if (!StringUtils.isEmpty(pom.name)) { doc.add(new Field(FIELD_NAME, pom.name, Store.YES, Index.ANALYZED)); } if (!StringUtils.isEmpty(pom.description)) { doc.add(new Field(FIELD_DESCRIPTION, pom.description, Store.YES, Index.ANALYZED)); } doc.add(new Field(FIELD_DATE, date, Store.YES, Index.ANALYZED)); // add the pom to the index writer.addDocument(doc); } catch (Exception e) { logger.log(Level.SEVERE, MessageFormat.format("Exception while reindexing {0} in {1}", pomFile, repository), e); } result.artifactCount++; } writer.commit(); resetIndexSearcher(repository); result.success(); } catch (Exception e) { logger.log(Level.SEVERE, "Exception while reindexing " + repository, e); } return result; }
Example 73
Project: neo4j-rdf-master File: SimpleFulltextIndex.java View source code |
private void doIndex(IndexWriter writer, long nodeId, String predicate, Object literal) { try { Document doc = new Document(); doc.add(new Field(KEY_ID, String.valueOf(nodeId), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(KEY_INDEX, getLiteralReader().read(literal), Store.YES, Index.ANALYZED)); doc.add(new Field(KEY_PREDICATE, predicate, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(KEY_INDEX_SOURCE, literal.toString(), Store.YES, Index.NOT_ANALYZED)); writer.addDocument(doc); } catch (IOException e) { throw new RuntimeException(e); } }
Example 74
Project: wonder-master File: ERLuceneAdaptorChannel.java View source code |
@Override
public void insertRow(NSDictionary row, EOEntity entity) {
try {
Document doc = new Document();
fillWithDictionary(doc, row, entity);
doc.add(new Field(EXTERNAL_NAME_KEY, entity.externalName(), Store.NO, Index.NOT_ANALYZED));
writer().addDocument(doc);
} catch (EOGeneralAdaptorException e) {
throw e;
} catch (Throwable e) {
throw new ERLuceneAdaptorException("Failed to insert '" + entity.name() + "' with row " + row + ": " + e.getMessage(), e);
}
}
Example 75
Project: zenoss-zep-master File: LuceneEventIndexMapper.java View source code |
public static Document fromEventSummary(EventSummary summary, Map<String, EventDetailItem> detailsConfig, boolean isArchive) throws ZepException { Document doc = new Document(); // Archive events don't store serialized protobufs - see ZEN-2159 if (!isArchive) { doc.add(new Field(FIELD_PROTOBUF, compressProtobuf(summary))); } // Store the UUID for more lightweight queries against the index doc.add(new Field(FIELD_UUID, summary.getUuid(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_CURRENT_USER_NAME, summary.getCurrentUserName(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new IntField(FIELD_STATUS, summary.getStatus().getNumber(), Store.YES)); doc.add(new LongField(FIELD_COUNT, summary.getCount(), Store.YES)); doc.add(new LongField(FIELD_LAST_SEEN_TIME, summary.getLastSeenTime(), Store.YES)); doc.add(new LongField(FIELD_FIRST_SEEN_TIME, summary.getFirstSeenTime(), Store.NO)); doc.add(new LongField(FIELD_STATUS_CHANGE_TIME, summary.getStatusChangeTime(), Store.NO)); doc.add(new LongField(FIELD_UPDATE_TIME, summary.getUpdateTime(), Store.NO)); Event event = summary.getOccurrence(0); doc.add(new Field(FIELD_FINGERPRINT, event.getFingerprint(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_SUMMARY, event.getSummary(), Store.NO, Index.ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_SUMMARY_NOT_ANALYZED, event.getSummary().toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new IntField(FIELD_SEVERITY, event.getSeverity().getNumber(), Store.YES)); doc.add(new Field(FIELD_EVENT_CLASS, event.getEventClass(), Store.NO, Index.ANALYZED_NO_NORMS)); // Store with a trailing slash to make lookups simpler doc.add(new Field(FIELD_EVENT_CLASS_NOT_ANALYZED, event.getEventClass().toLowerCase() + "/", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_AGENT, event.getAgent(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_MONITOR, event.getMonitor(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_EVENT_KEY, event.getEventKey(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_EVENT_CLASS_KEY, event.getEventClassKey(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_EVENT_GROUP, event.getEventGroup(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_MESSAGE, event.getMessage(), Store.NO, Index.ANALYZED_NO_NORMS)); for (EventTag tag : event.getTagsList()) { for (String tagUuid : tag.getUuidList()) { doc.add(new Field(FIELD_TAGS, tagUuid, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); } } EventActor actor = event.getActor(); String uuid = actor.getElementUuid(); if (uuid != null && !uuid.isEmpty()) { doc.add(new Field(FIELD_TAGS, uuid, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); } String id = actor.getElementIdentifier(); doc.add(new Field(FIELD_ELEMENT_IDENTIFIER, id, Store.NO, Index.ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_ELEMENT_IDENTIFIER_NOT_ANALYZED, id.toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); String title = actor.getElementTitle(); doc.add(new Field(FIELD_ELEMENT_TITLE, title, Store.NO, Index.ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_ELEMENT_TITLE_NOT_ANALYZED, title.toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); String subUuid = actor.getElementSubUuid(); if (subUuid != null && !subUuid.isEmpty()) { doc.add(new Field(FIELD_TAGS, subUuid, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); } String subId = actor.getElementSubIdentifier(); doc.add(new Field(FIELD_ELEMENT_SUB_IDENTIFIER, subId, Store.NO, Index.ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_ELEMENT_SUB_IDENTIFIER_NOT_ANALYZED, subId.toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); String subTitle = actor.getElementSubTitle(); doc.add(new Field(FIELD_ELEMENT_SUB_TITLE, subTitle, Store.NO, Index.ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_ELEMENT_SUB_TITLE_NOT_ANALYZED, subTitle.toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); // find details for indexing List<EventDetail> evtDetails = event.getDetailsList(); // Details with no value are indexed using a default value so we can search for None's. // The value used to index the null details depends on the type of the detail: // - Null numeric details are indexed using the Java min Integer // - Null text details are indexed using the bell character // The values defined in the zep facade for null details must match the above values Iterator<Map.Entry<String, EventDetailItem>> it = detailsConfig.entrySet().iterator(); while (it.hasNext()) { boolean found = false; Map.Entry<String, EventDetailItem> entry = it.next(); // make sure that entry doesn't exist in the regular document for (EventDetail eDetail : evtDetails) { String detailName = eDetail.getName(); if (entry.getKey().equals(detailName)) { found = true; break; } } if (!found) { String detailKeyName = DETAIL_INDEX_PREFIX + entry.getKey(); EventDetailItem detailDefn = detailsConfig.get(entry.getKey()); switch(detailDefn.getType()) { case INTEGER: doc.add(new IntField(detailKeyName, Integer.MIN_VALUE, Store.NO)); break; case FLOAT: doc.add(new FloatField(detailKeyName, Integer.MIN_VALUE, Store.NO)); break; case LONG: doc.add(new LongField(detailKeyName, Integer.MIN_VALUE, Store.NO)); break; case DOUBLE: doc.add(new DoubleField(detailKeyName, Integer.MIN_VALUE, Store.NO)); break; default: doc.add(new Field(detailKeyName, Character.toString((char) 07), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); break; } } } for (EventDetail eDetail : evtDetails) { String detailName = eDetail.getName(); EventDetailItem detailDefn = detailsConfig.get(detailName); if (detailDefn != null) { String detailKeyName = DETAIL_INDEX_PREFIX + detailDefn.getKey(); for (String detailValue : eDetail.getValueList()) { switch(detailDefn.getType()) { case STRING: doc.add(new Field(detailKeyName, detailValue, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); break; case INTEGER: try { int intValue = Integer.parseInt(detailValue); doc.add(new IntField(detailKeyName, intValue, Store.NO)); } catch (Exception e) { logger.warn("Invalid numeric(int) data reported for detail {}: {}", detailName, detailValue); } break; case FLOAT: try { float floatValue = Float.parseFloat(detailValue); doc.add(new FloatField(detailKeyName, floatValue, Store.NO)); } catch (Exception e) { logger.warn("Invalid numeric(float) data reported for detail {}: {}", detailName, detailValue); } break; case LONG: try { long longValue = Long.parseLong(detailValue); doc.add(new LongField(detailKeyName, longValue, Store.NO)); } catch (Exception e) { logger.warn("Invalid numeric(long) data reported for detail {}: {}", detailName, detailValue); } break; case DOUBLE: try { double doubleValue = Double.parseDouble(detailValue); doc.add(new DoubleField(detailKeyName, doubleValue, Store.NO)); } catch (Exception e) { logger.warn("Invalid numeric(double) data reported for detail {}: {}", detailName, detailValue); } break; case IP_ADDRESS: try { if (!detailValue.isEmpty()) { final InetAddress addr = IpUtils.parseAddress(detailValue); createIpAddressFields(doc, detailKeyName, addr); } } catch (Exception e) { logger.warn("Invalid IP address data reported for detail {}: {}", detailName, detailValue); } break; case PATH: createPathFields(doc, detailKeyName, detailValue); break; default: logger.warn("Configured detail {} uses unknown data type: {}, skipping", detailName, detailDefn.getType()); break; } } } } return doc; }
Example 76
Project: agile-itsm-master File: Lucene.java View source code |
private boolean indexarDocGemeo(PalavraGemeaDTO palGemeaDTO) throws IOException { this.excluirPalavraGemea(palGemeaDTO); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); Directory indexDir = FSDirectory.open(new File(dirGemeas)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = new IndexWriter(indexDir, config); Document doc = new Document(); doc.add(new Field("palavra", palGemeaDTO.getPalavra(), Store.YES, Index.ANALYZED)); doc.add(new Field("correspondente", palGemeaDTO.getPalavraCorrespondente(), Store.YES, Index.ANALYZED)); NumericField id = new NumericField("id", Store.YES, true); id.setLongValue(palGemeaDTO.getIdPalavraGemea()); doc.add(id); indexWriter.addDocument(doc); indexWriter.close(); indexDir.close(); return true; }
Example 77
Project: tml-master File: Repository.java View source code |
/** * Inserts a new text passage into the Repository. * * @param content * the content of the document * @param title * the title of the document * @param url * the url of the document * @param type * the type of the document ("document", "sentence" or * "paragraph") * @param parent * the id of the parent document (when type is segment) * @return the Lucene Document that was just added * @throws IOException * @throws SQLException */ private Document addTextPassageToOpenIndex(String content, String type, String parent, String parentDocument, String externalId, String title, String url) throws IOException, SQLException { Document document = new Document(); document.add(new Field(this.getLuceneContentField(), content, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS)); document.add(new Field(this.getLuceneExternalIdField(), externalId, Store.YES, Index.NOT_ANALYZED, TermVector.NO)); document.add(new Field(this.getLuceneTitleField(), title, Store.YES, Index.NOT_ANALYZED, TermVector.NO)); document.add(new Field(this.getLuceneUrlField(), url, Store.YES, Index.NOT_ANALYZED, TermVector.NO)); document.add(new Field("indexdate", Calendar.getInstance().getTime().toString(), Store.YES, Index.NOT_ANALYZED, TermVector.NO)); document.add(new Field(this.getLuceneParentField(), parent, Store.YES, Index.NOT_ANALYZED, TermVector.NO)); document.add(new Field("type", type, Store.YES, Index.NOT_ANALYZED, TermVector.NO)); document.add(new Field("parent", parentDocument, Store.YES, Index.NOT_ANALYZED, TermVector.NO)); this.getDbConnection().insertDocument(this, document); Term term = new Term("externalid", externalId); luceneIndexWriter.updateDocument(term, document); return document; }
Example 78
Project: opensearchserver-master File: Indexed.java View source code |
public final Index getLuceneIndex(String indexAnalyzer) { if (this == NO) return Index.NO; return indexAnalyzer == null ? Index.NOT_ANALYZED : Index.ANALYZED; }
Example 79
Project: cyclos-master File: DocumentBuilder.java View source code |
/** * Adds an string field, which may be analyzer or not */ public DocumentBuilder add(final String name, final String value, final boolean analyzed) { if (StringUtils.isNotEmpty(value)) { final Field field = new Field(name, value, Store.YES, analyzed ? Index.ANALYZED : Index.NOT_ANALYZED); document.add(field); } return this; }
Example 80
Project: eclipse-instasearch-master File: StorageIndexer.java View source code |
private static org.apache.lucene.document.Field createLuceneField(Field fieldName, String value) {
return new org.apache.lucene.document.Field(fieldName.toString(), value, Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED);
}
Example 81
Project: eadventure-master File: ModelIndex.java View source code |
/** * Adds a property to a node. * @param e the node * @param field name * @param value of property * @param searchable if this field is to be indexed and used in "anywhere" * searches */ public static void addProperty(DependencyNode e, String field, String value, boolean searchable) { e.getDoc().add(new Field(field, value, Store.YES, searchable ? Index.ANALYZED : Index.NO)); }
Example 82
Project: skalli-master File: LuceneIndex.java View source code |
private void addEntityToIndex(IndexWriter writer, T entity) throws IOException {
List<IndexEntry> fields = indexEntity(entity);
Document doc = LuceneUtil.fieldsToDocument(fields);
doc.add(new Field(FIELD_UUID, entity.getUuid().toString(), Store.YES, Index.NOT_ANALYZED));
writer.addDocument(doc);
}