Java Examples for org.apache.lucene.document.Field.Index

The following java examples will help you to understand the usage of org.apache.lucene.document.Field.Index. These source code samples are taken from different open source projects.

Example 1
Project: hibernate-search-master  File: DocumentBuilderIndexedEntity.java View source code
/**
	 * Builds the Lucene {@code Document} for a given entity instance and its id.
	 *
	 * @param tenantId the identifier of the tenant or null if there isn't one
	 * @param instance The entity for which to build the matching Lucene {@code Document}
	 * @param id the entity id.
	 * @param fieldToAnalyzerMap this maps gets populated while generating the {@code Document}.
	 * It allows to specify for any document field a named analyzer to use. This parameter cannot be {@code null}.
	 * @param objectInitializer used to ensure that all objects are initialized
	 * @param conversionContext a {@link org.hibernate.search.bridge.spi.ConversionContext} object.
	 * @param includedFieldNames list of field names to consider. Others can be excluded. Null if all fields are considered.
	 *
	 * @return The Lucene {@code Document} for the specified entity.
	 */
public Document getDocument(String tenantId, Object instance, Serializable id, Map<String, String> fieldToAnalyzerMap, InstanceInitializer objectInitializer, ConversionContext conversionContext, String[] includedFieldNames) {
    if (fieldToAnalyzerMap == null) {
        throw new IllegalArgumentException("fieldToAnalyzerMap cannot be null");
    }
    //sensible default for outside callers
    if (objectInitializer == null) {
        objectInitializer = getInstanceInitializer();
    }
    Document doc = new Document();
    FacetHandling faceting = new FacetHandling();
    Class<?> entityType = objectInitializer.getClass(instance);
    float documentLevelBoost = getMetadata().getClassBoost(instance);
    // add the class name of the entity to the document
    if (containsFieldName(ProjectionConstants.OBJECT_CLASS, includedFieldNames)) {
        @SuppressWarnings("deprecation") Field classField = new Field(ProjectionConstants.OBJECT_CLASS, entityType.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
        doc.add(classField);
    }
    addTenantIdIfRequired(tenantId, doc);
    // now add the entity id to the document
    if (containsFieldName(idFieldName, includedFieldNames)) {
        DocumentFieldMetadata idFieldMetaData = idPropertyMetadata.getFieldMetadata(idFieldName);
        LuceneOptions luceneOptions = new LuceneOptionsImpl(idFieldMetaData, idFieldMetaData.getBoost(), documentLevelBoost);
        final FieldBridge contextualizedBridge = conversionContext.oneWayConversionContext(getIdBridge());
        conversionContext.setClass(entityType);
        if (idPropertyName != null) {
            conversionContext.pushProperty(idPropertyName);
        }
        try {
            contextualizedBridge.set(idFieldMetaData.getAbsoluteName(), id, doc, luceneOptions);
            addSortFieldDocValues(doc, idPropertyMetadata, documentLevelBoost, id);
        } finally {
            if (idPropertyName != null) {
                conversionContext.popProperty();
            }
        }
    }
    // finally add all other document fields
    Set<String> processedFieldNames = new HashSet<>();
    buildDocumentFields(instance, doc, faceting, getMetadata(), fieldToAnalyzerMap, processedFieldNames, conversionContext, objectInitializer, documentLevelBoost, false, nestingContextFactory.createNestingContext(getTypeMetadata().getType()));
    doc = faceting.build(doc);
    return doc;
}
Example 2
Project: zoie-master  File: TweetInterpreter.java View source code
@Override
public ZoieIndexable convertAndInterpret(String tweet) {
    try {
        final String text = tweet;
        final long uid = id++;
        return new AbstractZoieIndexable() {

            @Override
            public IndexingReq[] buildIndexingReqs() {
                Document doc = new Document();
                doc.add(new Field("contents", text, Store.NO, Index.ANALYZED));
                return new IndexingReq[] { new IndexingReq(doc) };
            }

            @Override
            public long getUID() {
                return uid;
            }

            @Override
            public boolean isDeleted() {
                return false;
            }

            @Override
            public boolean isSkip() {
                return false;
            }
        };
    } catch (Exception e) {
        return new AbstractZoieIndexable() {

            @Override
            public IndexingReq[] buildIndexingReqs() {
                return null;
            }

            @Override
            public long getUID() {
                return 0;
            }

            @Override
            public boolean isDeleted() {
                return false;
            }

            @Override
            public boolean isSkip() {
                return true;
            }
        };
    }
}
Example 3
Project: cloud-zoie-master  File: ThrottledLuceneNRTDataConsumer.java View source code
public void consume(Collection<proj.zoie.api.DataConsumer.DataEvent<V>> events) throws ZoieException {
    if (_writer == null) {
        throw new ZoieException("Internal IndexWriter null, perhaps not started?");
    }
    if (events.size() > 0) {
        for (DataEvent<V> event : events) {
            ZoieIndexable indexable = _interpreter.convertAndInterpret(event.getData());
            if (indexable.isSkip())
                continue;
            try {
                _writer.deleteDocuments(new Term(DOCUMENT_ID_FIELD, String.valueOf(indexable.getUID())));
            } catch (IOException e) {
                throw new ZoieException(e.getMessage(), e);
            }
            IndexingReq[] reqs = indexable.buildIndexingReqs();
            for (IndexingReq req : reqs) {
                Analyzer localAnalyzer = req.getAnalyzer();
                Document doc = req.getDocument();
                Field uidField = new Field(DOCUMENT_ID_FIELD, String.valueOf(indexable.getUID()), Store.NO, Index.NOT_ANALYZED_NO_NORMS);
                uidField.setOmitTermFreqAndPositions(true);
                doc.add(uidField);
                if (localAnalyzer == null)
                    localAnalyzer = _analyzer;
                try {
                    _writer.addDocument(doc, localAnalyzer);
                } catch (IOException e) {
                    throw new ZoieException(e.getMessage(), e);
                }
            }
        }
        int numdocs;
        try {
            // for realtime commit is not needed per lucene mailing list
            //_writer.commit();
            numdocs = _writer.numDocs();
        } catch (IOException e) {
            throw new ZoieException(e.getMessage(), e);
        }
        logger.info("flushed " + events.size() + " events to index, index now contains " + numdocs + " docs.");
    }
}
Example 4
Project: Hibernate-Search-on-action-master  File: PersonPkBridge.java View source code
public void set(String name, Object value, Document document, LuceneOptions luceneOptions) {
    PersonPK id = (PersonPK) value;
    Store store = luceneOptions.getStore();
    Index index = luceneOptions.getIndex();
    TermVector termVector = luceneOptions.getTermVector();
    Float boost = luceneOptions.getBoost();
    Field field = new //store each sub property in a field
    Field(//store each sub property in a field
    name + ".firstName", //store each sub property in a field
    id.getFirstName(), store, index, termVector);
    field.setBoost(boost);
    document.add(field);
    field = new Field(name + ".lastName", id.getLastName(), store, index, termVector);
    field.setBoost(boost);
    document.add(field);
    field = new //store unique representation in named field
    Field(//store unique representation in named field
    name, //store unique representation in named field
    objectToString(id), store, index, termVector);
    field.setBoost(boost);
    document.add(field);
}
Example 5
Project: luja-master  File: FieldAnnotated.java View source code
public Document toDocument() {
    Document document = new Document();
    document.add(new NumericField("intNumber", Store.YES, true).setIntValue(intNumber));
    document.add(new org.apache.lucene.document.Field("date", DateTools.timeToString(date.toDateTimeAtStartOfDay(DateTimeZone.UTC).getMillis(), Resolution.DAY.asLuceneResolution()), Store.YES, Index.NOT_ANALYZED));
    document.add(new org.apache.lucene.document.Field("time", DateTools.timeToString(time.getMillis(), Resolution.MILLISECOND.asLuceneResolution()), Store.YES, Index.NOT_ANALYZED));
    document.add(new org.apache.lucene.document.Field("localTime", DateTools.timeToString(localTime.toDateTime(DateTimeZone.UTC).getMillis(), Resolution.MILLISECOND.asLuceneResolution()), Store.YES, Index.NOT_ANALYZED));
    document.add(new org.apache.lucene.document.Field("javaDate", DateTools.timeToString(javaDate.getTime(), Resolution.DAY.asLuceneResolution()), Store.YES, Index.NOT_ANALYZED));
    document.add(new org.apache.lucene.document.Field("code", code, Store.YES, Index.NOT_ANALYZED));
    document.add(new org.apache.lucene.document.Field("name", name, Store.YES, Index.NOT_ANALYZED));
    document.add(new org.apache.lucene.document.Field("tokenized", tokenized, Store.YES, Index.ANALYZED));
    document.add(new org.apache.lucene.document.Field("locale", locale.toString(), Store.YES, Index.NOT_ANALYZED));
    return document;
}
Example 6
Project: thrudb-java-master  File: ThrudexLuceneHandler.java View source code
/**
	 * Add/Replace a document
	 */
public void put(Document d) throws ThrudexException, TException {
    // make sure index is valid
    if (!isValidIndex(d.index))
        throw new ThrudexExceptionImpl("No Index Found: " + d.index);
    // make sure document has a key
    if (!d.isSetKey() || d.key.trim().equals(""))
        throw new ThrudexExceptionImpl("No Document key found");
    // Start new lucene document
    org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
    luceneDocument.add(new org.apache.lucene.document.Field(LuceneIndex.DOCUMENT_KEY, d.key, org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED));
    // Start analyzer
    Analyzer defaultAnalyzer = getAnalyzer(org.thrudb.thrudex.Analyzer.STANDARD);
    PerFieldAnalyzerWrapper qAnalyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer);
    // Add fields
    for (Field field : d.fields) {
        if (!field.isSetKey())
            throw new ThrudexExceptionImpl("Field key not set");
        // Convert Field store type to Lucene type
        org.apache.lucene.document.Field.Store fieldStoreType;
        if (field.isStore())
            fieldStoreType = org.apache.lucene.document.Field.Store.YES;
        else
            fieldStoreType = org.apache.lucene.document.Field.Store.NO;
        // Create Lucene Field
        org.apache.lucene.document.Field luceneField = new org.apache.lucene.document.Field(field.key, field.value, fieldStoreType, org.apache.lucene.document.Field.Index.ANALYZED);
        if (field.isSetWeight())
            luceneField.setBoost(field.weight);
        luceneDocument.add(luceneField);
        // Create sortable field?
        if (field.isSetSortable() && field.sortable) {
            luceneDocument.add(new org.apache.lucene.document.Field(field.key + "_sort", field.value, org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED));
        }
        // Add field specific analyzer to qAnalyzer
        qAnalyzer.addAnalyzer(field.key, getAnalyzer(field.getAnalyzer()));
    }
    // Add payload
    if (d.isSetPayload()) {
        luceneDocument.add(new org.apache.lucene.document.Field(LuceneIndex.PAYLOAD_KEY, d.payload, org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED));
    }
    // Document is not ready to put into the index
    indexMap.get(d.index).put(d.key, luceneDocument, qAnalyzer);
}
Example 7
Project: wesearch-master  File: IndexerCreator.java View source code
/**
	 * This method has to create indexes for classes of the ontology
	 * @return A list of indexes
	 */
public static List<IndexLucene> createIndexerForClasses() {
    List<IndexLucene> indexers = new LinkedList<IndexLucene>();
    indexers.add(new IndexLucene("label", "label", Index.ANALYZED, TermVector.YES, true));
    indexers.add(new IndexLucene("comment", "comment", Index.ANALYZED, TermVector.YES, true));
    indexers.add(0, new IndexLucene("class", "class", Index.ANALYZED, TermVector.YES, true));
    return indexers;
}
Example 8
Project: graphdb-traversal-context-master  File: IndexType.java View source code
Fieldable instantiateField(String key, Object value, Index analyzed) {
    Fieldable field = null;
    if (value instanceof Number) {
        Number number = (Number) value;
        NumericField numberField = new NumericField(key, Store.YES, true);
        if (value instanceof Long) {
            numberField.setLongValue(number.longValue());
        } else if (value instanceof Float) {
            numberField.setFloatValue(number.floatValue());
        } else if (value instanceof Double) {
            numberField.setDoubleValue(number.doubleValue());
        } else {
            numberField.setIntValue(number.intValue());
        }
        field = numberField;
    } else {
        field = new Field(key, value.toString(), Store.YES, analyzed);
    }
    return field;
}
Example 9
Project: neo4j-lucene4-index-master  File: IndexType.java View source code
IndexableField instantiateField(String key, Object value, Index analyzed) {
    IndexableField field = null;
    if (value instanceof Number) {
        Number number = (Number) value;
        final IndexableField numberField;
        if (value instanceof Long) {
            numberField = new LongField(key, number.longValue(), Store.YES);
        } else if (value instanceof Float) {
            numberField = new FloatField(key, number.floatValue(), Store.YES);
        } else if (value instanceof Double) {
            numberField = new DoubleField(key, number.doubleValue(), Store.YES);
        } else {
            numberField = new IntField(key, number.intValue(), Store.YES);
        }
        field = numberField;
    } else {
        field = new Field(key, value.toString(), Store.YES, analyzed);
    }
    return field;
}
Example 10
Project: neo4j-mobile-android-master  File: IndexType.java View source code
Fieldable instantiateField(String key, Object value, Index analyzed) {
    Fieldable field = null;
    if (value instanceof Number) {
        Number number = (Number) value;
        NumericField numberField = new NumericField(key, Store.YES, true);
        if (value instanceof Long) {
            numberField.setLongValue(number.longValue());
        } else if (value instanceof Float) {
            numberField.setFloatValue(number.floatValue());
        } else if (value instanceof Double) {
            numberField.setDoubleValue(number.doubleValue());
        } else {
            numberField.setIntValue(number.intValue());
        }
        field = numberField;
    } else {
        field = new Field(key, value.toString(), Store.YES, analyzed);
    }
    return field;
}
Example 11
Project: cloudtm-data-platform-master  File: DocumentBuilderIndexedEntity.java View source code
protected void checkDocumentId(XProperty member, PropertiesMetadata propertiesMetadata, boolean isRoot, String prefix, ConfigContext context, PathsContext pathsContext) {
    Annotation idAnnotation = getIdAnnotation(member, context);
    NumericField numericFieldAnn = member.getAnnotation(NumericField.class);
    if (idAnnotation != null) {
        String attributeName = getIdAttributeName(member, idAnnotation);
        if (pathsContext != null) {
            pathsContext.markEncounteredPath(prefix + attributeName);
        }
        if (isRoot) {
            if (explicitDocumentId) {
                if (idAnnotation instanceof DocumentId) {
                    throw log.duplicateDocumentIdFound(getBeanClass().getName());
                } else {
                    //If it's not a DocumentId it's a JPA @Id: ignore it as we already have a @DocumentId
                    return;
                }
            }
            if (idAnnotation instanceof DocumentId) {
                explicitDocumentId = true;
            }
            idKeywordName = prefix + attributeName;
            FieldBridge fieldBridge = BridgeFactory.guessType(null, numericFieldAnn, member, reflectionManager);
            if (fieldBridge instanceof TwoWayFieldBridge) {
                idBridge = (TwoWayFieldBridge) fieldBridge;
            } else {
                throw new SearchException("Bridge for document id does not implement TwoWayFieldBridge: " + member.getName());
            }
            Float boost = AnnotationProcessingHelper.getBoost(member, null);
            if (boost != null) {
                idBoost = boost.floatValue();
            }
            ReflectionHelper.setAccessible(member);
            idGetter = member;
        } else {
            //component should index their document id
            ReflectionHelper.setAccessible(member);
            propertiesMetadata.fieldGetters.add(member);
            propertiesMetadata.fieldGetterNames.add(member.getName());
            String fieldName = prefix + attributeName;
            propertiesMetadata.fieldNames.add(fieldName);
            propertiesMetadata.fieldStore.add(Store.YES);
            Field.Index index = AnnotationProcessingHelper.getIndex(Index.YES, Analyze.NO, Norms.YES);
            propertiesMetadata.fieldIndex.add(index);
            propertiesMetadata.fieldTermVectors.add(AnnotationProcessingHelper.getTermVector(TermVector.NO));
            propertiesMetadata.fieldNullTokens.add(null);
            propertiesMetadata.fieldBridges.add(BridgeFactory.guessType(null, null, member, reflectionManager));
            propertiesMetadata.fieldBoosts.add(AnnotationProcessingHelper.getBoost(member, null));
            propertiesMetadata.precisionSteps.add(getPrecisionStep(null));
            propertiesMetadata.dynamicFieldBoosts.add(AnnotationProcessingHelper.getDynamicBoost(member));
            // property > entity analyzer (no field analyzer)
            Analyzer analyzer = AnnotationProcessingHelper.getAnalyzer(member.getAnnotation(org.hibernate.search.annotations.Analyzer.class), context);
            if (analyzer == null) {
                analyzer = propertiesMetadata.analyzer;
            }
            if (analyzer == null) {
                throw new AssertionFailure("Analyzer should not be undefined");
            }
            addToScopedAnalyzer(fieldName, analyzer, index);
        }
    }
}
Example 12
Project: PartyDJ-master  File: HighlighterTest.java View source code
public void testMultiSearcher() throws Exception {
    // setup index 1
    RAMDirectory ramDir1 = new RAMDirectory();
    IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED);
    Document d = new Document();
    Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED);
    d.add(f);
    writer1.addDocument(d);
    writer1.optimize();
    writer1.close();
    IndexReader reader1 = IndexReader.open(ramDir1, true);
    // setup index 2
    RAMDirectory ramDir2 = new RAMDirectory();
    IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED);
    d = new Document();
    f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED);
    d.add(f);
    writer2.addDocument(d);
    writer2.optimize();
    writer2.close();
    IndexReader reader2 = IndexReader.open(ramDir2, true);
    IndexSearcher searchers[] = new IndexSearcher[2];
    searchers[0] = new IndexSearcher(ramDir1, true);
    searchers[1] = new IndexSearcher(ramDir2, true);
    MultiSearcher multiSearcher = new MultiSearcher(searchers);
    QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
    parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    query = parser.parse("multi*");
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    // at this point the multisearcher calls combine(query[])
    hits = multiSearcher.search(query, null, 1000);
    // query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
    Query expandedQueries[] = new Query[2];
    expandedQueries[0] = query.rewrite(reader1);
    expandedQueries[1] = query.rewrite(reader2);
    query = query.combine(expandedQueries);
    // create an instance of the highlighter with the tags used to surround
    // highlighted text
    Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query));
    for (int i = 0; i < hits.totalHits; i++) {
        String text = multiSearcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
        String highlightedText = highlighter.getBestFragment(tokenStream, text);
        System.out.println(highlightedText);
    }
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
}
Example 13
Project: PersonalityExtraction-master  File: CreateLuceneIndex.java View source code
public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.out.println("Args: index-dir");
        System.exit(-1);
    }
    File indexDir = new File(args[0]);
    if (indexDir.exists()) {
        System.out.println("Index directory already exists: " + indexDir.getAbsolutePath());
        System.exit(-2);
    }
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
    IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), analyzer, true, MaxFieldLength.UNLIMITED);
    for (Document d : SampleDocumentData.DOCUMENTS_DATA_MINING) {
        final org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
        /*
             * We will create Lucene documents with searchable "fullContent" field and "title", 
             * "url" and "snippet" fields for clustering.
             */
        doc.add(new Field("fullContent", d.getSummary(), Store.NO, Index.ANALYZED));
        doc.add(new Field("title", d.getTitle(), Store.YES, Index.NO));
        doc.add(new Field("snippet", d.getSummary(), Store.YES, Index.NO));
        doc.add(new Field("url", d.getContentUrl(), Store.YES, Index.NO));
        writer.addDocument(doc);
    }
    writer.close();
}
Example 14
Project: pylucene-master  File: TestIndexWriter.java View source code
/**
     * Make sure we skip wicked long terms.
    */
public void testWickedLongTerm() throws IOException {
    MockDirectoryWrapper dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
    char[] chars = new char[DocumentsWriter.CHAR_BLOCK_SIZE - 1];
    Arrays.fill(chars, 'x');
    Document doc = new Document();
    final String bigTerm = new String(chars);
    // Max length term is 16383, so this contents produces
    // a too-long term:
    String contents = "abc xyz x" + bigTerm + " another term";
    doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);
    // Make sure we can add another normal document
    doc = new Document();
    doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();
    IndexReader reader = IndexReader.open(dir, true);
    // Make sure all terms < max size were indexed
    assertEquals(2, reader.docFreq(new Term("content", "abc")));
    assertEquals(1, reader.docFreq(new Term("content", "bbb")));
    assertEquals(1, reader.docFreq(new Term("content", "term")));
    assertEquals(1, reader.docFreq(new Term("content", "another")));
    // Make sure position is still incremented when
    // massive term is skipped:
    TermPositions tps = reader.termPositions(new Term("content", "another"));
    assertTrue(tps.next());
    assertEquals(1, tps.freq());
    assertEquals(3, tps.nextPosition());
    // Make sure the doc that has the massive term is in
    // the index:
    assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
    reader.close();
    // Make sure we can add a document with exactly the
    // maximum length term, and search on that term:
    doc = new Document();
    doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
    StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
    sa.setMaxTokenLength(100000);
    writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
    writer.addDocument(doc);
    writer.close();
    reader = IndexReader.open(dir, true);
    assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
    reader.close();
    dir.close();
}
Example 15
Project: Solbase-master  File: DocumentLoader.java View source code
public CachedObjectWrapper<Document, Long> loadObject(Integer docNum, int start, int end, LayeredCache<Integer, Document, Long, ParsedDoc> cache) throws IOException {
    Document document = new Document();
    Get documentGet = new Get(SolbaseUtil.randomize(docNum));
    if (fieldNames == null || fieldNames.size() == 0) {
        // get all columns ( except this skips meta info )
        documentGet.addFamily(Bytes.toBytes("field"));
    } else {
        for (byte[] fieldName : fieldNames) {
            documentGet.addColumn(Bytes.toBytes("field"), fieldName);
        }
    }
    Result documentResult = null;
    // if docTable is set up, reuse instance, otherwise create brand new one and close after done
    if (this.docTable == null) {
        HTableInterface docTable = null;
        try {
            docTable = SolbaseUtil.getDocTable();
            documentResult = docTable.get(documentGet);
        } finally {
            SolbaseUtil.releaseTable(docTable);
        }
    } else {
        documentResult = this.docTable.get(documentGet);
    }
    if (documentResult == null || documentResult.isEmpty()) {
        return null;
    }
    // TODO, get from result
    Long versionIdentifier = 0l;
    NavigableMap<byte[], byte[]> familyMap = documentResult.getFamilyMap(Bytes.toBytes("field"));
    for (Map.Entry<byte[], byte[]> fieldColumn : familyMap.entrySet()) {
        Field field = null;
        String fieldName = Bytes.toString(fieldColumn.getKey());
        byte[] value;
        ByteBuffer v = ByteBuffer.wrap(fieldColumn.getValue());
        int vlimit = v.limit() + v.arrayOffset();
        if (v.array()[vlimit - 1] != Byte.MAX_VALUE && v.array()[vlimit - 1] != Byte.MIN_VALUE) {
            throw new CorruptIndexException("Solbase field is not properly encoded: " + docNum + "(" + fieldName + ")");
        } else if (// Binary
        v.array()[vlimit - 1] == Byte.MAX_VALUE) {
            value = new byte[vlimit - 1];
            System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
            field = new Field(fieldName, value, Store.YES);
            document.add(field);
        } else if (// String
        v.array()[vlimit - 1] == Byte.MIN_VALUE) {
            value = new byte[vlimit - 1];
            System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
            // Check for multi-fields
            String fieldString = new String(value, "UTF-8");
            if (fieldString.indexOf(Bytes.toString(SolbaseUtil.delimiter)) >= 0) {
                StringTokenizer tok = new StringTokenizer(fieldString, Bytes.toString(SolbaseUtil.delimiter));
                while (tok.hasMoreTokens()) {
                    // update logic
                    if (schema != null) {
                        SchemaField sfield = schema.getFieldOrNull(fieldName);
                        if (sfield.getType() instanceof EmbeddedIndexedIntField) {
                            EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
                            EmbeddedSortField sf = new EmbeddedSortField(fieldName, tok.nextToken(), Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
                            document.add(sf);
                        } else {
                            Field f = sfield.createField(tok.nextToken(), 1.0f);
                            // null fields are not added
                            if (f != null) {
                                document.add(f);
                            }
                        }
                    } else {
                        field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED);
                        document.add(field);
                    }
                }
            } else {
                // update logic
                if (schema != null) {
                    SchemaField sfield = schema.getFieldOrNull(fieldName);
                    if (sfield.getType() instanceof EmbeddedIndexedIntField) {
                        EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
                        EmbeddedSortField sf = new EmbeddedSortField(fieldName, fieldString, Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
                        document.add(sf);
                    } else {
                        Field f = sfield.createField(fieldString, 1.0f);
                        // null fields are not added
                        if (f != null) {
                            document.add(f);
                        }
                    }
                } else {
                    field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED);
                    document.add(field);
                }
            }
        }
    }
    return new CachedObjectWrapper<Document, Long>(document, versionIdentifier, System.currentTimeMillis());
}
Example 16
Project: solrcene-master  File: TestIndexWriter.java View source code
public void testOptimizeMaxNumSegments() throws IOException {
    MockDirectoryWrapper dir = newDirectory(random);
    final Document doc = new Document();
    doc.add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED));
    for (int numDocs = 38; numDocs < 500; numDocs += 38) {
        LogDocMergePolicy ldmp = new LogDocMergePolicy();
        ldmp.setMinMergeDocs(1);
        ldmp.setMergeFactor(5);
        IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(2).setMergePolicy(ldmp));
        for (int j = 0; j < numDocs; j++) writer.addDocument(doc);
        writer.close();
        SegmentInfos sis = new SegmentInfos();
        sis.read(dir);
        final int segCount = sis.size();
        ldmp = new LogDocMergePolicy();
        ldmp.setMergeFactor(5);
        writer = new IndexWriter(dir, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(ldmp));
        writer.optimize(3);
        writer.close();
        sis = new SegmentInfos();
        sis.read(dir);
        final int optSegCount = sis.size();
        if (segCount < 3)
            assertEquals(segCount, optSegCount);
        else
            assertEquals(3, optSegCount);
    }
    dir.close();
}
Example 17
Project: ansj_seg-master  File: NearTest.java View source code
public static void createIndex() throws Exception {
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, new AnsjAnalysis());
    Directory directory = FSDirectory.open(new File("c:/index"));
    IndexWriter writer = new IndexWriter(directory, conf);
    String str = "文化人;文化人谈文化";
    String[] values = str.split(";");
    for (String value : values) {
        Document doc = new Document();
        Field field = new Field("test", value, Store.YES, Index.ANALYZED_NO_NORMS, TermVector.WITH_POSITIONS_OFFSETS);
        //			field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        doc.add(field);
        writer.addDocument(doc);
        writer.commit();
    }
    writer.close();
}
Example 18
Project: datahotel-master  File: IndexBean.java View source code
@SuppressWarnings("rawtypes")
public void update(Metadata metadata) {
    MetadataLogger logger = metadata.getLogger();
    Timestamp ts = new Timestamp(FOLDER_CACHE_INDEX, metadata.getLocation(), "timestamp");
    if (metadata.getUpdated() == ts.getTimestamp()) {
        logger.info("Index up to date.");
        return;
    }
    logger.info("Building index.");
    long i = 0;
    try {
        File filename = Filesystem.getFile(FOLDER_SLAVE, metadata.getLocation(), FILE_DATASET);
        Directory dir = FSDirectory.open(Filesystem.getFolder(FOLDER_CACHE_INDEX, metadata.getLocation()));
        StandardAnalyzer analyzer = new StandardAnalyzer(version, new HashSet());
        IndexWriterConfig writerConfig = new IndexWriterConfig(version, analyzer);
        IndexWriter writer = new IndexWriter(dir, writerConfig);
        writer.deleteAll();
        CSVReader csv = csvReaderFactory.open(filename);
        while (csv.hasNext()) {
            try {
                i++;
                Map<String, String> line = csv.getNextLine();
                Document doc = new Document();
                String searchable = "";
                for (FieldLight f : fieldBean.getFields(metadata)) {
                    String value = line.get(f.getShortName());
                    if (value == null)
                        logger.info("Field not found: " + f.getShortName());
                    // TODO if (f.getGroupable())
                    if (value.matches("[0-9.,]+"))
                        doc.add(new Field(f.getShortName(), value, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
                    else
                        doc.add(new Field(f.getShortName(), value, Store.YES, Index.ANALYZED));
                    if (f.getSearchable())
                        searchable += " " + line.get(f.getShortName());
                }
                if (!searchable.trim().isEmpty())
                    doc.add(new Field("searchable", searchable.trim(), Store.NO, Index.ANALYZED));
                writer.addDocument(doc);
            } catch (Exception e) {
                logger.info("[" + e.getClass().getSimpleName() + (e.getStackTrace().length > 0 ? "][" + e.getStackTrace()[0].getFileName() + ":" + e.getStackTrace()[0].getLineNumber() : "") + "] Unable to index line " + i + ". (" + String.valueOf(e.getMessage()) + ")");
            }
            if (i % 10000 == 0)
                logger.info("Document " + i);
        }
        writer.optimize();
        writer.commit();
        writer.close();
        dir.close();
        ts.setTimestamp(metadata.getUpdated());
        ts.save();
    } catch (Exception e) {
        logger.log(Level.WARNING, e.getMessage(), e);
    }
}
Example 19
Project: Europeana-Creative-master  File: ExtendedLireIndexer.java View source code
protected Document buildDocument(LireObject s, String id) throws IOException, CorruptIndexException, BoFException {
    Document doc = new Document();
    // access private field by reflection
    // MPEG-7
    getM_sfaALL().addFieldToDoc(doc, s, getM_toppivs());
    // ID
    getM_sfaALL().AddIDField(doc, id);
    System.out.println("id " + id);
    // add URL to doc
    doc.add(new org.apache.lucene.document.Field("THMBURL", s.getThmbURL(), org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED, org.apache.lucene.document.Field.TermVector.NO));
    return doc;
}
Example 20
Project: Genoogle-master  File: LuceneIndexer.java View source code
public static void main(String[] args) throws IOException, IllegalSymbolException, NoSuchElementException, ParseException {
    Directory indexDir = FSDirectory.open(new File("./index"));
    if (new File("./index").exists()) {
        IndexSearcher is = new IndexSearcher(indexDir);
        Query q = new TermQuery(new Term("header", "100"));
        TopDocs search = is.search(q, 20);
        System.out.println(search.totalHits);
        System.out.println(search.scoreDocs[0]);
    } else {
        final boolean forceFormatting = true;
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_31, new StandardAnalyzer(Version.LUCENE_31));
        IndexWriter indexWriter = new IndexWriter(indexDir, indexWriterConfig);
        BufferedReader is = new BufferedReader(new FileReader("/Users/albrecht/genoogle/files/fasta/ecoli.nt"));
        RichSequenceStreamReader readFastaDNA = IOTools.readFasta(is, DNAAlphabet.SINGLETON);
        while (readFastaDNA.hasNext()) {
            RichSequence s;
            try {
                s = readFastaDNA.nextRichSequence();
            } catch (IllegalSymbolException e) {
                if (forceFormatting) {
                    continue;
                } else {
                    throw e;
                }
            }
            int id = getNextSequenceId();
            String gi = s.getGi();
            String name = s.getName();
            String type = s.getType();
            String accession = s.getAccession();
            String description = s.getDescription();
            String header = s.getHeader();
            System.out.println(id);
            System.out.println(gi);
            System.out.println(name);
            System.out.println(type);
            System.out.println(accession);
            System.out.println(description);
            Document doc = new Document();
            doc.add(new Field("header", header, Store.YES, Index.ANALYZED));
            doc.add(new Field("gi", gi, Store.YES, Index.NOT_ANALYZED));
            doc.add(new Field("name", name, Store.YES, Index.NOT_ANALYZED));
            doc.add(new Field("type", type, Store.YES, Index.NOT_ANALYZED));
            doc.add(new Field("accession", accession, Store.YES, Index.NOT_ANALYZED));
            doc.add(new Field("description", description, Store.YES, Index.ANALYZED));
            doc.add(new Field("id", Integer.toString(id), Store.YES, Index.NOT_ANALYZED));
            doc.add(new Field("file", "ecoli.nt", Store.YES, Index.NOT_ANALYZED));
            doc.add(new Field("db", "ECOLI_DB", Store.YES, Index.NOT_ANALYZED));
            indexWriter.addDocument(doc);
        }
        indexWriter.optimize();
        indexWriter.close();
    }
}
Example 21
Project: high-scale-lucene-master  File: TestIndexWriterReader.java View source code
public void testUpdateDocument() throws Exception {
    boolean optimize = true;
    Directory dir1 = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
    // create the index
    createIndexNoClose(!optimize, "index1", writer);
    // writer.flush(false, true, true);
    // get a reader
    IndexReader r1 = writer.getReader();
    assertTrue(r1.isCurrent());
    String id10 = r1.document(10).getField("id").stringValue();
    Document newDoc = r1.document(10);
    newDoc.removeField("id");
    newDoc.add(new Field("id", Integer.toString(8000), Store.YES, Index.NOT_ANALYZED));
    writer.updateDocument(new Term("id", id10), newDoc);
    assertFalse(r1.isCurrent());
    IndexReader r2 = writer.getReader();
    assertTrue(r2.isCurrent());
    assertEquals(0, count(new Term("id", id10), r2));
    assertEquals(1, count(new Term("id", Integer.toString(8000)), r2));
    r1.close();
    writer.close();
    assertTrue(r2.isCurrent());
    IndexReader r3 = IndexReader.open(dir1, true);
    assertTrue(r3.isCurrent());
    assertTrue(r2.isCurrent());
    assertEquals(0, count(new Term("id", id10), r3));
    assertEquals(1, count(new Term("id", Integer.toString(8000)), r3));
    writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    doc.add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);
    assertTrue(r2.isCurrent());
    assertTrue(r3.isCurrent());
    writer.close();
    assertFalse(r2.isCurrent());
    assertTrue(!r3.isCurrent());
    r2.close();
    r3.close();
    dir1.close();
}
Example 22
Project: lucene-Korean-Analyzer-master  File: SynonymDictionaryIndex.java View source code
public synchronized void indexingDictionary(List<String> synonyms) {
    try {
        indexWriter.deleteAll();
        indexWriter.commit();
        int recordCnt = 0;
        for (String syn : synonyms) {
            String[] synonymWords = syn.split(",");
            Document doc = new Document();
            for (int i = 0, size = synonymWords.length; i < size; i++) {
                String fieldValue = synonymWords[i];
                Field field = new Field("syn", fieldValue, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
                doc.add(field);
                recordCnt++;
            //end inner for
            }
            indexWriter.addDocument(doc);
        }
        //end outer for
        indexWriter.commit();
        logger.info("��어 색� 단어 갯수 : {}", recordCnt);
    } catch (Exception e) {
        throw new IllegalStateException();
    }
}
Example 23
Project: lucene-korean-master  File: SynonymDictionaryIndex.java View source code
public synchronized void indexingDictionary(List<String> synonyms) {
    try {
        indexWriter.deleteAll();
        indexWriter.commit();
        int recordCnt = 0;
        for (String syn : synonyms) {
            String[] synonymWords = syn.split(",");
            Document doc = new Document();
            for (int i = 0, size = synonymWords.length; i < size; i++) {
                String fieldValue = synonymWords[i];
                Field field = new Field("syn", fieldValue, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
                doc.add(field);
                recordCnt++;
            //end inner for
            }
            indexWriter.addDocument(doc);
        }
        //end outer for
        indexWriter.commit();
        logger.info("��어 색� 단어 갯수 : {}", recordCnt);
    } catch (Exception e) {
        throw new IllegalStateException();
    }
}
Example 24
Project: NLP-master  File: NearTest.java View source code
public static void createIndex() throws Exception {
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, new AnsjAnalysis());
    Directory directory = FSDirectory.open(new File("c:/index"));
    IndexWriter writer = new IndexWriter(directory, conf);
    String str = "文化人;文化人谈文化";
    String[] values = str.split(";");
    for (String value : values) {
        Document doc = new Document();
        Field field = new Field("test", value, Store.YES, Index.ANALYZED_NO_NORMS, TermVector.WITH_POSITIONS_OFFSETS);
        //			field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        doc.add(field);
        writer.addDocument(doc);
        writer.commit();
    }
    writer.close();
}
Example 25
Project: Solbase-Lucene-master  File: HighlighterTest.java View source code
public void testMultiSearcher() throws Exception {
    // setup index 1
    RAMDirectory ramDir1 = new RAMDirectory();
    IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true);
    Document d = new Document();
    Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED);
    d.add(f);
    writer1.addDocument(d);
    writer1.optimize();
    writer1.close();
    IndexReader reader1 = IndexReader.open(ramDir1);
    // setup index 2
    RAMDirectory ramDir2 = new RAMDirectory();
    IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true);
    d = new Document();
    f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED);
    d.add(f);
    writer2.addDocument(d);
    writer2.optimize();
    writer2.close();
    IndexReader reader2 = IndexReader.open(ramDir2);
    IndexSearcher searchers[] = new IndexSearcher[2];
    searchers[0] = new IndexSearcher(ramDir1);
    searchers[1] = new IndexSearcher(ramDir2);
    MultiSearcher multiSearcher = new MultiSearcher(searchers);
    QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
    parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    query = parser.parse("multi*");
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    // at this point the multisearcher calls combine(query[])
    hits = multiSearcher.search(query);
    // query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer());
    Query expandedQueries[] = new Query[2];
    expandedQueries[0] = query.rewrite(reader1);
    expandedQueries[1] = query.rewrite(reader2);
    query = query.combine(expandedQueries);
    // create an instance of the highlighter with the tags used to surround
    // highlighted text
    Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
        String highlightedText = highlighter.getBestFragment(tokenStream, text);
        System.out.println(highlightedText);
    }
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
}
Example 26
Project: tika-master  File: MetadataAwareLuceneIndexer.java View source code
public void indexContentSpecificMet(File file) throws Exception {
    Metadata met = new Metadata();
    try (InputStream is = new FileInputStream(file)) {
        tika.parse(is, met);
        Document document = new Document();
        for (String key : met.names()) {
            String[] values = met.getValues(key);
            for (String val : values) {
                document.add(new Field(key, val, Store.YES, Index.ANALYZED));
            }
            writer.addDocument(document);
        }
    }
}
Example 27
Project: yarep-master  File: DateIndexerSearcherImplV1.java View source code
/**
     * Get revision from index file
     * @param path2 Absolute path of index file
     * @return TODO
     */
private Revision getRevisionFromIndexFile(String path2) throws Exception, IndexOutOfSyncException {
    if (path2 != null) {
        if (new File(path2).isFile()) {
            String revisionName = getRevisionName(path2);
            if (revisionName != null) {
                try {
                    //log.debug("Get revision name from index file '" + path2 + "' for node '" + nodePath + "'.");
                    return new VirtualFileSystemRevision(repo, nodePath, revisionName);
                } catch (NoSuchRevisionException e) {
                    log.warn("No revision for revision name '" + revisionName + "' of index file: " + path2);
                    throw new IndexOutOfSyncException(path2);
                }
            } else {
                log.warn("Index file '" + path2 + "' does not seem to contain a revision name!");
                return null;
            }
        } else {
            log.warn("No such index file: " + path2);
            return null;
        }
    } else {
        //log.debug("No path.");
        return null;
    }
}
Example 28
Project: apache-nutch-fork-master  File: TestIndexSorter.java View source code
protected void setUp() throws Exception {
    if (conf == null)
        conf = NutchConfiguration.create();
    // create test index
    testDir = new File("indexSorter-test-" + System.currentTimeMillis());
    if (!testDir.mkdirs()) {
        throw new Exception("Can't create test dir " + testDir.toString());
    }
    LOG.info("Creating test index: " + testDir.getAbsolutePath());
    File plain = new File(testDir, INDEX_PLAIN);
    Directory dir = FSDirectory.open(plain);
    IndexWriter writer = new IndexWriter(dir, new NutchDocumentAnalyzer(conf), true, MaxFieldLength.UNLIMITED);
    // create test documents
    for (int i = 0; i < NUM_DOCS; i++) {
        Document doc = new Document();
        for (int k = 0; k < fieldNames.length; k++) {
            Field f;
            Store s;
            Index ix;
            String val = null;
            if (fieldNames[k].equals("id")) {
                s = Store.YES;
                ix = Index.NOT_ANALYZED;
                val = String.valueOf(i);
            } else if (fieldNames[k].equals("host")) {
                s = Store.YES;
                ix = Index.NOT_ANALYZED;
                val = "www.example" + i + ".com";
            } else if (fieldNames[k].equals("site")) {
                s = Store.NO;
                ix = Index.NOT_ANALYZED;
                val = "www.example" + i + ".com";
            } else if (fieldNames[k].equals("content")) {
                s = Store.NO;
                ix = Index.ANALYZED;
                val = "This is the content of the " + i + "-th document.";
            } else if (fieldNames[k].equals("boost")) {
                s = Store.YES;
                ix = Index.NO;
                // XXX note that this way we ensure different values of encoded boost
                // XXX note also that for this reason we can't reliably test more than
                // XXX 255 documents.
                float boost = Similarity.decodeNorm((byte) (i + 1));
                val = String.valueOf(boost);
                doc.setBoost(boost);
            } else {
                s = Store.YES;
                ix = Index.ANALYZED;
                if (fieldNames[k].equals("anchor")) {
                    val = "anchors to " + i + "-th page.";
                } else if (fieldNames[k].equals("url")) {
                    val = "http://www.example" + i + ".com/" + i + ".html";
                }
            }
            f = new Field(fieldNames[k], val, s, ix);
            doc.add(f);
        }
        writer.addDocument(doc);
    }
    writer.optimize();
    writer.close();
}
Example 29
Project: capedwarf-blue-master  File: DocumentFieldBridge.java View source code
@SuppressWarnings("unchecked")
public void set(String name, Object value, Document document, LuceneOptions luceneOptions) {
    com.google.appengine.api.search.Document googleDocument = (com.google.appengine.api.search.Document) value;
    document.add(new org.apache.lucene.document.Field(CacheValue.MATCH_ALL_DOCS_FIELD_NAME, CacheValue.MATCH_ALL_DOCS_FIELD_VALUE, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    for (Field field : googleDocument.getFields()) {
        if (field.getType() == null) {
            throw new IllegalStateException("Field " + field.getName() + " of document " + googleDocument.getId() + " has null type!");
        }
        String prefixedFieldName = fieldNamePrefixer.getPrefixedFieldName(field.getName(), field.getType());
        String prefixedAllFieldName = fieldNamePrefixer.getPrefixedFieldName(CacheValue.ALL_FIELD_NAME, field.getType());
        if (field.getType() == Field.FieldType.NUMBER) {
            luceneOptions.addNumericFieldToDocument(prefixedFieldName, field.getNumber(), document);
            luceneOptions.addNumericFieldToDocument(prefixedAllFieldName, field.getNumber(), document);
        } else if (field.getType() == Field.FieldType.GEO_POINT) {
            spatialFieldBridgeByGrid.set(prefixedFieldName, Point.fromDegrees(field.getGeoPoint().getLatitude(), field.getGeoPoint().getLongitude()), document, luceneOptions);
            document.getFields();
        } else {
            luceneOptions.addFieldToDocument(prefixedFieldName, convertToString(field), document);
            luceneOptions.addFieldToDocument(prefixedAllFieldName, convertToString(field), document);
        }
    }
}
Example 30
Project: hsearch-obsolete-master  File: LuceneIndexManager.java View source code
public void insert(HDocument hdoc) throws Exception {
    Document doc = new Document();
    for (com.bizosys.hsearch.common.Field fld : hdoc.fields) {
        ByteField bf = fld.getByteField();
        Store store = (fld.isStore()) ? Field.Store.YES : Field.Store.NO;
        Index index = (fld.isAnalyze()) ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED;
        doc.add(new Field(bf.name, bf.getValue().toString(), store, index));
    }
    doc.add(new Field("id", hdoc.getTenantDocumentKey(), Field.Store.YES, Field.Index.ANALYZED));
    if (null != hdoc.docType)
        doc.add(new Field("type", hdoc.docType, Field.Store.YES, Field.Index.ANALYZED));
    if (null != hdoc.url)
        doc.add(new Field("url", hdoc.url, Field.Store.YES, Field.Index.ANALYZED));
    if (null != hdoc.title)
        doc.add(new Field("title", hdoc.title, Field.Store.YES, Field.Index.ANALYZED));
    if (null != hdoc.preview)
        doc.add(new Field("preview", hdoc.preview, Field.Store.YES, Field.Index.ANALYZED));
    if (null != hdoc.cacheText)
        doc.add(new Field("cache", hdoc.cacheText, Field.Store.YES, Field.Index.ANALYZED));
    iwriter.addDocument(doc);
}
Example 31
Project: jtrac-master  File: Item.java View source code
/**
     * Lucene DocumentCreator implementation
     */
public Document createDocument() {
    Document d = new Document();
    d.add(new org.apache.lucene.document.Field("id", getId() + "", Store.YES, Index.NO));
    d.add(new org.apache.lucene.document.Field("type", "item", Store.YES, Index.NO));
    StringBuffer sb = new StringBuffer();
    if (getSummary() != null) {
        sb.append(getSummary());
    }
    if (getDetail() != null) {
        if (sb.length() > 0) {
            sb.append(" | ");
        }
        sb.append(getDetail());
    }
    d.add(new org.apache.lucene.document.Field("text", sb.toString(), Store.NO, Index.TOKENIZED));
    return d;
}
Example 32
Project: katta-master  File: LuceneClientTest.java View source code
@Test
public void testGetBinaryDetails() throws Exception {
    File index = _temporaryFolder.newFolder("indexWithBinaryData");
    String textFieldName = "textField";
    String binaryFieldName = "binaryField";
    String textFieldContent = "sample text";
    byte[] bytesFieldContent = new byte[] { 1, 2, 3 };
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(index), createIndexWriter());
    Document document = new Document();
    document.add(new Field(binaryFieldName, bytesFieldContent));
    document.add(new Field(textFieldName, textFieldContent, Store.NO, Index.ANALYZED));
    indexWriter.addDocument(document);
    indexWriter.close();
    DeployClient deployClient = new DeployClient(_clusterRule.getCluster().getProtocol());
    IndexState indexState = deployClient.addIndex(index.getName(), index.getParentFile().getAbsolutePath(), 1).joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);
    ILuceneClient client = new LuceneClient(_clusterRule.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_35, "", new KeywordAnalyzer()).parse(textFieldName + ": " + textFieldContent);
    final Hits hits = client.search(query, new String[] { index.getName() }, 10);
    assertNotNull(hits);
    assertEquals(1, hits.getHits().size());
    final Hit hit = hits.getHits().get(0);
    final MapWritable details = client.getDetails(hit);
    final Set<Writable> keySet = details.keySet();
    assertEquals(1, keySet.size());
    final Writable writable = details.get(new Text(binaryFieldName));
    assertNotNull(writable);
    assertThat(writable, instanceOf(BytesWritable.class));
    BytesWritable bytesWritable = (BytesWritable) writable;
    // getBytes() returns
    bytesWritable.setCapacity(bytesWritable.getLength());
    // the full array
    assertArrayEquals(bytesFieldContent, bytesWritable.getBytes());
    client.close();
}
Example 33
Project: l4ia-master  File: FastVectorHighlighterSample.java View source code
static void makeIndex() throws IOException {
    IndexWriter writer = new IndexWriter(dir, analyzer, true, MaxFieldLength.UNLIMITED);
    for (String d : DOCS) {
        Document doc = new Document();
        doc.add(new Field(F, d, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
        writer.addDocument(doc);
    }
    writer.close();
}
Example 34
Project: maven-indexer-master  File: MinimalArtifactInfoIndexCreator.java View source code
public void updateLegacyDocument(ArtifactInfo ai, Document doc) {
    updateDocument(ai, doc);
    // legacy!
    if (ai.getPrefix() != null) {
        doc.add(new Field(ArtifactInfo.PLUGIN_PREFIX, ai.getPrefix(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    }
    if (ai.getGoals() != null) {
        doc.add(new Field(ArtifactInfo.PLUGIN_GOALS, ArtifactInfo.lst2str(ai.getGoals()), Field.Store.YES, Field.Index.NO));
    }
    doc.removeField(ArtifactInfo.GROUP_ID);
    doc.add(new Field(ArtifactInfo.GROUP_ID, ai.getGroupId(), Field.Store.NO, Field.Index.NOT_ANALYZED));
}
Example 35
Project: nutchbase-master  File: TestIndexSorter.java View source code
protected void setUp() throws Exception {
    if (conf == null)
        conf = NutchConfiguration.create();
    // create test index
    testDir = new File("indexSorter-test-" + System.currentTimeMillis());
    if (!testDir.mkdirs()) {
        throw new Exception("Can't create test dir " + testDir.toString());
    }
    LOG.info("Creating test index: " + testDir.getAbsolutePath());
    File plain = new File(testDir, INDEX_PLAIN);
    Directory dir = FSDirectory.getDirectory(plain);
    IndexWriter writer = new IndexWriter(dir, new NutchDocumentAnalyzer(conf), true);
    // create test documents
    for (int i = 0; i < NUM_DOCS; i++) {
        Document doc = new Document();
        for (int k = 0; k < fieldNames.length; k++) {
            Field f;
            Store s;
            Index ix;
            String val = null;
            if (fieldNames[k].equals("id")) {
                s = Store.YES;
                ix = Index.UN_TOKENIZED;
                val = String.valueOf(i);
            } else if (fieldNames[k].equals("host")) {
                s = Store.YES;
                ix = Index.UN_TOKENIZED;
                val = "www.example" + i + ".com";
            } else if (fieldNames[k].equals("site")) {
                s = Store.NO;
                ix = Index.UN_TOKENIZED;
                val = "www.example" + i + ".com";
            } else if (fieldNames[k].equals("content")) {
                s = Store.NO;
                ix = Index.TOKENIZED;
                val = "This is the content of the " + i + "-th document.";
            } else if (fieldNames[k].equals("boost")) {
                s = Store.YES;
                ix = Index.NO;
                // XXX note that this way we ensure different values of encoded boost
                // XXX note also that for this reason we can't reliably test more than
                // XXX 255 documents.
                float boost = Similarity.decodeNorm((byte) (i + 1));
                val = String.valueOf(boost);
                doc.setBoost(boost);
            } else {
                s = Store.YES;
                ix = Index.TOKENIZED;
                if (fieldNames[k].equals("anchor")) {
                    val = "anchors to " + i + "-th page.";
                } else if (fieldNames[k].equals("url")) {
                    val = "http://www.example" + i + ".com/" + i + ".html";
                }
            }
            f = new Field(fieldNames[k], val, s, ix);
            doc.add(f);
        }
        writer.addDocument(doc);
    }
    writer.optimize();
    writer.close();
}
Example 36
Project: partake-master  File: EventSearchService.java View source code
/**
     * create a lucene document from eventId and event.
     */
private Document makeDocument(Event event, List<EventTicket> tickets) {
    StringBuilder builder = new StringBuilder();
    builder.append(event.getTitle()).append(" ");
    builder.append(event.getSummary()).append(" ");
    builder.append(event.getAddress()).append(" ");
    builder.append(event.getPlace()).append(" ");
    builder.append(Util.removeTags(event.getDescription()));
    long beginTime = event.getBeginDate().getTime();
    long deadlineTime = event.acceptsSomeTicketsTill(tickets).getTime();
    Document doc = new Document();
    doc.add(new Field("ID", event.getId(), Store.YES, Index.NOT_ANALYZED));
    doc.add(new Field("CATEGORY", event.getCategory(), Store.NO, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS));
    doc.add(new Field("CREATED-AT", TimeUtil.getTimeString(event.getCreatedAt().getTime()), Store.NO, Index.NOT_ANALYZED));
    doc.add(new Field("BEGIN-TIME", TimeUtil.getTimeString(beginTime), Store.NO, Index.NOT_ANALYZED));
    doc.add(new Field("DEADLINE-TIME", TimeUtil.getTimeString(deadlineTime), Store.NO, Index.NOT_ANALYZED));
    doc.add(new Field("TITLE", event.getTitle(), Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS));
    doc.add(new Field("CONTENT", builder.toString(), Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS));
    return doc;
}
Example 37
Project: RSSOwl-master  File: SearchDocument.java View source code
/**
   * Creates a new <code>Field</code> from the given {@link IPerson}.
   *
   * @param fieldConstant the constant identifying the target field
   * @param person the value of the field
   * @param store one of the {@link Store} constants.
   * @param index one of the {@link Index} constants.
   * @return Field the {@link Field} that can be used for indexing.
   */
protected Field createPersonField(int fieldConstant, IPerson person, Store store, Index index) {
    if (person == null)
        return null;
    /* Add Name and EMail */
    if (person.getName() != null && person.getEmail() != null) {
        //$NON-NLS-1$
        return createStringField(fieldConstant, person.getName() + " " + person.getEmail().toString(), store, index);
    }
    /* Add Name if present */
    if (person.getName() != null)
        return createStringField(fieldConstant, person.getName(), store, index);
    else /* Add EMail if present */
    if (person.getEmail() != null)
        return createURIField(fieldConstant, person.getEmail().toString(), store, Index.UN_TOKENIZED);
    return null;
}
Example 38
Project: sensei-master  File: SenseiSchema.java View source code
public static SenseiSchema build(JSONObject schemaObj) throws JSONException, ConfigurationException {
    SenseiSchema schema = new SenseiSchema();
    schema.setSchemaObj(schemaObj);
    schema._fieldDefMap = new HashMap<String, FieldDefinition>();
    JSONObject tableElem = schemaObj.optJSONObject("table");
    if (tableElem == null) {
        throw new ConfigurationException("empty schema");
    }
    schema._uidField = tableElem.getString("uid");
    schema._deleteField = tableElem.optString("delete-field", "");
    schema._skipField = tableElem.optString("skip-field", "");
    schema._srcDataStore = tableElem.optString("src-data-store", "");
    schema._srcDataField = tableElem.optString("src-data-field", "src_data");
    schema._compressSrcData = tableElem.optBoolean("compress-src-data", true);
    JSONArray columns = tableElem.optJSONArray("columns");
    int count = 0;
    if (columns != null) {
        count = columns.length();
    }
    for (int i = 0; i < count; ++i) {
        JSONObject column = columns.getJSONObject(i);
        try {
            String n = column.getString("name");
            String t = column.getString("type");
            String frm = column.optString("from");
            FieldDefinition fdef = new FieldDefinition();
            fdef.formatter = null;
            fdef.fromField = frm.length() > 0 ? frm : n;
            fdef.isMeta = true;
            fdef.isMulti = column.optBoolean("multi");
            fdef.isActivity = column.optBoolean("activity");
            fdef.name = n;
            String delimString = column.optString("delimiter");
            if (delimString != null && delimString.trim().length() > 0) {
                fdef.delim = delimString;
            }
            fdef.hasWildCards = column.optBoolean("wildcard");
            if (fdef.hasWildCards) {
                Assert.isTrue(fdef.fromField.equals(fdef.name), "Cannot have a different \"from\" field with wildcards");
                fdef.wildCardPattern = Pattern.compile(fdef.name);
            }
            schema._fieldDefMap.put(n, fdef);
            if (t.equals("int")) {
                MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(int.class);
                String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
                fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
                fdef.type = int.class;
            } else if (t.equals("short")) {
                MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(short.class);
                String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
                fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
                fdef.type = int.class;
            } else if (t.equals("long")) {
                MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(long.class);
                String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
                fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
                fdef.type = long.class;
            } else if (t.equals("float")) {
                MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(float.class);
                String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
                fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
                fdef.type = double.class;
            } else if (t.equals("double")) {
                MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(double.class);
                String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
                fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
                fdef.type = double.class;
            } else if (t.equals("char")) {
                fdef.formatter = null;
            } else if (t.equals("string")) {
                fdef.formatter = null;
            } else if (t.equals("boolean")) {
                MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(boolean.class);
                String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
                fdef.type = boolean.class;
            } else if (t.equals("date")) {
                String f = "";
                try {
                    f = column.optString("format");
                } catch (Exception ex) {
                    logger.error(ex.getMessage(), ex);
                }
                if (f.isEmpty())
                    throw new ConfigurationException("Date format cannot be empty.");
                fdef.formatter = new SimpleDateFormat(f);
                fdef.type = Date.class;
            } else if (t.equals("text")) {
                fdef.isMeta = false;
                String idxString = column.optString("index", null);
                String storeString = column.optString("store", null);
                String tvString = column.optString("termvector", null);
                Index idx = idxString == null ? Index.ANALYZED : DefaultSenseiInterpreter.INDEX_VAL_MAP.get(idxString.toUpperCase());
                Store store = storeString == null ? Store.NO : DefaultSenseiInterpreter.STORE_VAL_MAP.get(storeString.toUpperCase());
                TermVector tv = tvString == null ? TermVector.NO : DefaultSenseiInterpreter.TV_VAL_MAP.get(tvString.toUpperCase());
                if (idx == null || store == null || tv == null) {
                    throw new ConfigurationException("Invalid indexing parameter specification");
                }
                IndexSpec indexingSpec = new IndexSpec();
                indexingSpec.store = store;
                indexingSpec.index = idx;
                indexingSpec.tv = tv;
                fdef.textIndexSpec = indexingSpec;
            }
        } catch (Exception e) {
            throw new ConfigurationException("Error parsing schema: " + column, e);
        }
    }
    JSONArray facetsList = schemaObj.optJSONArray("facets");
    if (facetsList != null) {
        for (int i = 0; i < facetsList.length(); i++) {
            JSONObject facet = facetsList.optJSONObject(i);
            if (facet != null) {
                schema.facets.add(FacetDefinition.valueOf(facet));
            }
        }
    }
    return schema;
}
Example 39
Project: Solandra-master  File: IndexReader.java View source code
public Document document(int docNum, FieldSelector selector) throws CorruptIndexException, IOException {
    Document doc = getDocumentCache().get(docNum);
    if (doc != null) {
        logger.debug("Found doc in cache");
        return doc;
    }
    String docId = getDocIndexToDocId().get(docNum);
    if (docId == null)
        return null;
    Map<Integer, String> keyMap = new HashMap<Integer, String>();
    keyMap.put(docNum, CassandraUtils.hashKey(indexName + CassandraUtils.delimeter + docId));
    List<byte[]> fieldNames = null;
    // Parallel for Solr Performance  
    if (selector != null && selector instanceof SolandraFieldSelector) {
        List<Integer> otherDocIds = ((SolandraFieldSelector) selector).getOtherDocsToCache();
        fieldNames = ((SolandraFieldSelector) selector).getFieldNames();
        logger.debug("Going to bulk load " + otherDocIds.size() + " documents");
        for (Integer otherDocNum : otherDocIds) {
            if (otherDocNum == docNum)
                continue;
            if (getDocumentCache().containsKey(otherDocNum))
                continue;
            String docKey = getDocIndexToDocId().get(otherDocNum);
            if (docKey == null)
                continue;
            keyMap.put(otherDocNum, CassandraUtils.hashKey(indexName + CassandraUtils.delimeter + docKey));
        }
    }
    ColumnParent columnParent = new ColumnParent();
    columnParent.setColumn_family(CassandraUtils.docColumnFamily);
    SlicePredicate slicePredicate = new SlicePredicate();
    if (fieldNames == null || fieldNames.size() == 0) {
        // get all columns ( except this skips meta info )
        slicePredicate.setSlice_range(new SliceRange(new byte[] {}, CassandraUtils.finalToken.getBytes("UTF-8"), false, 100));
    } else {
        slicePredicate.setColumn_names(fieldNames);
    }
    long start = System.currentTimeMillis();
    try {
        Map<String, List<ColumnOrSuperColumn>> docMap = client.multiget_slice(CassandraUtils.keySpace, Arrays.asList(keyMap.values().toArray(new String[] {})), columnParent, slicePredicate, ConsistencyLevel.ONE);
        for (Map.Entry<Integer, String> key : keyMap.entrySet()) {
            List<ColumnOrSuperColumn> cols = docMap.get(key.getValue());
            if (cols == null) {
                logger.warn("Missing document in multiget_slice for: " + key.getValue());
                continue;
            }
            Document cacheDoc = new Document();
            for (ColumnOrSuperColumn col : cols) {
                Field field = null;
                String fieldName = new String(col.column.name);
                //Incase __META__ slips through
                if (Arrays.equals(col.column.name, CassandraUtils.documentMetaField.getBytes())) {
                    logger.debug("Filtering out __META__ key");
                    continue;
                }
                byte[] value;
                if (col.column.value[col.column.value.length - 1] != Byte.MAX_VALUE && col.column.value[col.column.value.length - 1] != Byte.MIN_VALUE) {
                    throw new CorruptIndexException("Lucandra field is not properly encoded: " + docId + "(" + fieldName + ")");
                } else if (col.column.value[col.column.value.length - 1] == Byte.MAX_VALUE) {
                    //Binary
                    value = new byte[col.column.value.length - 1];
                    System.arraycopy(col.column.value, 0, value, 0, col.column.value.length - 1);
                    field = new Field(fieldName, value, Store.YES);
                    cacheDoc.add(field);
                } else if (col.column.value[col.column.value.length - 1] == Byte.MIN_VALUE) {
                    //String
                    value = new byte[col.column.value.length - 1];
                    System.arraycopy(col.column.value, 0, value, 0, col.column.value.length - 1);
                    //Check for multi-fields
                    String fieldString = new String(value, "UTF-8");
                    if (fieldString.indexOf(CassandraUtils.delimeter) >= 0) {
                        StringTokenizer tok = new StringTokenizer(fieldString, CassandraUtils.delimeter);
                        while (tok.hasMoreTokens()) {
                            field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED);
                            cacheDoc.add(field);
                        }
                    } else {
                        field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED);
                        cacheDoc.add(field);
                    }
                }
            }
            //Mark the required doc
            if (key.getKey().equals(docNum))
                doc = cacheDoc;
            getDocumentCache().put(key.getKey(), cacheDoc);
        }
        long end = System.currentTimeMillis();
        logger.debug("Document read took: " + (end - start) + "ms");
        return doc;
    } catch (Exception e) {
        throw new IOException(e.getLocalizedMessage());
    }
}
Example 40
Project: step-master  File: FieldConfig.java View source code
/**
     * Gets a numerical field
     * 
     * @param fieldValue the field value
     * @return the field
     */
public Fieldable getField(final Number fieldValue) {
    final NumericField field = new NumericField(this.name, this.store, this.index == Index.ANALYZED);
    if (fieldValue instanceof Double) {
        field.setDoubleValue((Double) fieldValue);
    } else if (fieldValue instanceof Integer) {
        field.setIntValue((Integer) fieldValue);
    } else if (fieldValue instanceof Long) {
        field.setLongValue((Long) fieldValue);
    } else {
        throw new StepInternalException("Unsupported type: " + fieldValue.getClass());
    }
    return field;
}
Example 41
Project: recommenders-master  File: FileSnippetRepository.java View source code
private void indexSnippet(IndexWriter writer, ISnippet snippet, String path) throws IOException {
    Document doc = new Document();
    doc.add(new Field(F_PATH, path, Store.YES, Index.NO));
    doc.add(new Field(F_UUID, snippet.getUuid().toString(), Store.NO, Index.NOT_ANALYZED));
    String name = snippet.getName();
    doc.add(new Field(F_NAME, name, Store.YES, Index.ANALYZED));
    String description = snippet.getDescription();
    doc.add(new Field(F_DESCRIPTION, description, Store.YES, Index.ANALYZED));
    for (String tag : snippet.getTags()) {
        doc.add(new Field(F_TAG, tag, Store.YES, Index.ANALYZED_NO_NORMS));
    }
    for (String extraSearchTerm : snippet.getExtraSearchTerms()) {
        doc.add(new Field(F_EXTRA_SEARCH_TERM, extraSearchTerm, Store.YES, Index.ANALYZED));
    }
    for (Location location : expandLocation(snippet.getLocation())) {
        Field field = new Field(F_LOCATION, getIndexString(location), Store.NO, Index.NOT_ANALYZED);
        field.setBoost(0);
        doc.add(field);
    }
    for (ProjectCoordinate dependency : snippet.getNeededDependencies()) {
        doc.add(new Field(F_DEPENDENCY, getDependencyString(dependency), Store.YES, Index.ANALYZED));
    }
    if (snippet.getLocation() == Location.FILE) {
        if (snippet.getFilenameRestrictions().isEmpty()) {
            doc.add(new Field(F_FILENAME_RESTRICTION, NO_FILENAME_RESTRICTION, Store.NO, Index.NOT_ANALYZED));
        }
        for (String restriction : snippet.getFilenameRestrictions()) {
            doc.add(new Field(F_FILENAME_RESTRICTION, restriction.toLowerCase(), Store.NO, Index.NOT_ANALYZED));
        }
    } else {
        doc.add(new Field(F_FILENAME_RESTRICTION, NO_FILENAME_RESTRICTION, Store.NO, Index.NOT_ANALYZED));
    }
    writer.addDocument(doc);
}
Example 42
Project: AdServing-master  File: GeoIpIndex.java View source code
public void importIPs(String path) {
    try {
        if (!path.endsWith("/")) {
            path += "/";
        }
        Directory directory = FSDirectory.open(new File(db, "geo"));
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, new StandardAnalyzer(Version.LUCENE_31));
        IndexWriter writer = new IndexWriter(directory, config);
        List<String> cnames = new ArrayList<String>();
        BufferedReader br = new BufferedReader(new FileReader(path + "GeoLiteCity-Blocks.csv"));
        CSVReader reader = new CSVReader(br, ',', '\"', 2);
        //			Scanner scanner = new Scanner(new FileReader(filename));
        //			boolean firstLine = true;
        int count = 0;
        String[] values;
        Map<String, Map<String, String>> locations = getLocations(path);
        while ((values = reader.readNext()) != null) {
            String ipfrom = values[0];
            String ipto = values[1];
            String locid = values[2];
            Map<String, String> location = locations.get(locid);
            Document doc = new Document();
            doc.add(new Field("city", location.get("city"), Store.YES, Index.ANALYZED));
            doc.add(new Field("postalcode", location.get("postalcode"), Store.YES, Index.ANALYZED));
            doc.add(new Field("country", location.get("country"), Store.YES, Index.ANALYZED));
            doc.add(new Field("region", location.get("region"), Store.YES, Index.ANALYZED));
            doc.add(new Field("latitude", location.get("latitude"), Store.YES, Index.ANALYZED));
            doc.add(new Field("longitude", location.get("longitude"), Store.YES, Index.ANALYZED));
            NumericField ipfromField = new NumericField("ipfrom", 8, Store.YES, true);
            ipfromField.setLongValue(Long.parseLong(ipfrom.trim()));
            doc.add(ipfromField);
            NumericField iptoField = new NumericField("ipto", 8, Store.YES, true);
            iptoField.setLongValue(Long.parseLong(ipto.trim()));
            doc.add(iptoField);
            //				doc.add(new NumericField("ipto", ipto, Store.YES, Index.ANALYZED));
            writer.addDocument(doc);
            count++;
            if (count % 100 == 0) {
                writer.commit();
            }
        }
        System.out.println(count + " Eintr�ge importiert");
        writer.optimize();
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Example 43
Project: ClusterBasedRelevanceFeedback-master  File: PersistentSnapshotDeletionPolicy.java View source code
/**
   * Persists all snapshots information. If the given id and segment are not
   * null, it persists their information as well.
   */
private void persistSnapshotInfos(String id, String segment) throws IOException {
    writer.deleteAll();
    Document d = new Document();
    d.add(new Field(SNAPSHOTS_ID, "", Store.YES, Index.NO));
    for (Entry<String, String> e : super.getSnapshots().entrySet()) {
        d.add(new Field(e.getKey(), e.getValue(), Store.YES, Index.NO));
    }
    if (id != null) {
        d.add(new Field(id, segment, Store.YES, Index.NO));
    }
    writer.addDocument(d);
    writer.commit();
}
Example 44
Project: elasticsearch-server-master  File: TypeParsers.java View source code
public static Field.Index parseIndex(String fieldName, String index) throws MapperParsingException {
    index = Strings.toUnderscoreCase(index);
    if ("no".equals(index)) {
        return Field.Index.NO;
    } else if ("not_analyzed".equals(index)) {
        return Field.Index.NOT_ANALYZED;
    } else if ("analyzed".equals(index)) {
        return Field.Index.ANALYZED;
    } else {
        throw new MapperParsingException("Wrong value for index [" + index + "] for field [" + fieldName + "]");
    }
}
Example 45
Project: extension-aws-master  File: RelatedKeywordLuceneSearcher.java View source code
public void indexWord(String inWord, HitTracker inResults, Searcher inTypeSearcher) throws Exception {
    if (inWord == null || inWord.equals("")) {
        return;
    }
    HashSet<String> terms = new HashSet<String>();
    int count = 0;
    for (Object o : inResults) {
        count++;
        if (count > 50) {
            //Dont look over the entire result set
            break;
        }
        String keywords = inResults.getValue(o, "keywords");
        if (keywords != null) {
            for (String keyword : keywords.split(" ")) {
                keyword = keyword.trim();
                if (keyword.length() > 1 && !keyword.equals(inWord)) {
                    terms.add(keyword);
                }
            }
        }
        if (terms.size() > 9)
            break;
    }
    //Now check for categories?
    count = 0;
    if (terms.size() < 9) {
        for (Object o : inResults) {
            count++;
            if (count > 50) {
                //Dont look over the entire result set
                break;
            }
            String catalogid = inResults.getValue(o, "catalogid");
            String categoryid = inResults.getValue(o, "category");
            if (catalogid != null && categoryid != null && !"index".equals(categoryid)) {
                CategoryArchive archive = getMediaArchive(catalogid).getCategoryArchive();
                for (String keyword : categoryid.split(" ")) {
                    keyword = keyword.trim();
                    if (keyword.length() > 0 && !keyword.equals(inWord)) {
                        Category cat = archive.getCategory(keyword);
                        if (cat != null) {
                            keyword = cat.getName();
                            terms.add(keyword);
                        }
                    }
                }
            }
            if (terms.size() > 9)
                break;
        }
    }
    Document doc = new Document();
    StringBuffer saved = new StringBuffer();
    StringBuffer savedenc = new StringBuffer();
    //Find out how many asset hits exists
    for (String synonym : terms) {
        SearchQuery typeQuery = inTypeSearcher.createSearchQuery();
        synonym = synonym.replaceAll("\\(.*?\\)", "");
        synonym = synonym.replace("(", "").replace(")", "").replace("-", "");
        typeQuery.addStartsWith("description", synonym);
        typeQuery.setHitsName("relatedkeywords");
        try {
            int hits = inTypeSearcher.search(typeQuery).getTotal();
            if (hits > 1) {
                saved.append(synonym);
                saved.append(" (");
                saved.append(hits);
                saved.append(")");
                saved.append(";");
                synonym = synonym.replace(' ', '_').replace(";", " ");
                savedenc.append(synonym);
                savedenc.append(" ");
            }
        } catch (Exception ex) {
            log.error(ex);
        }
    }
    // into the index
    if (saved.length() > 0) {
        doc.add(new Field("synonyms", saved.toString(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    }
    doc.add(new Field("synonymsenc", savedenc.toString(), Store.NO, Index.ANALYZED_NO_NORMS));
    doc.add(new Field("word", inWord.replace(" ", "_"), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    /* Timestamp */
    String timestamp = DateTools.dateToString(new Date(), Resolution.SECOND);
    doc.add(new Field("timestamp", timestamp, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
    getIndexWriter().addDocument(doc, getAnalyzer());
    clearIndex();
}
Example 46
Project: guj.com.br-master  File: LuceneIndexer.java View source code
private Document createDocument(Post p) {
    Document d = new Document();
    d.add(new Field(SearchFields.Keyword.POST_ID, String.valueOf(p.getId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.FORUM_ID, String.valueOf(p.getForumId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.TOPIC_ID, String.valueOf(p.getTopicId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.USER_ID, String.valueOf(p.getUserId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.DATE, this.settings.formatDateTime(p.getTime()), Store.YES, Index.UN_TOKENIZED));
    // We add the subject and message text together because, when searching, we only care about the 
    // matches, not where it was performed. The real subject and contents will be fetched from the database
    d.add(new Field(SearchFields.Indexed.CONTENTS, p.getSubject() + " " + p.getText(), Store.NO, Index.TOKENIZED));
    return d;
}
Example 47
Project: hibernate-hql-parser-master  File: ClassBasedLucenePropertyHelper.java View source code
public boolean isAnalyzed(Class<?> type, String... propertyPath) {
    EntityIndexBinding entityIndexBinding = getIndexBinding(type);
    if (isIdentifierProperty(entityIndexBinding, propertyPath)) {
        return false;
    }
    TypeMetadata metadata = getLeafTypeMetadata(type, propertyPath);
    Index index = metadata.getPropertyMetadataForProperty(propertyPath[propertyPath.length - 1]).getFieldMetadata().iterator().next().getIndex();
    return EnumSet.of(Field.Index.ANALYZED, Field.Index.ANALYZED_NO_NORMS).contains(index);
}
Example 48
Project: hipergate-master  File: Indexer.java View source code
// rebuild
public static void add(IndexWriter oIWrt, Map oKeywords, Map oTexts, Map oUnStored) throws ClassNotFoundException, IOException, IllegalArgumentException, NoSuchFieldException, IllegalAccessException, InstantiationException, NullPointerException {
    String sFieldName;
    Object oFieldValue;
    Document oDoc = new Document();
    // *******************************************
    // Index keywords as stored untokenized fields
    Iterator oKeys = oKeywords.keySet().iterator();
    while (oKeys.hasNext()) {
        sFieldName = (String) oKeys.next();
        oFieldValue = oKeywords.get(sFieldName);
        if (null == oFieldValue)
            oFieldValue = "";
        if (oFieldValue.getClass().getName().equals("java.util.Date"))
            oDoc.add(new Field(sFieldName, DateTools.dateToString((Date) oFieldValue, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
        else
            oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.YES, Field.Index.NOT_ANALYZED));
    }
    // wend
    // ******************************************************
    // Index titles, authors, etc. as stored tokenized fields
    Iterator oTxts = oTexts.keySet().iterator();
    while (oTxts.hasNext()) {
        sFieldName = (String) oTxts.next();
        oFieldValue = oTexts.get(sFieldName);
        if (null == oFieldValue)
            oFieldValue = "";
        oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.YES, Field.Index.ANALYZED));
    }
    // wend
    // *********************************************
    // Index full texts as unstored tokenized fields
    Iterator oUnStor = oUnStored.keySet().iterator();
    while (oUnStor.hasNext()) {
        sFieldName = (String) oUnStor.next();
        oFieldValue = oUnStored.get(sFieldName);
        if (null == oFieldValue)
            oFieldValue = "";
        oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.NO, Field.Index.ANALYZED));
    }
    // wend
    oIWrt.addDocument(oDoc);
}
Example 49
Project: IPAddressZipCodeStateCountryLuceneJavaSearch-master  File: IndexIpAddressTask.java View source code
// while loop continues
/**
   * Adds IPSearch data to the index.
   * 
   * @param bean
   *          the bean it needs to index
   * @throws IOException
   */
public void addLocation(IpSearchCityBean bean) throws IOException {
    Document doc = new Document();
    doc.add(new NumericField("ip_start", Field.Store.YES, true).setLongValue(bean.getIpStart()));
    doc.add(new NumericField("ip_end", Field.Store.YES, true).setLongValue(bean.getIpEnd()));
    doc.add(new NumericField("ip_start_a", Field.Store.NO, true).setLongValue((bean.getIpStart() / 16777216l) % 256));
    doc.add(new NumericField("ip_start_b", Field.Store.NO, true).setLongValue((bean.getIpStart() / 65536) % 256));
    doc.add(new NumericField("ip_start_c", Field.Store.NO, true).setLongValue((bean.getIpStart() / 256) % 256));
    doc.add(new NumericField("ip_start_d", Field.Store.NO, true).setLongValue((bean.getIpStart()) % 256));
    doc.add(new NumericField("ip_end_a", Field.Store.NO, true).setLongValue((bean.getIpEnd() / 16777216l) % 256));
    doc.add(new NumericField("ip_end_b", Field.Store.NO, true).setLongValue((bean.getIpEnd() / 65536) % 256));
    doc.add(new NumericField("ip_end_c", Field.Store.NO, true).setLongValue((bean.getIpEnd() / 256) % 256));
    doc.add(new NumericField("ip_end_d", Field.Store.NO, true).setLongValue((bean.getIpEnd()) % 256));
    doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(bean.getLat()), Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(bean.getLon()), Field.Store.YES, Field.Index.NOT_ANALYZED));
    // some of these fields have a chance of being null
    addToDoc(doc, "city", bean.getCity(), Field.Store.YES, Field.Index.ANALYZED);
    addToDoc(doc, "zip_code", bean.getZipCode(), Field.Store.YES, Field.Index.ANALYZED);
    addToDoc(doc, "country_code", bean.getCountryCode(), Field.Store.YES, Field.Index.ANALYZED);
    addToDoc(doc, "country_name", bean.getCountryName(), Field.Store.YES, Field.Index.ANALYZED);
    addToDoc(doc, "metro_code", bean.getMetroCode(), Field.Store.YES, Field.Index.ANALYZED);
    addToDoc(doc, "region_code", bean.getRegionCode(), Field.Store.YES, Field.Index.ANALYZED);
    addToDoc(doc, "region_name", bean.getRegionName(), Field.Store.YES, Field.Index.ANALYZED);
    IProjector projector = new SinusoidalProjector();
    int startTier = 5;
    int endTier = 15;
    for (; startTier <= endTier; startTier++) {
        CartesianTierPlotter ctp;
        ctp = new CartesianTierPlotter(startTier, projector, tierPrefix);
        double boxId = ctp.getTierBoxId(bean.getLat(), bean.getLon());
        doc.add(new Field(ctp.getTierFieldName(), NumericUtils.doubleToPrefixCoded(boxId), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
    }
    writer.addDocument(doc);
}
Example 50
Project: jabylon-master  File: PropertyFileAnalyzer.java View source code
public List<Document> createDocuments(PropertyFileDescriptor descriptor) {
    PropertyFile file = descriptor.loadProperties();
    List<Document> documents = new ArrayList<Document>(file.getProperties().size());
    Map<String, Property> masterProperties = Collections.emptyMap();
    if (!descriptor.isMaster()) {
        PropertyFile masterFile = descriptor.getMaster().loadProperties();
        masterProperties = masterFile.asMap();
    }
    EList<Property> properties = file.getProperties();
    for (Property property : properties) {
        Document doc = new Document();
        ProjectLocale locale = descriptor.getProjectLocale();
        ProjectVersion version = locale.getParent();
        Project project = version.getParent();
        Field projectField = new Field(QueryService.FIELD_PROJECT, project.getName(), Store.YES, Index.NOT_ANALYZED);
        doc.add(projectField);
        Field versionField = new Field(QueryService.FIELD_VERSION, version.getName(), Store.YES, Index.NOT_ANALYZED);
        doc.add(versionField);
        if (locale.isMaster()) {
            //mark the master files specifically
            Field localeField = new Field(QueryService.FIELD_LOCALE, QueryService.MASTER, Store.YES, Index.NOT_ANALYZED);
            doc.add(localeField);
        } else if (locale.getLocale() != null) {
            Field localeField = new Field(QueryService.FIELD_LOCALE, locale.getLocale().toString(), Store.YES, Index.NOT_ANALYZED);
            doc.add(localeField);
            //only add the master to a localized document
            if (masterProperties.get(property.getKey()) != null && masterProperties.get(property.getKey()).getValue() != null) {
                Field masterValueField = new Field(QueryService.FIELD_MASTER_VALUE, masterProperties.get(property.getKey()).getValue(), Store.YES, Index.ANALYZED);
                doc.add(masterValueField);
            }
            if (masterProperties.get(property.getKey()) != null && masterProperties.get(property.getKey()).getComment() != null) {
                Field masterCommentField = new Field(QueryService.FIELD_MASTER_COMMENT, masterProperties.get(property.getKey()).getComment(), Store.YES, Index.ANALYZED);
                doc.add(masterCommentField);
            }
        }
        Field uriField = new Field(QueryService.FIELD_URI, descriptor.getLocation().toString(), Store.YES, Index.NOT_ANALYZED);
        doc.add(uriField);
        Field pathField = new Field(QueryService.FIELD_FULL_PATH, descriptor.fullPath().toString(), Store.YES, Index.NOT_ANALYZED);
        doc.add(pathField);
        CDOID cdoID = descriptor.cdoID();
        StringBuilder builder = new StringBuilder();
        CDOIDUtil.write(builder, cdoID);
        Field idField = new Field(QueryService.FIELD_CDO_ID, builder.toString(), Store.YES, Index.NOT_ANALYZED);
        doc.add(idField);
        Field comment = new Field(QueryService.FIELD_COMMENT, nullSafe(property.getComment()), Store.YES, Index.ANALYZED);
        doc.add(comment);
        Field key = new Field(QueryService.FIELD_KEY, nullSafe(property.getKey()), Store.YES, Index.NOT_ANALYZED);
        doc.add(key);
        Field analyzedKey = new Field(QueryService.FIELD_KEY, nullSafe(property.getKey()), Store.YES, Index.ANALYZED);
        doc.add(analyzedKey);
        Field value = new Field(QueryService.FIELD_VALUE, nullSafe(property.getValue()), Store.YES, Index.ANALYZED);
        doc.add(value);
        String templateLocation = descriptor.getMaster() == null ? "" : descriptor.getMaster().getLocation().toString();
        Field templateLoc = new Field(QueryService.FIELD_TEMPLATE_LOCATION, templateLocation, Store.YES, Index.NOT_ANALYZED);
        doc.add(templateLoc);
        documents.add(doc);
    }
    return documents;
}
Example 51
Project: jcr-master  File: TestChangesHolder.java View source code
public void testSerNDeserializeDocs() throws Exception {
    //System.out.println("###       testSerNDeserializeDocs    ###");
    Collection<Document> add = new ArrayList<Document>(3);
    Document doc = new Document();
    doc.setBoost(2.0f);
    Field fieldFull = new Field("full", "full-value", Store.YES, Index.ANALYZED_NO_NORMS, TermVector.WITH_POSITIONS_OFFSETS);
    fieldFull.setBoost(2.0f);
    fieldFull.setOmitTermFreqAndPositions(true);
    doc.add(fieldFull);
    Field fieldEmpty = new Field("empty", "empty-value", Store.NO, Index.NOT_ANALYZED, TermVector.NO);
    doc.add(fieldEmpty);
    add.add(doc);
    doc = new Document();
    doc.add(fieldFull);
    add.add(doc);
    doc = new Document();
    doc.add(fieldEmpty);
    add.add(doc);
    ByteArrayOutputStream baos = null;
    int total = 100000;
    long start;
    Collection<String> remove = Collections.emptyList();
    Collection<Document> addResult = null;
    start = System.currentTimeMillis();
    for (int i = 0; i < total; i++) {
        baos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(baos);
        oos.writeObject(new ChangesHolder(remove, add));
        oos.close();
    }
    //System.out.println("Custom serialization: total time = " + (System.currentTimeMillis() - start) + ", size = " + baos.size());
    start = System.currentTimeMillis();
    for (int i = 0; i < total; i++) {
        ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
        addResult = ((ChangesHolder) ois.readObject()).getAdd();
        ois.close();
    }
    //System.out.println("Custom deserialization: total time = " + (System.currentTimeMillis() - start));
    checkDocs(addResult);
    start = System.currentTimeMillis();
    for (int i = 0; i < total; i++) {
        baos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(baos);
        oos.writeObject(add);
        oos.close();
    }
    //System.out.println("Native serialization: total time = " + (System.currentTimeMillis() - start) + ", size = " + baos.size());
    start = System.currentTimeMillis();
    for (int i = 0; i < total; i++) {
        ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
        addResult = (Collection<Document>) ois.readObject();
        ois.close();
    }
    //System.out.println("Native deserialization: total time = " + (System.currentTimeMillis() - start));
    checkDocs(addResult);
}
Example 52
Project: jforum2-master  File: LuceneIndexer.java View source code
private Document createDocument(Post p) {
    Document d = new Document();
    d.add(new Field(SearchFields.Keyword.POST_ID, String.valueOf(p.getId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.FORUM_ID, String.valueOf(p.getForumId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.TOPIC_ID, String.valueOf(p.getTopicId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.USER_ID, String.valueOf(p.getUserId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.DATE, this.settings.formatDateTime(p.getTime()), Store.YES, Index.UN_TOKENIZED));
    // We add the subject and message text together because, when searching, we only care about the 
    // matches, not where it was performed. The real subject and contents will be fetched from the database
    d.add(new Field(SearchFields.Indexed.CONTENTS, p.getSubject() + " " + p.getText(), Store.NO, Index.TOKENIZED));
    return d;
}
Example 53
Project: mdrill-master  File: FieldTermStack.java View source code
public static void main(String[] args) throws Exception {
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer);
    Query query = parser.parse("a x:b");
    FieldQuery fieldQuery = new FieldQuery(query, true, false);
    Directory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
    Document doc = new Document();
    doc.add(new Field("f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(new Field("f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
    writer.addDocument(doc);
    writer.close();
    IndexReader reader = IndexReader.open(dir, true);
    new FieldTermStack(reader, 0, "f", fieldQuery);
    reader.close();
}
Example 54
Project: neo4j-components-svn-master  File: SimpleFulltextIndex.java View source code
private void doIndex(IndexWriter writer, long nodeId, String predicate, Object literal) {
    try {
        Document doc = new Document();
        doc.add(new Field(KEY_ID, String.valueOf(nodeId), Store.YES, Index.NOT_ANALYZED));
        doc.add(new Field(KEY_INDEX, getLiteralReader().read(literal), Store.YES, Index.ANALYZED));
        doc.add(new Field(KEY_PREDICATE, predicate, Store.YES, Index.NOT_ANALYZED));
        doc.add(new Field(KEY_INDEX_SOURCE, literal.toString(), Store.YES, Index.NOT_ANALYZED));
        writer.addDocument(doc);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Example 55
Project: ocms-master  File: ContentTableManagerDBImpl.java View source code
public void setIndexProp(ContentTable ct) {
    ct.setAllowIndex(1);
    Set<ContentField> fields = ct.getContentFieldsSet();
    if (fields != null) {
        for (ContentField field : fields) {
            String indexType = field.getIndexType();
            String fieldType = field.getFieldType();
            if (!StringUtils.hasText(indexType) || indexType.equalsIgnoreCase(Index.NO.toString())) {
                if (fieldType.equalsIgnoreCase("varchar") || fieldType.equalsIgnoreCase("text")) {
                    field.setIndexType(Index.TOKENIZED.toString());
                } else {
                    field.setIndexType(Index.UN_TOKENIZED.toString());
                }
            }
            field.setStoreType(Store.COMPRESS.toString());
            contentFieldDao.saveContentField(field);
        }
    }
    this.saveContentTable(ct);
}
Example 56
Project: opencms-core-master  File: CmsSearchField.java View source code
/**
     * Creates a Lucene field with the given name from the configuration and the provided content.<p>
     * 
     * If no valid content is provided (that is the content is either <code>null</code> or 
     * only whitespace), then no field is created and <code>null</code> is returned.<p>
     * 
     * @param name the name of the field to create
     * @param content the content to create the field with
     * 
     * @return a Lucene field with the given name from the configuration and the provided content
     */
public Field createField(String name, String content) {
    if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) {
        content = getDefaultValue();
    }
    if (content != null) {
        Index index = Field.Index.NO;
        if (isIndexed()) {
            if (isTokenizedAndIndexed()) {
                index = Field.Index.ANALYZED;
            } else {
                index = Field.Index.NOT_ANALYZED;
            }
        }
        Field.Store store = Field.Store.NO;
        if (isStored() || isCompressed()) {
            store = Field.Store.YES;
        }
        Field result = new Field(name, content, store, index);
        if (getBoost() != BOOST_DEFAULT) {
            result.setBoost(getBoost());
        }
        return result;
    }
    return null;
}
Example 57
Project: opencms-master  File: CmsSearchField.java View source code
/**
     * Creates a Lucene field with the given name from the configuration and the provided content.<p>
     * 
     * If no valid content is provided (that is the content is either <code>null</code> or 
     * only whitespace), then no field is created and <code>null</code> is returned.<p>
     * 
     * @param name the name of the field to create
     * @param content the content to create the field with
     * 
     * @return a Lucene field with the given name from the configuration and the provided content
     */
public Field createField(String name, String content) {
    if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) {
        content = getDefaultValue();
    }
    if (content != null) {
        Index index = Field.Index.NO;
        if (isIndexed()) {
            if (isTokenizedAndIndexed()) {
                index = Field.Index.ANALYZED;
            } else {
                index = Field.Index.NOT_ANALYZED;
            }
        }
        Field.Store store = Field.Store.NO;
        if (isStored() || isCompressed()) {
            store = Field.Store.YES;
        }
        Field result = new Field(name, content, store, index);
        if (getBoost() != BOOST_DEFAULT) {
            result.setBoost(getBoost());
        }
        return result;
    }
    return null;
}
Example 58
Project: querydsl-master  File: LuceneQueryTest.java View source code
@Test
// FIXME
@Ignore
public void sorted_by_different_locales() throws Exception {
    Document d1 = new Document();
    Document d2 = new Document();
    Document d3 = new Document();
    d1.add(new Field("sort", "aÄ", Store.YES, Index.NOT_ANALYZED));
    d2.add(new Field("sort", "ab", Store.YES, Index.NOT_ANALYZED));
    d3.add(new Field("sort", "aa", Store.YES, Index.NOT_ANALYZED));
    writer = createWriter(idx);
    writer.addDocument(d1);
    writer.addDocument(d2);
    writer.addDocument(d3);
    writer.close();
    IndexReader reader = IndexReader.open(idx);
    searcher = new IndexSearcher(reader);
    query = new LuceneQuery(new LuceneSerializer(true, true, Locale.ENGLISH), searcher);
    assertEquals(3, query.fetch().size());
    List<Document> results = query.where(sort.startsWith("a")).orderBy(sort.asc()).fetch();
    assertEquals(3, results.size());
    assertEquals("aa", results.get(0).getField("sort").stringValue());
    assertEquals("aÄ", results.get(1).getField("sort").stringValue());
    assertEquals("ab", results.get(2).getField("sort").stringValue());
    query = new LuceneQuery(new LuceneSerializer(true, true, new Locale("fi", "FI")), searcher);
    results = query.where(sort.startsWith("a")).orderBy(sort.asc()).fetch();
    assertEquals("aa", results.get(0).getField("sort").stringValue());
    assertEquals("ab", results.get(1).getField("sort").stringValue());
    assertEquals("aÄ", results.get(2).getField("sort").stringValue());
}
Example 59
Project: sakai-cle-master  File: IndexUpdateTransactionImpl.java View source code
/*
	 * (non-Javadoc)
	 * 
	 * @see org.sakaiproject.search.transaction.impl.IndexTransactionImpl#doBeforePrepare()
	 */
@Override
protected void doBeforePrepare() throws IndexTransactionException {
    try {
        transactionId = manager.getSequence().getNextId();
        Document savepointMarker = new Document();
        savepointMarker.add(new Field("_txid", String.valueOf(transactionId), Store.YES, Index.NOT_ANALYZED));
        savepointMarker.add(new Field("_txts", String.valueOf(System.currentTimeMillis()), Store.YES, Index.NOT_ANALYZED));
        savepointMarker.add(new Field("_worker", String.valueOf(Thread.currentThread().getName()), Store.YES, Index.NOT_ANALYZED));
        getInternalIndexWriter();
        indexWriter.addDocument(savepointMarker);
        indexWriter.close();
        indexWriter = null;
        // save all items
        searchBuilderItemSerializer.saveTransactionList(tempIndex, getItems());
    } catch (Exception ex) {
        throw new IndexTransactionException("Failed to prepare transaction", ex);
    }
    super.doBeforePrepare();
}
Example 60
Project: swf-all-master  File: LuceneIndexer.java View source code
private Document getDocument(Record r) throws IOException {
    if (!hasIndexedFields()) {
        return null;
    }
    Document doc = new Document();
    boolean addedFields = false;
    for (String columnName : indexedColumns) {
        ModelReflector<?> reflector = Database.getTable(tableName).getReflector();
        String fieldName = reflector.getFieldName(columnName);
        Object value = reflector.get(r, fieldName);
        if (!ObjectUtil.isVoid(value)) {
            TypeRef<?> ref = Database.getJdbcTypeHelper(reflector.getPool()).getTypeRef(reflector.getFieldGetter(fieldName).getReturnType());
            TypeConverter<?> converter = ref.getTypeConverter();
            if (!ref.isBLOB()) {
                addedFields = true;
                if (Reader.class.isAssignableFrom(ref.getJavaClass())) {
                    doc.add(new Field(fieldName, converter.toString(value), Field.Store.NO, Index.ANALYZED));
                } else {
                    Class<? extends Model> referredModelClass = indexedReferenceColumns.get(columnName);
                    String sValue = converter.toString(value);
                    if (ref.isNumeric() && referredModelClass != null) {
                        ModelReflector<?> referredModelReflector = ModelReflector.instance(referredModelClass);
                        Model referred = Database.getTable(referredModelClass).get(((Number) converter.valueOf(value)).intValue());
                        if (referred != null) {
                            doc.add(new Field(fieldName.substring(0, fieldName.length() - "_ID".length()), StringUtil.valueOf(referred.getRawRecord().get(referredModelReflector.getDescriptionField())), Field.Store.YES, Field.Index.ANALYZED));
                        }
                    }
                    doc.add(new Field(fieldName, sValue, Field.Store.YES, Field.Index.ANALYZED));
                }
            }
        } else {
            addedFields = true;
            if (indexedReferenceColumns.containsKey(fieldName)) {
                doc.add(new Field(fieldName.substring(0, fieldName.length() - "_ID".length()), "NULL", Field.Store.YES, Field.Index.ANALYZED));
            }
            doc.add(new Field(fieldName, "NULL", Field.Store.YES, Field.Index.ANALYZED));
        }
    }
    if (addedFields) {
        doc.add(new Field("ID", StringUtil.valueOf(r.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED));
    } else {
        doc = null;
    }
    return doc;
}
Example 61
Project: trydone-master  File: LuceneIndexer.java View source code
private Document createDocument(Post p) {
    Document d = new Document();
    d.add(new Field(SearchFields.Keyword.POST_ID, String.valueOf(p.getId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.FORUM_ID, String.valueOf(p.getForumId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.TOPIC_ID, String.valueOf(p.getTopicId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.USER_ID, String.valueOf(p.getUserId()), Store.YES, Index.UN_TOKENIZED));
    d.add(new Field(SearchFields.Keyword.DATE, this.settings.formatDateTime(p.getTime()), Store.YES, Index.UN_TOKENIZED));
    // We add the subject and message text together because, when searching, we only care about the 
    // matches, not where it was performed. The real subject and contents will be fetched from the database
    d.add(new Field(SearchFields.Indexed.CONTENTS, p.getSubject() + " " + p.getText(), Store.NO, Index.TOKENIZED));
    return d;
}
Example 62
Project: arastreju-master  File: ArastrejuIndex.java View source code
// ----------------------------------------------------
private Document createDocument(ResourceNode node) {
    Document doc = new Document();
    doc.add(new Field(IndexFields.QUALIFIED_NAME, node.toURI(), Store.YES, Index.ANALYZED));
    Set<Statement> asserted = node.getAssociations();
    Set<Statement> inferred = new HashSet<Statement>();
    for (Statement stmt : asserted) {
        for (Inferencer inferencer : inferencers) {
            inferencer.addInferenced(stmt, inferred);
        }
        addFields(doc, stmt);
    }
    for (Statement stmt : inferred) {
        addFields(doc, stmt);
    }
    return doc;
}
Example 63
Project: openmicroscopy-master  File: FullTextBridge.java View source code
/**
     * Parses all ownership and time-based details to the index for the given
     * object.
     *
     * @param name
     * @param object
     * @param document
     * @param opts
     */
public void set_details(final String name, final IObject object, final Document document, final LuceneOptions opts) {
    final LuceneOptions stored = new SimpleLuceneOptions(opts, Store.YES);
    final LuceneOptions storedNotAnalyzed = new SimpleLuceneOptions(opts, Index.NOT_ANALYZED, Store.YES);
    Details details = object.getDetails();
    if (details != null) {
        Experimenter e = details.getOwner();
        if (e != null && e.isLoaded()) {
            String omename = e.getOmeName();
            String firstName = e.getFirstName();
            String lastName = e.getLastName();
            add(document, "details.owner.omeName", omename, stored);
            add(document, "details.owner.firstName", firstName, opts);
            add(document, "details.owner.lastName", lastName, opts);
        }
        ExperimenterGroup g = details.getGroup();
        if (g != null && g.isLoaded()) {
            String groupName = g.getName();
            add(document, "details.group.name", groupName, stored);
        }
        Event creationEvent = details.getCreationEvent();
        if (creationEvent != null) {
            add(document, "details.creationEvent.id", creationEvent.getId().toString(), storedNotAnalyzed);
            if (creationEvent.isLoaded()) {
                String creation = DateBridge.DATE_SECOND.objectToString(creationEvent.getTime());
                add(document, "details.creationEvent.time", creation, storedNotAnalyzed);
            }
        }
        Event updateEvent = details.getUpdateEvent();
        if (updateEvent != null) {
            add(document, "details.updateEvent.id", updateEvent.getId().toString(), storedNotAnalyzed);
            if (updateEvent.isLoaded()) {
                String update = DateBridge.DATE_SECOND.objectToString(updateEvent.getTime());
                add(document, "details.updateEvent.time", update, storedNotAnalyzed);
            }
        }
        Permissions perms = details.getPermissions();
        if (perms != null) {
            add(document, "details.permissions", perms.toString(), stored);
        }
    }
}
Example 64
Project: mylyn.tasks-master  File: TaskListIndex.java View source code
/**
	 * call to wait until index maintenance has completed
	 *
	 * @throws InterruptedException
	 */
public void waitUntilIdle() throws InterruptedException {
    if (!Platform.isRunning() && reindexDelay != 0L) {
        // job join() behaviour is not the same when platform is not running
        Logger.getLogger(TaskListIndex.class.getName()).warning("Index job joining may not work properly when Eclipse platform is not running");
    }
    maintainIndexJob.join();
}
Example 65
Project: org.eclipse.mylyn.tasks-master  File: TaskListIndex.java View source code
/**
	 * call to wait until index maintenance has completed
	 * 
	 * @throws InterruptedException
	 */
public void waitUntilIdle() throws InterruptedException {
    if (!Platform.isRunning() && reindexDelay != 0L) {
        // job join() behaviour is not the same when platform is not running
        Logger.getLogger(TaskListIndex.class.getName()).warning("Index job joining may not work properly when Eclipse platform is not running");
    }
    maintainIndexJob.join();
}
Example 66
Project: CMISBox-master  File: Storage.java View source code
private void index(StoredItem si) throws Exception {
    org.apache.lucene.document.Document ldoc = new org.apache.lucene.document.Document();
    ldoc.add(new Field(Storage.FIELD_PATH, si.getPath(), Store.YES, Index.NOT_ANALYZED));
    ldoc.add(new Field(Storage.FIELD_TYPE, si.getType(), Store.YES, Index.NOT_ANALYZED));
    ldoc.add(new Field(Storage.FIELD_ID, si.getId(), Store.YES, Index.NOT_ANALYZED));
    ldoc.add(new Field(Storage.FIELD_VERSION, si.getVersion(), Store.YES, Index.NOT_ANALYZED));
    ldoc.add(new Field(Storage.FIELD_LOCAL_MODIFIED, DateTools.timeToString(si.getLocalModified(), Resolution.MILLISECOND), Store.YES, Index.NOT_ANALYZED));
    ldoc.add(new Field(Storage.FIELD_REMOTE_MODIFIED, DateTools.timeToString(si.getRemoteModified(), Resolution.MILLISECOND), Store.YES, Index.NOT_ANALYZED));
    this.writer.addDocument(ldoc);
    this.log.debug(String.format("Indexed %s", ldoc));
}
Example 67
Project: comm-master  File: LuceneUtils.java View source code
/**
	 * 新增Bean索引
	 * 
	 * @param revert
	 * @return
	 */
public boolean createrIndex(List<BbsBean> list) {
    Directory directory = null;
    IndexWriter indexWriter = null;
    try {
        // 打开索引库
        directory = FSDirectory.open(new File(indexDir));
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        indexWriter = new IndexWriter(directory, iwc);
        indexWriter.deleteAll();
        for (int i = 0; i < list.size(); i++) {
            Document doc = new Document();
            BbsBean bean = list.get(i);
            doc.add(new Field(LuceneType.ALL_TYPE, LuceneType.TYPE_TOPIC, Store.YES, Index.NOT_ANALYZED));
            doc.add(new Field(LuceneType.ALL_ID, bean.getTopicId(), Store.YES, Index.NOT_ANALYZED));
            doc.add(new Field(LuceneType.ALL_TITLE, bean.getTitle(), Store.YES, Index.ANALYZED));
            doc.add(new Field(LuceneType.ALL_CONTENT, bean.getRevertContent().replaceAll("<\\S[^>]+>", "").replaceAll("<p>", ""), Store.YES, Index.ANALYZED));
            doc.add(new Field(LuceneType.ALL_URL, bean.getRevertUrl(), Store.YES, Index.NOT_ANALYZED));
            // 将索引关键字添加到文件夹中
            indexWriter.addDocument(doc);
        }
    } catch (Exception e) {
        e.printStackTrace();
        return false;
    } finally {
        try {
            if (indexWriter != null) {
                indexWriter.close();
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return true;
}
Example 68
Project: jucy-master  File: TextIndexer.java View source code
public synchronized Set<HashValue> search(Set<String> keys, Set<String> excludes, Collection<String> endings) {
    if (//if inverted Index is empty .. -> no results..
    presentHashes.isEmpty()) {
        return Collections.<HashValue>emptySet();
    }
    BooleanQuery bq = new BooleanQuery();
    for (String s : keys) {
        if (s.contains(" ")) {
            PhraseQuery pq = new PhraseQuery();
            for (String subterm : s.split(" ")) {
                pq.add(new Term(FIELD_CONTENT, subterm));
            }
            bq.add(pq, BooleanClause.Occur.MUST);
        } else {
            bq.add(new TermQuery(new Term(FIELD_CONTENT, s)), BooleanClause.Occur.MUST);
        }
    }
    for (String s : excludes) {
        if (s.contains(" ")) {
            PhraseQuery pq = new PhraseQuery();
            for (String subterm : s.split(" ")) {
                pq.add(new Term(FIELD_CONTENT, subterm));
            }
            bq.add(pq, BooleanClause.Occur.MUST_NOT);
        } else {
            bq.add(new TermQuery(new Term(FIELD_CONTENT, s)), BooleanClause.Occur.MUST_NOT);
        }
    }
    if (!endings.isEmpty()) {
        BooleanQuery equery = new BooleanQuery();
        for (String s : endings) {
            equery.add(new TermQuery(new Term(FIELD_ENDING, s)), BooleanClause.Occur.SHOULD);
        }
        bq.add(equery, BooleanClause.Occur.MUST);
    }
    Set<HashValue> found = new HashSet<HashValue>();
    try {
        IndexSearcher searcher = new IndexSearcher(index, true);
        // new TopDocCollector(10);
        TopScoreDocCollector collector = TopScoreDocCollector.create(25, false);
        searcher.search(bq, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        for (ScoreDoc sd : hits) {
            int docId = sd.doc;
            Document d = searcher.doc(docId);
            found.add(HashValue.createHash(d.getBinaryValue(FIELD_HASH)));
        }
        searcher.close();
    } catch (Exception e) {
        logger.warn(e, e);
    }
    return found;
}
Example 69
Project: xcmis-master  File: LuceneIndexer.java View source code
/**
    * 
    * @see org.xcmis.search.content.ContentIndexer#createDocument(org.xcmis.search.content.ContentEntry)
    */
public Document createDocument(ContentEntry contentEntry) {
    final Document doc = new Document();
    //  UUID
    doc.add(new Field(FieldNames.UUID, contentEntry.getIdentifier(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
    //root
    if (contentEntry.getParentIdentifiers().length == 0) {
        doc.add(new Field(FieldNames.PARENT, indexConfiguration.getRootParentUuid(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
        doc.add(new Field(FieldNames.LABEL, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
    } else {
        //parent uuids
        for (int i = 0; i < contentEntry.getParentIdentifiers().length; i++) {
            String parentIdetifier = contentEntry.getParentIdentifiers()[i];
            doc.add(new Field(FieldNames.PARENT, parentIdetifier, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
            doc.add(new Field(FieldNames.LABEL, contentEntry.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
        }
    }
    //table names
    for (int i = 0; i < contentEntry.getTableNames().length; i++) {
        doc.add(new Field(FieldNames.TABLE_NAME, contentEntry.getTableNames()[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
    }
    for (int i = 0; i < contentEntry.getProperties().length; i++) {
        Property property = contentEntry.getProperties()[i];
        if (isIndexed(property.getName())) {
            addProperty(doc, property);
        }
    }
    return doc;
}
Example 70
Project: Bee-Browser-master  File: LuceneHandler.java View source code
protected void write(Entity entity, Document doc) {
    String schema = entity.getSchema();
    if (schema == null)
        schema = "";
    String[] fields = schema.split("\\|");
    for (int i = 0; i < fields.length && i + 1 < fields.length; i += 2) {
        String kind = fields[i];
        String fname = fields[i + 1];
        if (Entity.STRING.equalsIgnoreCase(kind)) {
            Field field = new Field(fname, entity.getString(fname), Store.YES, Index.NOT_ANALYZED_NO_NORMS);
            doc.add(field);
        } else if (Entity.DOUBLE.equalsIgnoreCase(kind)) {
            NumericField field = new NumericField(fname, Store.YES, true);
            field.setDoubleValue(entity.getDouble(fname));
            doc.add(field);
        } else if (Entity.FLOAT.equalsIgnoreCase(kind)) {
            NumericField field = new NumericField(fname, Store.YES, true);
            field.setFloatValue(entity.getFloat(fname));
            doc.add(field);
        } else if (Entity.INTEGER.equalsIgnoreCase(kind)) {
            NumericField field = new NumericField(fname, Store.YES, true);
            field.setIntValue(entity.getInteger(fname));
            doc.add(field);
        } else if (Entity.LONG.equalsIgnoreCase(kind)) {
            NumericField field = new NumericField(fname, Store.YES, true);
            field.setLongValue(entity.getLong(fname));
            doc.add(field);
        } else if (Entity.ANALYZED.equalsIgnoreCase(kind)) {
            Field field = new Field(fname, entity.getString(fname), Store.YES, Index.ANALYZED);
            doc.add(field);
        }
    }
}
Example 71
Project: jedit-CtagsInterface-master  File: TagIndex.java View source code
public Origin getOrigin(OriginType type, String id, boolean createIfNotExists) {
    Origin origin = new Origin(type, id);
    if (!createIfNotExists)
        return origin;
    final boolean b[] = new boolean[1];
    b[0] = false;
    String query = DOCTYPE_FLD + ":" + ORIGIN_DOC_TYPE + " AND " + TYPE_FLD + ":" + type.name + " AND " + ORIGIN_ID_FLD + ":" + escape(id);
    runQuery(query, 1, new DocHandler() {

        public void handle(Document doc) {
            b[0] = true;
        }
    });
    if (!b[0]) {
        startActivity();
        Document doc = new Document();
        doc.add(new Field(DOCTYPE_FLD, ORIGIN_DOC_TYPE, Store.YES, Index.ANALYZED));
        doc.add(new Field(TYPE_FLD, type.name, Store.YES, Index.ANALYZED));
        doc.add(new Field(ORIGIN_ID_FLD, id, Store.YES, Index.ANALYZED));
        try {
            writer.addDocument(doc);
        } catch (IOException e) {
            e.printStackTrace();
        }
        endActivity();
    }
    return origin;
}
Example 72
Project: moxie-master  File: LuceneExecutor.java View source code
/**
	 * This completely indexes the repository and will destroy any existing
	 * index.
	 * 
	 * @param repositoryName
	 * @return IndexResult
	 */
public IndexResult reindex(String repository) {
    IndexResult result = new IndexResult();
    if (!deleteIndex(repository)) {
        return result;
    }
    try {
        MoxieCache moxieCache = config.getMoxieCache();
        IMavenCache repositoryCache = config.getMavenCache(repository);
        Collection<File> files = repositoryCache.getFiles("." + org.moxie.Constants.POM);
        IndexWriter writer = getIndexWriter(repository);
        for (File pomFile : files) {
            try {
                Pom pom = PomReader.readPom(moxieCache, pomFile);
                String date = DateTools.timeToString(pomFile.lastModified(), Resolution.MINUTE);
                Document doc = new Document();
                doc.add(new Field(FIELD_PACKAGING, pom.packaging, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
                doc.add(new Field(FIELD_GROUPID, pom.groupId, Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_ARTIFACTID, pom.artifactId, Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_VERSION, pom.version, Store.YES, Index.ANALYZED));
                if (!StringUtils.isEmpty(pom.name)) {
                    doc.add(new Field(FIELD_NAME, pom.name, Store.YES, Index.ANALYZED));
                }
                if (!StringUtils.isEmpty(pom.description)) {
                    doc.add(new Field(FIELD_DESCRIPTION, pom.description, Store.YES, Index.ANALYZED));
                }
                doc.add(new Field(FIELD_DATE, date, Store.YES, Index.ANALYZED));
                // add the pom to the index
                writer.addDocument(doc);
            } catch (Exception e) {
                logger.log(Level.SEVERE, MessageFormat.format("Exception while reindexing {0} in {1}", pomFile, repository), e);
            }
            result.artifactCount++;
        }
        writer.commit();
        resetIndexSearcher(repository);
        result.success();
    } catch (Exception e) {
        logger.log(Level.SEVERE, "Exception while reindexing " + repository, e);
    }
    return result;
}
Example 73
Project: neo4j-rdf-master  File: SimpleFulltextIndex.java View source code
private void doIndex(IndexWriter writer, long nodeId, String predicate, Object literal) {
    try {
        Document doc = new Document();
        doc.add(new Field(KEY_ID, String.valueOf(nodeId), Store.YES, Index.NOT_ANALYZED));
        doc.add(new Field(KEY_INDEX, getLiteralReader().read(literal), Store.YES, Index.ANALYZED));
        doc.add(new Field(KEY_PREDICATE, predicate, Store.YES, Index.NOT_ANALYZED));
        doc.add(new Field(KEY_INDEX_SOURCE, literal.toString(), Store.YES, Index.NOT_ANALYZED));
        writer.addDocument(doc);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Example 74
Project: wonder-master  File: ERLuceneAdaptorChannel.java View source code
@Override
public void insertRow(NSDictionary row, EOEntity entity) {
    try {
        Document doc = new Document();
        fillWithDictionary(doc, row, entity);
        doc.add(new Field(EXTERNAL_NAME_KEY, entity.externalName(), Store.NO, Index.NOT_ANALYZED));
        writer().addDocument(doc);
    } catch (EOGeneralAdaptorException e) {
        throw e;
    } catch (Throwable e) {
        throw new ERLuceneAdaptorException("Failed to insert '" + entity.name() + "' with row " + row + ": " + e.getMessage(), e);
    }
}
Example 75
Project: zenoss-zep-master  File: LuceneEventIndexMapper.java View source code
public static Document fromEventSummary(EventSummary summary, Map<String, EventDetailItem> detailsConfig, boolean isArchive) throws ZepException {
    Document doc = new Document();
    // Archive events don't store serialized protobufs - see ZEN-2159
    if (!isArchive) {
        doc.add(new Field(FIELD_PROTOBUF, compressProtobuf(summary)));
    }
    // Store the UUID for more lightweight queries against the index
    doc.add(new Field(FIELD_UUID, summary.getUuid(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_CURRENT_USER_NAME, summary.getCurrentUserName(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new IntField(FIELD_STATUS, summary.getStatus().getNumber(), Store.YES));
    doc.add(new LongField(FIELD_COUNT, summary.getCount(), Store.YES));
    doc.add(new LongField(FIELD_LAST_SEEN_TIME, summary.getLastSeenTime(), Store.YES));
    doc.add(new LongField(FIELD_FIRST_SEEN_TIME, summary.getFirstSeenTime(), Store.NO));
    doc.add(new LongField(FIELD_STATUS_CHANGE_TIME, summary.getStatusChangeTime(), Store.NO));
    doc.add(new LongField(FIELD_UPDATE_TIME, summary.getUpdateTime(), Store.NO));
    Event event = summary.getOccurrence(0);
    doc.add(new Field(FIELD_FINGERPRINT, event.getFingerprint(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_SUMMARY, event.getSummary(), Store.NO, Index.ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_SUMMARY_NOT_ANALYZED, event.getSummary().toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new IntField(FIELD_SEVERITY, event.getSeverity().getNumber(), Store.YES));
    doc.add(new Field(FIELD_EVENT_CLASS, event.getEventClass(), Store.NO, Index.ANALYZED_NO_NORMS));
    // Store with a trailing slash to make lookups simpler
    doc.add(new Field(FIELD_EVENT_CLASS_NOT_ANALYZED, event.getEventClass().toLowerCase() + "/", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_AGENT, event.getAgent(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_MONITOR, event.getMonitor(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_EVENT_KEY, event.getEventKey(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_EVENT_CLASS_KEY, event.getEventClassKey(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_EVENT_GROUP, event.getEventGroup(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_MESSAGE, event.getMessage(), Store.NO, Index.ANALYZED_NO_NORMS));
    for (EventTag tag : event.getTagsList()) {
        for (String tagUuid : tag.getUuidList()) {
            doc.add(new Field(FIELD_TAGS, tagUuid, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
        }
    }
    EventActor actor = event.getActor();
    String uuid = actor.getElementUuid();
    if (uuid != null && !uuid.isEmpty()) {
        doc.add(new Field(FIELD_TAGS, uuid, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    }
    String id = actor.getElementIdentifier();
    doc.add(new Field(FIELD_ELEMENT_IDENTIFIER, id, Store.NO, Index.ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_ELEMENT_IDENTIFIER_NOT_ANALYZED, id.toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    String title = actor.getElementTitle();
    doc.add(new Field(FIELD_ELEMENT_TITLE, title, Store.NO, Index.ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_ELEMENT_TITLE_NOT_ANALYZED, title.toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    String subUuid = actor.getElementSubUuid();
    if (subUuid != null && !subUuid.isEmpty()) {
        doc.add(new Field(FIELD_TAGS, subUuid, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    }
    String subId = actor.getElementSubIdentifier();
    doc.add(new Field(FIELD_ELEMENT_SUB_IDENTIFIER, subId, Store.NO, Index.ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_ELEMENT_SUB_IDENTIFIER_NOT_ANALYZED, subId.toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    String subTitle = actor.getElementSubTitle();
    doc.add(new Field(FIELD_ELEMENT_SUB_TITLE, subTitle, Store.NO, Index.ANALYZED_NO_NORMS));
    doc.add(new Field(FIELD_ELEMENT_SUB_TITLE_NOT_ANALYZED, subTitle.toLowerCase(), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    // find details  for indexing
    List<EventDetail> evtDetails = event.getDetailsList();
    // Details with no value are indexed using a default value so we can search for None's.
    // The value used to index the null details depends on the type of the detail:
    //     - Null numeric details are indexed using the Java min Integer
    //     - Null text details are indexed using the bell character
    // The values defined in the zep facade for null details must match the above values
    Iterator<Map.Entry<String, EventDetailItem>> it = detailsConfig.entrySet().iterator();
    while (it.hasNext()) {
        boolean found = false;
        Map.Entry<String, EventDetailItem> entry = it.next();
        // make sure that entry doesn't exist in the regular document
        for (EventDetail eDetail : evtDetails) {
            String detailName = eDetail.getName();
            if (entry.getKey().equals(detailName)) {
                found = true;
                break;
            }
        }
        if (!found) {
            String detailKeyName = DETAIL_INDEX_PREFIX + entry.getKey();
            EventDetailItem detailDefn = detailsConfig.get(entry.getKey());
            switch(detailDefn.getType()) {
                case INTEGER:
                    doc.add(new IntField(detailKeyName, Integer.MIN_VALUE, Store.NO));
                    break;
                case FLOAT:
                    doc.add(new FloatField(detailKeyName, Integer.MIN_VALUE, Store.NO));
                    break;
                case LONG:
                    doc.add(new LongField(detailKeyName, Integer.MIN_VALUE, Store.NO));
                    break;
                case DOUBLE:
                    doc.add(new DoubleField(detailKeyName, Integer.MIN_VALUE, Store.NO));
                    break;
                default:
                    doc.add(new Field(detailKeyName, Character.toString((char) 07), Store.NO, Index.NOT_ANALYZED_NO_NORMS));
                    break;
            }
        }
    }
    for (EventDetail eDetail : evtDetails) {
        String detailName = eDetail.getName();
        EventDetailItem detailDefn = detailsConfig.get(detailName);
        if (detailDefn != null) {
            String detailKeyName = DETAIL_INDEX_PREFIX + detailDefn.getKey();
            for (String detailValue : eDetail.getValueList()) {
                switch(detailDefn.getType()) {
                    case STRING:
                        doc.add(new Field(detailKeyName, detailValue, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
                        break;
                    case INTEGER:
                        try {
                            int intValue = Integer.parseInt(detailValue);
                            doc.add(new IntField(detailKeyName, intValue, Store.NO));
                        } catch (Exception e) {
                            logger.warn("Invalid numeric(int) data reported for detail {}: {}", detailName, detailValue);
                        }
                        break;
                    case FLOAT:
                        try {
                            float floatValue = Float.parseFloat(detailValue);
                            doc.add(new FloatField(detailKeyName, floatValue, Store.NO));
                        } catch (Exception e) {
                            logger.warn("Invalid numeric(float) data reported for detail {}: {}", detailName, detailValue);
                        }
                        break;
                    case LONG:
                        try {
                            long longValue = Long.parseLong(detailValue);
                            doc.add(new LongField(detailKeyName, longValue, Store.NO));
                        } catch (Exception e) {
                            logger.warn("Invalid numeric(long) data reported for detail {}: {}", detailName, detailValue);
                        }
                        break;
                    case DOUBLE:
                        try {
                            double doubleValue = Double.parseDouble(detailValue);
                            doc.add(new DoubleField(detailKeyName, doubleValue, Store.NO));
                        } catch (Exception e) {
                            logger.warn("Invalid numeric(double) data reported for detail {}: {}", detailName, detailValue);
                        }
                        break;
                    case IP_ADDRESS:
                        try {
                            if (!detailValue.isEmpty()) {
                                final InetAddress addr = IpUtils.parseAddress(detailValue);
                                createIpAddressFields(doc, detailKeyName, addr);
                            }
                        } catch (Exception e) {
                            logger.warn("Invalid IP address data reported for detail {}: {}", detailName, detailValue);
                        }
                        break;
                    case PATH:
                        createPathFields(doc, detailKeyName, detailValue);
                        break;
                    default:
                        logger.warn("Configured detail {} uses unknown data type: {}, skipping", detailName, detailDefn.getType());
                        break;
                }
            }
        }
    }
    return doc;
}
Example 76
Project: agile-itsm-master  File: Lucene.java View source code
private boolean indexarDocGemeo(PalavraGemeaDTO palGemeaDTO) throws IOException {
    this.excluirPalavraGemea(palGemeaDTO);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
    Directory indexDir = FSDirectory.open(new File(dirGemeas));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer);
    config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter indexWriter = new IndexWriter(indexDir, config);
    Document doc = new Document();
    doc.add(new Field("palavra", palGemeaDTO.getPalavra(), Store.YES, Index.ANALYZED));
    doc.add(new Field("correspondente", palGemeaDTO.getPalavraCorrespondente(), Store.YES, Index.ANALYZED));
    NumericField id = new NumericField("id", Store.YES, true);
    id.setLongValue(palGemeaDTO.getIdPalavraGemea());
    doc.add(id);
    indexWriter.addDocument(doc);
    indexWriter.close();
    indexDir.close();
    return true;
}
Example 77
Project: tml-master  File: Repository.java View source code
/**
	 * Inserts a new text passage into the Repository.
	 *
	 * @param content
	 *            the content of the document
	 * @param title
	 *            the title of the document
	 * @param url
	 *            the url of the document
	 * @param type
	 *            the type of the document ("document", "sentence" or
	 *            "paragraph")
	 * @param parent
	 *            the id of the parent document (when type is segment)
	 * @return the Lucene Document that was just added
	 * @throws IOException
	 * @throws SQLException 
	 */
private Document addTextPassageToOpenIndex(String content, String type, String parent, String parentDocument, String externalId, String title, String url) throws IOException, SQLException {
    Document document = new Document();
    document.add(new Field(this.getLuceneContentField(), content, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS));
    document.add(new Field(this.getLuceneExternalIdField(), externalId, Store.YES, Index.NOT_ANALYZED, TermVector.NO));
    document.add(new Field(this.getLuceneTitleField(), title, Store.YES, Index.NOT_ANALYZED, TermVector.NO));
    document.add(new Field(this.getLuceneUrlField(), url, Store.YES, Index.NOT_ANALYZED, TermVector.NO));
    document.add(new Field("indexdate", Calendar.getInstance().getTime().toString(), Store.YES, Index.NOT_ANALYZED, TermVector.NO));
    document.add(new Field(this.getLuceneParentField(), parent, Store.YES, Index.NOT_ANALYZED, TermVector.NO));
    document.add(new Field("type", type, Store.YES, Index.NOT_ANALYZED, TermVector.NO));
    document.add(new Field("parent", parentDocument, Store.YES, Index.NOT_ANALYZED, TermVector.NO));
    this.getDbConnection().insertDocument(this, document);
    Term term = new Term("externalid", externalId);
    luceneIndexWriter.updateDocument(term, document);
    return document;
}
Example 78
Project: opensearchserver-master  File: Indexed.java View source code
public final Index getLuceneIndex(String indexAnalyzer) {
    if (this == NO)
        return Index.NO;
    return indexAnalyzer == null ? Index.NOT_ANALYZED : Index.ANALYZED;
}
Example 79
Project: cyclos-master  File: DocumentBuilder.java View source code
/**
     * Adds an string field, which may be analyzer or not
     */
public DocumentBuilder add(final String name, final String value, final boolean analyzed) {
    if (StringUtils.isNotEmpty(value)) {
        final Field field = new Field(name, value, Store.YES, analyzed ? Index.ANALYZED : Index.NOT_ANALYZED);
        document.add(field);
    }
    return this;
}
Example 80
Project: eclipse-instasearch-master  File: StorageIndexer.java View source code
private static org.apache.lucene.document.Field createLuceneField(Field fieldName, String value) {
    return new org.apache.lucene.document.Field(fieldName.toString(), value, Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED);
}
Example 81
Project: eadventure-master  File: ModelIndex.java View source code
/**
	 * Adds a property to a node.
	 * @param e the node
	 * @param field name
	 * @param value of property
	 * @param searchable if this field is to be indexed and used in "anywhere"
	 * searches
	 */
public static void addProperty(DependencyNode e, String field, String value, boolean searchable) {
    e.getDoc().add(new Field(field, value, Store.YES, searchable ? Index.ANALYZED : Index.NO));
}
Example 82
Project: skalli-master  File: LuceneIndex.java View source code
private void addEntityToIndex(IndexWriter writer, T entity) throws IOException {
    List<IndexEntry> fields = indexEntity(entity);
    Document doc = LuceneUtil.fieldsToDocument(fields);
    doc.add(new Field(FIELD_UUID, entity.getUuid().toString(), Store.YES, Index.NOT_ANALYZED));
    writer.addDocument(doc);
}