博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Lucene3.6第一篇--创建索引
阅读量:5281 次
发布时间:2019-06-14

本文共 5493 字,大约阅读时间需要 18 分钟。

lucene 3.6 source and Binary code 

import java.io.File;import java.io.FileReader;import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;public class Indexer {    public static void main(String[] args) throws Exception {//        args=new String[2];//        args[0]="D:/workspace/LuceneAction/lib/IndexDir";//        args[1]="D:/workspace/HaiduShare/src/com/weishangye/share";        if (args.length != 2) {            throw new Exception("Usege Java: " + Indexer.class.getName()                    + "
"); } //索引文件存放路径 Directory indexDir = new SimpleFSDirectory(new File(args[0])); //原始数据文件 File dataDir = new File(args[1]); long start = System.currentTimeMillis(); //遍历文件夹创建索引文件 int numIndexed = Index(indexDir, dataDir); long end = System.currentTimeMillis(); System.out.println("Index " + numIndexed + "files took " + (end - start) + " milliseconds"); } public static int Index(Directory indexDir, File dataDir) throws IOException { if (!dataDir.exists() || !dataDir.isDirectory()) { throw new IOException(dataDir+"is not exist or a directory"); } IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); IndexWriter writer=new IndexWriter(indexDir,config); //是否合并索引文件(过时) //writer.setUseCompoundFile(false); IndexDirectory(writer,dataDir); int numIndexed=writer.numDocs(); //优化索引(过时) //writer.optimize(); writer.close(); return numIndexed; } private static void IndexDirectory(IndexWriter writer,File dir) throws IOException{ File[] files =dir.listFiles(); for (int i = 0; i < files.length; i++) { File f=files[i]; if (f.isDirectory()) { IndexDirectory(writer, f); } else { IndexFile(writer,f); } } } private static void IndexFile(IndexWriter writer,File f) throws IOException { if (f.isHidden()||!f.exists()||!f.canRead()) { return; } System.out.println("Indexing "+f.getCanonicalPath()); Document doc=new Document(); doc.add(new Field("contents", new FileReader(f))); doc.add(new Field("filename",new FileReader(f.getCanonicalPath()))); writer.addDocument(doc); }}

 

package com.meetlucene;import java.io.File;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;public class Searcher {    public static final String FIELD_FILENAME = "filename";    public static final String FIELD_CONTENTS = "contents";        public static void main(String[] args) throws Exception {        args=new String[2];        args[0]="E:/workspace/LuceneAction/lib/IndexDir";        args[1]="Solr";                if (args.length!=2) {            throw new Exception("Usage:Java "+Searcher.class.getName()                    +" 
"); } //Indexer所创建的索引目录 File indexDir=new File(args[0]); //Directory indexDir=new SimpleFSDirectory(); String q=args[1]; if (!indexDir.exists()||!indexDir.isDirectory()) { throw new Exception(indexDir+ " dose not exist or is not a directory"); } search(indexDir,q); } public static void search(File indexDir,String q) throws Exception{ Directory faDir=new SimpleFSDirectory(indexDir); IndexReader indexReader=IndexReader.open(faDir); IndexSearcher indexSearcher=new IndexSearcher(indexReader); Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36); QueryParser queryParser =new QueryParser(Version.LUCENE_36, FIELD_CONTENTS, analyzer); Query query=queryParser.parse(q); long start =System.currentTimeMillis(); //@deprecated Use {@link TopScoreDocCollector} and {@link TopDocs} instead. Hits will be removed in Lucene 3.0. //org.apache.lucene.search.Hit TopDocs topDocs=indexSearcher.search(query, 10); long end =System.currentTimeMillis(); System.out.println("Found " + topDocs.totalHits + " document(s)(in " + (end - start) + " milliseconds) that matched query '" + q + "':"); for (ScoreDoc sd : topDocs.scoreDocs) { Document doc=indexSearcher.doc(sd.doc); System.out.println(sd.doc+"-->"+doc.get(FIELD_FILENAME)+"-->"+doc.get(FIELD_CONTENTS)); } }}

转载于:https://www.cnblogs.com/a282421083/archive/2012/09/23/2698622.html

你可能感兴趣的文章
shell脚本
查看>>
Upload Image to .NET Core 2.1 API
查看>>
【雷电】源代码分析(二)-- 进入游戏攻击
查看>>
Linux中防火墙centos
查看>>
[JS]递归对象或数组
查看>>
linux sed命令
查看>>
程序存储问题
查看>>
优雅地书写回调——Promise
查看>>
PHP的配置
查看>>
Struts框架----进度1
查看>>
Round B APAC Test 2017
查看>>
MySQL 字符编码问题详细解释
查看>>
css & input type & search icon
查看>>
C# 强制关闭当前程序进程(完全Kill掉不留痕迹)
查看>>
语音识别中的MFCC的提取原理和MATLAB实现
查看>>
MetaWeblog API Test
查看>>
c# 文件笔记
查看>>
类和结构
查看>>
心得25--JDK新特性9-泛型1-加深介绍
查看>>
安装NVIDIA驱动时禁用自带nouveau驱动
查看>>