public class FileDocument {
public final static String LUCENE_DOC_PATH = "d://temp/doc";
public final static String LUCENE_INDEX_PATH = "d://temp/index";
Directory dir;
BufferedReader reader;
static StandardAnalyzer analyzer = new StandardAnalyzer();
public Document document(File f) throws FileNotFoundException{
Document doc = new Document();
FileInputStream fs = new FileInputStream(f);
doc.add(new Field("path",f.getPath(),Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(new Field("modify",DateTools.timeToString(f.lastModified(), DateTools.Resolution.DAY),Field.Store.YES,Field.Index.UN_TOKENIZED));
reader = new BufferedReader(new InputStreamReader(fs));
doc.add(new Field("content",reader));
return doc;
}
public void buildIndex() throws IOException{
//dir = FSDirectory.getDirectory(LUCENE_DOC_PATH);
long start = System.currentTimeMillis();
IndexWriter iwriter =null;
File folder = new File(LUCENE_DOC_PATH);
float [] ft = {0.1f,0.2f,0.3f,0.4f,0.5f,0.6f,1.0f,0.8f,0.9f,1.0f};
if(folder.isDirectory())
{
String[] files = folder.list();
iwriter = new IndexWriter(LUCENE_INDEX_PATH,analyzer,true);
for(int i= 0 ;i<files.length;i++)
{
File file = new File(folder,files[i]);
Document doc = this.document(file);
System.out.println("正在索引文件:"+file.getName());
doc.setBoost(ft[i]);
iwriter.addDocument(doc);
// iwriter.setMaxMergeDocs(2000);来限制一个segment的最大文档数
//iwriter.setMergeFactor(50);//内存中有50个文件时就i/o操作,写入硬盘,默认为10个
// iwriter.setMaxFieldLength(20000);//设置检索多少字,默认10000个
//iwriter.setUseCompoundFile(true);//混合索引结构,默认为true false表多文件索引结构
}
}
//iwriter.optimize();//不能提高速度,最好在大规模地建立索引后,调用一次,进行索引的优化
iwriter.close();
/* IndexReader r = IndexReader.open(LUCENE_INDEX_PATH);
r.deleteDocument(0); //表示删除第 一个文件,不是真正意义上的删除
r.undeleteAll();//表示又恢复了,不删除
r.close(); */
long end = System.currentTimeMillis();
long time = end-start;
System.out.println("建立索引用时:"+time);
}
public final Hits search(String keyWords) throws CorruptIndexException, IOException
{
Hits hits=null;
IndexSearcher isearcher = new IndexSearcher(LUCENE_INDEX_PATH);
System.out.println("正在搜索的关键字:"+keyWords);
try {
QueryParser qp = new QueryParser("content",analyzer);
Query query = qp.parse(keyWords);
Date start = new Date();
hits = isearcher.search(query);
Date end = new Date();
System.out.println("完成搜索共耗时:"+(end.getTime()-start.getTime())+"毫秒!");
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return hits;
}
public void printResult(Hits hits) throws CorruptIndexException, IOException
{
if(null == hits||hits.length() == 0)
{
System.out.println("对不起,没有找到你想要的内容!");
}else
{
for(int i = 0 ;i<hits.length();i++)
{
try {
Document doc = hits.doc(i);
System.out.println("这是查到的第"+(i+1)+"个结果:"+doc.get("path")+" 分数: "+hits.score(i));
Runtime r = Runtime.getRuntime();
//r.exec(" cmd /c start " + doc.get("path")); //可用来打开文件
System.out.println("文档内部的id是:"+hits.id(i));
System.out.println("这是查到的第"+(i+1)+"个结果:"+doc.get("modify"));
System.out.println("这是查到的第"+(i+1)+"个结果:"+doc.get("content"));
} catch (IOException e) {
e.printStackTrace();
System.out.println("索引错误!");
}
}
/* final int page=3;
for(int start=0;start<hits.length();start+=page){//分页显示
int end = Math.min(hits.length(), start+page);
for(int i=start;i<end;i++){
Document doc = hits.doc(i);
System.out.println("这是查到的第"+(i+1)+"个结果:"+doc.get("path")+"分数 "+hits.score(i));
}
if(hits.length()>end){
System.out.println("more Y/N?");
BufferedReader buf = new BufferedReader(new InputStreamReader(System.in));
String line= buf.readLine();
if(line.length()==0||line.charAt(0)=='N'){
break;
}
}
}*/
}
System.out.println("----------------------------------------");
}
public static void main(String [] args) throws IOException
{ FileDocument fd = new FileDocument();
fd.buildIndex();
Hits h= fd.search("奥运会");
fd.printResult(h);
}
}
lucene搜索引擎
最新推荐文章于 2025-08-20 13:25:49 发布