/**
* @Description:
* @Version: 1.0
*/
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
import java.io.IOException;
import java.io.StringReader;
import java.util.*;
public class Test {
/**
* 对语句进行分词
* @param text 语句
* @return 分词后的集合
* @throws IOException
*/
private static Map segment(String text) throws IOException {
Map<String,Integer> map = new HashMap<String,Integer>();
StringReader re = new StringReader(text);
IKSegmenter ik = new IKSegmenter(re, false);//true 使用smart分词,false使用最小颗粒分词
Lexeme lex; while ((lex = ik.next()) != null) { if(lex.getLexemeText().length()>1){ if(map.containsKey(lex.getLexemeText())){ map.put(lex.getLexemeText(),map.get(lex.getLexemeText())+1); }else{ map.put(lex.getLexemeText(),1); } } } return map; } public static void main(String[] args) throws IOException { Map<String,Integer> map = segment("中国,中国,我爱你"); System.out.println(map.toString()); } }
输出结果: ; Q5 W! V9 y) `( k8 G 整理简单工具类,代码如下:# t4 A u Z. a' |% u. l' \% L8 E + b& b- w" W0 j# q4 i9 f) M- r: a7 l% y+ X IKanalyzer分词器源码下载>> 7 n# N! k) C3 N5 }+ G