SentenceAnalyzer
Posted on February 20, 2006 - Filed Under Uncategorized |
public class SentenceAnalyzer { private char[] interpunction = new char[] { '!', '…', '!', '?', ',', '@', '“', '”', '"', '\'', '《', '》', '{', '}', '(', ')', ';', ':', '>', '<', '~', '_', '.', '[', ']', '/', '-', ':' }; public SentenceAnalyzer() { interpunctionSet = new HashSet<Character>(); for (char interpunctionChar : interpunction) { interpunctionSet.add(interpunctionChar); } interpunction = null; } private Set<Character> interpunctionSet; public String analyze(String str) { int length = str.length(); StringBuffer buffer = new StringBuffer(length); boolean last = false; for (int i = 0; i < length; i++) { char ch = str.charAt(i); if (interpunctionSet.contains(ch)) { continue; } boolean al = isAlphanumeric(ch); // 合并连续的英文和数字 if ((!al) || (!last)) { buffer.append(' '); } if (ch != ' ') { buffer.append(ch); } last = al; } return buffer.toString(); } public boolean isAlphanumeric(char ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'); } public static void main(String[] args) { SentenceAnalyzer analyzer = new SentenceAnalyzer(); System.out.println(analyzer.analyze("姻缘天注定;-)你对你的另一半有何看法;-)")); } }
Most Commented Posts
Comments
Leave a Reply