使用迭代器的树Java Trie匹配
我有一个作业,涉及创建一个公司名称的Trie(从文件中读取),然后读取一篇新闻文章输入,并计算Trie中的公司名称在文章中出现的次数
我已经编写了一个相当标准的Trie结构,但是对于作业来说,让三元组包含完整的单词而不仅仅是每个字符更有意义
更复杂的是,文件中的每个公司名称都有一个“主要名称”,可以有多个“次要名称”。例如:微软公司、微软、Xbox——名字始终是主要名称
作业要求我计算文章中任何公司名称的所有匹配项,但在打印结果时只返回公司的主要名称。因此,我的三元组有字符串primeName数据字段,以及标准的isEnd bool。但是,在我的例子中,isEnd表示指定的节点及其父节点是否构成完整的公司名称
例如,文章“微软公司刚刚发布了一款新的Xbox游戏机”我需要返回与“Microsoft:2”类似的内容,因为Microsoft Corporation和Xbox都有相同的主要公司名称,即Microsoft
我在getHits()方法中使用了一个迭代器,但当我找到一个命中时,我需要查看数组中的下一个单词,以确保它不是一个继续,然后再决定是停止还是继续。问题是调用iter。next()不仅仅是“偷看”下一个值,它还会向前移动,基本上会导致我跳过单词
例如,如果你看下面的代码和我的例子,在“Best”得到点击后,它应该会看到“Buy”是一个孩子,下一次它循环得到一个匹配的“Buy”,但因为我已经调用了iter。next()要查看While循环中的“购买”,下一次迭代完全跳过“购买”。有没有什么方法可以让我在While循环中简单地窥视下一个iter值,而不实际移动到它?此外,对这段代码的任何改进都将不胜感激!我肯定有很多地方我草率地实现了一些东西
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
public class BuildTrie {
// Class Methods
public static void main(String[] args) throws IOException {
Trie Companies = new Trie();
String filename = "companies.dat";
try {
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line;
while ((line = reader.readLine()) != null) {
// Split line by tab character
String[] aliases = line.replaceAll("\\p{P}", "").split("\t");
// Loop over each "alias" of specific company
for (int n = 0; n < aliases.length; n++) {
String[] name = aliases[n].split(" ");
// Insert each alias into Trie with index 0 as primary
Companies.insert(name, aliases[0]);
}
}
reader.close();
} catch (Exception e) {
System.err.format("Exception occurred trying to read '%s'.", filename);
e.printStackTrace();
}
/*System.out.println("Article Input: ");
try (BufferedReader input = new BufferedReader(new InputStreamReader(System.in))) {
String line;
while ((line = input.readLine()) != null) {
if (".".equals(line)) break;
String[] items = line.trim().replaceAll("\\p{P}", "").split("\\s+");
for (int i = 0; i < items.length; i++) {
Companies.words.add(items[i]);
//System.out.println(items[i]);
}
}
}*/
Companies.articleAdd("The");
Companies.articleAdd("company");
Companies.articleAdd("Best");
Companies.articleAdd("Buy");
Companies.articleAdd("sell");
Companies.articleAdd("Xbox");
Companies.getHits();
}
}
// Trie Node, which stores a character and the children in a HashMap
class TrieNode {
// Data Fields
private String word;
HashMap<String,TrieNode> children;
boolean bIsEnd;
private String primary = "";
// Constructors
public TrieNode() {
children = new HashMap<>();
bIsEnd = false;
}
public TrieNode(String st, String prime) {
word = st;
children = new HashMap<>();
bIsEnd = false;
primary = prime;
}
// Trie Node Methods
public HashMap<String,TrieNode> getChildren() {
return children;
}
public String getValue() {
return word;
}
public void setIsEnd(boolean val) {
bIsEnd = val;
}
public boolean isEnd() {
return bIsEnd;
}
public String getPrime() {
return primary;
}
}
class Trie {
private ArrayList<String> article = new ArrayList<String>();
private HashMap<String,Integer> hits = new HashMap<String,Integer>();
// Constructor
public Trie() {
root = new TrieNode();
}
// Insert article text
public void articleAdd(String word) {
article.add(word);
}
// Method to insert a new company name to Trie
public void insert(String[] names, String prime) {
// Find length of the given name
int length = names.length;
//TrieNode currNode = root;
HashMap<String,TrieNode> children = root.children;
// Traverse through all words of given name
for( int i=0; i<length; i++)
{
String name = names[i];
System.out.println("Iter: " + name);
TrieNode t;
// If there is already a child for current word of given name
if( children.containsKey(name))
t = children.get(name);
else // Else create a child
{
System.out.println("Inserting node " + name + " prime is " + prime);
t = new TrieNode(name, prime);
children.put( name, t );
}
children = t.getChildren();
int j = names.length-1;
if(i==j){
t.setIsEnd(true);
System.out.println("WordEnd");
}
}
}
public void getHits() {
// String[] articleArr = article.toArray(new String[0]);
// Initialize reference to traverse through Trie
// TrieNode crawl = root;
// int level, prevMatch = 0;
Iterator<String> iter = article.iterator();
TrieNode currNode = root;
while (iter.hasNext()) {
String word = iter.next();
System.out.println("Iter: " + word);
// HashMap of current node's children
HashMap<String,TrieNode> child = currNode.getChildren();
// If hit in currNode's children
if (child.containsKey(word)) {
System.out.println("Node exists: " + word);
// Update currNode to be node that matched
currNode = child.get(word);
System.out.println(currNode.isEnd());
String next = "";
// If currNode is leaf and next node has no match in children, were done
if (iter.hasNext()) {next = iter.next();}
if (currNode.isEnd() && !child.containsKey(next)) {
System.out.println("Matched word: " + word);
System.out.println("Primary: " + currNode.getPrime());
currNode = root;
} else {
// Else next node is continuation
}
} else {
// Else ignore next word and reset
currNode = root;
}
}
}
private TrieNode root;
}
共 (0) 个答案