java xpath获取表中的行

1 周，3 日 Questions & Answers 926

我有一个html文件，比如：http://scholar.google.gr/citations?user=v9xULZwAAAAJ&hl=el

该文件中有一个包含项目的表格。我想用xpath获得前20篇文章（如果有的话）

我试图找到第一篇文章：

String str = (String) xpath.evaluate("//form[contains(@id,'citationsForm')]/div[2]/div[1]/table/tbody/tr[2]/td[@id='col-title']/a", docList.get(0), XPathConstants.STRING);

没关系！结果：现代信息检索

对于所有文章：

String str = (String) xpath.evaluate("//form[contains(@id,'citationsForm')]/div[2]/div[1]/table/tbody/tr/td[@id='col-title']/a", docList.get(0), XPathConstants.STRING);

但不起作用知道吗

比你强

编辑： 我还尝试：

        NodeList result = (NodeList)xpath.evaluate("//form[contains(@id,'citationsForm')]/div[2]/div[1]/table/tbody/tr/td[@id='col-title']/a",
        docList.get(0), XPathConstants.NODESET);
        ArrayList<String>liste = new ArrayList<String>();
        for(int i=0; i<result.getLength();i++){
            System.out.println(result.item(i).getNodeValue());
            liste.add(result.item(i).getNodeName());
        }

编辑2所有代码

类文件操作：

package xmlparse;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.DomSerializer;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.w3c.dom.Document;

public class FileOperations {

    private static final String path = "C:\\Users\\Dimitris\\Desktop\\authors";

    public ArrayList<Document> getXmlDocumt() {
        ArrayList<Document> xmlFileList = new ArrayList<>();

        try {
            ArrayList<File> listFiles = listFiles(path);
            for (File f : listFiles) {
                String html = readfile(f.getAbsolutePath());

                xmlFileList.add(ConvertHtml2Xml(html) );

            }
        } catch (IOException ex) {
            Logger.getLogger(FileOperations.class.getName()).log(Level.SEVERE, null, ex);
        }
        return xmlFileList;
    }

    private ArrayList<File> listFiles(String directoryName) throws IOException {
        ArrayList<File> htmlfilelist = new ArrayList<>();
        File directory = new File(directoryName);

        //get all the files from a directory
        File[] fList = directory.listFiles();

        for (File file : fList) {
            if (file.isFile()) {
                htmlfilelist.add(file);
            }
        }
        return htmlfilelist;
    }

    private String readfile(String file) throws FileNotFoundException, IOException {
        String s = "";
        FileReader fr = new FileReader(file);
        BufferedReader br = new BufferedReader(fr);
        StringBuilder content = new StringBuilder(1024);
        while ((s = br.readLine()) != null) {
            content.append(s);
        }
        //System.out.println(content.toString());
        return content.toString();
    }

    private Document ConvertHtml2Xml(String html) {
        TagNode tagNode = new HtmlCleaner().clean(html);
        Document doc = null;

        try {
            doc = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
        } catch (ParserConfigurationException ex) {
            Logger.getLogger(FileOperations.class.getName()).log(Level.SEVERE, null, ex);
        }

        return doc;

    }

}

类XpathQueries：

XPath xpath;
    ArrayList<Document> docList;

    public XpathQueries() {
        xpath = XPathFactory.newInstance().newXPath();
        FileOperations fo = new FileOperations();
        docList = new ArrayList<>(fo.getXmlDocumt());
    }

    public void getArticle() throws XPathExpressionException {
//        String str = (String) xpath.evaluate("//form[contains(@id,'citationsForm')]/div[2]/div[1]/table/tbody//td[1]/a",
//                docList.get(0), XPathConstants.STRING);
         String str = (String) xpath.evaluate("//*[@id='col-title']/a", docList.get(0), XPathConstants.STRING);

        System.out.println(str);
    }
}

Python中文网

有 Java 编程相关的问题?

java xpath获取表中的行

共 (2) 个答案

# 1 楼答案

# 2 楼答案