有 Java 编程相关的问题?

你可以在下面搜索框中键入要查询的问题!

java使用Saxon/XQuery设置URI或目录解析器

我正在用Java开发一个简单的命令行应用程序,从一个大型XML数据集(15000多个XML文件)中挖掘数据。我选择使用Saxon S9API作为XQuery处理器。只要开放访问internet,Saxon使用的解析器就可以解析xsi:noNamespaceSchemaLocation URI(或者我假设的任何其他URI),一切都可以正常工作

我已经搜索了Stackoverflow,以及一般的Google搜索,寻找关于如何向XQuery处理器提供目录的答案。我还没有找到一个很好的解释来说明如何这样做

这是我在这一点上的简单代码,正如我所说的,它在开放访问互联网时工作良好:


    package ipd.part.info.mining.app;

    import java.io.File;
    import java.util.List;
    import java.util.Scanner;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    import net.sf.saxon.Configuration;
    import net.sf.saxon.TransformerFactoryImpl;
    import net.sf.saxon.s9api.DOMDestination;
    import net.sf.saxon.s9api.Processor;
    import net.sf.saxon.s9api.QName;
    import net.sf.saxon.s9api.SaxonApiException;
    import net.sf.saxon.s9api.XQueryCompiler;
    import net.sf.saxon.s9api.XQueryEvaluator;
    import net.sf.saxon.s9api.XQueryExecutable;
    import net.sf.saxon.s9api.XdmAtomicValue;
    import net.sf.saxon.lib.*;
    import static org.apache.xerces.jaxp.JAXPConstants.JAXP_SCHEMA_LANGUAGE;
    import static org.apache.xerces.jaxp.JAXPConstants.W3C_XML_SCHEMA;
    import org.apache.xerces.util.XMLCatalogResolver;
    import org.apache.xml.resolver.tools.CatalogResolver;
    import org.w3c.dom.Document;
    import org.xml.sax.ErrorHandler;

    /**
     *
     * @author tfurst
     */
    public class IPDPartInfoMiningApp {

        /**
         * @param args the command line arguments
         */
        private static Scanner scanner = new Scanner(System.in);
        private static String ietmPath;
        private static String outputPath;

        private static CatalogResolver resolver;
        private static org.apache.xerces.util.XMLCatalogResolver xres;
        private static ErrorHandler eHandler;

        private static DocumentBuilderFactory DBF;
        private static DocumentBuilder DB;

        public static void main(String[] args) {
            initDb();
            try {
                // TODO code application logic here
                System.out.println("Enter path to complete IETM Export:");
                ietmPath = scanner.nextLine();
                System.out.println("Enter path to save report:");
                outputPath = scanner.nextLine();

                Processor proc = new Processor(true);

                XQueryCompiler comp = proc.newXQueryCompiler();

                //File xq = fixXquery(new File(XQ));
                //XQueryExecutable exp = comp.compile(xq);
                XQueryExecutable exp = comp.compile("declare variable $path external;\n" +
    "\n" +
    "let $coll := collection(concat($path,'?select=*.xml'))//itemSequenceNumber \n" +
    "\n" +
    "return\n" +
    "<parts>\n" +
    "{\n" +
    "    for $mod in $coll\n" +
    "    let $pn := normalize-space($mod/partNumber)\n" +
    "    let $nomen := $mod/partIdentSegment[1]/descrForPart\n" +
    "    let $smr := $mod/locationRcmdSegment/locationRcmd/sourceMaintRecoverability\n" +
    "    order by $pn\n" +
    "    return <part pn=\"{$pn}\" nomen=\"{$nomen}\" smr=\"{$smr}\"/>\n" +
    "}\n" +
    "</parts>");
                //Serializer out = proc.newSerializer(System.out);

                Document dom = DB.newDocument();

                XQueryEvaluator ev = exp.load();
                ev.setExternalVariable(new QName("path"), new XdmAtomicValue(new File(ietmPath).toPath().toUri().toString().substring(0, new File(ietmPath).toPath().toUri().toString().lastIndexOf("/"))));
                ev.run(new DOMDestination(dom));

                TransformerFactoryImpl tfact = new net.sf.saxon.TransformerFactoryImpl();

                Transformer trans = tfact.newTransformer();
                DOMSource src = new DOMSource(dom);
                StreamResult res = new StreamResult(new File(outputPath + File.separator + "output.xml"));
                trans.transform(src, res);


            } catch (SaxonApiException | TransformerException ex) {
                Logger.getLogger(IPDPartInfoMiningApp.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        private static XMLCatalogResolver createXMLCatalogResolver(CatalogResolver resolver)
        {
            int i = 0;

            List files = resolver.getCatalog().getCatalogManager().getCatalogFiles();
            String[] catalogs = new String[files.size()];
            XMLCatalogResolver xcr = new XMLCatalogResolver();

            for(Object file : files)
            {
                catalogs[i] = new File(file.toString()).getAbsolutePath();
            }

            xcr.setCatalogList(catalogs);
            return xcr;
        }

        private static void initDb()
        {
            try 
            {
                resolver = new CatalogResolver();
                eHandler = new DocumentErrorHandler();
                xres = createXMLCatalogResolver(resolver);
                DBF = DocumentBuilderFactory.newInstance();
                DBF.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);
                DBF.setNamespaceAware(true);
                DB = DBF.newDocumentBuilder();
                DB.setEntityResolver(xres);
                DB.setErrorHandler(eHandler);
            } 
            catch (ParserConfigurationException ex) 
            {
                ex.printStackTrace();
            }
        }

    }

当我断开计算机与网络的连接时,我收到此错误:

C:\Users\tfurst\Desktop\XQuery Test\testXml\test\tool>java -jar IPD_Part_Info_Mining_App.jar
Enter path to complete IETM Export:
C:\Users\tfurst\Desktop\Wire Repl Testing
Enter path to save report:
C:\Users\tfurst\Desktop\Wire Repl Testing\report
Error on line 6 column 2
  collection(): failed to parse XML file
  file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: I/O error reported by XML parser processing file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: Read timed out
Aug 20, 2019 2:55:23 PM ipd.part.info.mining.app.IPDPartInfoMiningApp main
SEVERE: null
net.sf.saxon.s9api.SaxonApiException: collection(): failed to parse XML file file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: I/O error reported by XML parser processing file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: Read timed out
        at net.sf.saxon.s9api.XQueryEvaluator.run(XQueryEvaluator.java:372)
        at ipd.part.info.mining.app.IPDPartInfoMiningApp.main(IPDPartInfoMiningApp.java:80)
Caused by: net.sf.saxon.trans.XPathException: collection(): failed to parse XML file file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: I/O error reported by XML parser processing file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: Read timed out
        at net.sf.saxon.resource.XmlResource.getItem(XmlResource.java:113)
        at net.sf.saxon.functions.CollectionFn$2.mapItem(CollectionFn.java:246)
        at net.sf.saxon.expr.ItemMappingIterator.next(ItemMappingIterator.java:113)
        at net.sf.saxon.expr.ItemMappingIterator.next(ItemMappingIterator.java:108)
        at net.sf.saxon.expr.ItemMappingIterator.next(ItemMappingIterator.java:108)
        at net.sf.saxon.om.FocusTrackingIterator.next(FocusTrackingIterator.java:85)
        at net.sf.saxon.expr.ContextMappingIterator.next(ContextMappingIterator.java:59)
        at net.sf.saxon.expr.sort.DocumentOrderIterator.<init>(DocumentOrderIterator.java:47)
        at net.sf.saxon.expr.sort.DocumentSorter.iterate(DocumentSorter.java:230)
        at net.sf.saxon.expr.flwor.ForClausePush.processTuple(ForClausePush.java:34)
        at net.sf.saxon.expr.flwor.FLWORExpression.process(FLWORExpression.java:841)
        at net.sf.saxon.expr.instruct.ElementCreator.processLeavingTail(ElementCreator.java:337)
        at net.sf.saxon.expr.instruct.ElementCreator.processLeavingTail(ElementCreator.java:284)
        at net.sf.saxon.expr.instruct.Instruction.process(Instruction.java:151)
        at net.sf.saxon.query.XQueryExpression.run(XQueryExpression.java:411)
        at net.sf.saxon.s9api.XQueryEvaluator.run(XQueryEvaluator.java:370)
        ... 1 more


C:\Users\tfurst\Desktop\XQuery Test\testXml\test\tool>pause
Press any key to continue . . .

我相信这可能是一个相对简单的解决方案,很可能是我忽略了的。我知道在使用XSL转换时如何处理这个问题,方法是提供一个目录和模式的位置。提前感谢您的帮助,非常感谢


共 (1) 个答案

  1. # 1 楼答案

    要使用XML目录文件,应在代码中执行以下操作:

            Processor proc = new Processor(false); //false for Saxon-HE
            XQueryCompiler compiler = proc.newXQueryCompiler();
            XmlCatalogResolver.setCatalog("path/catalog.xml", proc.getUnderlyingConfiguration(), false);
            ...