有 Java 编程相关的问题?

你可以在下面搜索框中键入要查询的问题!

java中的XML文档遍历器

每个人都知道我们可以使用DocumentTraversal的节点编辑器遍历整个xml文档。 我的应用程序需要一些额外的工作,因此我决定在java Stack的支持下编写自己的XML遍历程序<>

这是我的代码(我不擅长编码,所以代码和逻辑可能看起来很混乱)

public class test
{
    private static Stack<Node> gStack = new Stack<Node>();

    public static void main(String[] args) throws XPathExpressionException
    {
        String str =
            "<section>"
                + "<paragraph>This example combines regular wysiwyg editing of a document with very controlled editing of semantic rich content. The main content can be"
                + "edited like you would in a normal word processor. Though the difference is that the content remains schema valid XML because Xopus will not allow you to perform actions"
                + "on the document that would render it invalid.</paragraph>"
                + "<paragraph>The table is an example of controlled style. The style of the table is controlled by three attributes:</paragraph>"
                + "<unorderedlist>"
                + "<item><paragraph><emphasis>alternaterowcolor</emphasis>, do all rows have the same color, or should the background color alternate?</paragraph></item>"
                + "<item><paragraph><emphasis>border</emphasis>, a limited choice of border styles.</paragraph></item>"
                + "<item><paragraph><emphasis>color</emphasis>, a limited choice of colors.</paragraph></item>"
                + "</unorderedlist>"
                + "<paragraph>You have quite some freedom to style the table, but you can't break the predefined style.</paragraph>"
                + "</section>";

        Document domDoc = null;
        try
        {
            DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
            ByteArrayInputStream bis = new ByteArrayInputStream(str.getBytes());
            domDoc = docBuilder.parse(bis);
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }

        Element root = null;
        NodeList list = domDoc.getChildNodes();
        for (int i = 0; i < list.getLength(); i++)
        {
            if (list.item(i) instanceof Element)
            {
                root = (Element) list.item(i);
                break;
            }
        }

        NodeList nlist = root.getChildNodes();

        System.out.println("root = " + root.getNodeName() + "  child count = " + nlist.getLength());
        domTraversor(root);
    }

    private static void domTraversor(Node node)
    {
        if (node.getNodeName().equals("#text"))
        {
            System.out.println("textElem = " + node.getTextContent());
            if (node.getNextSibling() != null)
            {
                gStack.push(node.getNextSibling());
                domTraversor(node.getNextSibling());
            }
            else
            {
                if (node.getParentNode().getNextSibling() != null)
                    domTraversor(node.getParentNode().getNextSibling());
            }
        }
        else
        {
            if (node.getChildNodes().getLength() > 1)
            {
                gStack.push(node);
                Node n = node.getFirstChild();
                if (n.getNodeName().equals("#text"))
                {
                    System.out.println("textElem = " + n.getTextContent());
                    if (n.getNextSibling() != null)
                    {
                        gStack.push(n.getNextSibling());
                        domTraversor(n.getNextSibling());
                    }
                }
                else
                {
                    gStack.push(n);
                    domTraversor(n);
                }
            }
            else if (node.getChildNodes().getLength() == 1)
            {
                Node fnode = node.getFirstChild();
                if (fnode.getChildNodes().getLength() > 1)
                {
                    gStack.push(fnode);
                    domTraversor(fnode);
                }
                else
                {
                    if (!fnode.getNodeName().equals("#text"))
                    {
                        gStack.push(fnode);
                        domTraversor(fnode);
                    }
                    else
                    {
                        System.out.println("textElem = " + fnode.getTextContent());
                        if (fnode.getNodeName().equals("#text"))
                        {
                            if (node.getNextSibling() != null)
                            {
                                gStack.push(node.getNextSibling());
                                domTraversor(node.getNextSibling());
                            }
                            else
                            {
                                if (!gStack.empty())
                                {
                                    Node sibPn = gStack.pop();
                                    if (sibPn.getNextSibling() == null)
                                    {
                                        sibPn = gStack.pop();
                                    }
                                    domTraversor(sibPn.getNextSibling());
                                }
                            }
                        }
                        else
                        {
                            if (fnode.getNextSibling() != null)
                            {
                                domTraversor(fnode.getNextSibling());
                            }
                            else
                            {
                                if (!gStack.empty())
                                {
                                    Node sibPn = gStack.pop().getNextSibling();
                                    domTraversor(sibPn);
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

它可以很好地处理某些xml文档,但不能处理具有类似标记的文档

<unorderedlist>
    <item>
        <paragraph>
            <emphasis>alternaterowcolor</emphasis>
            , do all rows have the same color, or should the background
            color
            alternate?
        </paragraph>
    </item>
    <item>
        <paragraph>
            <emphasis>border</emphasis>
            , a limited choice of border styles.
        </paragraph>
    </item>
    <item>
        <paragraph>
            <emphasis>color</emphasis>
            , a limited choice of colors.
        </paragraph>
    </item>
</unorderedlist>

这里是一个场景,如果任何元素有三个以上的嵌套子元素,我的代码就会停止,不再继续

是否有更好的实施方案,请建议


共 (1) 个答案

  1. # 1 楼答案

    这样试试

    Element e;
    NodeList n;
    Document doc=StudyParser.XMLfromString(xmlString);
    String starttag=doc.getFirstChild().getNodeName();
       Log.e("start",starttag );
       n=doc.getElementsByTagName(starttag);
       for(int i=0;i<n.getLength();i++){
           e=(Element)n.item(i);
           NodeList np = e.getElementsByTagName("item");
           for(int j=0;j<np.getLength();j++){
               e=(Element)n.item(i);
               try{
               String para=StudyParser.getValue(e, "paragraph");
               Log.e("paravalue",para);
               String emp=StudyParser.getValue(e, "emphasis");       
               Log.e("empval",emp);
               }catch(Exception e){
                   e.printStackTrace();
               }
           }
       }
    

    StudyParser课程

        import java.io.BufferedInputStream;
        import java.io.ByteArrayOutputStream;
        import java.io.IOException;
        import java.io.InputStream;
        import java.io.InputStreamReader;
        import java.io.StringReader;
        import java.io.UnsupportedEncodingException;
        import java.net.MalformedURLException;
    
        import javax.xml.parsers.DocumentBuilder;
        import javax.xml.parsers.DocumentBuilderFactory;
        import javax.xml.parsers.ParserConfigurationException;
    
        import org.apache.http.HttpEntity;
        import org.apache.http.HttpResponse;
        import org.apache.http.client.methods.HttpPost;
        import org.apache.http.impl.client.DefaultHttpClient;
        import org.apache.http.util.EntityUtils;
        import org.w3c.dom.Document;
        import org.w3c.dom.Element;
        import org.w3c.dom.Node;
        import org.w3c.dom.NodeList;
        import org.xml.sax.InputSource;
        import org.xml.sax.SAXException;
    
    
    
        public class StudyParser {
        public StudyParser() {
    
        }
    
    public final static Document XMLfromString(String xml){
         Document doc = null;
    
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
                try {
    
              DocumentBuilder db = dbf.newDocumentBuilder();
    
              InputSource is = new InputSource();
                  is.setCharacterStream(new StringReader(xml));
                  doc = db.parse(is); 
    
            } catch (ParserConfigurationException e) {
              System.out.println("XML parse error: " + e.getMessage());
              return null;
            } catch (SAXException e) {
              System.out.println("Wrong XML file structure: " + e.getMessage());
                    return null;
            } catch (IOException e) {
              System.out.println("I/O exeption: " + e.getMessage());
              return null;
            }
    
                return doc;
    
      }
    public static String getXMLstring(String xml){   
          String line = null;
    
          try {
    
            DefaultHttpClient httpClient = new DefaultHttpClient();
            HttpPost httpPost = new HttpPost(xml);
    
            HttpResponse httpResponse = httpClient.execute(httpPost);
            HttpEntity httpEntity = httpResponse.getEntity();
            line = EntityUtils.toString(httpEntity);
    
          } catch (UnsupportedEncodingException e) {
            line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
          } catch (MalformedURLException e) {
            line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
          } catch (IOException e) {
            line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
          }
    
          return line;
    
      }
    public static String getXML(InputStream is)throws IOException {
    
        BufferedInputStream bis = new BufferedInputStream(is);
        ByteArrayOutputStream buf = new ByteArrayOutputStream();
        int result = bis.read();
        while(result != -1) {
          byte b = (byte)result;
          buf.write(b);
          result = bis.read();
        }        
        return buf.toString();
    }
    public final static String getElementValue( Node elem ) {
           Node kid;
           if( elem != null){
               if (elem.hasChildNodes()){
                   for( kid = elem.getFirstChild(); kid != null; kid = kid.getNextSibling() ){
                       if( kid.getNodeType() == Node.TEXT_NODE  ){
                           return kid.getNodeValue();
                       }
    
                   }
               }
           }
           return "";
       }
     public static int numResults(Document doc){    
            Node results = doc.getDocumentElement();
            int res = -1;
    
            try{
              res = Integer.valueOf(results.getAttributes().getNamedItem("Categories").getNodeValue());
            }catch(Exception e ){
              res = -1;
            }
    
            return res;
          }
    
          public static String getValue(Element item, String str) {    
            NodeList n = item.getElementsByTagName(str);    
            return StudyParser.getElementValue(n.item(0));
          }
    
    
    }
    

    只是动态xml的普通演示我假设了相同的xml,但没有使用getElementByTagName有许多属性可以查看

       doc = StudyParser.XMLfromString(xml);
       String starttag=doc.getFirstChild().getNodeName();
       Log.e("start",starttag );
       n=doc.getElementsByTagName(starttag);
       for(int i=0;i<n.getLength();i++){
           e=(Element)n.item(i);
               try{
                 Log.e("1234",""+ e.getTextContent());
    
               }catch(Exception e){
                   e.printStackTrace();
               }
    
       }