有 Java 编程相关的问题?

你可以在下面搜索框中键入要查询的问题!

使用stax解析器java解析xml

您好,我有一个带有多个开始标记的大xml文件,请帮助我解析它并基于组显示它 我的xml文件将被删除

我希望将每个用户、内容和会话ID添加到文件的一行中,如果会话ID不同,则添加到第二行

例如:输出文件应为 1 converID用户名对话+用户名对话。。。。 2另一个CONVID用户名对话+用户名对话

       <?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Data provided by Bloomberg LP. -->
<FileDump>
<Version>IBXML 1.3</Version>
<Conversation Perspective=" " RoomType="P">
<RoomID>PCHAT-0x3000001CA8361</RoomID>
<StartTime>03/31/2016 13:39:01</StartTime>
<StartTimeUTC>1459431541</StartTimeUTC>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 13:39:01</DateTime>
<DateTimeUTC>1459431541</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>G_LO</LoginName>
<FirstName>GARY</FirstName>
<LastName>LO</LastName>
<UUID>7054548</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>G_LO@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 14:56:22</DateTime>
<DateTimeUTC>1459436182</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<ParticipantLeft InteractionType="N" DeviceType="M">
<User>
<LoginName>G_LO</LoginName>
<FirstName>GARY</FirstName>
<LastName>LO</LastName>
<UUID>7054548</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>G_LO@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 19:30:01</DateTime>
<DateTimeUTC>1459452601</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantLeft>
<ParticipantLeft InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 19:33:56</DateTime>
<DateTimeUTC>1459452836</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantLeft>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 19:45:16</DateTime>
<DateTimeUTC>1459453516</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<ParticipantLeft InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 23:08:09</DateTime>
<DateTimeUTC>1459465689</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantLeft>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>G_LO</LoginName>
<FirstName>GARY</FirstName>
<LastName>LO</LastName>
<UUID>7054548</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>G_LO@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 23:14:23</DateTime>
<DateTimeUTC>1459466063</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<Message InteractionType="N">
<User>
<LoginName>G_LO</LoginName>
<FirstName>GARY</FirstName>
<LastName>LO</LastName>
<UUID>7054548</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>G_LO@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:10:57</DateTime>
<DateTimeUTC>1459469457</DateTimeUTC>
<Content>
abcdefgghhhhhh
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>WVU</LoginName>
<FirstName>WHEELOCK</FirstName>
<LastName>VU</LastName>
<UUID>8266852</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>WVU@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>WHEELOCKVU@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:14:05</DateTime>
<DateTimeUTC>1459469645</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<ParticipantEntered InteractionType="N">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress></CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:29:19</DateTime>
<DateTimeUTC>1459470559</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<Message InteractionType="N">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress></CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:29:19</DateTime>
<DateTimeUTC>1459470559</DateTimeUTC>
<Content>
ajdakjgdljsgdsafhkafa
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress></CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:29:19</DateTime>
<DateTimeUTC>1459470559</DateTimeUTC>
<Content>
akjdgljsafdlshf;kdsjf
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N">
<User>
<LoginName>WVU</LoginName>
<FirstName>WHEELOCK</FirstName>
<LastName>VU</LastName>
<UUID>8266852</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>WVU@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>WHEELOCKVU@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:39:32</DateTime>
<DateTimeUTC>1459471172</DateTimeUTC>
<Content>
sagdksajdlsahd
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 01:01:27</DateTime>
<DateTimeUTC>1459472487</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<Message InteractionType="N">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 01:31:29</DateTime>
<DateTimeUTC>1459474289</DateTimeUTC>
<Content>
ajdslsahdsj;a
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N" DeviceType="M">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress></CorporateEmailAddress>
</User>
<DateTime>04/01/2016 02:49:46</DateTime>
<DateTimeUTC>1459478986</DateTimeUTC>
<Content>
sagdkjsagdkjashdlasjd
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N" DeviceType="M">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress></CorporateEmailAddress>
</User>
<DateTime>04/01/2016 02:49:46</DateTime>
<DateTimeUTC>1459478986</DateTimeUTC>
<Content>
jsdhkshdksjdlsjdlks
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N" DeviceType="M">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress></CorporateEmailAddress>
</User>
<DateTime>04/01/2016 03:47:37</DateTime>
<DateTimeUTC>1459482457</DateTimeUTC>
<Content>
jshdkshdksjdlskld
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N" DeviceType="M">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress></CorporateEmailAddress>
</User>
<DateTime>04/01/2016 03:47:37</DateTime>
<DateTimeUTC>1459482457</DateTimeUTC>
<Content>
aasasasasas
</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<EndTime>04/01/2016 03:47:37</EndTime>
<EndTimeUTC>1459482457</EndTimeUTC>
</Conversation>
</FileDump>

共 (1) 个答案

  1. # 1 楼答案

    如果所有“内容”、“登录名”和“会话ID”节点中的文本都能放入内存,然后,解决方案可能与我在下面发布的类似(从input.xml读取,写入output.txt;此外,我假设您希望行编号为1,2,…,并且希望“+”符号将数据与问题中指定的不同消息分开)

    但是,如果这些数据无法放入内存,那么您将需要使用StAX以格式(ConversationID、LoginName、Content)将其提取到一个文件中,然后对其进行排序in external memory(在磁盘或多台机器上),然后将序列行与同一ConversationID合并。或者只需将初始XML拆分为几个,使用以下方法处理每个XML,然后仍然需要合并生成的文件,但这可能会更容易

    import java.io.FileInputStream;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.HashSet;
    import java.util.List;
    import java.util.Set;
    
    import javax.xml.stream.XMLInputFactory;
    import javax.xml.stream.XMLStreamConstants;
    import javax.xml.stream.XMLStreamException;
    import javax.xml.stream.XMLStreamReader;
    
    import org.apache.commons.lang3.StringUtils;
    
    public class Solution {
    
        private static final String ROOM_ID = "RoomID";
        private static final String CONTENT = "Content";
        private static final String LOGIN_NAME = "LoginName";
        private static final String CONVERSATION_ID = "ConversationID";
        private static final String FILE_DUMP = "FileDump";
        private static final String MESSAGE = "Message";
        private static final String CONVERSATION = "Conversation";
        private static final String START_TIME = "StartTime";
    
        static class ConversationInfo {
            private String startTimeStr;
    
            private String conversationId;
    
            private final Set<String> users = new HashSet<>();
    
            private final List<Message> messages = new ArrayList<>();
    
            @Override
            public String toString() {
                return String.format("%s %s (%d) %s", startTimeStr, conversationId, users.size(),
                     StringUtils.join(messages, " + "));
            }
        }
    
        static class Message {
    
            public final String userName;
    
            public final String content;
    
            public Message(String name, String content) {
                this.userName = name;
                this.content = content;
            }
    
            @Override
            public String toString() {
                return userName + " " + content;
            }
        }
    
        public static void main(String[] args)
                throws XMLStreamException, IOException {
            XMLInputFactory xf = XMLInputFactory.newFactory();
            List<ConversationInfo> m = new ArrayList<>();
            try (FileInputStream fin = new FileInputStream("input.xml")) {
                XMLStreamReader xr = xf.createXMLStreamReader(fin);
                LOOP: while (xr.hasNext()) {
                    int event = xr.next();
                    switch (event) {
                        case XMLStreamConstants.START_ELEMENT: {
                            String elName = xr.getLocalName();
                            if (CONVERSATION.equals(elName)) {
                                ConversationInfo convInfo = parseConversation(xr);
                                if (convInfo != null) {
                                    m.add(convInfo);
                                }
                            }
                            break;
                        }
                        case XMLStreamConstants.END_ELEMENT: {
                            String elName = xr.getLocalName();
                            if (FILE_DUMP.equals(elName)) {
                                break LOOP;
                            }
                            break;
                        }
                        case XMLStreamConstants.END_DOCUMENT:
                            throw new IllegalStateException("xml not well-formed: <"
                                + FILE_DUMP + "> tag not closed");
                    }
                }
            }
            try (FileWriter w = new FileWriter("output.txt")) {
                int i = 1;
                for (ConversationInfo convInfo : m) {
                    w.write(String.format("%d %s\n", i++, convInfo));
                }
            }
        }
    
        private static ConversationInfo parseConversation(XMLStreamReader xr)
                throws XMLStreamException {
            ConversationInfo convInfo = new ConversationInfo();
            while (xr.hasNext()) {
                int event = xr.next();
                switch (event) {
                    case XMLStreamConstants.START_ELEMENT: {
                        String elName = xr.getLocalName();
                        if (MESSAGE.equals(elName)) {
                            Message message = parseMessage(xr);
                            if (message != null) {
                                convInfo.messages.add(message);
                                convInfo.users.add(message.userName);
                            }
                        } else if (START_TIME.equals(elName)) {
                            convInfo.startTimeStr = xr.getElementText();
                        } else if (ROOM_ID.equals(elName)) {
                            convInfo.conversationId = xr.getElementText();
                        }
                        break;
                    }
                    case XMLStreamConstants.END_ELEMENT: {
                        String elName = xr.getLocalName();
                        if (CONVERSATION.equals(elName)) {
                            return convInfo;
                        }
                        break;
                    }
                    case XMLStreamConstants.END_DOCUMENT:
                        throw new XMLStreamException("xml not well-formed: <"
                            + CONVERSATION + "> tag not closed");
                }
            }
            throw new XMLStreamException(
                "unexpected end of xml file while parsing a conversation");
        }
    
        private static Message parseMessage(XMLStreamReader xr)
                throws XMLStreamException {
            String userName = null;
            String content = null;
            while (xr.hasNext()) {
                int event = xr.next();
                switch (event) {
                    case XMLStreamConstants.START_ELEMENT: {
                        String elName = xr.getLocalName();
                        if (LOGIN_NAME.equals(elName)) {
                            userName = xr.getElementText();
                        } else if (CONTENT.equals(elName)) {
                            content = StringUtils.trimToEmpty(xr.getElementText());
                        }
                        break;
                    }
                    case XMLStreamConstants.END_ELEMENT: {
                        String elName = xr.getLocalName();
                        if (MESSAGE.equals(elName)) {
                            return new Message(userName, content);
                        }
                        break;
                    }
                    case XMLStreamConstants.END_DOCUMENT:
                        throw new XMLStreamException("xml not well-formed: <"
                            + MESSAGE + "> tag not closed");
                }
            }
            throw new XMLStreamException(
                "unexpected end of xml file while parsing a message");
        }
    }