前言

python使用minidom处理xml还是比较麻烦的,网上很多资料都是断断续续的一部分,不成体统。这里写一个demo,把常用xml解析操作:读写文件、解析节点、添加节点、解析属性、添加属性、解析节点值和修改节点值等,都包含进来的。供各位读者参考

实现

demo实现的功能是从input.xml文件中读取xml并协议,将解析内容输出,然后再将内容组装成xml写进output.xml文件中。
input.txt文件内容如下:

<school>
    <class>
        <name>class1</name>
        <student age='8'>zhangsan</student>
        <student age='8'>lisi</student>
    </class>
    <class>
        <name>class2</name>
        <student age='8'>wangwu</student>
        <student age='8'>zhaoliu</student>
    </class>
</school>

代码如下:

# coding=utf-8
# !/usr/bin/python3
import os
import xml.dom.minidom as minidom
import codecs


class Student:
    def __init__(self):
        self.age = 0
        self.name = ''

    def __str__(self):
        return "[name:" + self.name + ",age:" + str(self.age) + ']'


class Class:

    def __init__(self):
        self.students = []
        self.name = ''

    def __str__(self):
        result = '[name:' + self.name + ',Student:['
        for student in self.students:
            result += str(student) + ','
        result += ']]'
        return result


def ReadXml():
    xml_path = "input.xml"
    if not os.path.isfile(xml_path):
        return []
    dom = minidom.parse(xml_path)
    school_node = dom.documentElement  # 获取到的根节点

    class_nodes = school_node.getElementsByTagName('class')
    result = []
    for class_node in class_nodes:
        temp_class = Class()
        temp_class.name = class_node.getElementsByTagName("name")[0].childNodes[0].data  # 获取name节点的值

        student_nodes = class_node.getElementsByTagName('student')
        for student_node in student_nodes:
            tmp_student = Student()
            tmp_student.name = student_node.childNodes[0].data
            tmp_student.age = int(student_node.getAttribute('age'))  # 获取属性值
            temp_class.students.append(tmp_student)
        result.append(temp_class)
    return result


# 写xml
def WriteXml(class_list):
    dom = minidom.Document()
    school_node = dom.createElement("school")
    dom.appendChild(school_node)
    for tmp_class in class_list:
        class_node = dom.createElement("class")  # 创建class节点

        class_name_node = dom.createElement("name")  # 创建name节点
        text_node = dom.createTextNode(str(tmp_class.name))  # 创建字符节点
        class_name_node.appendChild(text_node)  # 将字符节点加到name节点
        class_node.appendChild(class_name_node)  # 将name节点加到class节点
        school_node.appendChild(class_node)

        for student in tmp_class.students:
            student_node = dom.createElement("student")
            student_node.setAttribute('age', str(student.age))  # 设置age属性

            text_node = dom.createTextNode(str(student.name))
            student_node.appendChild(text_node)

            class_node.appendChild(student_node)

    with codecs.open('output.xml', 'wb', 'utf-8') as f:
        dom.writexml(f, newl='\n', encoding='utf-8')


if __name__ == '__main__':
    class_list = ReadXml()
    for tmp_class in class_list:
        print tmp_class

    WriteXml(class_list)

生成的output.xml如下:

<?xml version="1.0" encoding="utf-8"?>
<school>
<class>
<name>class1</name>
<student age="8">zhangsan</student>
<student age="8">lisi</student>
</class>
<class>
<name>class2</name>
<student age="8">wangwu</student>
<student age="8">zhaoliu</student>
</class>
</school>

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐