我如何确定是否可以改变这个词。。。Python纸条

#!/usr/bin/python import os import time from stat import * def fileExtension(s): i = s.rfind('.') if i == -1: return '' tmp = '|' + s[i+1:] + '|' return tmp def changeFiles(): # get all files in current directory with desired extension files = [f for f in os.listdir('.') if extStr.find(fileExtension(f)) != -1] for f in files: if os.path.isdir(f): continue st = os.stat(f) atime = st[ST_ATIME] # org access time mtime = st[ST_MTIME] # org modification time fw = open(f, 'r+') tmp = fw.read().replace(oldStr, newStr) fw.seek(0) fw.write(tmp) fw.close() # put file timestamp back to org timestamp os.utime(f,(atime,mtime)) # if we want to check subdirectories if checkSubDirs : dirs = [d for d in os.listdir('.') if os.path.isdir(d)] for d in dirs : os.chdir(d) changeFiles() os.chdir('..') # ============================================================================== # ==================================== MAIN ==================================== oldStr = 'MyWord' newStr = 'Myword' extStr = '|html|htm|' checkSubDirs = True changeFiles()

2条回答

网友

1楼 · 编辑于 2024-10-02 12:36:16

lxml有助于完成这类任务。你知道吗

html = """
<html>
<body>
    <h1>MyWord</h1>
    <a href="http://MyWord">MyWord</a>
    <img src="images/MyWord.png"/>
    <div class="MyWord">
        <p>MyWord!</p>
        MyWord
    </div>
    MyWord
</body><!  MyWord  >
</html>
"""

import lxml.etree as etree

tree = etree.fromstring(html)
for elem in tree.iter():
    if elem.text:
        elem.text = re.sub(r'MyWord', 'Myword', elem.text)
    if elem.tail:
        elem.tail = re.sub(r'MyWord', 'Myword', elem.tail)

print etree.tostring(tree)

上面打印的是：

<html>
<body>
    <h1>Myword</h1>
    <a href="http://MyWord">Myword</a>
    <img src="images/MyWord.png"/>
    <div class="MyWord">
        <p>Myword!</p>
        Myword
    </div>
    Myword
</body><!  Myword  >
</html>

注意：如果还需要对脚本标记的内容进行特殊处理，则需要使上述代码稍微复杂一些，例如

<script>
    var title = "MyWord"; // this should change to "Myword"
    var hoverImage = "images/MyWord-hover.png"; // this should not change
</script>

网友

2楼 · 编辑于 2024-10-02 12:36:16

使用regex这里有一个示例，您可以从中开始，希望这对您有所帮助：

import re

html = """
    <href="MyWord" />
    MyWord
"""

re.sub(r'(?<!href=")MyWord', 'myword', html)
output: \n\n <href="MyWord" />\n myword\n\n

参考号：http://docs.python.org/library/re.html

相关问题更多 >

编程相关推荐

热门问题

热门文章