Python regex groupdict为组返回单个字符而不是字符串

#!/bin/python import sys import getopt import re import base64 #################################################################################### # # Setting up global vars and functions # #################################################################################### # Assemble Pattern Dictionary pattern={} pattern["HexOct"]=re.compile(r'([\"\'])(?P<obf_code>(\\[xX012]?[\dA-Fa-f]{2})*)\1') pattern["Base64"]=re.compile(r'([\"\'])(?P<obf_code>[\dA-Za-z\/\+]{15,}={0,2})\1') # Assemble more precise Pattern handling: sub_pattern={} sub_pattern["HexOct"]=re.compile(r'((?P<Hex>\\[xX][\dA-Fa-f]{2})|(?P<Oct>\\[012]?[\d]{2}))') #print pattern # trying to Debug Pattern Dicts #print sub_pattern # trying to Debug Pattern Dicts # Global Var init file_in="" file_out="" code_string="" format_code = False # Prints the Help screen def usage(): print "How to use deobf.py:" print "-----------------------------------------------------------\n" print "$ python deobf.py -i {inputfile.php} [-o {outputfile.txt}]\n" print "Other options include:" print "-----------------------------------------------------------" print "-f : Format - Format the output code with indentations" print "-h : Help - Prints this info\n" print "-----------------------------------------------------------" print "You can also use the long forms:" print "-i : --in" print "-o : --out" print "-f : --format" print "-h : --Help" # Combination wrapper for the above two functions def deHexOct(obf_code): match = re.search(sub_pattern["HexOct"],obf_code) if match: # Find and process Hex obfuscated elements for HexObj in match.groupdict()["Hex"]: print match.groupdict()["Hex"] print "Processing:" print HexObj.pattern obf_code.replace(HexObj,chr(int(HexObj),16)) # Find and process Oct obfuscated elements for OctObj in set(match.groupdict()["Oct"]): print "Processing:" print OctObj obf_code.replace(OctObj,chr(int(OctObj),8)) return obf_code # Crunch the Data def deObfuscate(file_string): # Identify HexOct sections and process match = re.search(pattern["HexOct"],file_string) if match: print "HexOct Obfuscation found." for HexOctObj in match.groupdict()["obf_code"]: print "Processing:" print HexOctObj file_string.replace(HexOctObj,deHexOct(HexOctObj)) # Identify B64 sections and process match = re.search(pattern["Base64"],file_string) if match: print "Base64 Obfuscation found." for B64Obj in match.groupdict()["obf_code"]: print "Processing:" print B64Obj file_string.replace(B64Obj,base64.b64decode(B64Obj)) # Return the (hopefully) deobfuscated string return file_string # File to String def loadFile(file_path): try: file_data = open(file_path) file_string = file_data.read() file_data.close() return file_string except ValueError,TypeError: print "[ERROR] Problem loading the File: " + file_path # String to File def saveFile(file_path,file_string): try: file_data = open(file_path,'w') file_data.write(file_string) file_data.close() except ValueError,TypeError: print "[ERROR] Problem saving the File: " + file_path #################################################################################### # # Main body of Script # #################################################################################### # Getting the args try: opts, args = getopt.getopt(sys.argv[1:], "hi:o:f", ["help","in","out","format"]) except getopt.GetoptError: usage() sys.exit(2) # Handling the args for opt, arg in opts: if opt in ("-h", "--help"): usage() sys.exit() elif opt in ("-i", "--in"): file_in = arg print "Designated input file: "+file_in elif opt in ("-o", "--out"): file_out = arg print "Designated output file: "+file_out elif opt in ("-f", "--format"): format_code = True print "Code Formatting mode enabled" # Checking the input if file_in =="": print "[ERROR] - No Input File Specified" usage() sys.exit(2) # Checking or assigning the output if file_out == "": file_out = file_in+"-deObfuscated.txt" print "[INFO] - No Output File Specified - Automatically assigned: "+file_out # Zhu Li, Do the Thing! code_string=loadFile(file_in) deObf_String=deObfuscate(str(code_string)) saveFile(file_out,deObf_String)

1条回答

网友

1楼 · 发布于 2024-06-26 02:26:20

您的正则表达式可以很好地匹配组，但是您将迭代匹配组中的字符。在

这将给出您刚刚匹配的字符串：match.groupdict()["Hex"]

这将迭代字符串中的字符：

for HexObj in match.groupdict()["Hex"]:

您希望迭代搜索，因此使用re.finditer()而不是re.search()。比如说：

^{pr2}$

另外，字符串前面的r只会停止Python将反斜杠解释为转义符，而正则表达式需要使用反斜杠来进行转义。另一种方法是将正则表达式中的每个反斜杠加倍；这样就不需要r前缀，但正则表达式的可读性可能会变得更差。在

相关问题更多 >

编程相关推荐

热门问题

热门文章