用di中的值计算对数似然比

from collections import Counter def n_gram_opcodes(source, n): source = open(source).read() OPCODES = set(["add","call","cmp","mov","jnz","jmp","jz","lea","pop","push", "retn","sub","test","xor"]) source_words = source.split() opcodes = [w for w in source_words if w in OPCODES] return Counter(zip(*[opcodes[i:] for i in range(n)]))

import math # The placeholder value for 0 counts epsilon = 0.0001 def opcode_llr(opcode, freq_table_before, freq_table_after): ''' Args: opcode: A single opcode mnemonic, e.g., 'mov' freq_table_before: The frequency table for opcode trigrams *before* extraction. freq_table_after: The frequency table for opcode trigrams *after* extraction. The keys for both tables are tuples of string. So, each is of the form { ('mov', 'mov', 'mov'): 5.0, ('mov', 'jmp', 'mov'): 7.0, ... } ''' t_b = len(freq_table_before) or epsilon t_a = len(freq_table_after) or epsilon # Compute the opcode counts when occurring in positions 0, 1, 2 opcode_counts = [epsilon, epsilon, epsilon] for triplet in freq_table_after.keys(): for i, comp in enumerate(triplet): if comp == opcode: opcode_counts[i] += 1 f1 = opcode_counts[0] f2 = opcode_counts[1] f3 = opcode_counts[2] return (f1 + f2 + f3) * math.log(float(t_b) / t_a)

1条回答

网友

1楼 · 发布于 2024-09-28 20:45:05

这是一种从Counter计算llr的通用方法。在

from collections import Counter
import random
import math

def CntToLLR(cnt):
    n = sum(cnt.values())   # total number of samples
    LLR = {}                # dict to store LLRs (same keys as counter)
    for x,y in cnt.items(): # x is the key, and y the count
        LLR[x] = math.log(y) - math.log(n - y)
    return LLR

# populate a counter with random values
cnt = Counter([random.randrange(10) for x in range(100)])

llrs = CntToLLR(cnt)

# You can convert the dictionary to a list of (key, value)
llrs = list(llrs.iteritems())

相关问题更多 >

编程相关推荐

热门问题

热门文章