为Pizzly Fusion表展平JSON文件

2024-09-29 23:23:49 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试扁平化来自Kallisto->;Pizzly管道的JSON文件。GitHub页面提供了一个python脚本展平_json.py文件(见下文)。我还是python的初学者,还没有弄清楚如何有效地运行这个脚本来生成gene表。我似乎在网上找不到任何教程或小插曲。任何指导都会很有帮助。你知道吗

import sys
import json
from collections import OrderedDict

####
# gene1_name gene1_id, gene2_name, gene2_id, type, pair, split, txlist

def loadJSON(fn):
    with open(fn) as f:
        JJ = json.load(f,object_pairs_hook=OrderedDict)
    return JJ['genes']

def outputGeneTable(fusions, outf, filters = None):
    outf.write('\t'.join("geneA.name geneA.id geneB.name geneB.id paircount splitcount transcripts.list".split()))
    outf.write('\n')
    for gf in fusions:
        gAname = gf['geneA']['name']
        gAid   = gf['geneA']['id']
        gBname = gf['geneB']['name']
        gBid   = gf['geneB']['id']
        pairs  = str(gf['paircount'])
        split  = str(gf['splitcount'])
        txp = [tp['fasta_record'] for tp in gf['transcripts']]

        outf.write('\t'.join([gAname, gAid, gBname, gBid, pairs, split, ';'.join(txp)]))
        outf.write('\n')

def usage():
    print("Usage: python flatten_json.py fusion.out.json [genetable.txt]")
    print("")
    print("       outputs a flat table listing all gene fusions, if the output file is not")
    print("       specified it prints to standard output")


if __name__ == "__main__":
    nargs = len(sys.argv)
    if nargs <= 1:
        usage()
    else:
        infn = sys.argv[1]
        fusions = loadJSON(infn)
        outf = sys.stdout
        if nargs == 3:
            outf = open(sys.argv[2],'w')

        outputGeneTable(fusions,outf)

        if outf != sys.stdout:
            outf.close()

Tags: nameimportidjsonifdefsyswrite

热门问题