我正在尝试扁平化来自Kallisto->;Pizzly管道的JSON文件。GitHub页面提供了一个python脚本展平_json.py文件(见下文)。我还是python的初学者,还没有弄清楚如何有效地运行这个脚本来生成gene表。我似乎在网上找不到任何教程或小插曲。任何指导都会很有帮助。你知道吗
import sys
import json
from collections import OrderedDict
####
# gene1_name gene1_id, gene2_name, gene2_id, type, pair, split, txlist
def loadJSON(fn):
with open(fn) as f:
JJ = json.load(f,object_pairs_hook=OrderedDict)
return JJ['genes']
def outputGeneTable(fusions, outf, filters = None):
outf.write('\t'.join("geneA.name geneA.id geneB.name geneB.id paircount splitcount transcripts.list".split()))
outf.write('\n')
for gf in fusions:
gAname = gf['geneA']['name']
gAid = gf['geneA']['id']
gBname = gf['geneB']['name']
gBid = gf['geneB']['id']
pairs = str(gf['paircount'])
split = str(gf['splitcount'])
txp = [tp['fasta_record'] for tp in gf['transcripts']]
outf.write('\t'.join([gAname, gAid, gBname, gBid, pairs, split, ';'.join(txp)]))
outf.write('\n')
def usage():
print("Usage: python flatten_json.py fusion.out.json [genetable.txt]")
print("")
print(" outputs a flat table listing all gene fusions, if the output file is not")
print(" specified it prints to standard output")
if __name__ == "__main__":
nargs = len(sys.argv)
if nargs <= 1:
usage()
else:
infn = sys.argv[1]
fusions = loadJSON(infn)
outf = sys.stdout
if nargs == 3:
outf = open(sys.argv[2],'w')
outputGeneTable(fusions,outf)
if outf != sys.stdout:
outf.close()
目前没有回答
相关问题 更多 >
编程相关推荐