<p>还有一个选择。当你开始添加更多文件时,我试着让它更容易管理。可以在命令行上运行并提供参数,每个要添加的文件都有一个参数。基因/样本名称存储在字典中以提高效率。所需JSON对象的格式化是在每个类的format()方法中完成的。希望这有帮助。在</p>
<pre><code>import csv, json, sys
class Sample(object):
def __init__(self, name, extras):
self.name = name
self.extras = [extras]
def format(self):
map = {}
map['sample'] = self.name
map['extras'] = self.extras
return map
def add_extras(self, extras):
#edit 8/20
#always just add the new extras to the list
for extra in extras:
self.extras.append(extra)
class Gene(object):
def __init__(self, name, samples):
self.name = name
self.samples = samples
def format(self):
map = {}
map ['gene'] = self.name
map['samples'] = sorted([self.samples[sample_key].format() for sample_key in self.samples], key=lambda sample: sample['sample'])
return map
def create_or_add_samples(self, new_samples):
# loop through new samples, seeing if they already exist in the gene object
for sample_name in new_samples:
sample = new_samples[sample_name]
if sample.name in self.samples:
self.samples[sample.name].add_extras(sample.extras)
else:
self.samples[sample.name] = sample
class Genes(object):
def __init__(self):
self.genes = {}
def format(self):
return sorted([self.genes[gene_name].format() for gene_name in self.genes], key=lambda gene: gene['gene'])
def create_or_add_gene(self, gene):
if not gene.name in self.genes:
self.genes[gene.name] = gene
else:
self.genes[gene.name].create_or_add_samples(gene.samples)
def row_to_gene(headers, row):
gene_name = ""
sample_name = ""
extras = {}
for value in enumerate(row):
if headers[value[0]] == "gene":
gene_name = value[1]
elif headers[value[0]] == "sample":
sample_name = value[1]
else:
extras[headers[value[0]]] = value[1]
sample_dict = {}
sample_dict[sample_name] = Sample(sample_name, extras)
return Gene(gene_name, sample_dict)
if __name__ == '__main__':
delim = "\t"
genes = Genes()
files = sys.argv[1:]
for file in files:
print("Reading " + str(file))
with open(file,'r') as f1:
reader = csv.reader(f1, delimiter=delim)
headers = []
for row in reader:
if len(headers) == 0:
headers = row
else:
genes.create_or_add_gene(row_to_gene(headers, row))
result = json.dumps(genes.format(), indent=4)
print(result)
with open('json_output.txt', 'w') as output:
output.write(result)
</code></pre>