<p>似乎文件包含双重编码的JSON。我试着用Python的<code>csv</code>库解析它,但没有成功。你知道吗</p>
<p>但是您可以使用<code>re</code>模块,然后手动解码json。此脚本打印json数据和行的其余部分,分析如下:</p>
<pre><code>import re
import json
with open('file.csv', newline='') as malformed_csvfile:
next(malformed_csvfile) # skip headers
for line in malformed_csvfile:
d1, j, d2 = re.findall(r'^(.*?),(".*?")(,".*)', line)[0]
json_data = json.loads(json.loads(j))
rest_of_line = list( map(lambda d: d.strip('"'), (d1.strip() + d2.strip()).split(',') ) )
print(rest_of_line)
print(json.dumps(json_data, indent=4))
</code></pre>
<p>印刷品:</p>
<pre><code>['2911', '072017', 'GSTR3B', '652704b1-e9e4-4f3f-a1c8-148f076ac2a9', '2018-02-05 14:06:28.709000', '2018-11-18 05:36:24.625000', 'FILED', 'NULL', 'NULL', 'NULL']
{
"sup_details": {
"osup_det": {
"txval": 1232800.0,
"iamt": 196459.0,
"camt": 12723.0,
"samt": 12723.0,
"csamt": 0.0
},
"osup_zero": {
"txval": 0.0,
"iamt": 0.0,
"camt": 0.0,
"samt": 0.0,
"csamt": 0.0
},
"osup_nil_exmp": {
"txval": 0.0,
"iamt": 0.0,
"camt": 0.0,
"samt": 0.0,
"csamt": 0.0
},
"isup_rev": {
"txval": 120000.0,
"iamt": 0.0,
"camt": 10800.0,
"samt": 10800.0,
"csamt": 0.0
},
"osup_nongst": {
"txval": 0.0,
"iamt": 0.0,
"camt": 0.0,
"samt": 0.0,
"csamt": 0.0
}
},
...and so on.
</code></pre>
<p><code>file.csv</code>包含问题的文本:</p>
<pre><code>"id","data","return_period","return_type","taxable_entity_id","created_at","updated_at","filing_status","exception","fiscal_year","exception_info"
2911,"{\"sup_details\":{\"osup_det\":{\"txval\":1232800.00,\"iamt\":196459.00,\"camt\":12723.00,\"samt\":12723.00,\"csamt\":0.00},\"osup_zero\":{\"txval\":0.00,\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},\"osup_nil_exmp\":{\"txval\":0.00,\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},\"isup_rev\":{\"txval\":120000.00,\"iamt\":0.00,\"camt\":10800.00,\"samt\":10800.00,\"csamt\":0.00},\"osup_nongst\":{\"txval\":0.00,\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00}},\"itc_elg\":{\"itc_avl\":[{\"ty\":\"IMPG\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"IMPS\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"ISRC\",\"iamt\":0.00,\"camt\":10800.00,\"samt\":10800.00,\"csamt\":0.00},{\"ty\":\"ISD\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"OTH\",\"iamt\":0.00,\"camt\":4320.00,\"samt\":4320.00,\"csamt\":0.00}],\"itc_rev\":[{\"ty\":\"RUL\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"OTH\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00}],\"itc_net\":{\"iamt\":0.00,\"camt\":15120.00,\"samt\":15120.00,\"csamt\":0.00},\"itc_inelg\":[{\"ty\":\"RUL\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"OTH\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00}]},\"tx_pmt\":{\"tx_py\":[{\"trans_typ\":\"30002\",\"liab_ldg_id\":237161,\"sgst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"cgst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"cess\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"igst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00}},{\"trans_typ\":\"30003\",\"liab_ldg_id\":237162,\"sgst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"cgst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"cess\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"igst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00}}],\"pdcash\":[{\"liab_ldg_id\":\"237161\",\"trans_typ\":\"30002\",\"ipd\":196459.00,\"cpd\":8403.00,\"spd\":8403.00,\"cspd\":0.00,\"i_intrpd\":0.00,\"c_intrpd\":0.00,\"s_intrpd\":0.00,\"cs_intrpd\":0.00,\"i_lfeepd\":0.00,\"c_lfeepd\":0.00,\"s_lfeepd\":0.00,\"cs_lfeepd\":0.00},{\"liab_ldg_id\":\"237162\",\"trans_typ\":\"30003\",\"ipd\":0.00,\"cpd\":10800.00,\"spd\":10800.00,\"cspd\":0.00,\"i_intrpd\":0.00,\"c_intrpd\":0.00,\"s_intrpd\":0.00,\"cs_intrpd\":0.00,\"i_lfeepd\":0.00,\"c_lfeepd\":0.00,\"s_lfeepd\":0.00,\"cs_lfeepd\":0.00}],\"pditc\":{\"liab_ldg_id\":\"237161\",\"trans_typ\":\"30002\",\"i_pdi\":0.00,\"i_pdc\":0.00,\"i_pds\":0.00,\"c_pdi\":0.00,\"c_pdc\":4320.00,\"s_pdi\":0.00,\"s_pds\":4320.00,\"cs_pdcs\":0.00}},\"intr_ltfee\":{\"ltfee_details\":{\"camt\":0.00,\"samt\":0.00}},\"gstin\":\"37ACUPY7876Q1ZR\",\"ret_period\":\"072017\"}","072017","GSTR3B","652704b1-e9e4-4f3f-a1c8-148f076ac2a9","2018-02-05 14:06:28.709000","2018-11-18 05:36:24.625000","FILED",NULL,NULL,NULL
2918,"{\"sup_details\":{\"osup_det\":{\"txval\":1372882.00,\"iamt\":235139.00,\"camt\":5990.00,\"samt\":5990.00,\"csamt\":0.00},\"osup_zero\":{\"txval\":0.00,\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},\"osup_nil_exmp\":{\"txval\":0.00,\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},\"isup_rev\":{\"txval\":286800.00,\"iamt\":0.00,\"camt\":25812.00,\"samt\":25812.00,\"csamt\":0.00},\"osup_nongst\":{\"txval\":0.00,\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00}},\"itc_elg\":{\"itc_avl\":[{\"ty\":\"IMPG\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"IMPS\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"ISRC\",\"iamt\":0.00,\"camt\":25812.00,\"samt\":25812.00,\"csamt\":0.00},{\"ty\":\"ISD\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"OTH\",\"iamt\":50750.00,\"camt\":15515.00,\"samt\":15515.00,\"csamt\":0.00}],\"itc_rev\":[{\"ty\":\"RUL\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"OTH\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00}],\"itc_net\":{\"iamt\":50750.00,\"camt\":41327.00,\"samt\":41327.00,\"csamt\":0.00},\"itc_inelg\":[{\"ty\":\"RUL\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00},{\"ty\":\"OTH\",\"iamt\":0.00,\"camt\":0.00,\"samt\":0.00,\"csamt\":0.00}]},\"tx_pmt\":{\"tx_py\":[{\"trans_typ\":\"30002\",\"liab_ldg_id\":1726731,\"sgst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"cgst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"cess\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"igst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00}},{\"trans_typ\":\"30003\",\"liab_ldg_id\":1726732,\"sgst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"cgst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"cess\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00},\"igst\":{\"intr\":0.00,\"tx\":0.00,\"fee\":0.00}}],\"pdcash\":[{\"liab_ldg_id\":\"1726731\",\"trans_typ\":\"30002\",\"ipd\":92115.00,\"cpd\":0.00,\"spd\":0.00,\"cspd\":0.00,\"i_intrpd\":0.00,\"c_intrpd\":0.00,\"s_intrpd\":0.00,\"cs_intrpd\":0.00,\"i_lfeepd\":0.00,\"c_lfeepd\":0.00,\"s_lfeepd\":0.00,\"cs_lfeepd\":0.00},{\"liab_ldg_id\":\"1726732\",\"trans_typ\":\"30003\",\"ipd\":0.00,\"cpd\":25812.00,\"spd\":25812.00,\"cspd\":0.00,\"i_intrpd\":0.00,\"c_intrpd\":0.00,\"s_intrpd\":0.00,\"cs_intrpd\":0.00,\"i_lfeepd\":0.00,\"c_lfeepd\":0.00,\"s_lfeepd\":0.00,\"cs_lfeepd\":0.00}],\"pditc\":{\"liab_ldg_id\":\"1726731\",\"trans_typ\":\"30002\",\"i_pdi\":50750.00,\"i_pdc\":46137.00,\"i_pds\":46137.00,\"c_pdi\":0.00,\"c_pdc\":5990.00,\"s_pdi\":0.00,\"s_pds\":5990.00,\"cs_pdcs\":0.00}},\"intr_ltfee\":{\"ltfee_details\":{\"camt\":0.00,\"samt\":0.00}},\"gstin\":\"37ACUPY7876Q1ZR\",\"ret_period\":\"082017\"}","082017","GSTR3B","652704b1-e9e4-4f3f-a1c8-148f076ac2a9","2018-02-05 14:07:22.333000","2018-11-18 05:36:25.525000","FILED",NULL,NULL,NULL
</code></pre>