我正在迭代一个复杂的json对象,它作为字典加载到python中。下面是json文件的示例。对感兴趣的数据进行了注释。在
{
"name":"ns1:timeSeriesResponseType",
"nil":false,
"value":{
"queryInfo":{ },
"timeSeries":[
{
"variable":{ },
"values":[
{
"qualifier":[ ],
"censorCode":[ ],
"value":[
{
"codedVocabularyTerm":null,
"censorCode":null,
"offsetTypeID":null,
"accuracyStdDev":null,
"timeOffset":null,
"qualifiers":[
"P", # data of interest
"Ice" # data of interest
],
"qualityControlLevelCode":null,
"sampleID":null,
"dateTimeAccuracyCd":null,
"methodCode":null,
"codedVocabulary":null,
"sourceID":null,
"oid":null,
"dateTimeUTC":null,
"offsetValue":null,
"metadataTime":null,
"labSampleCode":null,
"methodID":null,
"value":"-999999",
"dateTime":"2015-02-24T03:30:00.000-05:00",
"offsetTypeCode":null,
"sourceCode":null
},
{
"codedVocabularyTerm":null,
"censorCode":null,
"offsetTypeID":null,
"accuracyStdDev":null,
"timeOffset":null,
"qualifiers":[ ],
"qualityControlLevelCode":null,
"sampleID":null,
"dateTimeAccuracyCd":null,
"methodCode":null,
"codedVocabulary":null,
"sourceID":null,
"oid":null,
"dateTimeUTC":null,
"offsetValue":null,
"metadataTime":null,
"labSampleCode":null,
"methodID":null,
"value":"-999999", # data of interest
"dateTime":"2015-02-24T04:00:00.000-05:00", # data of interest
"offsetTypeCode":null,
"sourceCode":null
}
],
"sample":[ ],
"source":[ ],
"offset":[ ],
"units":null,
"qualityControlLevel":[ ],
"method":[ ]
}
],
"sourceInfo":{ },
"name":"USGS:03193000:00060:00011"
},
{ }, # more data need is stored in here
{ }, # more data need is stored in here
{ } # more data need is stored in here
]
},
"declaredType":"org.cuahsi.waterml.TimeSeriesResponseType",
"scope":"javax.xml.bind.JAXBElement$GlobalScope",
"globalScope":true,
"typeSubstituted":false
}
下面是我的代码,用于单步执行/遍历字典以获取我想要的数据,并将其存储在一个格式更简单的字典中:
^{pr2}$我的问题是——作为python的新手,有人能指出我代码中明显的低效吗?我相信在很多年的时间里,<或者我的json文件的值都是无效的。有没有一种简单的方法可以搜索并返回键:值对从这样一个分层字典,字典列表里面有字典列表吗?在
编辑:
基于@Alex Martelli提供的解决方案,这里是代码的新的、更高效的精简版本:
# Building the output dictionary
for key in result["value"]["timeSeries"]:
if "values" in key:
for key2 in key.get("values"):
if "value" in key2:
for key3 in key2.get("value"):
if "value" in key3:
valueList.append(key3.get("value"))
if "dateTime" in key3:
dateTimeList.append(key3.get("dateTime"))
if "qualifiers" in key3:
qualifiersList.append(key3.get("qualifiers"))
if "name" in key:
outputList.append(key.get("name"))
outputDict[key.get("name")]={"dateTime":None, "values":None, "qualifiers":None}
outputDict[key.get("name")]["dateTime"] = dateTimeList[:] # passing the items in the list rather
outputDict[key.get("name")]["values"] = valueList[:] # than a reference to the list so the delete works
outputDict[key.get("name")]["qualifiers"] = qualifiersList[:] # than a reference to the list so the delete works
del dateTimeList[:]
del valueList[:]
del qualifiersList[:]
工作原理相同,删除了4行代码。运行时间更快。不错。在
编辑:
基于@Two Bit Alchemist提出的解决方案,该方案同样有效:
# Building the output dictionary
for key in result["value"]["timeSeries"]:
print key
for value in key["values"][0]["value"]:
# qualifiers is a list containing ["P", "Ice"]
qualifiersList.append(value['qualifiers'])
valueList.append(value['value'])
dateTimeList.append(value['dateTime'])
if "name" in key:
outputList.append(key.get("name"))
outputDict[key.get("name")]={"dateTime":None, "values":None, "qualifiers":None}
outputDict[key.get("name")]["dateTime"] = dateTimeList[:] # passing the items in the list rather
outputDict[key.get("name")]["values"] = valueList[:] # than a reference to the list so the delete works
outputDict[key.get("name")]["qualifiers"] = qualifiersList[:] # than a reference to the list so the delete works
del dateTimeList[:]
del valueList[:]
del qualifiersList[:]
我看到的唯一问题是,我从来没有完全确定[“值”]列表中的第一个位置是我想要的。而且我丢失了“if”语句提供的检查,这些检查应该确保如果从错误的查询语句返回值时不会引入错误。在
编辑:
try:
# requests.get returns a "file-like" object
# in this case it is a JSON object because of the settings in the query
response = requests.get(url=query)
# if-else ladder that only performs the parsing of the returned JSON object
# when the HTTP status code indicates a successful query execution
if(response.status_code == 200):
# parsing the
result = response.json()
# Setting up blank variables to store results
outputDict = {}
outputList = []
dateTimeList = []
valueList = []
qualifiersList = []
# Building the output dictionary
for key in result["value"]["timeSeries"]:
print key
for value in key["values"][0]["value"]:
# qualifiers is a list containing ["P", "Ice"]
qualifiersList.append(value['qualifiers'])
valueList.append(value['value'])
dateTimeList.append(value['dateTime'])
# OLD CODE
# if "values" in key:
# for key2 in key.get("values"):
# if "value" in key2:
# for key3 in key2.get("value"):
# if "value" in key3:
# valueList.append(key3.get("value"))
# if "dateTime" in key3:
# dateTimeList.append(key3.get("dateTime"))
# if "qualifiers" in key3:
# qualifiersList.append(key3.get("qualifiers"))
if "name" in key:
outputList.append(key.get("name"))
outputDict[key.get("name")]={"dateTime":None, "values":None, "qualifiers":None}
outputDict[key.get("name")]["dateTime"] = dateTimeList[:] # passing the items in the list rather
outputDict[key.get("name")]["values"] = valueList[:] # than a reference to the list so the delete works
outputDict[key.get("name")]["qualifiers"] = qualifiersList[:] # than a reference to the list so the delete works
del dateTimeList[:]
del valueList[:]
del qualifiersList[:]
# Tracking how long it took to process the data
elapsed = time.time() - now
print "Runtime: " + str(elapsed)
out = {"Status": 'ok', "Results": [[{"myResult": outputDict}]]}
elif(response.status_code == 400):
raise Exception("Bad Request, "+ datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
elif(response.status_code== 403):
raise Exception("Access Forbidden, "+ datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
elif(response.status_code == 404):
raise Exception("Gage location(s) not Found, "+ datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
elif(response.status_code == 500):
raise Exception("Internal Server Error, "+ datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
elif(response.status_code == 503):
raise Exception("Service Unavailable, "+ datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
else:
raise Exception("Unknown Response, "+ datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
except:
out = {"Status": 'Error', "Message": str(sys.exc_info()[1])}
print out
从我对这个问题的理解来看,我相信你最初对这个问题的令人费解的方法是大刀阔斧的,这给一个非常简单的解决方案蒙上了阴影。请纠正我,如果我仍然误解这一点和过分简化。即使这个结构非常复杂,如果它的可变部分是
timeSeries
处列表的长度,您可以访问该列表并在其上迭代,同时反复获取“感兴趣的数据”。我不知道这些数据是什么给你一个很好的示例数据结构,甚至是体面的变量名,说明它应该如何存储,以便以后在你的程序中使用,所以我将它存储在一个大列表中,只是为了向你展示我的意思:如果在我硬编码索引0的其他地方有重复,只需在那里引入for循环,例如
^{pr2}$如果您担心某些值不存在,只需准备好从dict或等效值中捕获一个
KeyError
。在例如,我在其中写道:
如果
value_list
没有值gt;=2,则可能引发IndexError
;如果第二个元素不是dict映射“value”到某物,则可能引发KeyError
。你可以抓住其中的一个或两个,并一起或单独处理它们,或者忽略它们继续前进。在或者
或者
你的
# handle whatever
代码很可能是pass
,这意味着“我知道可能会发生这种情况,但不要惊慌失措。如果你不抓住它们,异常就会“冒泡”到执行上下文的顶部,并使你的程序崩溃。在你问“我的代码中有没有明显的低效”,答案是肯定的,特别是在你循环字典的地方(因此,按顺序获取它们的所有键,即
O(N)
,即,需要与字典中键的数量成比例的时间),而不是仅仅将它们用作字典(这需要时间O(1)
,即,恒定时间)太快了)。在例如你在哪里
你应该改为:
^{pr2}$以及更深层次的类似结构。可以进一步优化,例如:
以避免重复索引(同样,在更深的地方)。在
相关问题 更多 >
编程相关推荐