通过python将Bulkinsertion插入couchbase

import time import csv from couchbase import Couchbase from couchbase.bucket import Bucket from couchbase.exceptions import CouchbaseError c = Bucket('couchbase://localhost/bulk-load') from couchbase.exceptions import CouchbaseTransientError BYTES_PER_BATCH = 1024 * 256 # 256K with open('/home/royshah/Desktop/bulk_try/roy.csv') as csvfile: lines = csvfile.readlines()[4:] for k, line in enumerate(lines): data_tmp = line.strip().split(',') strDate = data_tmp[0].replace("\"", "") timerecord = datetime.datetime.strptime(strDate, '%Y-%m-%d %H:%M:%S.%f') microsecs = timerecord.microsecond strDate = "\"" + strDate + "\"" ts = calendar.timegm(timerecord.timetuple())*1000000 + microsecs datastore = [ts] + data_tmp[1:] stre = {'col1 ': datastore[1], # I am making key-values on the fly from csv file 'col2': datastore[2], 'col3': datastore[3], 'col4': datastore[4], 'col5': datastore[5], 'col6': datastore[6]} cb.upsert(str(datastore[0]), (stre)) # datastore[0] is used as document id and (stre) is used as key-value to be inserted for respective id.

1条回答

网友

1楼 · 发布于 2024-05-04 09:36:01

我试图将来自docs的示例改编为您的用例。你可能需要改变一两个细节，但你应该明白。在

c = Bucket('couchbase://localhost/bulk-load')
from couchbase.exceptions import CouchbaseTransientError
BYTES_PER_BATCH = 1024 * 256 # 256K

batches = []
cur_batch = {}
cur_size = 0
batches.append(cur_batch)

with open('/home/royshah/Desktop/bulk_try/roy.csv') as csvfile:
    lines = csvfile.readlines()[4:]
for key, line in enumerate(lines):
    #Format your data
    data_tmp = line.strip().split(',')
    strDate = data_tmp[0].replace("\"", "")
    timerecord = datetime.datetime.strptime(strDate,
                                           '%Y-%m-%d %H:%M:%S.%f')
    microsecs = timerecord.microsecond
    strDate = "\"" + strDate + "\""
    timestamp = calendar.timegm(timerecord.timetuple())*1000000 + microsecs

    #Build kv
    datastore = [ts] + data_tmp[1:]
    value = {'col1 ': datastore[1],  # I am making key-values on the fly from csv file
            'col2': datastore[2],
            'col3': datastore[3],
            'col4': datastore[4],
            'col5': datastore[5],
            'col6': datastore[6]}

    key = str(datastore[0]
    cur_batch[key] = value
    cur_size += len(key) + len(value) + 24

    if cur_size > BYTES_PER_BATCH:
        cur_batch = {}
        batches.append(cur_batch)
        cur_size = 0

print "Have {} batches".format(len(batches))
num_completed = 0
while batches:
  batch = batches[-1]
  try:
      cb.upsert_multi(batch)
      num_completed += len(batch)
      batches.pop()
  except CouchbaseTransientError as e:
      print e
      ok, fail = e.split_results()
      new_batch = {}
      for key in fail:
          new_batch[key] = all_data[key]
      batches.pop()
      batches.append(new_batch)
      num_completed += len(ok)
      print "Retrying {}/{} items".format(len(new_batch), len(ok))

相关问题更多 >

编程相关推荐

热门问题

热门文章