from google.cloud import storage
# Instantiates a client
storage_client = storage.Client()
# Get GCS bucket
bucket = storage_client.get_bucket(bucket_name)
# Get blobs in bucket (including all subdirectories)
blobs_all = list(bucket.list_blobs())
# Get blobs in specific subirectory
blobs_specific = list(bucket.list_blobs(prefix='path/to/subfolder/'))
import sys
import json
import argparse
import googleapiclient.discovery
BUCKET = 'bucket-sounds'
def create_service():
return googleapiclient.discovery.build('storage', 'v1')
def list_bucket(bucket):
"""Returns a list of metadata of the objects within the given bucket."""
service = create_service()
# Create a request to objects.list to retrieve a list of objects.
fields_to_return = 'nextPageToken,items(name,size,contentType,metadata(my-key))'
#req = service.objects().list(bucket=bucket, fields=fields_to_return) # returns everything
#req = service.objects().list(bucket=bucket, fields=fields_to_return, prefix='UrbanSound') # returns everything. UrbanSound is top dir in bucket
#req = service.objects().list(bucket=bucket, fields=fields_to_return, prefix='UrbanSound/FREE') # returns the file FREESOUNDCREDITS.TXT
#req = service.objects().list(bucket=bucket, fields=fields_to_return, prefix='UrbanSound/FREESOUNDCREDITS.txt', delimiter='/') # same as above
#req = service.objects().list(bucket=bucket, fields=fields_to_return, prefix='UrbanSound/data/dog_bark', delimiter='/') # returns nothing
req = service.objects().list(bucket=bucket, fields=fields_to_return, prefix='UrbanSound/data/dog_bark/', delimiter='/') # returns files in dog_bark dir
all_objects = []
# If you have too many items to list in one request, list_next() will
# automatically handle paging with the pageToken.
while req:
resp = req.execute()
all_objects.extend(resp.get('items', []))
req = service.objects().list_next(req, resp)
return all_objects
# usage
print(json.dumps(list_bucket(BUCKET), indent=2))
import re
import sys
from google.cloud import storage
BUCKET = 'bucket-sounds'
# Create a Cloud Storage client.
gcs = storage.Client()
# Get the bucket that the file will be uploaded to.
bucket = gcs.get_bucket(BUCKET)
def my_list_bucket(bucket_name, limit=sys.maxsize):
a_bucket = gcs.lookup_bucket(bucket_name)
bucket_iterator = a_bucket.list_blobs()
for resource in bucket_iterator:
print(resource.name)
limit = limit - 1
if limit <= 0:
break
my_list_bucket(BUCKET, limit=5)
以下是此应答线程的更新:
您可以使用Python GCS API客户端库。有关文档和下载的相关链接,请参见Samples and Libraries for Google Cloud Storage文档页面。
在你的例子中,首先我想指出你混淆了“桶”这个词。我建议阅读文档的Key Terms页。你说的是对象名前缀。
您可以从GitHub上的list-objects.py示例开始。查看list引用页,您将需要传递
prefix=abc/xyz
和delimiter=/
。我还需要简单地列出一个桶的内容。理想情况下,我想要类似于tf.gfile提供的东西。gfile支持确定条目是文件还是目录。
我尝试了上面@jterrace提供的各种链接,但结果并不理想。这说明它值得展示结果。
给定一个包含“目录”和“文件”的bucket,很难在“文件系统”中找到感兴趣的项。我在代码中提供了一些注释 上面引用的代码是如何工作的。
在这两种情况下,我使用的是一个包含凭据的datalab笔记本。给定结果,我将需要使用字符串解析来确定哪些文件在特定目录中。如果有人知道如何扩展这些方法或其他方法来解析类似tf.gfile的目录,请回复。
方法一
这会产生如下结果:
方法二
这会产生这样的输出。
相关问题 更多 >
编程相关推荐