在colab noteb中定义google驱动器中文件的路径

path_to_zip = tf.keras.utils.get_file( 'cornell_movie_dialogs.zip', origin='http://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip', extract=True) path_to_dataset = os.path.join( os.path.dirname(path_to_zip), "cornell movie-dialogs corpus") path_to_movie_lines = os.path.join(path_to_dataset, 'movie_lines.txt') path_to_movie_conversations = os.path.join(path_to_dataset, 'movie_conversations.txt')

def preprocess_sentence(sentence): sentence = sentence.lower().strip() # creating a space between a word and the punctuation following it # eg: "he is a boy." => "he is a boy ." sentence = re.sub(r"([?.!,])", r" \1 ", sentence) sentence = re.sub(r'[" "]+', " ", sentence) # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",") sentence = re.sub(r"[^a-zA-Z?.!,]+", " ", sentence) sentence = sentence.strip() # adding a start and an end token to the sentence return sentence def load_conversations(): # dictionary of line id to text id2line = {} with open(path_to_movie_lines, errors='ignore') as file: lines = file.readlines() for line in lines: parts = line.replace('\n', '').split(' +++$+++ ') id2line[parts[0]] = parts[4] inputs, outputs = [], [] with open(path_to_movie_conversations, 'r') as file: lines = file.readlines() for line in lines: parts = line.replace('\n', '').split(' +++$+++ ') # get conversation in a list of line ID conversation = [line[1:-1] for line in parts[3][1:-1].split(', ')] for i in range(len(conversation) - 1): inputs.append(preprocess_sentence(id2line[conversation[i]])) outputs.append(preprocess_sentence(id2line[conversation[i + 1]])) if len(inputs) >= MAX_SAMPLES: return inputs, outputs return inputs, outputs

1条回答

网友

1楼 · 发布于 2024-06-28 23:40:04

我想你不能直接从Colab使用Google驱动器的文件。首先，您必须在colab中运行以下代码才能将Google驱动器装载到colab：

from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

那么

^{pr2}$

并将数据集路径更改为：

path_to_dataset = 'data actual'

或者您不再需要“路径”到“数据集”，只需删除它，然后将代码改为：

path_to_movie_lines = 'movie_lines.txt' 
path_to_movie_conversations = 'movie_conversations.txt'

试着删除数据集的路径。这两种方法中有一种是可行的。在

在我以前的项目中，我没有使用到数据集的路径，我只是直接使用dataset.txt文件，就像它们与我的笔记本在同一个文件夹中一样。在

对不起，我的英语不好。在

相关问题更多 >

编程相关推荐

热门问题

热门文章