python脚本只在子目录中运行，而不在b中运行

#next step print('Start merging contig files') for root, dirs, files in os.walk(os.getcwd()): filepath = os.path.join(root, 'genes.faa.genespercontig.csv') if os.path.isfile(filepath): with open(filepath, 'r') as f1: df1 = pd.read_csv(f1, header=None, delim_whitespace=True, names = ["contig", "genes"]) df1['genome'] = os.path.basename(os.path.dirname(filepath)) else: continue filepath = os.path.join(root, 'hmmer.analyze.txt.results.txt') if os.path.isfile(filepath): with open(filepath, 'r') as f2: df2 = pd.read_csv(f2, header=None, delim_whitespace=True, names = ["contig", "SCM"]) df2['genome'] = os.path.basename(os.path.dirname(filepath)) else: continue filepath = os.path.join(root, 'genes.fna.output_blastplasmiddb.out.count_plasmiddbhit.out') if os.path.isfile(filepath): with open(filepath, 'r') as f3: df3 = pd.read_csv(f3, header=None, delim_whitespace=True, names = ["contig", "plasmid_genes"]) df3['genome'] = os.path.basename(os.path.dirname(filepath)) else: continue #merge dataframes dfmerge1 = pd.merge(df1, df2, on=['genome', 'contig'], how='outer') df_end = pd.merge(dfmerge1, df3, on=['genome', 'contig'], how='outer') #set NaN in columns to 0 nan_cols = df_end.columns[df_end.isnull().any(axis=0)] for col in nan_cols: df_end[col] = df_end[col].fillna(0).astype(int) #add column with genes/SCM en round to 2 decimals df_end['SCM/genes'] = df_end['SCM']/df_end['genes'] df_end['SCM/genes'] = df_end['SCM/genes'].round(2) #add column with genes/plasmid_genes en round to 2 decimals df_end['plasmid_genes/genes'] = df_end['plasmid_genes']/df_end['genes'] df_end['plasmid_genes/genes'] = df_end['plasmid_genes/genes'].round(2) df_end.to_csv(os.path.join(root,'outputgenesdf.csv')) print('extra columns;done') #next step #CURRENT DIRECTORY cd = os.path.dirname(os.getcwd()) # concatenate csv files dfList = [] for root, dirs, files in os.walk(cd): for fname in files: if re.match("outputgenesdf.csv", fname): frame = pd.read_csv(os.path.join(root, fname)) dfList.append(frame) df = pd.concat(dfList)

1条回答

网友

1楼 · 发布于 2024-09-29 17:23:37

依据：

[...] and want that the script only works in the current working directory and its subdirectories.[...]

你可以试试这个：

def next_file(directory=os.getcwd(), max_depth=0, depth=0):

    if max_depth < 0 or depth <= max_depth:

        for name in os.listdir(directory):
            with_path = os.path.join(directory, name)

            if os.path.isfile(with_path):
                yield with_path
            else:
                for a_file in next_file(directory=with_path, max_depth=max_depth, depth=depth+1):
                    yield a_file

并使用以下方法处理文件：

for a_file in next_file(max_depth=1):
    print 'processing file: %s' % a_file
    # do your stuff here

使用max_depth控制要处理的嵌套目录的数量。0读取当前目录中的文件，-1处理所有目录。（如os.walk）。你知道吗

编辑

我对文件迭代方法做了一个小的修改。你知道吗

以下是您的脚本的完整（未经测试）版本：

def next_file(current_dir=os.getcwd(), max_depth=0, depth=0):

    if max_depth < 0 or depth <= max_depth:

        for name in os.listdir(current_dir):
            with_path = os.path.join(current_dir, name)

            if os.path.isfile(with_path):
                yield current_dir, name
            else:
                for directory, name in next_file(current_dir=with_path, max_depth=max_depth, depth=depth+1):
                    yield directory, name


for directory, name in next_file(max_depth=1):
    print 'file: %s' % name

print('Start merging contig files')

## for root, dirs, files in os.walk(os.getcwd()):
for directory, name in next_file(max_depth=1):

    ## filepath = os.path.join(root, 'genes.faa.genespercontig.csv')
    filepath = os.path.join(directory, name)

    ## if os.path.isfile(filepath):
    if name == 'genes.faa.genespercontig.csv':
        ## with open(filepath, 'r') as f1:
        with open(filepath, 'r')
            df1 = pd.read_csv(f1, header=None, delim_whitespace=True, names = ["contig", "genes"])
            ## df1['genome'] = os.path.basename(os.path.dirname(filepath))
            df1['genome'] = filepath
    ## else:  # Not necessary
    ##     continue

    ## filepath = os.path.join(root, 'hmmer.analyze.txt.results.txt')
    ## if os.path.isfile(filepath):
    if name == 'hmmer.analyze.txt.results.txt':
        with open(filepath, 'r') as f2:
            df2 = pd.read_csv(f2, header=None, delim_whitespace=True, names = ["contig", "SCM"])
            ## df2['genome'] = os.path.basename(os.path.dirname(filepath))
            df2['genome'] = filepath
    ## else:
    ##     continue

    ## filepath = os.path.join(root, 'genes.fna.output_blastplasmiddb.out.count_plasmiddbhit.out')
    ## if os.path.isfile(filepath):
    if name == 'genes.fna.output_blastplasmiddb.out.count_plasmiddbhit.out':
        with open(filepath, 'r') as f3:
            df3 = pd.read_csv(f3, header=None, delim_whitespace=True, names = ["contig", "plasmid_genes"])
            ## df3['genome'] = os.path.basename(os.path.dirname(filepath))
            df3['genome'] = filepath
        ## else:
        ##     continue

#merge dataframes
dfmerge1 = pd.merge(df1, df2, on=['genome', 'contig'], how='outer')
df_end = pd.merge(dfmerge1, df3, on=['genome', 'contig'], how='outer')

#set NaN in columns to 0
nan_cols = df_end.columns[df_end.isnull().any(axis=0)]
for col in nan_cols:
    df_end[col] = df_end[col].fillna(0).astype(int)

#add column with genes/SCM en round to 2 decimals
df_end['SCM/genes'] = df_end['SCM']/df_end['genes']
df_end['SCM/genes'] = df_end['SCM/genes'].round(2)

#add column with genes/plasmid_genes en round to 2 decimals
df_end['plasmid_genes/genes'] = df_end['plasmid_genes']/df_end['genes']
df_end['plasmid_genes/genes'] = df_end['plasmid_genes/genes'].round(2)      

#CURRENT DIRECTORY 
cd = os.path.dirname(os.getcwd())
df_end.to_csv(os.path.join(cd,'outputgenesdf.csv'))
print('extra columns;done')

#next step
# concatenate csv files
dfList = []

## I'm not sure what you want to achieve with this:
for root, dirs, files in os.walk(cd):
    for fname in files:
        if re.match("outputgenesdf.csv", fname):
            frame = pd.read_csv(os.path.join(root, fname))
            dfList.append(frame)    

df = pd.concat(dfList)

请注意，您的文件处理逻辑未经测试（我测试了文件迭代方法）。我对pandas并不熟悉，但是在每个数据帧中，您都设置了一个gnome属性df1['genome']，它保存文件的路径。我不确定你是不是想要这样。你知道吗

最后，我不明白您的最后一步是如何将outputgenesdf.csv文件合并到单个数据帧中的。你知道吗

希望这有帮助。你知道吗

编辑

相关问题更多 >

编程相关推荐

热门问题

热门文章