将探针和基因ID与GEOparse合并

2024-10-04 11:35:07 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试使用Python包GEOparse下载微阵列数据。我已经到了合并探针和基因ID的步骤。我想用ENTREZ_GENE_ID替换ID_REF

然而,这是行不通的。这就是我到目前为止所做的,我正在沿着这条路线走:https://geoparse.readthedocs.io/en/latest/Analyse_hsa-miR-124a-3p_transfection_time-course.html

# Import tools
import GEOparse
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# download datasets
gse1 = GEOparse.get_GEO(geo="GSE99039", destdir="C:/Users/Highf_000/PycharmProjects/TFTest")
gse2 = GEOparse.get_GEO(geo="GSE6613", destdir="C:/Users/Highf_000/PycharmProjects/TFTest")
gse3 = GEOparse.get_GEO(geo="GSE72267", destdir="C:/Users/Highf_000/PycharmProjects/TFTest")

# import all GSM data for each GSE file
with open("GSE99039_GPL570.csv") as f:
    GSE99039_GPL570 = f.read().splitlines()
with open("GSE6613_GPL96.csv") as f:
    GSE6613_GPL96 = f.read().splitlines()
with open("GSE72267_GPL571.csv") as f:
    GSE72267_GPL571 = f.read().splitlines()

gse1.gsm = gse1.phenotype_data
print(gse1.gsm)
gse2.gsm = gse1.phenotype_data
print(gse2.gsm)
gse3.gsm = gse1.phenotype_data
print(gse3.gsm)

gse1.gpls['GPL570'].table
gse2.gpls['GPL96'].table
gse3.gpls['GPL571'].table

# gse1
pivoted_control_samples = gse1.pivot_samples('VALUE')[GSE99039_GPL570]
print(pivoted_control_samples)


# gse1
# Pulls the probes out
pivoted_control_samples_average = pivoted_control_samples.median(axis=1)
# Print number of probes before filtering
print("Number of probes before filtering: ", len(pivoted_control_samples_average))
# Extract all probes > 0.25
expression_threshold = pivoted_control_samples_average.quantile(0.25)
expressed_probes = pivoted_control_samples_average[pivoted_control_samples_average >= expression_threshold].index.tolist()
# Print probes above cut off
print("Number of probes above threshold: ", len(expressed_probes))
# confirm filtering worked
samples = gse1.pivot_samples("VALUE").ix[expressed_probes]

Tags: importdataascontrolsamplesprintaveragegsm