我已经从Matlab过渡到Python,我面临着索引/子集设置方面的问题。我有两个数据帧。一个是CT,另一个是oco。我必须根据lat/lon从oco中提取值,并从CT数据帧中提取一些余量。我的Matlab代码运行良好,但当我尝试使用Python时,它给出了不同的值
以下是Matlab代码:
clc
clear
dataleng = [];
myFolderct = '/Users/farhanmustafa/Documents/analysis2/year2012/';
if ~isdir(myFolderct)
errorMessage = sprintf('Error: The following folder does not exist:\n%s', myFolder);
uiwait(warndlg(errorMessage));
return;
end
dategg = [];
CTall = []; SXCT = []; GO1 = [];
myFolder = '/Users/farhanmustafa/Documents/analysis2/gosat2012/';
if ~isdir(myFolder)
errorMessage = sprintf('Error: The following folder does not exist:\n%s', myFolder);
uiwait(warndlg(errorMessage));
return;
end
filePattern = fullfile(myFolder,'*.nc4');
theFiles = dir(filePattern);
dataleng = [dataleng;length(theFiles)];
for k = 1:1
baseFileName = theFiles(k).name;
fullFileName = fullfile(myFolder, baseFileName);
fprintf(1, 'Now reading %s\n', fullFileName);
ncid = netcdf.open(fullFileName,'NC_NOWRITE');
varid = netcdf.inqVarID(ncid,'longitude');
lon = netcdf.getVar(ncid,varid);
varid = netcdf.inqVarID(ncid,'latitude');
lat = netcdf.getVar(ncid,varid);
varid = netcdf.inqVarID(ncid,'xco2');
xco2 = netcdf.getVar(ncid,varid);
varid = netcdf.inqVarID(ncid,'xco2_uncertainty');
err = netcdf.getVar(ncid,varid);
varid = netcdf.inqVarID(ncid,'xco2_quality_flag');
qflag = netcdf.getVar(ncid,varid);
qflagd = double(qflag);
varid = netcdf.inqVarID(ncid,'xco2_averaging_kernel');
KA = netcdf.getVar(ncid,varid);
KAm = flip(KA);
varid = netcdf.inqVarID(ncid,'pressure_levels');
ocopl = netcdf.getVar(ncid,varid);
ocoplm = flip(ocopl*100);
varid = netcdf.inqVarID(ncid,'pressure_weight');
pw = netcdf.getVar(ncid,varid);
pwm = flip(pw);
varid = netcdf.inqVarID(ncid,'xco2_apriori');
Xa = netcdf.getVar(ncid,varid);
Xam = Xa';
varid = netcdf.inqVarID(ncid,'co2_profile_apriori');
Pa = netcdf.getVar(ncid,varid);
Pam = flip(Pa);
varid = netcdf.inqVarID(ncid,'time');
t = netcdf.getVar(ncid,varid);
t = double(t)/86400 + datenum([1970 01 01 00 00 00]);
[Y,M,D,H,MM,SS] = datevec(double(t));
oco = [t,Y,M,D,H,lon,lat,xco2,err, qflagd];
qflag_filter = (oco(:,10)==0);
t = oco(qflag_filter,1);
Y = oco(qflag_filter,2);
M = oco(qflag_filter,3);
D = oco(qflag_filter,4);
H = oco(qflag_filter,5);
lon = oco(qflag_filter,6);
lat = oco(qflag_filter,7);
xco2 = oco(qflag_filter,8);
err = oco(qflag_filter,9);
qflagd = oco(qflag_filter,10);
oco = [t,Y,M,D,H,lon,lat,xco2,err, qflagd];
filePatternct = fullfile(myFolderct,'*.nc');
theFilesct = dir(filePatternct);
dataleng = [dataleng;length(theFilesct)];
baseFileNamect = theFilesct(k).name;
fullFileNamect = fullfile(myFolderct, baseFileNamect);
fprintf(1, 'Now reading %s\n', fullFileNamect);
ncid = netcdf.open(fullFileNamect,'NC_NOWRITE');
varid = netcdf.inqVarID(ncid,'longitude') ;
lonct = netcdf.getVar(ncid,varid);
varid = netcdf.inqVarID(ncid,'latitude');
latct = netcdf.getVar(ncid,varid);
varid = netcdf.inqVarID(ncid,'co2');
ct = double(netcdf.getVar(ncid,varid));
varid = netcdf.inqVarID(ncid,'pressure');
ctgph = double(netcdf.getVar(ncid,varid));
varid = netcdf.inqVarID(ncid,'time');
tct = netcdf.getVar(ncid,varid);
datect = double(tct) + datenum([2000 01 01 00 00 00]);
[Yct,Mct,Dct,HHct] = datevec(double(tct) + datenum([2000 01 01 00 00 00]));
netcdf.close(ncid)
l1 = find(lonct>=28.5 & lonct<=142.5);
l2 = find(latct>=-7 & latct<=55);
CT =[];
CTgph = [];
load /Users/farhanmustafa/Documents/MATLAB/grid_arrengiment_index2.mat
for ii = 1:3:23
ih = find(HHct==ii);
ct1 = ct(l1,l2,1,ih); ct2 = ct(l1,l2,2,ih);ct3 = ct(l1,l2,3,ih);
ct4 = ct(l1,l2,4,ih);ct5 = ct(l1,l2,5,ih);ct6 = ct(l1,l2,6,ih);
ct7 = ct(l1,l2,7,ih);ct8 = ct(l1,l2,8,ih);ct9 = ct(l1,l2,9,ih);
ct10 = ct(l1,l2,10,ih);ct11 = ct(l1,l2,11,ih);ct12 = ct(l1,l2,12,ih);
ct13 = ct(l1,l2,13,ih);ct14 = ct(l1,l2,14,ih);ct15 = ct(l1,l2,15,ih);
ct16 = ct(l1,l2,16,ih);ct17 = ct(l1,l2,17,ih);ct18 = ct(l1,l2,18,ih);
ct19 = ct(l1,l2,19,ih);ct20 = ct(l1,l2,20,ih);ct21 = ct(l1,l2,21,ih);
ct22 = ct(l1,l2,22,ih);ct23 = ct(l1,l2,23,ih);ct24 = ct(l1,l2,24,ih);
ct25 = ct(l1,l2,25,ih);
gph1 = ctgph(l1,l2,1,ih);gph2 = ctgph(l1,l2,2,ih);gph3 = ctgph(l1,l2,3,ih);
gph4 = ctgph(l1,l2,4,ih);gph5 = ctgph(l1,l2,5,ih);gph6 = ctgph(l1,l2,6,ih);
gph7 = ctgph(l1,l2,7,ih);gph8 = ctgph(l1,l2,8,ih);gph9 = ctgph(l1,l2,9,ih);
gph10 = ctgph(l1,l2,10,ih);gph11 = ctgph(l1,l2,11,ih);gph12 = ctgph(l1,l2,12,ih);
gph13 = ctgph(l1,l2,13,ih);gph14 = ctgph(l1,l2,14,ih);gph15 = ctgph(l1,l2,15,ih);
gph16 = ctgph(l1,l2,16,ih);gph17 = ctgph(l1,l2,17,ih);gph18 = ctgph(l1,l2,18,ih);
gph19 = ctgph(l1,l2,19,ih);gph20 = ctgph(l1,l2,20,ih);gph21 = ctgph(l1,l2,21,ih);
gph22 = ctgph(l1,l2,22,ih);gph23 = ctgph(l1,l2,23,ih);gph24 = ctgph(l1,l2,24,ih);
gph25 = ctgph(l1,l2,25,ih);
lonaf= lonct(l1);lataf = latct(l2);
ct1 = ct1(:);ct2 = ct2(:);ct3 = ct3(:);ct4 = ct4(:);ct5 = ct5(:);
ct6 = ct6(:);ct7 = ct7(:);ct8 = ct8(:);ct9 = ct9(:);ct10 = ct10(:);
ct11 = ct11(:);ct12 = ct12(:);ct13 = ct13(:);ct14 = ct14(:);ct15 = ct15(:);
ct16 = ct16(:);ct17 = ct17(:);ct18 = ct18(:);ct19 = ct19(:);ct20 = ct20(:);
ct21 = ct21(:);ct22 = ct22(:);ct23 = ct23(:);ct24 = ct24(:);ct25 = ct25(:);
f = ~isnan(gph1);
gph1 = gph1(:);gph2 = gph2(:);gph3 = gph3(:);gph4 = gph4(:);gph5 = gph5(:);
gph6 = gph6(:);gph7 = gph7(:);gph8 = gph8(:);gph9 = gph9(:);gph10 = gph10(:);
gph11 = gph11(:);gph12 = gph12(:);gph13 = gph13(:);gph14 = gph14(:);gph15 = gph15(:);
gph16 = gph16(:);gph17 = gph17(:);gph18 = gph18(:);gph19 = gph19(:);gph20 = gph20(:);
gph21 = gph21(:);gph22 = gph22(:);gph23 = gph23(:);gph24 = gph24(:);gph25 = gph25(:);
[n,m,] = find(f==1);
yy = [];
for ii = 1:length(n)
yy = [yy;datect(ih),Yct(ih),Mct(ih),Dct(ih),HHct(ih)];
end
CT = [CT;yy(1:length(n),:),lonaf(n),lataf(m),ct1,ct2,ct3,ct4,ct5,ct6,ct7,ct8...
,ct9,ct10,ct11,ct12,ct13,ct14,ct15,ct16,ct17,ct18,ct19,ct20,ct21,ct22,ct23,ct24,ct25];
CTgph = [CTgph;yy(1:length(n),:),lonaf(n),lataf(m),gph1,gph2,gph3,gph4,gph5,gph6,gph7,gph8...
,gph9,gph10,gph11,gph12,gph13,gph14,gph15,gph16,gph17,gph18,gph19,gph20,gph21,gph22,gph23,gph24,gph25];
end
CTall = [CTall;CT];
di = CT(:,1); ilon = CT(:,6); ilat=CT(:,7);
gd= oco(:,1); glon = oco(:,6); glat=oco(:,7);
for ii = 1:length(CT)
indx = find(glon(:)<=(ilon(ii)+1.5) & glon(:)>=(ilon(ii)-1.5) & ...
glat(:)<=(ilat(ii)+1.5) & glat(:)>=(ilat(ii)-1.5));
if indx~=0
GO1 = [GO1;oco(indx,:)];
end
end
end
Python代码如下所示:
import os
import glob
import numpy as np
import xarray as xr
from datetime import datetime as dt
from datetime import timedelta
import pandas as pd
import matplotlib.pyplot as plt
import re
def find_files_in_dir(path, substrs):
file_list = []
for root, directory, files in os.walk(path):
for f in files:
for s in substrs:
if s in f:
file_list.append(os.path.join(root, f))
file_list.sort()
return file_list
myFolder = '/Users/farhanmustafa/Documents/analysis2/gosat2012/'
sat_substrs = ['.nc4']
myFolderct = '/Users/farhanmustafa/Documents/analysis2/year2012/'
ct_substrs = ['.nc']
sat_file_list = find_files_in_dir(myFolder, sat_substrs)
ct_file_list = find_files_in_dir(myFolderct, ct_substrs)
CTall = pd.DataFrame(list(zip(*[]))).add_prefix('Col')
GO1 = pd.DataFrame(list(zip(*[]))).add_prefix('Col')
for i in range(len(sat_file_list)):
print("Reading", sat_file_list[i])
ds = xr.open_dataset(sat_file_list[i])
ds = ds.swap_dims({'sounding_id': 'time'})
lon_sat = ds.longitude.values.tolist()
lat_sat = ds.latitude.values.tolist()
xco2 = ds.xco2.values.tolist()
err = ds.xco2_uncertainty.values.tolist()
qflag = ds.xco2_quality_flag.values.tolist()
KA = ds.xco2_averaging_kernel
KAm = np.flip(KA, 0)
ocopl = ds.pressure_levels
ocoplm = np.flip((ocopl*100), 0)
pw = ds.pressure_weight
pwm = np.flip(pw)
Xa = ds.xco2_apriori
Xam = Xa.T
Pa = ds.co2_profile_apriori
time_sat = ds.time.values.tolist()
time_sat = pd.to_datetime(time_sat)
oco = pd.DataFrame(list(zip(*[time_sat, lon_sat, lat_sat, xco2, err, qflag]))).add_prefix('Col')
ds.close()
print("Reading", ct_file_list[i])
ds = xr.open_dataset(ct_file_list[i])
longitude=slice(28.5, 142.5)
latitude=slice(-7, 55)
ct = ds.co2.sel(longitude=longitude, latitude=latitude)
ctgph = ds.pressure
#time = ds.time.dt.hour[7]
#time = time.time.values
#ct_df = pd.DataFrame(list(zip(*[]))).add_prefix('Col')
CT = pd.DataFrame(list(zip(*[]))).add_prefix('Col')
#gph_df = pd.DataFrame(list(zip(*[]))).add_prefix('Col')
CTgph = pd.DataFrame(list(zip(*[]))).add_prefix('Col')
for i in [0,1,2,3,4,5,6,7]:
time = ds.time.dt.hour[i]
time = time.time.values
ct1 = (ct.sel(level=1)).isel(time=i).values.flatten()
ct2 = (ct.sel(level=2)).isel(time=i).values.flatten()
ct3 = (ct.sel(level=3)).isel(time=i).values.flatten()
ct4 = (ct.sel(level=4)).isel(time=i).values.flatten()
ct5 = (ct.sel(level=5)).isel(time=i).values.flatten()
ct6 = (ct.sel(level=6)).isel(time=i).values.flatten()
ct7 = (ct.sel(level=7)).isel(time=i).values.flatten()
ct8 = (ct.sel(level=8)).isel(time=i).values.flatten()
ct9 = (ct.sel(level=9)).isel(time=i).values.flatten()
ct10 = (ct.sel(level=10)).isel(time=i).values.flatten()
ct11 = (ct.sel(level=11)).isel(time=i).values.flatten()
ct12 = (ct.sel(level=12)).isel(time=i).values.flatten()
ct13 = (ct.sel(level=13)).isel(time=i).values.flatten()
ct14 = (ct.sel(level=14)).isel(time=i).values.flatten()
ct15 = (ct.sel(level=15)).isel(time=i).values.flatten()
ct16 = (ct.sel(level=16)).isel(time=i).values.flatten()
ct17 = (ct.sel(level=17)).isel(time=i).values.flatten()
ct18 = (ct.sel(level=18)).isel(time=i).values.flatten()
ct19 = (ct.sel(level=19)).isel(time=i).values.flatten()
ct20 = (ct.sel(level=20)).isel(time=i).values.flatten()
ct21 = (ct.sel(level=21)).isel(time=i).values.flatten()
ct22 = (ct.sel(level=22)).isel(time=i).values.flatten()
ct23 = (ct.sel(level=23)).isel(time=i).values.flatten()
ct24 = (ct.sel(level=24)).isel(time=i).values.flatten()
ct25 = (ct.sel(level=25)).isel(time=i).values.flatten()
gph1 = (ctgph.sel(boundary=1)).isel(time=i).values.flatten()
gph2 = (ctgph.sel(boundary=2)).isel(time=i).values.flatten()
gph3 = (ctgph.sel(boundary=3)).isel(time=i).values.flatten()
gph4 = (ctgph.sel(boundary=4)).isel(time=i).values.flatten()
gph5 = (ctgph.sel(boundary=5)).isel(time=i).values.flatten()
gph6 = (ctgph.sel(boundary=6)).isel(time=i).values.flatten()
gph7 = (ctgph.sel(boundary=7)).isel(time=i).values.flatten()
gph8 = (ctgph.sel(boundary=8)).isel(time=i).values.flatten()
gph9 = (ctgph.sel(boundary=9)).isel(time=i).values.flatten()
gph10 = (ctgph.sel(boundary=10)).isel(time=i).values.flatten()
gph11 = (ctgph.sel(boundary=11)).isel(time=i).values.flatten()
gph12 = (ctgph.sel(boundary=12)).isel(time=i).values.flatten()
gph13 = (ctgph.sel(boundary=13)).isel(time=i).values.flatten()
gph14 = (ctgph.sel(boundary=14)).isel(time=i).values.flatten()
gph15 = (ctgph.sel(boundary=15)).isel(time=i).values.flatten()
gph16 = (ctgph.sel(boundary=16)).isel(time=i).values.flatten()
gph17 = (ctgph.sel(boundary=17)).isel(time=i).values.flatten()
gph18 = (ctgph.sel(boundary=18)).isel(time=i).values.flatten()
gph19 = (ctgph.sel(boundary=19)).isel(time=i).values.flatten()
gph20 = (ctgph.sel(boundary=20)).isel(time=i).values.flatten()
gph21 = (ctgph.sel(boundary=21)).isel(time=i).values.flatten()
gph22 = (ctgph.sel(boundary=22)).isel(time=i).values.flatten()
gph23 = (ctgph.sel(boundary=23)).isel(time=i).values.flatten()
gph24 = (ctgph.sel(boundary=24)).isel(time=i).values.flatten()
gph25 = (ctgph.sel(boundary=25)).isel(time=i).values.flatten()
listOfStrings2 = [time]*len(ct1)
lonct = ct.longitude.values.tolist()
latct = ct.latitude.values.tolist()
lonct = [a for a in lonct
for b in latct if a != b]
latct = [b for a in lonct
for b in latct if a != b]
CT2 = pd.DataFrame(list(zip(*[listOfStrings2,lonct,latct,ct1,ct2,ct3,ct4,ct5,ct6,ct7,ct8,ct9,ct10,ct11,ct12,ct13,ct14,ct15,ct16,ct17,ct18,ct19,ct20,ct21,ct22,ct23,ct24,ct25]))).add_prefix('Col')
CTgph = pd.DataFrame(list(zip(*[listOfStrings2,lonct,latct,gph1,gph2,gph3,gph4,gph5,gph6,gph7,gph8,gph9,gph10,gph11,gph12,gph13,gph14,gph15,gph16,gph17,gph18,gph19,gph20,gph21,gph22,gph23,gph24,gph25]))).add_prefix('Col')
#gph_df = gph_df.append(gph_df2)
CT = CT.append(CT2)
ds.close()
CTall = CTall.append(CT)
di = CT.Col0
ilon = CT.Col1
ilat = CT.Col2
sat_date = oco.Col0
sat_lon = oco.Col1
sat_lat = oco.Col2
for ii in range(len(CT)):
subset = oco[(ilat[ii]<=(sat_lat[:]+1.5)) & (ilat[ii]>=(sat_lat[:]-1.5)) &
(ilon[ii]<=(sat_lon[:]+1.5)) & (ilon[ii]>=(sat_lon[:]-1.5))]
GO1 = GO1.append(subset)
请注意,两种情况下的数据帧是相同的。我会感谢有人帮助我解决这个问题
目前没有回答
相关问题 更多 >
编程相关推荐