如何防止Python杀死鳕鱼

2024-09-21 07:23:18 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个相当长的python3代码,它对许多图像进行过采样,然后将它们拟合为2D高斯函数,以在图像中找到源的相关参数(质心坐标、振幅等),将其移到中心,进行规格化,然后在最后用一个中值对所有这些图像进行叠加。我试着用50000张图片的数据来做这个。我已经提取了数据并将其保存在之前的代码中,该代码为我提供了一个名为initial_data.data的所有图像数据(numpy数组)的二进制文件,我还使用astropy wcs包来查找质心坐标的正确猜测,因此这些数据也保存在centroid_coords.data中(以相同的顺序)。当我第一次尝试在所有图像上运行代码时,它在代码的过采样部分显示Killed。然后我决定尝试将我所有的图像分解成子部分,一次在一个小节上运行代码。如果只是几个小节,这是可以的,但是如果我一次只能操作5000张图片的话,就会变得太乏味和不切实际了!{I{I>在我的第一行输入之后输入第1/3行的代码。在

import pickle
import numpy as np
import re
from scipy import optimize, interpolate
from astropy.io import fits
from astropy.nddata import Cutout2D

frac = int(input('Subset 1, 2, or 3? ')) # select which subset of images to operate on
oversampled_file = 'oversampled_images_frac%d.data' % (frac)
params_file = 'gparams_2D_frac%d.data' % (frac)
final_images_file = 'images_b4stacking_2D_frac%d.data' % (frac)
FWHM_file = 'FWHM_2D_frac%d.data' % (frac)
fitsfilename = 'stacked_images_frac%d.fits' % (frac)

# Define subsets of total image data based on chosen fraction
if frac==1:
    start, end = 0, 50000//3
    print('Will operate on 1st subsection: %d elements' % (end-start))
elif frac==2:
    start, end = 50000//3, 2*50000//3
    print('Will operate on 2nd subsection: %d elements' % (end-start))
elif frac==3:
    start, end = 2*50000//3, 49999
    print('Will operate on 3rd subsection: %d elements' % (end-start))
else:
    print('Subsection of 1, 2, or 3 not detected')

# Read in a subset of initial data and centroid coordinates
with open('initial_data.data', 'rb') as f_r:
    initial_data = pickle.load(f_r)[start:end]
with open('centroid_coords.data', 'rb') as f_r:
    centroid_coords = pickle.load(f_r)[start:end]

# Oversample images
def oversample(data): # pixels -> NxN pixels
    oversampled = []
    N = 5
    c=1
    for data_set in data:
        Y, X = np.shape(data_set)
        x = np.linspace(0, 0.5, X)
        y = np.linspace(0, 0.5, Y)

        f = interpolate.interp2d(x, y, data_set, kind='cubic')

        Xnew = np.linspace(0, 0.5, X*N)
        Ynew = np.linspace(0, 0.5, Y*N)
        new_data = f(Xnew, Ynew)
        oversampled.append(new_data)

        if c%50==0:
            print('Oversampling %f%% complete' % (c*100/len(data)))
        c+=1
    return np.array(oversampled) # array of 2D arrays

resampled_data = oversample(initial_data)

# Save oversampled image data -- array by array to save RAM
with open(oversampled_file, 'wb') as f:
    for image in resampled_data:
        pickle.dump(image, f)

# Fit to 2D Gaussian
def gaussian_func(xy, x0, y0, sigma_x, sigma_y, amp, theta, offset): # (x0, y0) is center
    x, y = xy

    a = (np.cos(theta))**2/(2*sigma_x**2) + (np.sin(theta))**2/(2*sigma_y**2)
    b = -np.sin(2*theta)/(4*sigma_x**2) + np.sin(2*theta)/(4*sigma_y**2)
    c = (np.sin(theta))**2/(2*sigma_x**2) + (np.cos(theta))**2/(2*sigma_y**2)

    inner = a * (x-x0)**2
    inner += 2*b*(x-x0)*(y-y0)
    inner += c * (y-y0)**2
    return (offset + amp * np.exp(-inner)).ravel()

def Sigma2width(sigma):
    return 2 * np.sqrt(2*np.log(2)) * sigma

def generate(data_set):
    xvec = np.arange(0, np.shape(data_set)[1], 1)
    yvec = np.arange(0, np.shape(data_set)[0], 1)
    X, Y = np.meshgrid(xvec, yvec)
    return X, Y

def fit_to_model(data):
    gaussian_params = []
    N = 5
    theta_guess = 6
    c=1
    for i in range(len(data)):
        data_set = data[i]
        # Guess parameters

        # Centroid and amplitude
        offset = np.median(data_set)
        x0, y0 = centroid_coords[i][0]*N, centroid_coords[i][1]*N
        x0, y0 = int(round(x0)), int(round(y0))

        if x0<500 and x0>300 and y0<500 and y0>300: #skips images that have likely inaccurate centroids
            subimage = data_set[y0-80:y0+80, x0-80:x0+80]
            amp = np.max(subimage)
            bg_sub_amp = amp-offset

            # Sigmas and offset
            ylim, xlim = np.shape(subimage)
            x, y = np.arange(0, xlim, 1), np.arange(0, ylim, 1)
            ypix, xpix = np.where(subimage==amp)

            y_range = np.take(subimage-offset, ypix[0], axis=0)
            x_range = np.take(subimage-offset, xpix[0], axis=1)

            half_max = bg_sub_amp/2
            d_x = x_range - half_max
            d_y = y_range - half_max
            indices_x = np.where(d_x > 0)[0]
            indices_y = np.where(d_y > 0)[0]
            width_x = len(indices_x) # estimate of integer pixels only
            width_y = len(indices_y)

            sigma_x = width_x/(2*np.sqrt(2*np.log(2)))
            sigma_y = width_y/(2*np.sqrt(2*np.log(2)))

            # Guesses
            guesses = [x0, y0, sigma_x, sigma_y, amp, theta_guess, offset]

            # Fit to Gaussian
            xx, yy = generate(data_set)

            try:
                pred_params, uncert_cov = optimize.curve_fit(gaussian_func, (xx.ravel(), yy.ravel()), data_set.ravel(), p0=guesses)
            except (OptimizeWarning, RuntimeError): #not working?
                print('Failed to find fit; skipping...')
                continue

            gaussian_params.append(pred_params)

            if c%10==0:
                print('2D Gaussian fit complete for %f%% of images' % (c*100/len(data)))
            c+=1

        else: #not a good Gaussian fit (doesn't have centroid coords I want)
            gaussian_params.append([])
    return gaussian_params

params = fit_to_model(resampled_data)

# Save Gaussian params to data file
with open(params_file, 'wb') as f:
    for param_set in params:
        pickle.dump(param_set, f)

# Make centered cutouts of size 600x600 pixels
def cutout(data_list):
    centered, FWHM_images = [], []
    size = 600

    c=1
    for i in range(len(data_list)):
        data_set = data_list[i]
        if params[i] != []:
            if np.shape(data_set)[0] == np.shape(data_set)[1]: #eliminates images that had objects too close to the edge
                x, y = params[i][0], params[i][1]
                FWHM_x, FWHM_y = Sigma2width(np.abs(params[i][2])), Sigma2width(np.abs(params[i][3]))

                # Pass on images that have bad fit (too big FWHMs)
                if FWHM_x > 50 or FWHM_y > 50:
                    continue

                FWHM_images.append((FWHM_x, FWHM_y))
                cutout = Cutout2D(data_set, (x, y), size).data
                centered.append(cutout)

                if c%50==0:
                    print('Centered cutouts complete for %f%% of images' % (c*100/len(data_list)))
                c+=1
    return centered, FWHM_images

centered_cutouts, FWHM_images = cutout(resampled_data)

# Normalize
def normalize(data):
    img_count = 1
    norm = []
    for data_set in data:
        data_set *= 1/data_set.max()
        norm.append(data_set)

        if img_count%50==0:
            print('Normalization complete for %f%% of images' % (img_count*100/len(data)))
        img_count+=1
    print('NUMBER OF FINAL IMAGES:', img_count)
    return np.array(norm)

final_images = normalize(centered_cutouts)

# Save final (normalized) image data and individual FWHMs before stacking
with open(final_images_file, 'wb') as f:
    for image in final_images:
        pickle.dump(image, f)

with open(FWHM_file, 'wb') as f:
    for width in FWHM_images:
        pickle.dump(width, f)

# Stack images
stacked_image = np.median(final_images, axis=0)

# Write to new FITS file
hdu = fits.PrimaryHDU(stacked_image)
hdu.writeto(fitsfilename, overwrite=True)

如您所见,我的代码中还有多个检查点,其中新数据被保存到二进制数据文件中。这是因为如果发生错误,我希望能够简单地读入上次保存的数据,而不是再次运行所有数据,因为这需要一段时间。我相信我5万张图片中的三分之一需要11个小时,所以我在screen上运行了一夜。但是,现在我看到另一条Killed消息。下面是我在尝试运行此代码时看到的最后两个输出行:

^{pr2}$

我通常有很多可用的RAM(接近100GB),因为我在工作时使用的是大型服务器。每个初始图像的numpy数组数据通常具有157x158的形状,当然在过采样之后,它们会变得大于700x700。在

我不想把它们分成许多小部分,因为那样我就整天坐在这里一个一个地运行它们。我还能做些什么来确保代码在不被杀死的情况下运行吗?而且,实际上,我可能只想将代码末尾的所有图像数据保存到每个小节的文件中,因为我真正需要做的是将所有图像的中间值(来自所有子部分)放在一起。也许发电机会被证明是有用的?我读过一些关于他们的文章,但不确定这在这里是否真的有用。我还可以访问多个cpu,但我不确定如何在Python中使用multiprocessing(如果这有帮助的话)。谢谢您!在


Tags: ofto代码fordatanpparamssigma

热门问题