当初始df包含日期时间序列时，Pandas应用返回的数据帧

df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) def random(row): # Return an tuple with more elements than df has columns return (1,2,3,4,5,6,7,8) df.apply(random,axis=1) # Output, returns new series as expected: 0 (1, 2, 3, 4, 5, 6, 7, 8) 1 (1, 2, 3, 4, 5, 6, 7, 8) 2 (1, 2, 3, 4, 5, 6, 7, 8) 3 (1, 2, 3, 4, 5, 6, 7, 8) 4 (1, 2, 3, 4, 5, 6, 7, 8)

--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/internals.py in create_block_manager_from_arrays(arrays, names, axes) 4262 blocks = form_blocks(arrays, names, axes) -> 4263 mgr = BlockManager(blocks, axes) 4264 mgr._consolidate_inplace() /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/internals.py in __init__(self, blocks, axes, do_integrity_check, fastpath) 2760 if do_integrity_check: -> 2761 self._verify_integrity() 2762 /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/internals.py in _verify_integrity(self) 2970 if block._verify_integrity and block.shape[1:] != mgr_shape[1:]: -> 2971 construction_error(tot_items, block.shape[1:], self.axes) 2972 if len(self.items) != tot_items: /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/internals.py in construction_error(tot_items, block_shape, axes, e) 4232 raise ValueError("Shape of passed values is {0}, indices imply {1}".format( -> 4233 passed, implied)) 4234 ValueError: Shape of passed values is (5, 8), indices imply (5, 5) During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) <ipython-input-29-b57dd4b93995> in <module>() ----> 1 df.apply(random,axis=1) /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds) 4150 if reduce is None: 4151 reduce = True -> 4152 return self._apply_standard(f, axis, reduce=reduce) 4153 else: 4154 return self._apply_broadcast(f, axis) /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _apply_standard(self, func, axis, ignore_failures, reduce) 4263 index = None 4264 -> 4265 result = self._constructor(data=results, index=index) 4266 result.columns = res_index 4267 /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy) 264 dtype=dtype, copy=copy) 265 elif isinstance(data, dict): --> 266 mgr = self._init_dict(data, index, columns, dtype=dtype) 267 elif isinstance(data, ma.MaskedArray): 268 import numpy.ma.mrecords as mrecords /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype) 400 arrays = [data[k] for k in keys] 401 --> 402 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) 403 404 def _init_ndarray(self, values, index, columns, dtype=None, copy=False): /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype) 5406 axes = [_ensure_index(columns), _ensure_index(index)] 5407 -> 5408 return create_block_manager_from_arrays(arrays, arr_names, axes) 5409 5410 /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/internals.py in create_block_manager_from_arrays(arrays, names, axes) 4265 return mgr 4266 except ValueError as e: -> 4267 construction_error(len(arrays), arrays[0].shape, axes, e) 4268 4269 /Users/jguillette/anaconda/lib/python3.5/site-packages/pandas/core/internals.py in construction_error(tot_items, block_shape, axes, e) 4231 raise ValueError("Empty data passed with indices specified.") 4232 raise ValueError("Shape of passed values is {0}, indices imply {1}".format( -> 4233 passed, implied)) 4234 4235 ValueError: Shape of passed values is (5, 8), indices imply (5, 5)

2条回答

网友

1楼 · 编辑于 2024-10-02 10:20:08

https://github.com/pandas-dev/pandas/blob/v0.22.0/pandas/core/frame.py#L236-L6142

class DataFrame(NDFrame):
def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
    # skip if we are mixed datelike and trying reduce across axes
    # GH6125
    # this block will judge if it's datelike mixed type and disable reduce if true
    if (reduce and axis == 1 and self._is_mixed_type and
            self._is_datelike_mixed_type):
        reduce = False
    # try to reduce first (by default)
    # this only matters if the reduction in values is of different dtype
    # e.g. if we want to apply to a SparseFrame, then can't directly reduce
    if reduce:
        values = self.values

        # we cannot reduce using non-numpy dtypes,
        # as demonstrated in gh-12244
        if not is_extension_type(values):
            # Create a dummy Series from an empty array
            index = self._get_axis(axis)
            empty_arr = np.empty(len(index), dtype=values.dtype)
            dummy = Series(empty_arr, index=self._get_axis(axis),
                           dtype=values.dtype)

            try:
                labels = self._get_agg_axis(axis)
                result = lib.reduce(values, func, axis=axis, dummy=dummy,
                                    labels=labels)
                return Series(result, index=labels)
            except Exception:
                pass

    dtype = object if self._is_mixed_type else None
    if axis == 0:
        series_gen = (self._ixs(i, axis=1)
                      for i in range(len(self.columns)))
        res_index = self.columns
        res_columns = self.index
    elif axis == 1:
        res_index = self.index
        res_columns = self.columns
        values = self.values
        series_gen = (Series.from_array(arr, index=res_columns, name=name,
                                        dtype=dtype)
                      for i, (arr, name) in enumerate(zip(values,
                                                          res_index)))
    else:  # pragma : no cover
        raise AssertionError('Axis must be 0 or 1, got %s' % str(axis))

    i = None
    keys = []
    results = {}
    if ignore_failures:
        successes = []
        for i, v in enumerate(series_gen):
            try:
                results[i] = func(v)
                keys.append(v.name)
                successes.append(i)
            except Exception:
                pass
        # so will work with MultiIndex
        if len(successes) < len(res_index):
            res_index = res_index.take(successes)
    else:
        try:
            for i, v in enumerate(series_gen):
                results[i] = func(v)
                keys.append(v.name)
        except Exception as e:
            if hasattr(e, 'args'):
                # make sure i is defined
                if i is not None:
                    k = res_index[i]
                    e.args = e.args + ('occurred at index %s' %
                                       pprint_thing(k), )
            raise

    if len(results) > 0 and is_sequence(results[0]):
        if not isinstance(results[0], Series):
            index = res_columns
        else:
            index = None

        result = self._constructor(data=results, index=index)
        result.columns = res_index

        if axis == 1:
            result = result.T
        result = result._convert(datetime=True, timedelta=True, copy=False)

    else:

        result = Series(results)
        result.index = res_index

    return result

我找到了你问题背后的源代码，详细原因你可以检查GH6125，正如评论所说。我的决定有些愚蠢，如下所示。在

^{pr2}$

第二个解决方案是确保func返回一个序列（看起来比较慢）

def func(line):
    return pd.Series(['a', 'x', 'y'])
df.apply(func, axis=1)

希望有帮助。在

网友

2楼 · 编辑于 2024-10-02 10:20:08

根据df的数据类型，pandas处理apply返回值的方式不同。在第一个示例中，所有数据类型都是float，而在添加列E之后，数据类型是混合的，这导致pandas试图使用返回的值重建数据帧。我不知道这种行为背后的理性，但以下几点应该可以解决你的问题：

df.astype(object).apply(random,axis=1)
Out[64]: 
0    (1, 2, 3, 4, 5, 6, 7, 8)
1    (1, 2, 3, 4, 5, 6, 7, 8)
2    (1, 2, 3, 4, 5, 6, 7, 8)
3    (1, 2, 3, 4, 5, 6, 7, 8)
4    (1, 2, 3, 4, 5, 6, 7, 8)

相关问题更多 >

编程相关推荐

热门问题

热门文章