Cut将x的范围划分为区间,并根据x中的区间对x中的值进行编码,我可以用R中的Cut函数得到一个小的群,我怎么做呢 在python代码中工作?在

82   68   86   94   89   63   77   76   84    89
75   78   81   82   76   99   80   84   89    88
60   83   72   83   85   56   86   68   75   100
90   84   75   86   74   77   95   63   80    76
100   43  76   81   79   74   96   52   69    86'



tu = (0,60,70,80,90,100)

print raw_data
print tu

from collections import OrderedDict
tu0 = tu[0]
tuL = tu[-1]
tuF = tu[1:]
d = OrderedDict((tu[i], [tu[i-1:i+1]])
                for i in xrange(1,len(tu)))

for x in map(int,raw_data.split()):
    if x>=tu0:
        if x==tuL:
            for lim in tuF:
                if x<lim:
print '\n'.join('%-15s%s' % (d[k][0],len(d[k])-1) for k in d)






困难在于,当原始数据中的一个数字等于极限序列tu = (0,60,70,80,90,100)的最后一个值(样本中为100),这个数字必须被计算为属于最后一个跨度,这里是样本中的[90,100](数学表示法),而前面的跨度如[0,60[[60,70[。。。不包括上限。


tu = [0,60,70,80,90,100]
print( 'raw_data :%s\n\ntu :\n%s' % (raw_data,tu),'\n' )

tu0, tuLast = tu[0], tu[-1]
tu_from1 = tu[1:]
from collections import OrderedDict
d = OrderedDict( (two[1],[two]) for two in zip(tu, tu_from1) )

print ('OrderedDictionary , before:\n%s'
       % '\n'.join('key:%3s   value:%s'  % item for item in d.items()))

# the core of the code is here:
for x in (x for x in map(int,raw_data.split()) if x>=tu0):
    d[next(lim for lim in tu_from1
           if x<lim or x==tuLast==lim)].append(x)

print ('\nOrderedDictionary , after:\n%s'
       % '\n'.join('key:%3s   value:%s'  % item for item in d.items()),'\n')

print( '\n'.join('%-15s%s' % (v[0],len(v)-1) for v in d.values()) )


raw_data :
82   68   86   94   89   63   77   76   84    89
75   78   81   82   76   99   80   84   89    88
60   83   72   83   85   56   86   68   75   100
90   84   75   86   74   77   95   63   80    76
100   43  76   81   79   74   96   52   69    86

tu :
[0, 60, 70, 80, 90, 100] 

OrderedDictionary , before:
key: 60   value:[(0, 60)]
key: 70   value:[(60, 70)]
key: 80   value:[(70, 80)]
key: 90   value:[(80, 90)]
key:100   value:[(90, 100)]

OrderedDictionary , after:
key: 60   value:[(0, 60), 56, 43, 52]
key: 70   value:[(60, 70), 68, 63, 60, 68, 63, 69]
key: 80   value:[(70, 80), 77, 76, 75, 78, 76, 72, 75, 75, 74, 77, 76, 76, 79, 74]
key: 90   value:[(80, 90), 82, 86, 89, 84, 89, 81, 82, 80, 84, 89, 88, 83, 83, 85, 86, 84, 86, 80, 81, 86]
key:100   value:[(90, 100), 94, 99, 100, 90, 95, 100, 96] 

(0, 60)        3
(60, 70)       6
(70, 80)       14
(80, 90)       20
(90, 100)      7


from bisect import bisect
from collections import defaultdict
from itertools import chain, imap

82   68   86   94   89   63   77   76   84    89
75   78   81   82   76   99   80   84   89    88
60   83   72   83   85   56   86   68   75   100
90   84   75   86   74   77   95   63   80    76
100   43  76   81   79   74   96   52   69    86""".lstrip()

# Make data into iterable of ints
data = chain.from_iterable(
    imap(int, line.split()) 
    for line in raw_data.splitlines()
# Counter
count = defaultdict(int)
for num in data:
    # Use 101 instead of 100 to cater for inclusion of 100
    count[bisect([0, 60, 70, 80, 90, 101], num)] += 1




import itertools
from collections import Counter

# 'kwargs' lets you implement more options
def cut ( data, breaks, **kwargs ):
    counts = Counter()
    ranges = itertools.izip(breaks, itertools.islice(breaks,1,None))
    for element in data:
        for range in ranges:
            if range[0] < element < range[1]:
                counts[range] += 1
    return counts

# Call 'cut'
cut( raw_data, (0,60,70,80,90,100) )


