有 Java 编程相关的问题?

你可以在下面搜索框中键入要查询的问题!

并集和交集的java缓慢实现

我正试图找到一种更好的方法来实现这些方法,因为在非常大的集合上,它们需要很长的时间,有什么想法吗

import java.util.HashMap;
import java.util.HashSet;

public class Multiset<E> extends HashSet<E> {

    private static final long serialVersionUID = -9013417064272046980L;
    private HashMap<E, Integer> multiplicities = new HashMap<E, Integer>();

    @Override
    public boolean add(E element){
        if(multiplicities.containsKey(element)){
            int x = (int) multiplicities.get(element);
            multiplicities.put(element, ++x);
        }else{
            multiplicities.put(element, 1);
        }
        return super.add(element);    
    }

/**
 * Adds all of the elements of another multiset to this one. 
 * This method allows the preservation of multiplicities
 * which would not occur using the superclass's addAll().
 * @param elements
 * @return true if all elements were successfully added
 */
public boolean addAll(Multiset<E> elements) {
    boolean flag = false;
    for(E element : elements){
        for(int i = 0; i < elements.multiplicity(element); i++)
            flag = add(element);
    }
    return flag;
}

/**
 * The set-view of a multiset is the ordinary set of all 
 * elements with multiplicity >= 1.
 * @return all elements that have multiplicity >= 1
 */
public Multiset<E> setView(){
    Multiset<E> set = new Multiset<E>();
    for(E o : multiplicities.keySet()){
        set.add(o);
    }
    return set;
}

/**
 * provides a union of two multisets whereby the multiplicity of each
 * element is the larger of the two
 * @param second
 * @return
 */
public Multiset<E> union(Multiset<E> second){
    Multiset<E> union = new Multiset<E>();
    Multiset<E> join = new Multiset<E>();
    join.addAll(this);
    join.addAll(second);

    for(E o : join){
        int i = this.multiplicity(o); 
        int j = second.multiplicity(o);
        i = i > j ? i : j;
        for(int c = 0; c < i; c++){
            union.add(o);
        }
    }

    return union;
}

/**
 * provides an intersection of two multisets whereby 
 * the multiplicity of each element is the smaller of the two
 * @param second
 * @return The multiset containing the intersection of two multisets
 */
public Multiset<E> intersect(Multiset<E> second){    

    Multiset<E> intersection = new Multiset<E>();
    for(E o : this.setView()){
        if (second.setView().contains(o)) {
            int i = this.multiplicity(o); 
            int j = second.multiplicity(o);
            i = i < j ? i : j;
            for(int c = 0; c < i; c++){
                intersection.add(o);
            }
        }
    }

    return intersection;        
}

/**
 * The Multiplicity is the number of occurrences of an object 
 * in the multiset
 * @param o
 * @return number of occurrences of o
 */
public int multiplicity(E o){

    return (multiplicities.containsKey(o)) ? multiplicities.get(o) : 0;
}

public int cardinality(){
    int card = 0;
    for(Integer i : multiplicities.values()){
        card += i;
    }

    return card;    
 }

/**
 * Measures the similarity between two multisets
 * @param A
 * @param B
 * @return the cardinality of the difference of A and B 
 */
public int similarityOfMultisets(Multiset<E> second){

    Multiset<E> union, intersection; 
    int difference;

    union = union(second);
    intersection = intersect(second);
    difference = union.cardinality() - intersection.cardinality();

    return difference;

}
}

编辑:

我相信我已经找到了一种更快的方法来计算多重集方法的相似性:

public int similarityOfMultisets(Multiset<E> second){
    int c = 0;
    for(E elem: this.setView()){
        c += Math.min(this.multiplicity(elem), second.multiplicity(elem));
    }   
    Multiset<E> union = this.union(second);
    return union.cardinality() - c;     
}

共 (5) 个答案

  1. # 1 楼答案

    下面是该类的重构。不一定更快——除了不在循环中重新运行setView()——但在某些方面可能更干净。FWIW

    import java.util.HashMap;
    import java.util.HashSet;
    
    public class Multiset<E> extends HashSet<E> {
        private static final long           serialVersionUID    = -9013417064272046980L;
        private final HashMap<E, Integer>   multiplicities      = new HashMap<E, Integer>();
    
        public boolean add(E element) {
            return add(element, 1);
        }
    
        private boolean add(E element, int copies) {
            if (!contains(element))
                multiplicities.put(element, 0);
            int n = multiplicities.get(element);
            multiplicities.put(element, n + copies);
            return super.add(element);
        }
    
        /**
         * Adds all of the elements of another multiset to this one. This method allows the preservation of multiplicities which would not occur
         * using the superclass's addAll().
         * 
         * @param that
         * @return true if all elements were successfully added
         */
        public boolean addAll(Multiset<E> that) {
            boolean flag = false;
            for (E element : that)
                flag = add(element, that.multiplicity(element));
            return flag;
        }
    
        /**
         * The set-view of a multiset is the ordinary set of all elements with multiplicity >= 1.
         * 
         * @return all elements that have multiplicity >= 1
         */
        public Multiset<E> setView() {
            Multiset<E> set = new Multiset<E>();
            for (E o : multiplicities.keySet())
                set.add(o);
            return set;
        }
    
        /**
         * provides a union of two multisets whereby the multiplicity of each element is the larger of the two
         * 
         * @param that
         * @return
         */
        public Multiset<E> union(Multiset<E> that) {
            HashSet<E> both = new HashSet<E>();
            both.addAll(this);
            both.addAll(that);
            Multiset<E> union = new Multiset<E>();
            for (E element : both)
                union.add(element, Math.max(this.multiplicity(element), that.multiplicity(element)));
            return union;
        }
    
        /**
         * provides an intersection of two multisets whereby the multiplicity of each element is the smaller of the two
         * 
         * @param that
         * @return The multiset containing the intersection of two multisets
         */
        public Multiset<E> intersect(Multiset<E> that) {
            Multiset<E> intersection = new Multiset<E>();
            final Multiset<E> other = that.setView();
            for (E element : this.setView())
                if (other.contains(element))
                    intersection.add(element, Math.min(this.multiplicity(element), that.multiplicity(element)));
            return intersection;
        }
    
        /**
         * The Multiplicity is the number of occurrences of an object in the multiset
         * 
         * @param element
         * @return number of occurrences of o
         */
        public int multiplicity(E element) {
            return contains(element) ? multiplicities.get(element) : 0;
        }
    
        public int cardinality() {
            int card = 0;
            for (Integer n : multiplicities.values())
                card += n;
            return card;
        }
    
        /**
         * Measures the similarity between two multisets
         * 
         * @param that
         * @return the cardinality of the difference of A and B
         */
        public int similarityOfMultisets(Multiset<E> that) {
            return union(that).cardinality() - intersect(that).cardinality();
        }
    }
    
  2. # 2 楼答案

    我不明白setView方法的目的。。。看起来你只是在返回你自己的一个副本,但是每个密钥的多重性都设置为1

    对于union,请尝试以下内容(可能无法编译):

    public Multiset<E> union(Multiset<E> second) {
        Multiset<E> union = new Multiset<E>();
        union.addAll(this);
        union.addAll(second);
    
        for(E o : union){
            int multiplicity = Math.max (this.multiplicity(o), second.multiplicity(o));
            union.multiplicities.put (o, multiplicity);
        }
    
        return union;
    }
    
  3. # 3 楼答案

    我认为问题是你在调用第二个。setView()-重新创建该集合-用于此集合中的每个元素。setView()。试试这个:

    /**
     * provides an intersection of two multisets whereby 
     * the multiplicity of each element is the smaller of the two
     * @param second
     * @return The multiset containing the intersection of two multisets
     */
    public Multiset<E> intersect(Multiset<E> second){    
    
        Multiset<E> intersection = new Multiset<E>();
        Set<E> other = second.setView();
        for(E o : this.setView()){
            if (other.contains(o)) {
                int i = this.multiplicity(o); 
                int j = second.multiplicity(o);
                i = i < j ? i : j;
                for(int c = 0; c < i; c++){
                    intersection.add(o);
                }
            }
        }
    
        return intersection;        
    }
    
  4. # 4 楼答案

    我们算法的第一个变量的性能测试结果:

    Robert-Union: 2263374 us
    Robert-Intersection: 603134 us
    Robert-Similarity: 2926389 us
    Carl-Union: 3372 us
    Carl-Intersection: 5097 us
    Carl-Similarity: 6913 us
    David-Union: 5182 us
    David-Intersection: 2527 us
    David-Similarity: 5270 us
    

    卡尔的联盟打败了我的联盟

    测试代码here。不过,我没有验证计算结果的正确性

    测试代码2的各种集合大小和方差here(JDK 7b59)。结果xslx/ods

  5. # 5 楼答案

    这就是我提出的G-C:

    import com.google.common.collect.Multiset;
    import com.google.common.collect.Multisets;
    import com.google.common.collect.Multiset.Entry;
    public class MultisetOp {
        public static void main(String[] args) {
            Multiset<Integer> ms1 = Multisets.newHashMultiset(1, 1, 2, 3, 4, 4, 4);
            Multiset<Integer> ms2 = Multisets.newHashMultiset(1, 2, 3, 3,
                4, 5, 5, 5);
            Multiset<Integer> mu = Multisets.newHashMultiset();
            Multiset<Integer> mi = Multisets.newHashMultiset();
            // -------- UNION START -----------
            for (Entry<Integer> e : ms1.entrySet()) {
                int j = ms2.count(e.getElement());
                mu.add(e.getElement(), Math.max(e.getCount(), j));
            }
            for (Entry<Integer> e : ms2.entrySet()) {
                int j = ms1.count(e.getElement());
                if (j == 0) {
                    mu.add(e.getElement(), e.getCount());
                }
            }
            // -------- UNION END -----------
    
            // -------- INTERSECT START -----------
            for (Entry<Integer> e : ms1.entrySet()) {
                int j = ms2.count(e.getElement());
                if (j > 0) {
                    mi.add(e.getElement(), Math.min(e.getCount(), j));
                }
            }
            // -------- INTERSECT END -----------
    
            System.out.printf("Union: %s%n", mu);
            System.out.printf("Intersection: %s%n", mi);
            System.out.printf("Cardinality: %d%n", mu.size() - mi.size());
    
        }
    }
    

    结果:

    [1 x 2, 2, 3 x 2, 4 x 3, 5 x 3]
    [1, 2, 3, 4]

    没有基准。 看来,你的基数可以用两次而不是三次遍历来计算