HHeLiBeXの日記 正道編

日々の記憶の記録とメモ‥

Collection#iterator() vs Collection#contains()

こんなコードを見かけることがある。

private boolean search(List<Xxx> list, Xxx target) {
    for (Xxx xxx : list) {
        if (xxx.equals(target)) {
            return true;
        }
    }
    return false;
}

こんなことをするなら、Object#equals()やObject#hashCode()を適切に実装して、

private boolean search(List<Xxx> list, Xxx target) {
    return list.contains(target);
}

でいいじゃない、と。
前者だと、無駄なIteratorオブジェクトが生成されるわけだし。

import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Collection;
import java.util.Iterator;

import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestIteration {

    private static NumberFormat nf = new DecimalFormat("0.000");

    private static final int n = 100;
    private static final int nloop = 1000000;
    private static Collection<String> COLLECTION = new java.util.ArrayList<String>();
//    private static Collection<String> COLLECTION = new java.util.LinkedList<String>();
//    private static Collection<String> COLLECTION = new java.util.HashSet<String>();
//    private static Collection<String> COLLECTION = new java.util.LinkedHashSet<String>();
//    private static Collection<String> COLLECTION = new java.util.TreeSet<String>();

    private static String first;
    private static String mid;
    private static String last;

    @BeforeClass
    public static void setUpBeforeClass() throws Exception {
        for (int i = 0; i < n; ++i) {
            COLLECTION.add(String.valueOf((i / 1000)) + String.valueOf((i % 1000) / 100) + String.valueOf((i % 100) / 10) + String.valueOf(i % 10));
        }

        Iterator<String> it = COLLECTION.iterator();
        for (int i = 0; it.hasNext(); ++i) {
            String s = it.next();
            if (i == 0) {
                first = s;
            } else if (i == n - 1) {
                last = s;
            } else if (i == ((n + 1) / 2 - 1)) {
                mid = s;
            }
        }
        System.out.printf("%s %s %s\n", first, mid, last);
    }

    private long start;
    private long end;
    private String name;

    @Before
    public void setUp() throws Exception {
        start = System.currentTimeMillis();
    }

    @After
    public void tearDown() throws Exception {
        end = System.currentTimeMillis();
        System.out.printf("%-24s %-10s %7s %7s %24s\n", name, COLLECTION.getClass().getSimpleName(), n, nloop, nf.format((end - start) / 1000.0) + " sec");
    }

    @Test
    public void test_iterator_First() {
        name = "test_iterator_First";
        test_iterator(COLLECTION, first);
    }

    @Test
    public void test_iterator_Mid() {
        name = "test_iterator_Mid";
        test_iterator(COLLECTION, mid);
    }

    @Test
    public void test_iterator_Last() {
        name = "test_iterator_Last";
        test_iterator(COLLECTION, last);
    }

    @Test
    public void test_contains_First() {
        name = "test_contains_First";
        test_contains(COLLECTION, first);
    }

    @Test
    public void test_contains_Mid() {
        name = "test_contains_Mid";
        test_contains(COLLECTION, mid);
    }

    @Test
    public void test_contains_Last() {
        name = "test_contains_Last";
        test_contains(COLLECTION, last);
    }

    private void test_iterator(Collection<String> collection, String target) {
        for (int i = 0; i < nloop; ++i) {
            search_iterator(collection, target);
        }
    }
    private void test_contains(Collection<String> collection, String target) {
        for (int i = 0; i < nloop; ++i) {
            search_contains(collection, target);
        }
    }
    private static boolean search_iterator(Collection<String> collection, String target) {
        for (String s : collection) {
            if (s.equals(target)) {
                return true;
            }
        }
        return false;
    }
    private static boolean search_contains(Collection<String> collection, String target) {
        return collection.contains(target);
    }

}

これで、いくつかのケース(ArrayList、LinkedList、HashSet、LinkedHashSet、TreeSet)について処理時間を計測してみる。

test_iterator_First      ArrayList            100 1000000                0.197 sec
test_iterator_Mid        ArrayList            100 1000000                6.285 sec
test_iterator_Last       ArrayList            100 1000000               12.159 sec
test_contains_First      ArrayList            100 1000000                0.048 sec
test_contains_Mid        ArrayList            100 1000000                3.020 sec
test_contains_Last       ArrayList            100 1000000                5.966 sec
test_iterator_First      LinkedList           100 1000000                0.168 sec
test_iterator_Mid        LinkedList           100 1000000                4.231 sec
test_iterator_Last       LinkedList           100 1000000                8.290 sec
test_contains_First      LinkedList           100 1000000                0.054 sec
test_contains_Mid        LinkedList           100 1000000                3.145 sec
test_contains_Last       LinkedList           100 1000000                6.271 sec
test_iterator_First      HashSet              100 1000000                0.216 sec
test_iterator_Mid        HashSet              100 1000000                7.649 sec
test_iterator_Last       HashSet              100 1000000               14.528 sec
test_contains_First      HashSet              100 1000000                0.070 sec
test_contains_Mid        HashSet              100 1000000                0.067 sec
test_contains_Last       HashSet              100 1000000                0.069 sec
test_iterator_First      LinkedHashSet        100 1000000                0.193 sec
test_iterator_Mid        LinkedHashSet        100 1000000                4.612 sec
test_iterator_Last       LinkedHashSet        100 1000000                8.967 sec
test_contains_First      LinkedHashSet        100 1000000                0.083 sec
test_contains_Mid        LinkedHashSet        100 1000000                0.072 sec
test_contains_Last       LinkedHashSet        100 1000000                0.067 sec
test_iterator_First      TreeSet              100 1000000                0.240 sec
test_iterator_Mid        TreeSet              100 1000000                6.487 sec
test_iterator_Last       TreeSet              100 1000000               12.536 sec
test_contains_First      TreeSet              100 1000000                0.377 sec
test_contains_Mid        TreeSet              100 1000000                0.388 sec
test_contains_Last       TreeSet              100 1000000                0.620 sec

ArrayListはiterationが苦手なので、iteratorを使った処理ではLinkedListより余計に時間がかかる。
List系では、処理時間が線形に増加していくのはまぁ当然。
でも、指定した要素が含まれているかどうか(equals()がtrueを返すかどうか)だけなら、contains()を使ったほうが、処理時間もIteratorオブジェクト生成によるヒープメモリI/Oも節約できるはず。
...
さて、早速あのだめソース(何)を直すかな(ぉ