Добавил:
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:

Beginning Algorithms (2006)

.pdf
Скачиваний:
252
Добавлен:
17.08.2013
Размер:
9.67 Mб
Скачать

Chapter 12

Follow those with the list set class itself:

package com.wrox.algorithms.sets;

import com.wrox.algorithms.iteration.Iterator; import com.wrox.algorithms.lists.LinkedList; import com.wrox.algorithms.lists.List;

public class ListSet implements Set {

private final List _values = new LinkedList();

public boolean contains(Object value) { return _values.contains(value);

}

public boolean add(Object value) { if (contains(value)) {

return false;

}

_values.add(value); return true;

}

public boolean delete(Object value) { return _values.delete(value);

}

public void clear() { _values.clear();

}

public int size() { return _values.size();

}

public boolean isEmpty() { return _values.isEmpty();

}

public Iterator iterator() { return _values.iterator();

}

}

How It Works

The ListSetTest class simply extends AbstractSetTestCase, and in doing so inherits all the test cases. The only other thing you did was implement the createSet() method to return an instance of the ListSet class to be used by the test cases themselves.

Implementing the ListSet class itself is fairly straightforward. For the most part, you delegate the methods on the Set interface directly to equivalent methods on the underlying list.

304

Sets

A linked list is used as the underlying storage mechanism, though technically any list implementation will suffice. Almost all the methods are one-liners, delegating directly to methods on the underlying list — the exception, of course, being, add().

The add() method first determines whether the value to be added already exists in the underlying list. If it does, false is returned to indicate that the set has not been changed; otherwise, the new value is added:

public boolean add(Object value) { if (contains(value)) {

return false;

}

_values.add(value); return true;

}

As you can see, a list-based set is very simple. The add(), delete(), and contains() methods all perform in O(N) time, which is probably sufficient for handling small numbers of values.

A Hash Set

If you are storing a relatively large number of values and ordering is not important, then a set implementation based on hash tables (covered in Chapter 11) is a good choice. In the next Try It Out section, you implement a hash set, so you may wish to briefly go over hashing again to familiarize yourself with the concepts, especially the implementation of hash tables that use buckets.

Try It Out

Testing and Implementing a Hash Set

Start by creating the test class:

package com.wrox.algorithms.sets;

public class HashSetTest extends AbstractSetTestCase { protected Set createSet() {

return new HashSet();

}

}

Then create the hash set class:

package com.wrox.algorithms.sets;

import com.wrox.algorithms.hashing.HashtableIterator; import com.wrox.algorithms.iteration.ArrayIterator; import com.wrox.algorithms.iteration.Iterator;

public class HashSet implements Set {

public static final int DEFAULT_CAPACITY = 17;

public static final float DEFAULT_LOAD_FACTOR = 0.75f;

305

Chapter 12

private final int _initialCapacity; private final float _loadFactor; private ListSet[] _buckets; private int _size;

public HashSet() {

this(DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR);

}

public HashSet(int initialCapacity) { this(initialCapacity, DEFAULT_LOAD_FACTOR);

}

public HashSet(int initialCapacity, float loadFactor) {

assert initialCapacity > 0 : “initialCapacity can’t be < 1”; assert loadFactor > 0 : “loadFactor can’t be <= 0”;

_initialCapacity = initialCapacity; _loadFactor = loadFactor;

clear();

}

public boolean contains(Object value) {

ListSet bucket = _buckets[bucketIndexFor(value)]; return bucket != null && bucket.contains(value);

}

public boolean add(Object value) { ListSet bucket = bucketFor(value);

if (bucket.add(value)) { ++_size; maintainLoad(); return true;

}

return false;

}

public boolean delete(Object value) {

int bucketIndex = bucketIndexFor(value); ListSet bucket = _buckets[bucketIndex];

if (bucket != null && bucket.delete(value)) { --_size;

if (bucket.isEmpty()) { _buckets[bucketIndex] = null;

}

return true;

}

return false;

}

public Iterator iterator() {

return new HashtableIterator(new ArrayIterator(_buckets));

306

Sets

}

public void clear() {

_buckets = new ListSet[_initialCapacity]; _size = 0;

}

public int size() { return _size;

}

public boolean isEmpty() { return size() == 0;

}

private ListSet bucketFor(Object value) { int bucketIndex = bucketIndexFor(value);

ListSet bucket = _buckets[bucketIndex]; if (bucket == null) {

bucket = new ListSet(); _buckets[bucketIndex] = bucket;

}

return bucket;

}

private int bucketIndexFor(Object value) { assert value != null : “value can’t be null”;

return Math.abs(value.hashCode() % _buckets.length);

}

private void maintainLoad() { if (loadFactorExceeded()) {

resize();

}

}

private boolean loadFactorExceeded() {

return size() > _buckets.length * _loadFactor;

}

private void resize() {

HashSet copy = new HashSet(_buckets.length * 2, _loadFactor);

for (int i = 0; i < _buckets.length; ++i) { if (_buckets[i] != null) {

copy.addAll(_buckets[i].iterator());

}

}

_buckets = copy._buckets;

}

307

Chapter 12

private void addAll(Iterator values) {

assert values != null : “values can’t be null”;

for (values.first(); !values.isDone(); values.next()) {

add(values.current());

}

}

}

How It Works

Once again, the HashSetTest class extends AbstractSetTestCase, and you implement the createSet() method to return an instance of a HashSet to be tested.

For the most part, the implementation of HashSet is a direct copy of the BucketingHashtable code from Chapter 11, so we confine the discussion of the code to only the differences between it and the original BucketingHashtable implementation, which are required to fulfill the Set interface.

Besides actually implementing the Set interface, the first major difference between HashSet and BucketingHashtable is that instead of using a List for the buckets, you’ve instead used a ListSet. In a sense, a bucket really is a set — it cannot contain duplicate values — and the hash set merely distributes values among the different sets (based on the hash code) in order to reduce lookup times. Therefore, by using a set instead of a list for your buckets, you not only simplify the code, but more importantly, you clarify the overall intent of the code. This is reflected in the add() method by removing the need to call contains() on the bucket before adding the new value:

public boolean add(Object value) { ListSet bucket = bucketFor(value);

if (bucket.add(value)) { ++_size; maintainLoad(); return true;

}

return false;

}

The next difference is that you’ve added a delete() method, as required by the Set interface. Again, as with add(), you can take advantage of the fact that the buckets are themselves sets, so that once the appropriate bucket has been found, a simple call to delete() on the bucket is all that is needed to remove the value:

public boolean delete(Object value) {

int bucketIndex = bucketIndexFor(value); ListSet bucket = _buckets[bucketIndex];

if (bucket != null && bucket.delete(value)) { --_size;

if (bucket.isEmpty()) { _buckets[bucketIndex] = null;

}

return true;

308

Sets

}

return false;

}

Lastly, you’ve implemented the iterator() method to allow traversal of all the contained values. Here, you’ve used the HashtableIterator from Chapter 11. Note that this was possible because HashtableIterator is based on the Iterable interface, rather than a List.

Other than that, the only other thing you’ve done is add some convenience constructors for usability, but HashSet is pretty much a carbon copy of BucketingHashtable from Chapter 11.

Given the use of a hash table based on buckets, and assuming a good hash function in the form of the hashCode method on the values being stored, you can expect to achieve fairly close to O(1) performance. Of course, as noted at the start of this section, the use of hashing necessarily precludes any notion of ordering, so an iterator will appear to return the values randomly.

A Tree Set

Sets don’t usually guarantee an ordering of the data. Sometimes, though, you may need a predictable iteration order — for example, when displaying options from which a user selects, or maybe in an alphabetical list of names from an address book, all while maintaining set semantics. Binary search trees (see Chapter 10) provide exactly the data structure you need.

Before proceeding with the implementation of tree-based sets, we recommend that you revisit binary search trees to refresh your understanding of the core concepts and code because the discussion

will again be limited to only the differences between the TreeSet code presented and the original

BinarySearchTree code.

Try It Out

Testing and Implementing a Tree Map

Start by creating the TreeSetTest class as follows:

package com.wrox.algorithms.sets;

public class TreeSetTest extends AbstractSetTestCase { protected Set createSet() {

return new TreeSet();

}

}

Follow the class with the tree set implementation:

package com.wrox.algorithms.sets;

import com.wrox.algorithms.iteration.Iterator;

import com.wrox.algorithms.iteration.IteratorOutOfBoundsException; import com.wrox.algorithms.sorting.Comparator;

import com.wrox.algorithms.sorting.NaturalComparator;

309

Chapter 12

public class TreeSet implements Set { private final Comparator _comparator; private Node _root;

private int _size;

public TreeSet() { this(NaturalComparator.INSTANCE);

}

public TreeSet(Comparator comparator) {

assert comparator != null : “comparator can’t be null”; _comparator = comparator;

}

public boolean contains(Object value) { return search(value) != null;

}

public boolean add(Object value) { Node parent = null;

Node node = _root; int cmp = 0;

while (node != null) { parent = node;

cmp = _comparator.compare(value, node.getValue()); if (cmp == 0) {

return false;

}

node = cmp < 0 ? node.getSmaller() : node.getLarger();

}

Node inserted = new Node(parent, value);

if (parent == null) { _root = inserted;

}else if (cmp < 0) { parent.setSmaller(inserted);

}else { parent.setLarger(inserted);

}

++_size; return true;

}

public boolean delete(Object value) { Node node = search(value);

if (node == null) { return false;

}

Node deleted = node.getSmaller() != null && node.getLarger() != null ? node.successor() : node;

310

Sets

assert deleted != null : “deleted can’t be null”;

Node replacement = deleted.getSmaller() != null ? deleted.getSmaller() : deleted.getLarger();

if (replacement != null) { replacement.setParent(deleted.getParent());

}

if (deleted == _root) { _root = replacement;

}else if (deleted.isSmaller()) { deleted.getParent().setSmaller(replacement);

}else {

deleted.getParent().setLarger(replacement);

}

if (deleted != node) {

Object deletedValue = node.getValue(); node.setValue(deleted.getValue()); deleted.setValue(deletedValue);

}

--_size; return true;

}

public Iterator iterator() { return new ValueIterator();

}

public void clear() { _root = null; _size = 0;

}

public int size() { return _size;

}

public boolean isEmpty() { return _root == null;

}

private Node search(Object value) {

assert value != null : “value can’t be null”;

Node node = _root;

while (node != null) {

int cmp = _comparator.compare(value, node.getValue()); if (cmp == 0) {

break;

}

311

Chapter 12

node = cmp < 0 ? node.getSmaller() : node.getLarger();

}

return node;

}

private static final class Node { private Object _value; private Node _parent; private Node _smaller; private Node _larger;

public Node(Node parent, Object value) { setParent(parent);

setValue(value);

}

public Object getValue() { return _value;

}

public void setValue(Object value) {

assert value != null : “value can’t be null”; _value = value;

}

public Node getParent() { return _parent;

}

public void setParent(Node parent) { _parent = parent;

}

public Node getSmaller() { return _smaller;

}

public void setSmaller(Node node) {

assert node != getLarger() : “smaller can’t be the same as larger”; _smaller = node;

}

public Node getLarger() { return _larger;

}

public void setLarger(Node node) {

assert node != getSmaller() : “larger can’t be the same as smaller”; _larger = node;

}

public boolean isSmaller() {

312

Sets

return getParent() != null && this == getParent().getSmaller();

}

public boolean isLarger() {

return getParent() != null && this == getParent().getLarger();

}

public Node minimum() { Node node = this;

while (node.getSmaller() != null) { node = node.getSmaller();

}

return node;

}

public Node maximum() { Node node = this;

while (node.getLarger() != null) { node = node.getLarger();

}

return node;

}

public Node successor() {

if (getLarger() != null) { return getLarger().minimum();

}

Node node = this;

while (node.isLarger()) { node = node.getParent();

}

return node.getParent();

}

public Node predecessor() {

if (getSmaller() != null) { return getSmaller().maximum();

}

Node node = this;

while (node.isSmaller()) { node = node.getParent();

}

return node.getParent();

}

313