Path: utzoo!utgpu!news-server.csri.toronto.edu!mailrus!cs.utexas.edu!samsung!usc!apple!voder!pyramid!infmx!briand From: briand@infmx.UUCP (brian donat) Newsgroups: comp.lang.c++ Subject: Beginner's Corner [Hash Table w. X(const X&) constuctor bug] Keywords: Con-found-it-any-way! Message-ID: <4366@infmx.UUCP> Date: 31 May 90 22:21:09 GMT Organization: Informix Software Inc., Menlo Park, CA. Lines: 446 Hello again, and welcome back to Beginner's Corner.... where an absolute novice beginner at C++ programming (me), displays his meager attempts at the language for your purusal and criticism, thereby hoping to wring some greater insight out of what's gotta be a new and exciting way to build software tools. This week, your host (me), has expanded his previous code, to create a hash-table class. Two unique problems presented themselves beyond the simple implementation of methods for the hash-table class. First, constructors of the form X(const X&) were explored; these, it was hoped, would allow assignments of the form: Objtype NewObj = OldObj such that automatic memberwise initialization is defeated in those cases where pointers are part of the class definition. A very nasty C++ shortcoming was found here when dealing with a pointer to an array of type Obj. Secondly, a method was explored for pulling nodes (intact) out of one list, and sticking them in another, without having to copy them. If you figure you know already what I'm talking about here, don't just punch out, at least read far enough to see what my beginner questions and doubts are. Share your wisdom on these matters if you can. If you saved my previous code, add-in or edit-in the following changes to upgrade it to reflect the current discussion: ------------------------------ CUT HERE ---------------------------------- /////////////////////////////////////\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ // \\ // vlist.h Version 3.00 \\ // A linked-list object: May 30, 1990, Brian L. Donat \\ // \\ ... [ everything stays the same here up to here ...] ////////////////////////\\\\\\\\\\\\\\\\\\\\\\\\ // VLIST is the actual doubly linked list class // class VLIST { int listsize; _VLIST_NODE *head, *tail; _VLIST_NODE *cnode; // current _VLIST_NODE or copy _VLIST_NODE. public: VLIST(); // add in the X(const X&) constructor. VLIST(const VLIST&); ~VLIST(); .... // Redefine these member functions with default initializations // as shown here. virtual void insert(ELEMENT E = 0, _VLIST_NODE *newptr = 0); virtual void add(ELEMENT E = 0, _VLIST_NODE *newptr = 0) { cnode = tail; insert(E, newptr); } virtual _VLIST_NODE *remove(int mode = 0); .... }; ------------------------------ CUT HERE ---------------------------------- As you can see, besides providing an explicit X(const X&) constructor for VLIST, insert() has been altered so that it can except either an ELEMENT or a _VLIST_NODE as input. This is in turn propagated to add(). remove() has been altered to that it can either delete the current node or optionally, extract it. Of course, once you pull a node out, you can always put it back, but in the meantime, can remember it's address external to the list, and in this way, maintain more (as one person said) bookmarks on the list. This should at least suggest a better way of obtaining such bookmarks, since removing and re-inserting isn't the most efficient way of doing that sort of thing. Note that I nulled the pointers on the node if it's extracted. Otherwise, it might still point, dangerously, into the list. Here're the changes to vlist.c. The other files for stacks, queues and lists remain unaltered. ------------------------------ CUT HERE ---------------------------------- /////////////////////////////////////\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ // \\ // vlist.c Version 3.00 \\ // VLIST class member functions: May 30, 1990, Brian L. Donat \\ // \\ /////////////////////////////////////\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ #include #include "vlist.h" #define FALSE 0 #define TRUE 1 int listError = FALSE; ... [ No changes till here ] // X(const X&) to duplicate a list. VLIST::VLIST(const VLIST &cList) : listsize(cList.listsize), head(new _VLIST_NODE), tail(new _VLIST_NODE), cnode(head) { // Build an empty list to start. if(!head || !tail) abort(); // No Memory?! head->previous = tail->next = 0; head->next = tail; tail->previous = head; // Add each node from cList. int i = listsize; listsize = 0; for(_VLIST_NODE *nptr = cList.head->next; i > 0; i--, nptr = nptr->next) { add(nptr->data); } } VLIST::~VLIST() { ... // The above kinda looks like VLIST::VLIST(), but look again! // In order to copy node per node, one VLIST object to assign // it to another, completely duplicating it, not just sharing // a pointer, we have to // // 1. Get the size of the former. // 2. Start out the new VLIST object. // 3. Sequentially duplicate each node in the former. // // When we're all done, we can do the following: // // VLIST newOBJ = oldOBJ; // // and get a new object with identical list nodes as the first. // Changes in one, will not be reflected in the other. // .... [ no more changes til ...] // Now here, we can insert either a new ELEMENT or a // new _VLIST_NODE as follows: // // _VLIST_NODE *vp = intList.remove(1); // ..... // intList.insert(0, vp); // // My only headache here, is that I must use syntax // which includes both the ELEMENT and the _VLIST_NODE // when inserting a _VLIST_NODE. I can't just do // intList.insert(vp); // not without rewriting a completely new overloaded // version of insert(). // // Any body know of a way around this? void VLIST::insert(ELEMENT E, _VLIST_NODE *newptr) { // If at the head of an empty list, fake an add(). _VLIST_NODE *pptr = cnode->previous; if(!listsize && !cnode->previous) { cnode = tail; pptr = head; } // build new _VLIST_NODE if((int)(void *)newptr != 0) ; else { newptr = new _VLIST_NODE; newptr->data = E; } newptr->next = cnode; newptr->previous = cnode->previous; // Update Adjacent Nodes pptr->next = newptr; cnode->previous = newptr; cnode = newptr; listsize++; } _VLIST_NODE *VLIST::remove(int mode) { listError = FALSE; _VLIST_NODE *rptr = 0; if(!listsize) listError = TRUE; else if(!cnode->previous) listError = TRUE; else if(!cnode->next) listError = TRUE; else { _VLIST_NODE *pptr = cnode->previous; _VLIST_NODE *nptr = cnode->next; // let the previous and next _VLIST_NODEs reference each other. pptr->next = cnode->next; nptr->previous = cnode->previous; // remove or take the current node. if(mode) { rptr = cnode; rptr->next = rptr->previous = 0; } else delete cnode; // If the next pointer is not tail, make it current, // else make the previous pointer the current _VLIST_NODE. cnode = (nptr->next) ? nptr : pptr; listsize--; } return rptr; } ... [ no more changes till end of vlist.c] ELEMENT VLIST::getval(void) const { listError = FALSE; if(!cnode->previous || !cnode->next) listError = TRUE; return cnode->data; } // end ------------------------------ CUT HERE ---------------------------------- Here's the new hash table class. It features a function to deliver a prime number >= to the desired table size, because hash tables work best when their size == prime. There is also a function to generate a hashvalue which more or less gives a fairly good distribution over the table for both random and ordered insertion sequences. The particularly nasty part of this object is X(const X&). htable starts out a pointer, but is used as representing the table itself as an array. You'll find, if you experiment a bit with this, that you can't use X(X&). You must use X(const X&). And ... You can't change the index of the array inside of X(const X&) because X is qualified as const. This is apparently the case with all array types declared 'const'. Therefore when you say HASH newhash = oldhash; You never get an exact copy. Only a new table with at best, the same size. Intuitively, you find yourself thinking that changing the array index has nothing to do with disturbing the array contents. But remember, htable is a pointer. 'It' must remain constant. ------------------------------ CUT HERE ---------------------------------- /////////////////////////////////////\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ // \\ // hash.h Version 1.00 \\ // HASH class derived from VLIST: May 29, 1990, Brian L. Donat \\ // \\ /////////////////////////////////////\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ #define MAXHASH 20 #define hashError listError extern int listError; extern class VLIST; class HASH : private VLIST { int size; VLIST *htable; int prime(int&); int hashval(int); public: HASH(); HASH(const HASH&); HASH(int); ~HASH(); void store(ELEMENT); void update(ELEMENT, ELEMENT); void kill(ELEMENT); ELEMENT retrieve(ELEMENT); void hdump(); }; ------------------------------ CUT HERE ---------------------------------- Here are the members of HASH. I've left some commented out code, a for-loop in the X(const X&) constructor so that anybody willing to can experiment along that same lines that I was, trying to get the thing to do a list per list, node per node duplication on assignment. Also, pay careful attention to the comments above hashval(). I haven't yet decided on how to get this function to support genericity, as you can see. Operation of the hash table methods should be obvious enough. You might want to add some random find '*' and next '*' type methods, if you don't implement this object in such a way that its contents are remembered externally. Remember, you can say HASH hashobj; or HASH hashobj = ; or HASH hashobj(size); ------------------------------ CUT HERE ---------------------------------- /////////////////////////////////////\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ // \\ // hash.c Version 1.00 \\ // HASH member functions : May 29, 1990, Brian L. Donat \\ // \\ /////////////////////////////////////\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ #define TRUE 1 #define FALSE 0 #define MINHASH 11 #include #include "vlist.h" #include "hash.h" const ELEMENT EDUMMY = 0; // Convert requested table size to a prime number >= size. // Hash Tables work better with prime number table sizes. int HASH::prime(int &size) { if(size < MINHASH) size = MINHASH; // Assure a minimum table size. for(int i = 2; i <= size; i++) { for(int j = 2; i % j; j++) ; if(j < size && i >= size) { size++; continue; } // prime >= size. } return (HASH::size = --i); } // Very Simple Hashing Function. // Careful! Notice that ELEMENT is currently 'int'. // There's a definite need for conversion functions // here, if genericity is to be supported. int HASH::hashval(ELEMENT E) { return ((E ^= 0xf0) << 4) % size; } // Hash Table uses array of VLIST Objets of size = prime(size); HASH::HASH() : size(MAXHASH), htable(new VLIST[prime(size)]) {} HASH::HASH(int size) : htable(new VLIST[prime(size)]) {} HASH::HASH(const HASH &cHT) : size(cHT.size), htable(new VLIST[prime(size)]) { /* for(int i = 0; i < size; i++) { cHT[i].headend(); for(int j = cHT[i].getsize(); j > 0; j--) { htable[i].add(cHT[i].getval()); cHT[i].step(); } } */ } HASH::~HASH() { // You've got to delete the lists first. for(int i = 0; i < size; i++) { VLIST *vp = &htable[i]; vp->VLIST::~VLIST(); } delete [size] htable; } void HASH::store(ELEMENT E) { htable[hashval(E)].insert(E); } void HASH::kill(ELEMENT E) { hashError = FALSE; if(htable[hashval(E)].find(E)) htable[hashval(E)].remove(); else hashError = TRUE; } void HASH::update(ELEMENT E, ELEMENT F) { hashError = FALSE; kill(E); if(!hashError) store(F); } ELEMENT HASH::retrieve(ELEMENT E) { hashError = FALSE; if(htable[hashval(E)].find(E)) return htable[hashval(E)].getval(); else { hashError = TRUE; return EDUMMY; } } // This is more of a diagnostic than // anything else, but its structure // should indicate a means for checking // the table for forgotten entries. void HASH::hdump() { hashError = FALSE; for(int i = 0; i < size; i++) { htable[i].headend(); cout << "\nList #" << i << ": "; for(int j = htable[i].getsize(); j > 0; j--) { ELEMENT E = htable[i].getval(); if(hashError) { cout << " End of List" << flush; hashError = FALSE; break; } cout << E; if(j > 1) cout << ","; htable[i].step(); } } } ------------------------------ CUT HERE ---------------------------------- --infmx!briand@pyramid