跳表和散列

最新推荐文章于 2024-07-23 17:03:30 发布

月光迟暮

最新推荐文章于 2024-07-23 17:03:30 发布

阅读量574

点赞数

CC 4.0 BY-SA版权

分类专栏：数据结构与算法应用C++ 文章标签：链表 c++ 数据结构

本文链接：https://blog.csdn.net/weixin_43572802/article/details/113103626

数据结构与算法应用C++ 专栏收录该内容

14 篇文章

订阅专栏

本文深入探讨了三种高效的数据结构：跳表、散列和字典。跳表通过随机化增加向前指针以实现快速查找，平均时间复杂度为O(logn)，但最坏情况下为O(n)。散列提供O(1)的平均查找、插入和删除速度，但在最坏情况下为O(n)。字典是键值对的集合，文中给出了模板类实现，包括查找、插入和删除操作。此外，还介绍了线性探查法解决散列冲突的方法。这些数据结构在实际应用中有着广泛的应用，例如文本压缩算法LZW。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

增加了额外的向前指针的链表叫做跳表。它采用随机技术来决定链表的哪些节点应增加向前指针，以及增加多少个指针，跳表的查找、插入、删除的平均复杂度为 $O (l o g n)$ ，然而，最坏情况下的时间复杂度却变成 $O (n)$ 。
散列是用来查找、插入、删除的另一种随机方法。与跳表相比，它把操作时间提高到 $O (1)$ ，但最坏情况下的时间复杂度认为 $O (n)$ 。

字典

字典是由一些形如(k,v)的数对组成的集合，其中k是关键字，v是与关键字k对应的值。任意两个数对，其关键字都不等。有关字典的操作有：

确定字典是否为空
确定字典有多少数对
寻找一个指定了关键字的数对
插入一个数对
删除一个指定了关键字的数对

template<class K,class E>
class dictionary
{
	public:
		virtual ~dictionary(){}
		virtual bool empty() const = 0;
					//返回true，当且仅当字典为空
		virtual int size() const = 0;
					//返回字典中数对的数目
		virtual pair<const K, E>* find(const K&) const = 0;
					//返回匹配数对的指针
		virtual void erase(const K&) = 0;
}

字典可以存储在线性表中 $p_0,p_1,...)$ 中其中 $p_is$ 是字典中按关键字递增次序排列，为了适应这种方式，可以定义两个类sortedArrayList和sortedChain前者用数组描述线性表，后者用链表描述

template<class K, class E>
pair<const K,E>* sortedChain<K,E>::find(const K& theKey) const
{//返回匹配的数对的指针
//如果不存在匹配的数对，则返回NULL
	pairNode<K,E>* currentNode = firstNode;

	// 搜索关键字为theKey的数对
	while(currentNode != NULL && currentNode->element.first != theKey)
	currentNode = currentNode->next;

	//判断是否匹配
	if(currentNode != NULL && currentNode->element.first == theKey)
		//找到匹配数对
		return &currentNode->element;
	//无匹配的数对
	return NULL;
}

template<class K,class E>
void sortedChain<K,E>::insert(const pair<const K, E>& thePair)
{//往字典中插入thePair，覆盖已经存在的匹配的数对
	pairNode<K,E> *p = firstNode,
			      *tp = NULL;
	//移动指针tp，使thePair可以插在tp的后面
	while(p != NULL && p->element.first < thePair.first)
	{
		tp = p;
		p = p->next;
	}           
	//检查是否有匹配的数对
	if (p != NULL && p->element.first == thePair.first)
	{//替换旧值
		p->element.second = thePair.second;
		return;
	}
	//无匹配的数对，为thePair建立新节点
	pairNode<K,E> *newNode = new pairNode<K,E>(thePair,p);

	//在tp之后插入新节点
	if(tp == NULL)firstNode = newNode;
	else tp->next = newNode;

	dSize++;
	return;
}
template<class K,class E>
void sortedChain<K,E>::erase(const K& theKey)
{//删除关键字为theKey的数对
	pairNode<K,E> *P = firstNode,
				  *tp = NULL;
	//搜索关键字为theKey的数对
	while(p != NULL && p->element.first < theKey)
	{
		tp = p;
		p = p->next;
	}
	//确定是否匹配
	if(p != NULL && p->element.first == theKey)
	{//找到一个匹配的数对
		//从链表中删除p
		if(tp == NULL)firstNode = p->next;//p是第一个节点
		else tp->next = p->next;

		delete p;
		dSize--;
	}
}

跳表

增加了额外的向前指针的链表叫做跳表
在这里插入图片描述
在这种结构中有 $n/2^i$ 个元素为i级链元素，所以在进行插入时应尽量逼近这种结构。在进行插入时，新元素属于i级链的概率为 $1/2^i$ 。在确定新元素的级时，应考虑各种可能的情况，因此，把新元素作为i级链元素的可能性我 $p^i$ 。
跳表的头结点需要有足够的指针域，以满足最大链表级数的构建需要，而尾结点不需要指针域。

template <class K,class E>
struct skipNode
{
	typedef pair<const K,E> pairType;

	pairType element;
	skipNode<K,E> **next;

	skipNode(const pairType& thePair, int size)
			:element(thePair){next = new skipNode<K,E>* [size];}
}

// skip list data structure, implements dictionary

#ifndef skipList_
#define skipList_



#include <iostream>
#include <math.h>
#include <sstream>
#include <string>
#include "dictionary.h"
#include "skipNode.h"
#include "myExceptions.h"

using namespace std;

template<class K, class E>
class skipList : public dictionary<K,E> 
{
   public:
      skipList(K, int maxPairs = 10000, float prob = 0.5);
      ~skipList();

      bool empty() const {return dSize == 0;}
      int size() const {return dSize;}
      pair<const K, E>* find(const K&) const;
      void erase(const K&);
      void insert(const pair<const K, E>&);
      void output(ostream& out) const;

   protected:
      float cutOff;          // 用来决定层数
      int level() const;     // generate a random level number
      int levels;            // 当前最大的非空链表
      int dSize;             // 字典的数对个数
      int maxLevel;          // 允许的最大链表层数
      K tailKey;             // a large key
      skipNode<K,E>* search(const K&) const;
                             // search saving last nodes seen
      skipNode<K,E>* headerNode;  // 头结点指针
      skipNode<K,E>* tailNode;    // 尾结点指针
      skipNode<K,E>** last;       // last[i]表示i层的最后节点
};

template<class K, class E>
skipList<K,E>::skipList(K largeKey, int maxPairs, float prob)
{// 构造函数，关键字小于largeKey且数对个数size最多为maxPairs，，0 < prob < 1
   cutOff = prob * RAND_MAX;
   maxLevel = (int) ceil(logf((float) maxPairs) / logf(1/prob)) - 1;
   levels = 0;  // 初始化级数
   dSize = 0;
   tailKey = largeKey;

   // 生成头结点、尾结点和数组last
   pair<K,E> tailPair;
   tailPair.first = tailKey;
   headerNode = new skipNode<K,E> (tailPair, maxLevel + 1);
   tailNode = new skipNode<K,E> (tailPair, 0);
   last = new skipNode<K,E> *[maxLevel+1];

   //链表为空
   for (int i = 0; i <= maxLevel; i++)
       headerNode->next[i] = tailNode;
}

template<class K, class E>
skipList<K,E>::~skipList()
{// Delete all nodes and array last.
   skipNode<K,E> *nextNode;

   // delete all nodes by following level 0 chain
   while (headerNode != tailNode)
   {
      nextNode = headerNode->next[0];
      delete headerNode;
      headerNode = nextNode;
   }
   delete tailNode;

   delete [] last;
}

template<class K, class E>
pair<const K,E>* skipList<K,E>::find(const K& theKey) const
{// 返回匹配数对的指针，如果没有返回NULL
   if (theKey >= tailKey)
      return NULL;  // 没有可能的匹配数对

   // 位置beforeNode是关键字为theKey的节点之前最右边的位置
   skipNode<K,E>* beforeNode = headerNode;
   for (int i = levels; i >= 0; i--)          // 从上级链表到下级链表
      // 跟踪i级链表指针
      while (beforeNode->next[i]->element.first < theKey)
         beforeNode = beforeNode->next[i];

   // 检查下一个节点的关键字是否是theKey
   if (beforeNode->next[0]->element.first == theKey)
      return &beforeNode->next[0]->element; 

   return NULL;  // 无匹配的数对
}

template<class K, class E>
int skipList<K,E>::level() const
{// 返回一个表示链表级的随机数，这个数不大于maxLevel
   int lev = 0;
   while (rand() <= cutOff)
      lev++;
   return (lev <= maxLevel) ? lev : maxLevel;
}

template<class K, class E>
skipNode<K,E>* skipList<K,E>::search(const K& theKey) const
{// 搜索关键字theKey，把每一级链表中要查看的最后一个节点存储在数组last中
 // 返回包含关键字theKey的节点
   // 位置beforeNode是关键字为theKey的节点之前最右边的位置
   skipNode<K,E>* beforeNode = headerNode;
   for (int i = levels; i >= 0; i--)
   {
      while (beforeNode->next[i]->element.first < theKey)
         beforeNode = beforeNode->next[i];
      last[i] = beforeNode;  // 最后一级链表i的节点
   }
   return beforeNode->next[0];
}

template<class K, class E>
void skipList<K,E>::insert(const pair<const K, E>& thePair)
{// 把数对thePair插入字典，覆盖其关键字相同的已存在的数对
   if (thePair.first >= tailKey) // 关键字太大
   {ostringstream s;
    s << "Key = " << thePair.first << " Must be < " << tailKey;
    throw illegalParameterValue(s.str());
   }
   
   // 查看关键字为theKey的数对是否已经存在
   skipNode<K,E>* theNode = search(thePair.first);
   if (theNode->element.first == thePair.first)
   {// 若存在，则更新数对的值
      theNode->element.second = thePair.second;
      return;
   }

   // 若不存在，则确定新节点所在的级链表
   int theLevel = level(); // 新节点的级
   // 使级theLevel <= levels + 1
   if (theLevel > levels)
   {
      theLevel = ++levels;
      last[theLevel] = headerNode;
   }

   // 在结点theNode之后插入新节点
   skipNode<K,E>* newNode = new skipNode<K,E>(thePair, theLevel + 1);
   for (int i = 0; i <= theLevel; i++)
   {// 插入i级链表
      newNode->next[i] = last[i]->next[i];
      last[i]->next[i] = newNode;
   }

   dSize++;
   return;
}

template<class K, class E>
void skipList<K,E>::erase(const K& theKey)
{// 删除关键字为theKey的数对.
   if (theKey >= tailKey) // 关键字太大
      return;

   // 查看是否有匹配的数对
   skipNode<K,E>* theNode = search(theKey);
   if (theNode->element.first != theKey) // 不存在
      return;

   // 把数对从跳表中删除(每级依次删除)
   for (int i = 0; i <= levels &&
                   last[i]->next[i] == theNode; i++)
      last[i]->next[i] = theNode->next[i];
   
   // 更新链表级
   while (levels > 0 && headerNode->next[levels] == tailNode)
      levels--;
   
      delete theNode;
      dSize--;
}

template<class K, class E>
void skipList<K,E>::output(ostream& out) const
{// Insert the dictionary pairs into the stream out.
   // follow level 0 chain
   for (skipNode<K,E>* currentNode = headerNode->next[0];
                       currentNode != tailNode;
                       currentNode = currentNode->next[0])
      out << currentNode->element.first << " "
          << currentNode->element.second << "  ";
}

// overload <<
template <class K, class E>
ostream& operator<<(ostream& out, const skipList<K,E>& x)
   {x.output(out); return out;}

#endif

散列表

字典的另一种表示方法是散列（hashing），它用一个散列函数（也称哈希函数）把数对映射到一个散列表的具体位置。如果数对p的关键字是k，散列函数为f，那么在理想状况下，p在散列表中的位置为 $f (k)$ 。

散列函数和散列表

桶和起始桶

当关键字的范围太大，不能用理想方法表示时，可以采用并不理想的散列表和散列函数：散列表位置的数量比关键字的个数少，散列函数把若干个不同的关键字映射到散列表的同一个位置。散列表的每一个位置叫一个桶；对关键字为k的数对， $f (k)$ 是起始桶；桶的数量等于散列表的长度或大小。

除法散列函数

在多种散列函数中最常用的是除法散列函数，它的形式如下：
$f(k)=k\%D$
其中k是关键字，D是散列表的长度(即桶的数量)，%为求模操作符。散列表的位置索引从0到D-1，D=11时，与关键字3、22、27、40、80和96分别对应的起始桶是3,0,5,7,3,8。

冲突和溢出

当两个不同的关键字所对应的起始桶相同时，就是冲突发生了，如果存储桶没有空间存储一个新数对，就是溢出发生了。溢出处理方法最常见的就是线性探查法，

良好的散列函数

除非一个桶可以容纳无限多个数对，否则插入时的溢出就不是那么容易解决的问题了，当映射到散列表中任何一个桶里的关键字数量大致相等时，冲突和溢出的平均数最少。均匀散列函数便是这样的函数。

除法和非整型关键字

把一个字符串转换为一个不唯一的整数

int stringToInt(string s)
{//把s转换为一个非负整数，这种转换依赖s的所有字符
	int length = (int) s.length();
	int answer = 0;
	if ( length % 2 == 1)
	{//长度为奇数
		answer == s.at(length - 1);
		length--;
	}
	//长度为偶数
	for(int i = 0; i < length;i += 2)
	{//同时转换两个字符
		answer += s.at(i);
		answer += ((int) s.at(i+1)) << 8;
	}
	return (answer < 0) ? -answer : answer;
}

线性探查

线性探查用来解决溢出，最简单的方法是找到下一个可用的桶。线性探查搜索方法为(把散列表当成环表)：假设要查找关键字为k的数对，首先搜索其实桶 $f (k)$ ，然后把散列表当做环表继续搜索下一个桶，知道一下情况发生为止：1）存有关键字k的桶已经找到2）到达一个空桶3）又回到其实桶f(k)。

//散列表的数据成员
pair<const K,E>** table;       //散列表
hash<K> hash;                  //把类型K映射到一个非整数
int dSize;                     //字典中数对个数
int divisor;				   //散列函数除数
//构造函数

template<class K,class E>
hashTable<K,E>::hashTable(int theDivisor)
{
	divisor = theDivisor;
	dSize = 0;
	//分配和初始化散列表数组
	table = new pair<const K, E>* [divisor];
	for(int i = 0; i < divisor; i++)
		table[i] = NULL;
}

template<class K, class E>
int hashTable<K,E>::search(const K& theKey) const
{// 搜索一个公开地址散列表，查找关键字为theKey的数对
 // 如果匹配的数对存在，返回它的位置，否则，如果散列表不满
// 则返回关键字为theKey的数对可以插入的位置

   int i = (int) hash(theKey) % divisor;  // 起始桶
   int j = i;    // 从起始桶开始
   do
   {
      if (table[j] == NULL || table[j]->first == theKey)
         return j;
      j = (j + 1) % divisor;  // 下一个桶
   } while (j != i);          // 是佛返回到起始桶

   return j;  // 表满
}

template<class K, class E>
pair<const K,E>* hashTable<K,E>::find(const K& theKey) const
{// 返回匹配数对的指针
 // 如果匹配数对不存在，返回null
   // 搜索散列表
   int b = search(theKey);
   
   // 判断table[b]是否匹配数对
   if (table[b] == NULL || table[b]->first != theKey)
      return NULL;           // 没有找到

   return table[b];  // 找到匹配数对
}

template<class K, class E>
void hashTable<K,E>::insert(const pair<const K, E>& thePair)
{// 把数对thePair插入字典，若存在关键字相同的数对，则覆盖
 // 若表满，则抛出异常
   // 搜索散列表，查找匹配的数对
   int b = search(thePair.first);

   // 检查匹配的数对是否存在
   if (table[b] == NULL)
   {
      // 没有匹配的数对，且表不满
      table[b] = new pair<const K,E> (thePair);
      dSize++;
   }
   else
   {// 检查是否有重复的关键字数对或是否表满
      if (table[b]->first == thePair.first)
      {// 有重复的关键字数对，修改table[b]->second
         table[b]->second = thePair.second;
      }
      else // 表满
         throw hashTableFull();
   }
}

设b为散列表的桶数，D为散列函数的除数，且b=D，令 $U_n,S_n$ 分别表示一次成功搜索和不成功搜索中平均搜索的桶数。
$\begin{cases} U_n\approx\frac{1}{2}(1+\frac{1}{(1-\alpha)^2})\\ S_n\approx\frac{1}{2}(1+\frac{1}{(1-\alpha)}) \end{cases}$
$\alpha=n/b$ 为负载因子。一般情况下设置 $\alpha\le0.75$