日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

基于哈夫曼编码完成的文件压缩及解压

發布時間:2024/7/19 编程问答 44 豆豆
生活随笔 收集整理的這篇文章主要介紹了 基于哈夫曼编码完成的文件压缩及解压 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

這幾天在較為認真的研究基于哈夫曼編碼的文件壓縮及解壓,費了點時間,在這分享一下:


這里用鏈式結構,非順序表結構;


文件壓縮:

1.獲取文件信息(這里采用TXT格式文本);

2.壓縮文件;

3.寫配置文件(便于解壓時用,無非就是存放原文件的索引之類的,比如說,文件中某個字符出現的個數,記錄下來)

4.解壓縮,使用壓縮后的文件和配置文件解壓文件;

5.用比對軟件,比對解壓后的文件和源文件是否相同;


下面慢慢解析:

先看一個文件信息類:

typedef long long LongType; struct FileInfo {unsigned char _ch; //字符LongType _count; //字符出現次數string _code; //字符對應的哈夫曼編碼 FileInfo(unsigned char ch = 0):_ch(ch),_count(0){}FileInfo operator+(const FileInfo& x){FileInfo tmp;tmp._count = this->_count + x._count;return tmp;}bool operator !=(const FileInfo& x) const{return this->_count != x._count;} };bool operator<(const FileInfo info1,const FileInfo info2) {return info1._count < info2._count; }此為一個文件信息的類結構,包含字符,字符對應出現的次數,以及這個字符對應的哈夫曼編碼(能看到這篇博客的星弟,對哈夫曼編碼不會陌生,這里不再強調)

除了統計字符出現的次數及哈夫曼編碼,還完成了幾個運算符的重載


要獲取哈夫曼編碼,就得建立哈夫曼樹,建立哈夫曼樹用最小堆取操作,以下是最小堆建立過程

// 小堆 template<class T> struct Less {bool operator() (const T& l, const T& r){return l < r; // operator<}};template<class T> struct Greater {bool operator() (const T& l, const T& r){return l > r; // operator<} };template<class T, class Compare = Less<T>> class Heap { public:Heap(){}Heap(const T* a, size_t size){for (size_t i = 0; i < size; ++i){_arrays.push_back(a[i]);}// 建堆for(int i = (_arrays.size()-2)/2; i >= 0; --i){AdjustDown(i);}}void Push(const T& x){_arrays.push_back(x);AdjustUp(_arrays.size()-1);}void Pop(){assert(_arrays.size() > 0);swap(_arrays[0], _arrays[_arrays.size() - 1]);_arrays.pop_back();AdjustDown(0);}T& Top(){assert(_arrays.size() > 0);return _arrays[0];}bool Empty(){return _arrays.empty();}int Size(){return _arrays.size();}void AdjustDown(int root){int child = root*2 + 1;// Compare com;while (child < _arrays.size()){// 比較出左右孩子中小的那個if (child+1<_arrays.size() &&*_arrays[child+1] < _arrays[child])//if(child+1<_arrays.size() &&// com(_arrays[child+1],_arrays[child])){++child;}if(*_arrays[child] < _arrays[root])//if(com(_arrays[child],_arrays[root])){swap(_arrays[child], _arrays[root]);root = child;child = 2*root+1;}else{break;}}}void AdjustUp(int child){int parent = (child-1)/2;//while (parent >= 0)while (child > 0){if (*_arrays[child] < _arrays[parent]){swap(_arrays[parent], _arrays[child]);child = parent;parent = (child-1)/2;}else{break;}}}public:vector<T> _arrays; };最小堆里也完成了很多接口,包括push ?pop等

然后就是幾個壓縮和解壓的函數接口


1.根據哈夫曼樹獲取哈夫曼變慢:

void _GenerateHuffmanCode(HuffmanTreeNode<FileInfo>* root){if (root == nullptr){return;}_GenerateHuffmanCode(root->_left);_GenerateHuffmanCode(root->_right);//當前節點為葉子節點為空 才生成哈夫曼編碼if (root->_left == nullptr && root->_right == nullptr){HuffmanTreeNode<FileInfo>* cur = root;HuffmanTreeNode<FileInfo>* parent = cur->_parent;string& code = _infos[cur->_weight._ch]._code;while (parent){if (parent->_left == cur){code += '1';}else if (parent->_right == cur){code += '0';}cur = parent;parent = cur->_parent;}reverse(code.begin(), code.end());}}


2.根據最小堆建立哈夫曼樹;

void CreateTree(T *a, size_t size, const T& invalid){assert(a);Heap<HuffmanTreeNode<T>*> s1; //草 終于發現問題 在這里 (堆里放的是指針,類型一定要對)//找兩個最小的元素for (size_t i = 0; i < size; ++i){if (a[i] != invalid){HuffmanTreeNode<T>* node = new HuffmanTreeNode<T>(a[i]);s1.Push(node);}}while (s1.Size() > 1){HuffmanTreeNode<T>* left = s1.Top();s1.Pop();HuffmanTreeNode<T>* right = s1.Top();s1.Pop();HuffmanTreeNode<T>* parent = new HuffmanTreeNode<T>(left->_weight + right->_weight);parent->_left = left;parent->_right = right;left->_parent = parent;right->_parent = parent;s1.Push(parent);}_root = s1.Top();s1.Pop();}


3.讀取文本文件中的一行:

bool _ReadLine(FILE *fOutLogFile, string& line){char ch = fgetc(fOutLogFile);if (feof(fOutLogFile))return false;else{if (ch == '\n'){line += ch;ch = fgetc(fOutLogFile);}while (ch != '\n'){line += ch;ch = fgetc(fOutLogFile);}return true;}}
4.文件壓縮

//文件壓縮bool Compress(const char* filename){//1.打開一個文件,統計文件字符出現的次數//2.生成對應的哈弗曼編碼//3.壓縮文件//4.寫配置文件,方便解壓縮assert(filename);FILE *fOut = fopen(filename, "rb");assert(fOut);//統計文件字符出現的次數unsigned char ch = fgetc(fOut);while (!feof(fOut)) //文件結束{_infos[ch]._count++;ch = fgetc(fOut);}HuffmanTree<FileInfo> ht;FileInfo invalid;ht.CreateTree(_infos, 256, invalid);//哈夫曼編碼_GenerateHuffmanCode(ht.GetRoot());string compressFile = filename;compressFile += ".huf";//壓縮后的文件名 后綴為《輸入文件名+.huf》FILE *finCompress = fopen(compressFile.c_str(), "wb"); //獲取string中的C字符串assert(finCompress);fseek(fOut, 0, SEEK_SET);//將文件指針移到開頭char cha = fgetc(fOut);unsigned char inch = 0;int index = 0; //一個字節的八位while (!feof(fOut)){string& code = _infos[(unsigned char)cha]._code;for (size_t i = 0; i < code.size(); ++i){inch <<= 1; //低位向高位進if (code[i] == '1'){inch |= 1;}if (++index == 8){fputc(inch, finCompress); //夠8位,裝進文件index = 0; //重新一輪開始inch = 0;}}cha = fgetc(fOut);}fclose(fOut);//如果index = 0 說明 上邊8位剛好存滿 不等 下一個自己又出來了if (index != 0) //處理最后一個字符不夠的問題{inch <<= (8 - index); //最高位必須裝上 后邊的浪費掉fputc(inch, finCompress);}fclose(finCompress);}
5.寫配置文件:

string logFile = filename;logFile += ".log";FILE *Log = fopen(logFile.c_str(), "wb");assert(Log);string chInfo;char str[128] = {0}; //沒空間 不可以for (size_t i = 1; i < 256; ++i){if (_infos[i]._count > 0){chInfo += _infos[i]._ch;chInfo += ',';chInfo += _itoa(_infos[i]._count,str,10);chInfo += '\n';fputs(chInfo.c_str(), Log);chInfo.clear();}}fclose(Log);
6.最后的文件解壓:

//重構文件void _RestoreFiles(HuffmanTreeNode<FileInfo> *root, const char* Fileneme,long long size){assert(root);//原壓縮文件string name = Fileneme;name += ".huf";FILE* Out = fopen(name.c_str(),"rb");assert(Out);string restorefilename = Fileneme;restorefilename += ".over";FILE *over = fopen(restorefilename.c_str(),"wb");assert(over);int pos = 8;long long poss = size;unsigned char chz = fgetc(Out);while (poss>0){HuffmanTreeNode<FileInfo>* cur = nullptr;cur = root;while (cur->_left != nullptr || cur->_right != nullptr){pos--;unsigned char temp = chz >> pos;int ch = 1 & temp;if (ch == 0){cur = cur->_right;}else if (ch == 1){cur = cur->_left;}if (pos == 0){chz = fgetc(Out);pos = 8;}}fputc(cur->_weight._ch, over);poss--;}fclose(Out);fclose(over);}void UnCompress(const char* Fileneme)//解壓縮{//1.打開日志文件//2.根據信息還原哈夫曼樹//3.還原信息;string UnCompressneme = Fileneme;UnCompressneme += ".log";FILE *fOutLogFile = fopen(UnCompressneme.c_str(), "rb");assert(fOutLogFile);string line;while (_ReadLine(fOutLogFile, line)){unsigned char ch = line[0];_infos[ch]._count = atoi(line.substr(2).c_str());line.clear();} HuffmanTree<FileInfo> f;FileInfo invalid;f.CreateTree(_infos, 256, invalid);//根據重建的哈夫曼樹 還原文件;long long size = f.GetRoot()->_weight._count;_RestoreFiles(f.GetRoot(), Fileneme,size);}到此,此項目基本完成;如遇問題,希望留言,隨時解答,如有見解,跪求賜教!






轉載于:https://www.cnblogs.com/li-ning/p/9490022.html

總結

以上是生活随笔為你收集整理的基于哈夫曼编码完成的文件压缩及解压的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。