日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程语言 > c/c++ >内容正文

c/c++

Trie实现(C++)

發布時間:2024/4/17 c/c++ 33 豆豆
生活随笔 收集整理的這篇文章主要介紹了 Trie实现(C++) 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

參考了本文:http://www.cnblogs.com/xulb597/archive/2012/07/05/2578562.html

  • 支持模糊搜索,比如,【bkmh】可以匹配【BuKaManHua】;
  • 支持優先級,首字母、大小字母有更高的優先級。

亟需解決的問題:

  • 目前搜索結果與關鍵詞中字母的順序無關,即【buk】可以匹配【BKManHua】
  • 若條目中存在重復的任意關鍵字,即使不包含其他關鍵字,仍然能匹配上來
  • 內存的占用:因為接觸C++不久,內存管理一竅不通,一個1.65MB的文件(312964個單詞),索引之后程序(VS2013編譯,Release版本)的內存有68 860KB,

trie.hpp

#ifndef TRIE #define TRIE#include "LinkedList.hpp" #include "MatchInfo.hpp"#include <stdlib.h> #include <iostream> #include <string>#define BRANCH_SIZE 28 #define START_CHAR 'a'#define INDEX(x) (x == '?'? 27 : (x == '*'? 26 : x - START_CHAR))class Trie { public:Trie(){rootNode = new Trie_Node(0);memset(nodeMap, NULL, sizeof(nodeMap));memset(indexList, NULL, sizeof(indexList));}~Trie(){//delete rootNode; }void insert(const char *data, const int i){bool flag_start = false,flag_capital = false;Trie_Node *location = rootNode;int pos = 0;while (*data){char c = *data;// check wether it's capital and convert to lowwer case if so.if(c > 'A'-1 && c < 'Z'+1){flag_capital = true;c += 32;}else{flag_capital = false;}// map the char value to int which starts from 0int index = INDEX(c);// skip invalid charsif(index < 0){data++;pos++;continue;}// find nextif(location->next[index] == NULL){location->next[index] = getNode(index);}location = location->next[index];// build MatchInfo and add it to the trie node's indexListMatchInfo *info = new MatchInfo();info->itemindex = i;info->position = pos; // position of the char in the stringinfo->priority = 1;// intial or capital char has a higher priorityif(!flag_start){flag_start = true;info->priority++;}if(flag_capital)info->priority++;if(indexList[index] == NULL)indexList[index] = new LinkedList<MatchInfo>();indexList[index]->add(info);data++;pos++;}// end character has a higher priority//location->indexList->getCurrent()->value->priority++; }/*int match(const char *data){Trie_Node *location = rootNode;while (*data && location){location = location->next[INDEX(*data)];data++;}return (location != NULL);}*//*void fuzzy_match(const char *data, int* indexMap, size_t indexMapLength){predicateIndexMap(data, indexMap, indexMapLength);int index;Trie_Node *location = rootNode;while (*data && location){index = INDEX(*data);location = location->next[INDEX(*data)];if(location != NULL){fillIndexArray(indexMap, index);}data++;}}*/void fuzzy_match(const char *data, int* indexMap, size_t indexMapLength){predicateIndexMap(data, indexMap, indexMapLength);int index;Trie_Node *location = nodeMap[INDEX(*data)];do{index = INDEX(*data);if(location != NULL){fillIndexArray(indexMap, index);}else{break;}data++;} while ((*data) && (location = nodeMap[index]));}/*void print(){print(rootNode);}*/private://// a list to record matche info of each char in indexed words.// it's for priority and fuzzy seaching.// LinkedList<MatchInfo>* indexList[BRANCH_SIZE];struct Trie_Node{//int index;Trie_Node *next[BRANCH_SIZE];Trie_Node(int _index){//index = _index;memset(next, NULL, sizeof(next));};~Trie_Node(){//delete indexList;for (int i = 0; i < BRANCH_SIZE; i++){if(next[i])delete next[i];}}};Trie_Node *rootNode;//// a map to hold all created Trie_Node.// Trie_Node *nodeMap[BRANCH_SIZE];//// /*get a trie node from map.*/// return a new Trie_Node;// index: (char - 'a')// Trie_Node *getNode(int index){//return new Trie_Node(index);Trie_Node *tempNode = nodeMap[index];if(tempNode == NULL){tempNode = new Trie_Node(index);nodeMap[index] = tempNode;}return tempNode;}//// fill [indexMap] with priority of char at [index]// void fillIndexArray(int* indexMap, int index){if(indexList[index] == NULL)indexList[index] = new LinkedList<MatchInfo>();LinkedList<MatchInfo> *list = indexList[index];Node<MatchInfo> *node = list->getRoot();while (node){int itemIndex = node->value->itemindex;if(indexMap[itemIndex] != -1)indexMap[itemIndex] += node->value->priority;node = node->next;}}//// keep moving node to next until it's itemindex in value has been changed.// node will set to NULL if reaches the end.// void moveToNextItemIndex(Node<MatchInfo> **node){int index = (*node)->value->itemindex;if((*node)->next == NULL)(*node) = NULL;else{while ((*node)->value->itemindex == index){(*node)=(*node)->next;if((*node) == NULL)break;}}}//// predicate whether an index in indexMap is impossiable to be matched.// It will be set to -1 if so.// void predicateIndexMap(const char* keyword, int* indexMap, size_t indexMapLength){int *indexesMatched = new int[indexMapLength];int keywordLength = strlen(keyword);unsigned int keywordRecords[BRANCH_SIZE];size_t size = indexMapLength * sizeof(int);memset(indexesMatched, 0, size);memset(indexMap, -1, size);LinkedList<MatchInfo> *list;Node<MatchInfo> *match_node;int charIndex, index = 0;while (*keyword){charIndex = INDEX(*keyword);if(keywordRecords[charIndex] == 1){keyword++;continue;}keywordRecords[charIndex] = 1;list = indexList[charIndex];if(list != NULL){match_node = list->getRoot();while (match_node != NULL){indexesMatched[match_node->value->itemindex]++;match_node = match_node->next;//moveToNextItemIndex(&match_node); }}keyword++;}for (int i = 0; i < indexMapLength; i++){if(indexesMatched[i] >= keywordLength)indexMap[i] = 0;}delete indexesMatched;}/*void print(Trie_Node* node){char c;for (int i = 0; i < BRANCH_SIZE; i++){if(node->next[i] != NULL)\n{c = node->index + 'a';printf("%c-", c);print(node->next[i]);}}}*/ }; #endif // TRIE View Code

LinkedList.hpp

#ifndef LINKEDLIST #define LINKEDLIST#include <stdlib.h> #include <iostream>template <class T> struct Node {T* value;int index;Node *next;Node(T* _value, int _index){value = _value;index = _index;}~Node(){delete value;} };template <class T> class LinkedList { public:int length;LinkedList(){length = 0;root = new Node<T>(NULL, 0);current = root;};~LinkedList(){Node<MatchInfo> *node = root;Node<MatchInfo> *tmp;while (node){tmp = node->next;delete node;node = tmp;}};void add(T *value){if(length == 0){root->value = value;root->index = 0;}else{current->next = new Node<T>(value, current->index + 1);current = current->next;}length++;current->next = NULL;};Node<T> getAt(int index){Node<T> *node = root;while (node){if(node->index == index)return node;node = node->next;}return NULL;}Node<T> *getRoot(){return root;}Node<T> *getCurrent(){return current;} private:Node<T> *root,*current; };#endif // LINKEDLIST View Code

MatchInfo.hpp

#ifndef DEFINE_MatchInfo #define DEFINE_MatchInfo // // 字符的匹配信息 // struct MatchInfo {// 所在條目的序號int itemindex;// 所在的位置int position;// 優先級int priority; }; #endif View Code

SortBiTree.hpp

#ifndef DEFINE_SortBiTree #define DEFINE_SortBiTree#include <stdlib.h> #include <iostream> #include <string>template <class T> struct BTNode {int index;T value;BTNode<T> *left,*right;BTNode(int _i, T _v){index = _i;value = _v;left = NULL;right = NULL;} };template <class T> class SortBiTree { public:SortBiTree(){root = NULL;}~SortBiTree(){}void add(int index, T value){BTNode<T> *node = root;if(root == NULL)root = new BTNode<T>(index, value);else{add_iter(root, index, value);}}BTNode<T> *getMaxNode(){return maxNode;}BTNode<T> *getRootNode(){return root;} private:BTNode<T> *root,*minNode,*maxNode;void add_iter(BTNode<T>* node, int index, int value){if(index > node->index){if(node->left != NULL){add_iter(node->left, index, value);}else{node->left = new BTNode<T>(index, value);maxNode = node->left;}}else{if(node->right != NULL){add_iter(node->right, index, value);}else{node->right = new BTNode<T>(index, value);minNode = node->right;}}} }; #endif View Code

Stopwatch.hpp

#ifndef STOPWATCH_DEF #define STOPWATCH_DEF #include <ctime>class Stopwatch { public:Stopwatch(){};~Stopwatch(){};static Stopwatch* StartNew(){Stopwatch* sw = new Stopwatch();sw->startTime = clock();return sw;}double Stop(){double t = (double)(clock() - startTime)/CLOCKS_PER_SEC;delete this;return t;} private:clock_t startTime; }; #endif View Code

main.cpp

#include <stdlib.h> #include <iostream> #include <string> #include <fstream> #include <sys/stat.h>#include "Stopwatch.hpp" #include "trie.hpp" #include "SortBiTree.hpp"using namespace std;int displayMaxItems = 10;unsigned long get_file_size(const char *path) {unsigned long filesize = -1; struct stat statbuff;if(stat(path, &statbuff) < 0){return filesize;}else{filesize = statbuff.st_size;}return filesize; }void readFromFile(const char* path, char** buff) {long length = get_file_size(path); // 取得文件大小if(length == -1){cerr << "content file is invalid!" << endl;system("quit");return;}FILE *f = fopen(path, "r");*buff = new char[length];fread(*buff, sizeof(char), length, f);fclose(f); }void readInlineChars(char* source, char** buff) {*buff = new char[strlen(source)];strcpy(*buff, source); }// // 用于顯示結果 // TODO:使用迭代器 // void inorder_traverse(BTNode<int> *node, char* words[], int* indexMap, int threshold) {if(displayMaxItems == 0)return;if (NULL != node->left) {inorder_traverse(node->left, words, indexMap, threshold);}int confidence = indexMap[node->value];if(confidence > threshold){printf("%i\t%i\t%s\n", node->value, confidence, words[node->value]);displayMaxItems--;}if (NULL != node->right) {inorder_traverse(node->right, words, indexMap, threshold);} }void main(char* argv){Trie t;SortBiTree<int> bt;char* cpy = new char[];//=====================================// 注意,大文件請自行修改,去掉words//=====================================char *words[256];printf("indexing...");//readFromFile("contents.txt", &cpy);readFromFile("app_list.txt", &cpy);//readInlineChars("PlayShangDian PhotoshopDesigner pho Pho BuKaManHua BKManHua", &cpy);//readInlineChars("abc acc caa acb aaa abb", &cpy);//readInlineChars("aac aaa", &cpy);char *tk = strtok(cpy, " ");size_t index = 0;Stopwatch *sw = Stopwatch::StartNew();t.insert(tk, index++);while (tk = strtok(NULL, " ")){words[index] = tk;t.insert(tk, index++);}printf("%i word(s) have been indexed. [%lf seconds]\n", index, sw->Stop()); label_enter:printf("I'm searching for:\n>");string input;const char *chars;while (true){cin >> input;chars = input.data();if(!isalpha(*chars) || (*chars <= 'Z' && *chars >= 'A')){printf("only lowwer character is accepted!\n>");}elsebreak;}int *indexMap = new int[index];sw = Stopwatch::StartNew();printf("searching...");t.fuzzy_match(chars, indexMap, index);printf("done. [%lf seconds]\n", sw->Stop()); label_change:bt = SortBiTree<int>();printf("please input the threshold:\n>");int threshold = 0;cin >> threshold;printf("calculating...");int count = 0;for (int i = 0; i < index; i++){int confidence = indexMap[i];if(confidence > threshold){count++;bt.add(confidence, i);}}printf(" [%i] words matched.\n", count);displayMaxItems = 10;if(count > displayMaxItems)printf("first 10 items are listed below.\n");goto label_display;label_display:printf("------------------------------------------\n");printf("index\tpriority\tcontent\n");printf("------------------------------------------\n");if(bt.getRootNode() != NULL)inorder_traverse(bt.getRootNode(), words, indexMap, threshold);elseprintf(" none \n");/*it = &bt.getIterator();while (it->hasNext()){int i = it->next();int confidence = indexMap[i];if(confidence > threshold){printf("%s\t%i\t%i\n",words[i], confidence, i);}}*/printf("------------------------------------------\n"); label_menu:printf("now you may want to : \n[1].See them all.\n[2].Change thresgold\n[3].Change keyword.\n[q].Exit\n>");char choise = 0;cin >> choise;switch (choise){case '1':displayMaxItems = -1;goto label_display;break;case '2':goto label_change;break;case '3':goto label_enter;break;case 'q':break;default:break;}//delete indexMap; } View Code

截圖:

轉載于:https://www.cnblogs.com/ornithopter/p/3732496.html

總結

以上是生活随笔為你收集整理的Trie实现(C++)的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。