【lua学习】3.字符串
【lua學(xué)習(xí)】3.字符串
- Lua字符串的概況
- 字符串實(shí)現(xiàn)
- 字符串結(jié)構(gòu)TString
- 全局字符串表stringtable
- 新建字符串luaS_newlstr (先查表,再?zèng)Q定創(chuàng)建與否)
- 新建字符串 newlstr
- 重新設(shè)置全局字符串的大小 luaS_resize
- 全局字符串表的縮容
- 保留字是如何不被回收的
Lua字符串的概況
- Lua虛擬機(jī)中存在一個(gè)散列桶結(jié)構(gòu)的全局字符串表來存放所有字符串
- 關(guān)于比較字符串:先比較hash,再比較長度,再逐字符比較。節(jié)省時(shí)間
- 同一個(gè)字符串在Lua虛擬機(jī)中僅有一個(gè)副本。節(jié)省空間
- 一旦創(chuàng)建則無法變更
- 變量存放的僅是字符串的引用
字符串實(shí)現(xiàn)
字符串結(jié)構(gòu)TString
(lobject.h) TString
typedef union TString {L_Umaxalign dummy;//保證最大對其//見下文struct {CommonHeader;lu_byte reserved;//當(dāng)>0時(shí),其值-1表示保留字列表中的索引//見下文unsigned int hash;//字符串散列值,根據(jù)字符串長度和部分字符計(jì)算而來的值,見下文size_t len;//字符串長度} tsv; } TString;(llimits.h) L_Umaxalign
typedef LUAI_USER_ALIGNMENT_T L_Umaxalign;//LUAI_USER_ALIGNMENT_T見下文(luaconf.h) LUAI_USER_ALIGNMENT_T
//看此定義,為8字節(jié)對齊 #define LUAI_USER_ALIGNMENT_T union { double u; void* s; long l; }全局字符串表stringtable
(lstate.h) stringtable
typedef struct stringtable {GCObject** hash;//開散列結(jié)構(gòu)(和lua table的閉散列結(jié)構(gòu)是有區(qū)別的),指向一個(gè)數(shù)組,每個(gè)元素是桶(GCObject*類型),桶管理GCObject鏈表lu_int32 nuse;//存儲(chǔ)的字符串?dāng)?shù)量int size;//全局字符串表的最大容量(hash桶的最大數(shù)量) } stringtable;新建字符串luaS_newlstr (先查表,再?zèng)Q定創(chuàng)建與否)
(lstring.c) luaS_newlstr
TString* luaS_newlstr(lua_State* L, const char *str, size_t len) {//初始h值就是字符串的長度unsigned int h = cast(unsigned int, len);//cast就是強(qiáng)制轉(zhuǎn)型,見下文//獲得計(jì)算hash值的跨度,如果字符串很長,若逐位計(jì)算肯定非常消耗性能size_t step = (len>>5) + 1;//從最后一個(gè)字符開始,計(jì)算h值,跟后續(xù)計(jì)算的值執(zhí)行異或,進(jìn)而得到最終的h值for (size_t l1 = len; l1 >= step; l1 -= step){h ^= ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));}//h值對全局字符串表的最大桶數(shù)量求余,得到桶的索引unsigned int bucket_index = lmod(h, G(L)->strt.size);//lmod見下文,G見下文//遍歷該桶管理的鏈表,查找有沒有相等的字符串for (GCObject* gco = G(L)->strt.hash[bucket_index];gco != NULL;gco = gco->gch.next){TString* ts = rawgco2ts(gco);//rawgco2ts見下文if (ts->tsv.len == len//先比較長度&& (memcmp(str, getstr(ts), len) == 0))//再逐位比較。getstr見下文{if (isdead(G(L), gco))//若要被GC,則把標(biāo)記標(biāo)為另一種白色,防止被GC。isdead見下文{changewhite(gco);}//找到了,就不需要新建了,直接返回即可return ts;}}//新建字符串return newlstr(L, str, len, h); }(llimits.h) cast宏
#define cast(t, exp) ((t)(exp))(lobject.h) lmod宏
//針對size為2次冪的 優(yōu)化的 取模算法 #define lmod(s,size) \(check_exp((size&(size-1))==0, (cast(int, (s) & ((size)-1)))))(lstate.h) G宏
#define G(L) (L->l_G)(lstate.h) rawgco2ts宏
//根據(jù)GCObject*獲取TString* #define rawgco2ts(gco) check_exp((gco)->gch.tt == LUA_TSTRING, &((gco)->ts))(lobject.h) getstr宏
//根據(jù)TString* 獲取 字符串的首地址,注意:字符串首地址并不在TString內(nèi)部,而在TString對象最后一個(gè)字節(jié)的下一個(gè)字節(jié),這也解釋了為何TString一定要對齊,就是為了提高CPU讀取性能 #define getstr(ts) cast(const char *, (ts) + 1)(lgc.h) isdead宏
//判斷是否在當(dāng)前GC階段被判定為需要回收,todo以后討論 #define isdead(g, gco) ((gco)->gch.marked & otherwhite(g) & WHITEBITS)(lgc.h) changewhite宏
//改變GCObject的當(dāng)前白色標(biāo)記,todo以后討論 #define changewhite(gco) ((gco)->gch.marked ^= WHITEBITS)新建字符串 newlstr
(lstring.c) newlstr
static TString* newlstr(lua_State* L, const char* str, size_t len, unsigned int h) {//若字符串太長,則luaM_toobig。luaM_toobig見下文if (len + 1 > (MAX_SIZE - sizeof(TString)/sizeof(char)){luaM_toobig(L);}//為TString對象分配連續(xù)的空間,這個(gè)空間首部是TString結(jié)構(gòu),后面緊接著是字符串實(shí)際內(nèi)容TString* ts = cast(TString*, luaM_malloc(L, (len + 1)*sizeof(char) + sizeof(TString)));ts.tsv.len = len;ts.tsv.hash = h;ts.tsv.marked = luaC_white(G(L));ts.tsv.reserved = 0;//獲取字符串內(nèi)容的首地址pstrchar* pstr = (char*)(ts + 1);//拷貝str到pstrmemcpy(pstr , str, len * sizeof(char));//字符串最后一個(gè)字符當(dāng)然是'\0'pstr[len] = '\0';//獲取全局字符串表stringtable* tb = &G(L)->strt;//計(jì)算桶索引h = lmod(h, tb->size);//用頭插法將字符串插入桶中ts->tsv.next = tb->hash[h];tb->hash[h] = obj2gco(ts);//obj2gco見下文//全局字符串表的字符串?dāng)?shù)量+1tb->nuse++;//若字符串總數(shù) 超過了 全局字符串表的最大桶數(shù) 且 最大桶數(shù) <= MAX_INT/2,則對全局字符串表擴(kuò)容if (tb->nuse > cast(lu_int32, tb->size) && tb->size <= MAX_INT/2){luaS_resize(L, tb->size*2);//luaS_resize,重新分配全局字符串表的大小,見下文} }(llimits.h) MAX_SIZET宏
#define MAX_SIZET ((size_t)(~(size_t)0)-2)(lmem.c) luaM_toobig報(bào)告要分配的內(nèi)存過大
void* luaM_toobig(lua_State* L) {//luaG_runerror, todo后面討論luaG_runerror(L, "memory allocation error: blobk too big");return NULL; }(lmem.h) luaM_malloc宏
//請求分配needbytes字節(jié)內(nèi)存,luaM_realloc_見下文 #define luaM_malloc(L, bytes_to_allocate) luaM_realloc_(L, NULL, 0, (bytes_to_allocate))(lmem.c) luaM_realloc_分配內(nèi)存
void* luaM_relloc_(lua_State* L, void* address_to_free, size_t bytes_to_free, size_t bytes_to_allocate) {lua_assert((bytes_to_free==0)==(address_to_free==NULL));//#llimits.h中define lua_assert(c) ((void)0) 什么也不做,所以忽略global_State* g = G(L);//調(diào)用全局表的內(nèi)存分配函數(shù)void* address_to_allocate = (*g->frealloc)(g->ud, address_to_free, bytes_to_free, bytes_to_allocate);//若分配失敗且需要的字節(jié)數(shù)>0,拋出內(nèi)存分配錯(cuò)誤if (address_to_allocate == NULL && bytes_to_allocate> 0){//luaD_throw,todo后面討論luaD_throw(L, LUA_ERRMEM);//lua.h中#define LUA_ERRMEM 4}lua_assert((bytes_to_allocate==0)==(address_to_allocate==NULL));//分配的內(nèi)存總字節(jié)數(shù) 發(fā)生變化g->totalbytes += bytes_to_allocate - bytes_to_free;return address_to_allocate; }(lgc.h) luaC_white獲取當(dāng)前GC白色
//獲取當(dāng)前gc的白色,todo后面討論 #define luaC_white(g) cast(lu_byte, (g)->currentwhite & WHITEBITS)(lstate.h) obj2gco宏
//將對象指針強(qiáng)制轉(zhuǎn)為GCObject* #define obj2gco(v) (cast(GCObject *, (v)))(llimits.h) MAX_INT宏
#define MAX_INT (INT_MAX-2)重新設(shè)置全局字符串的大小 luaS_resize
(lstring.c) luaS_resize
void luaS_resize(lua_State* L, int newsize) {//若GC正處于掃描字符串階段,則不處理。GCSweepingstring見下文if (G(L)->gcstate == GCSweepingstring){return;}//新分配hash結(jié)構(gòu)GCObject** newhash = luaM_newvector(L, newsize, GCObject*);//初始化每個(gè)桶為空指針for (int i = 0; i < newsize; i++){newhash[i] = NULL;}//獲取全局字符串表指針stringtable* tb = &G(L)->strt;//遍歷每個(gè)桶,遍歷每個(gè)桶管理的鏈表,全部奪舍到新的hash結(jié)構(gòu)中for (int i = 0; i < tb.size; i+){GCObject* p = tb->hash[i];//遍歷桶管理的鏈表while(p){//以next指向下一個(gè)元素GCObject* next = p->gch.next;//根據(jù)的hash計(jì)算新的 hashunsigned int oldh = gco2ts(p)->hash;int newh = lmod(oldh, newsize);lua_assert(cast_int(oldh%newsize)==lmod(oldh,newsize))//用頭插法將元素加入桶管理的鏈表p->gch.next = newhash[newh];newhash[newh] = p;//p設(shè)為next,以便循環(huán)的下一輪p = next;}}//釋放舊的hash結(jié)構(gòu)luaM_freearray(L, tb->hash, tb_size, TString*);//更新全局字符串表tb->size = newsize;tb->hash = newhash; }(lgc.h) GCSsweepstring宏
//gc的幾個(gè)階段,todo后面再說 #define GCSpause 0 #define GCSpropagate 1 #define GCSsweepstring 2 #define GCSsweep 3 #define GCSfinalize 4(lmem.h) luaM_newvector宏
//分配count個(gè)類型為datatype的連續(xù)內(nèi)存空間,獲得的數(shù)據(jù)強(qiáng)制轉(zhuǎn)型為datatype* #define luaM_newvector(L,count_to_allocate,datatype) \cast(datatype*, luaM_reallocv(L, NULL, 0, count_to_allocate, sizeof(datatype)))(lmem.h) luaM_reallocv宏
//分配count個(gè)singlebytes大小的連續(xù)內(nèi)存空間,若空間足夠則分配,否則報(bào)錯(cuò) #define luaM_reallocv(L,address_to_free,count_to_free,count_to_allocate,singlebytes) \((cast(size_t, (count)+1) <= MAX_SIZET/(singlebytes)) ? luaM_realloc_(L, (address_to_free), (count_to_free)*(singlebytes), (count_to_allocate)*(singlebytes)) : \luaM_toobig(L))(lmem.h) luaM_freearray宏
//釋放address_to_free處的count_to_free個(gè)datatype類型的連續(xù)內(nèi)存空間 #define luaM_freearray(L, address_to_free, count_to_free, datatype) luaM_reallocv(L, (address_to_free), count_to_free, 0, sizeof(datatype))全局字符串表的縮容
- 縮容的時(shí)機(jī):垃圾回收的GCSweep階段
- 縮容的原則:全局字符串表的字符串總數(shù)<桶的最大數(shù)量 且 桶的最大數(shù)量>MINSTRTABSIZE*2 (llimits.h中#define MINSTRTABSIZE 32)
(lgc.c) 看checkSize
static void checkSize(lua_State* L) {global_State* g = G(L);//當(dāng)全局字符串表的字符串總數(shù)小于桶最大數(shù)量的四分之一 且 桶的最大數(shù)量大于MINSTRTABSIZE*2, 則縮容if (g-<strt.nuse < cast(lu_int32, g->strt.size/4)&& g->strt.size > MINSTRTABSIZE*2){luaS_resize(L, g->strt.size/2);}//...無關(guān)內(nèi)容省略 }(lgc.c) 看singlestep
//一次單步GC,todo后面再說 static l_mem singlestep(lua_State* L) {global_State* g = G(L);switch(g->gcstate){//...無關(guān)內(nèi)容省略case GCSweep:{lu_mem old = g->totalbytes;g->sweepgc = sweeplist(L, g->sweepgc, GCSWEEPMAX);if (*g->sweepgc == NULL){//包含有全局字符串表縮容的操作checkSizes(L);g->gcstate = GCSfinalize;}lua_assert(old >= g->totalbytes);g->estimate -= old - g->totalbytes;return GCSWEEPMAX*GCSWEEPCOST;}//...無關(guān)內(nèi)容省略}//...無關(guān)內(nèi)容省略 }保留字是如何不被回收的
- 不被回收的原則:被luaS_fix操作,其tsv.marked被改成了FIXEDBIT
(llex.c) luaX_init
void luaX_init(lua_State* L) {for (int i=0; i<NUM_RESERVED; i++)//NUM_RESERVED見下文{//嘗試新建每個(gè)保留字字符串TString* ts = luaS_new(L, luaX_tokens[i]);//luaS_new luaX_tokens 見下文//標(biāo)記不會(huì)被GC,修改ts->tsv.marked為FIXEDBITluaS_fix(ts);//luaS_fix見下文lua_assert(strlen(luaX_tokens[i]) + 1 <= TOKEN_LEN);//TOKEN_LEN見下文//記錄在保留字?jǐn)?shù)組的索引+1值ts->tsv.reserved = cast_byte(i + 1);} }(llex.h) NUM_RESERVED宏
//表示多少個(gè)保留字 //TK_WHILE FIRST_RESERVED 見下文 #define NUM_RESERVED (cast(int, TK_WHILE-FIRST_RESERVED+1))(llex.h) RESERVED 枚舉
enum RESERVED {/* terminal symbols denoted by reserved words */TK_AND = FIRST_RESERVED, TK_BREAK,TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,/* other terminal symbols */TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_NUMBER,TK_NAME, TK_STRING, TK_EOS };(lstring.h) luaS_new宏
//嘗試新建一個(gè)字符串 #define luaS_new(L, s) (luaS_newlstr(L, s, strlen(s)))(llex.c) luaX_tokens
//終結(jié)符數(shù)組 const char *const luaX_tokens [] = {"and", "break", "do", "else", "elseif","end", "false", "for", "function", "if","in", "local", "nil", "not", "or", "repeat","return", "then", "true", "until", "while","..", "...", "==", ">=", "<=", "~=","<number>", "<name>", "<string>", "<eof>",NULL };(lstring.h) luaS_fix宏
//設(shè)置字符串不會(huì)被GC //l_setbit FIXEDBIT見下文 #define luaS_fix(s) l_setbit((s)->tsv.marked, FIXEDBIT)(lgc.h) l_setbit宏
//x與m求或 #define setbits(x,m) ((x) |= (m)) //求2的b-1次方,也就是第b位為1,其余為0 #define bitmask(b) (1<<(b)) //將b1和b2位設(shè)為1,其余為0 #define bit2mask(b1,b2) (bitmask(b1) | bitmask(b2)) //將x的第b位置為1 #define l_setbit(x,b) setbits(x, bitmask(b))(lgc.h) FIXEDBIT宏
//todo后面再說 #define WHITE0BIT 0 #define WHITE1BIT 1 #define BLACKBIT 2 #define FINALIZEDBIT 3 #define KEYWEAKBIT 3 #define VALUEWEAKBIT 4 #define FIXEDBIT 5 #define SFIXEDBIT 6 #define WHITEBITS bit2mask(WHITE0BIT, WHITE1BIT)(llex.h) TOKEN_LEN宏
//保留字選function為最長 #define TOKEN_LEN (sizeof("function")/sizeof(char))總結(jié)
以上是生活随笔為你收集整理的【lua学习】3.字符串的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: ddos源码下载(ddos安卓版源码)
- 下一篇: 【lua学习】4.表