
本文共 14931 字,大约阅读时间需要 49 分钟。
前言 - Simple Dynamic Strings
antirez 想统一 Redis,Disque,Hiredis 项目中 SDS 代码, 因此构建了这个项目
. 更多介绍的背景知识, 可以阅读 README.md.
sds 项目是 C 字符串数据结构在实际环境中一种取舍实现, 库本身是非线程安全的. 下面
来带大家手写相关代码, 透彻了解这个库的意图(antirez 注释的很棒).
#define SDS_MAX_PREALLOC (1024*1024)/* Note: sdshdr5 is never used, we just access the flags byte directly. * However is here to document the layout of type 5 SDS strings. */struct __attribute__ ((__packed__)) sdshdr5 { unsigned char flags; /* 3 lsb of type, and 5 msb of string length */ char buf[];};struct __attribute__ ((__packed__)) sdshdr8 { uint8_t len; /* used */ uint8_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[];};struct __attribute__ ((__packed__)) sdshdr16 { uint16_t len; /* used */ uint16_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[];};struct __attribute__ ((__packed__)) sdshdr32 { uint32_t len; /* used */ uint32_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[];};struct __attribute__ ((__packed__)) sdshdr64 { uint64_t len; /* used */ uint64_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; };#define SDS_TYPE_5 0#define SDS_TYPE_8 1#define SDS_TYPE_16 2#define SDS_TYPE_32 3#define SDS_TYPE_64 4#define SDS_TYPE_MASK 7#define SDS_TYPE_BITS 3#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)#define SDS_HDR(T, s) ((struct sdshdr##T *)((s) - (sizeof(struct sdshdr##T))))#define SDS_HDR_VAR(T, s) struct sdshdr##T * sh = SDS_HDR(T, s)
可以先分析 sdshdr5 sdshdr8 sdshdr16 sdshdr32 sdshdr64, 感受作者的意图.
首先这几个基本结构, 都有 len, alloc, flags, buf 4 个字段, 是不是. 有的同学会问, sdshdr5
中没有 alloc 和 len 字段呀, 这个啊 sdshdr5 结构比较特殊, 其 alloc 和 len 都隐含在 flags
中, 三者复用一个字段. 可以从函数宏 SDS_TYPE_5_LEN(f), 可以看出来. 因而 sdshdr5 表达的字符
串长度和字符串容量默认相同. 再考究一下 __attribute__ ((__packed__)) 意图(告诉编译器取消结
构在编译过程中的优化对齐, 按照实际占用字节数进行对齐). 对于取消结构内存编译对齐优化, 我的考究有
两点, 1 节约内存, 2 内存可移植变强.
随后多数是流水线代码, 非常好理解. 例如有段这样关系的代码 sdsalloc() = sdsavail() + sdslen()
inline size_t sdslen(const sds s) { unsigned char flags = s[-1]; switch (flags & SDS_TYPE_MASK) { case SDS_TYPE_5 : return SDS_TYPE_5_LEN(flags); case SDS_TYPE_8 : return SDS_HDR(8 , s)->len; case SDS_TYPE_16: return SDS_HDR(16, s)->len; case SDS_TYPE_32: return SDS_HDR(32, s)->len; case SDS_TYPE_64: return SDS_HDR(64, s)->len; } return 0;}inline size_t sdsavail(const sds s) { unsigned char flags = s[-1]; switch (flags & SDS_TYPE_MASK) { case SDS_TYPE_8 : { SDS_HDR_VAR(8 , s); return sh->alloc - sh->len; } case SDS_TYPE_16: { SDS_HDR_VAR(16, s); return sh->alloc - sh->len; } case SDS_TYPE_32: { SDS_HDR_VAR(32, s); return sh->alloc - sh->len; } case SDS_TYPE_64: { SDS_HDR_VAR(64, s); return sh->alloc - sh->len; } default: return 0; } }/* sdsalloc() = sdsavail() + sdslen() */inline size_t sdsalloc(const sds s) { unsigned char flags = s[-1]; switch (flags & SDS_TYPE_MASK) { case SDS_TYPE_5 : return SDS_TYPE_5_LEN(flags); case SDS_TYPE_8 : return SDS_HDR(8 , s)->alloc; case SDS_TYPE_16: return SDS_HDR(16, s)->alloc; case SDS_TYPE_32: return SDS_HDR(32, s)->alloc; case SDS_TYPE_64: return SDS_HDR(64, s)->alloc; } return 0;}
是不是一下就理解了 sdsalloc(), sdsavail(), sdslen() 是干什么的呢 ❤
正文 - 代码抽样讲解
1. 重复代码可以修的更好
/* Helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * SDS_LLSTR_SIZE bytes. * * The function returns the length of the null-terminated string * representation stored at 's'. */#define SDS_LLSTR_SIZE 21int sdsll2str(char *s, long long value) { char *p, aux; unsigned long long v; size_t l; /* Generate the string representation, this method produces * an reversed string. */ v = (value < 0) ? -value : value; p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); if (value < 0) *p++ = '-'; /* Compute length and add null term. */ l = p-s; *p = '\0'; /* Reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l;}/* Identical sdsll2str(), but for unsigned long long type. */int sdsull2str(char *s, unsigned long long v) { char *p, aux; size_t l; /* Generate the string representation, this method produces * an reversed string. */ p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); /* Compute length and add null term. */ l = p-s; *p = '\0'; /* Reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l;}
long long or unsigned long long convert to char * 中, 功能很简单. 函数结尾处代码
重复不少, 是可以重构复用的.
inline int sdsreverse(char * s, char * p) { /* Compute length and add null term. */ size_t l = p - s; *p = '\0'; p--; while (s < p) { char aux = *s; *s = *p; *p = aux; s++; p--; } return (int)l;}/* Helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * SDS_LLSTR_SIZE bytes. * * The function returns the length of the null-terminated string * representation stored at 's'. */#define SDS_LLSTR_SIZE 21int sdsll2str(char * s, long long value) { char * p; unsigned long long v; /* Generate the string representation, this method produces * an reversed string. */ v = (value < 0) ? -value : value; p = s; do { *p++ = '0' + (v % 10); } while ((v /= 10)); if (value < 0) *p++ = '-'; return sdsreverse(s, p);}/* Identical sdsll2str(), but for unsigned long long type. */int sdsull2str(char * s, unsigned long long v) { /* Generate the string representation, this method produces * an reversed string. */ char * p = s; do { *p++ = '0' + (v % 10); } while ((v /= 10)); return sdsreverse(s, p);}
是不是显得老学究气质凸显了不少.
2. vsnprintf 用的太硬邦邦
/* Like sdscatprintf() but gets va_list instead of being variadic. */sds sdscatvprintf(sds s, const char *fmt, va_list ap) { va_list cpy; char staticbuf[1024], *buf = staticbuf, *t; size_t buflen = strlen(fmt)*2; /* We try to start using a static buffer for speed. * If not possible we revert to heap allocation. */ if (buflen > sizeof(staticbuf)) { buf = s_malloc(buflen); if (buf == NULL) return NULL; } else { buflen = sizeof(staticbuf); } /* Try with buffers two times bigger every time we fail to * fit the string in the current buffer size. */ while(1) { buf[buflen-2] = '\0'; va_copy(cpy,ap); vsnprintf(buf, buflen, fmt, cpy); va_end(cpy); if (buf[buflen-2] != '\0') { if (buf != staticbuf) s_free(buf); buflen *= 2; buf = s_malloc(buflen); if (buf == NULL) return NULL; continue; } break; } /* Finally concat the obtained string to the SDS string and return it. */ t = sdscat(s, buf); if (buf != staticbuf) s_free(buf); return t;}
由 while vsnprintf 来扩容, 这实在太暴力. 更好操作可以观看 man vsnprintf/ 这里可以看我的提交
/* Like sdscatprintf() but gets va_list instead of being variadic. */sds sdscatvprintf(sds s, const char * fmt, va_list ap) { int size; va_list cpy; char staticbuf[1024], * buf, * t; /* Determine required size */ va_copy(cpy, ap); size = vsnprintf(NULL, 0, fmt, cpy); va_end(cpy); if (size < 0) return NULL; /* For '\0' */ size++; /* We try to start using a static buffer for speed. * If not possible we revert to heap allocation. */ if (size > sizeof(staticbuf)) { buf = s_malloc(size); if (buf == NULL) return NULL; } else { buf = staticbuf; } va_copy(cpy, ap); size = vsnprintf(buf, size, fmt, cpy); va_end(ap); if (size < 0) { if (buf != staticbuf) s_free(buf); return NULL; } /* Finally concat the obtained string to the SDS string and return it. */ t = sdscat(s, buf); if (buf != staticbuf) s_free(buf); return t;}
别问, 问就是上天.
3. sdssplitargs 难以让人见色起意
/* Helper function for sdssplitargs() that returns non zero if 'c' * is a valid hex digit. */int is_hex_digit(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');}/* Helper function for sdssplitargs() that converts a hex digit into an * integer from 0 to 15 */int hex_digit_to_int(char c) { switch(c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return 0; }}/* Split a line into arguments, where every argument can be in the * following programming-language REPL-alike form: * * foo bar "newline are supported\n" and "\xff\x00otherstuff" * * The number of arguments is stored into *argc, and an array * of sds is returned. * * The caller should free the resulting array of sds strings with * sdsfreesplitres(). * * Note that sdscatrepr() is able to convert back a string into * a quoted string in the same format sdssplitargs() is able to parse. * * The function returns the allocated tokens on success, even when the * input string is empty, or NULL if the input contains unbalanced * quotes or closed quotes followed by non space characters * as in: "foo"bar or "foo' */sds *sdssplitargs(const char *line, int *argc) { const char *p = line; char *current = NULL; char **vector = NULL; *argc = 0; while(1) { /* skip blanks */ while(*p && isspace(*p)) p++; if (*p) { /* get a token */ int inq=0; /* set to 1 if we are in "quotes" */ int insq=0; /* set to 1 if we are in 'single quotes' */ int done=0; if (current == NULL) current = sdsempty(); while(!done) { if (inq) { if (*p == '\\' && *(p+1) == 'x' && is_hex_digit(*(p+2)) && is_hex_digit(*(p+3))) { unsigned char byte; byte = (hex_digit_to_int(*(p+2))*16)+ hex_digit_to_int(*(p+3)); current = sdscatlen(current,(char*)&byte,1); p += 3; } else if (*p == '\\' && *(p+1)) { char c; p++; switch(*p) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'a': c = '\a'; break; default: c = *p; break; } current = sdscatlen(current,&c,1); } else if (*p == '"') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else if (insq) { if (*p == '\\' && *(p+1) == '\'') { p++; current = sdscatlen(current,"'",1); } else if (*p == '\'') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else { switch(*p) { case ' ': case '\n': case '\r': case '\t': case '\0': done=1; break; case '"': inq=1; break; case '\'': insq=1; break; default: current = sdscatlen(current,p,1); break; } } if (*p) p++; } /* add the token to the vector */ vector = s_realloc(vector,((*argc)+1)*sizeof(char*)); vector[*argc] = current; (*argc)++; current = NULL; } else { /* Even on empty input string return something not NULL. */ if (vector == NULL) vector = s_malloc(sizeof(void*)); return vector; } }err: while((*argc)--) sdsfree(vector[*argc]); s_free(vector); if (current) sdsfree(current); *argc = 0; return NULL;}/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */void sdsfreesplitres(sds *tokens, int count) { if (!tokens) return; while(count--) sdsfree(tokens[count]); s_free(tokens);}
sdssplitargs 函数开始不好理解, 不过我这写了个 demo 分享给大家
#include#include "sds.h"int main(int argc, char * argv[]) { int count; const char * line = " hset name \"name:filed\" \"value:field\" "; sds * tokens = sdssplitargs(line, &count); printf("line = [%s], count = [%d]\n", line, count); for (int i = 0; i < count; i++) { printf("tokens[%d] = [%s]\n", i, tokens[i]); } sdsfreesplitres(tokens, count); return 0;}
输出
line = [ hset name "name:filed" "value:field" ], count = [4]tokens[0] = [hset]tokens[1] = [name]tokens[2] = [name:filed]tokens[3] = [value:field]
是不是瞬间开悟 -> sdssplitargs 到底要闹那样!
整体写下来, 感受是 antirez sds 实现的很四平八稳, 没有什么花哨的地方.
感兴趣的朋友写写, 收获少不了 ~
后记 - 再接再厉
错误是难免, 欢迎交流吹水 ~
-
古朗月行(唐·李白)小时不识月,呼作白玉盘。又疑瑶台镜,飞在青云端。仙人垂两足,桂树作团团。白兔捣药成,问言与谁餐。蟾蜍蚀圆影,大明夜已残。羿昔落九乌,天人清且安。阴精此沦惑,去去不足观。忧来其如何,凄怆摧心肝。
发表评论
最新留言
关于作者
