C基础 带你手写 redis sds
发布日期:2021-05-09 04:33:26 浏览次数:15 分类:博客文章

本文共 14931 字,大约阅读时间需要 49 分钟。

前言 - Simple Dynamic Strings

   antirez 想统一 Redis,Disque,Hiredis 项目中 SDS 代码, 因此构建了这个项目

 . 更多介绍的背景知识, 可以阅读 README.md.  

  sds 项目是 C 字符串数据结构在实际环境中一种取舍实现, 库本身是非线程安全的. 下

来带大家手写相关代码, 透彻了解这个库的意图(antirez 注释的很棒). 

#define SDS_MAX_PREALLOC  (1024*1024)/* Note: sdshdr5 is never used, we just access the flags byte directly. * However is here to document the layout of type 5 SDS strings. */struct __attribute__ ((__packed__)) sdshdr5 {    unsigned char flags;    /* 3 lsb of type, and 5 msb of string length */    char buf[];};struct __attribute__ ((__packed__)) sdshdr8 {    uint8_t len;            /* used */    uint8_t alloc;          /* excluding the header and null terminator */    unsigned char flags;    /* 3 lsb of type, 5 unused bits */    char buf[];};struct __attribute__ ((__packed__)) sdshdr16 {    uint16_t len;           /* used */    uint16_t alloc;         /* excluding the header and null terminator */    unsigned char flags;    /* 3 lsb of type, 5 unused bits */    char buf[];};struct __attribute__ ((__packed__)) sdshdr32 {    uint32_t len;           /* used */    uint32_t alloc;         /* excluding the header and null terminator */    unsigned char flags;    /* 3 lsb of type, 5 unused bits */    char buf[];};struct __attribute__ ((__packed__)) sdshdr64 {    uint64_t len;           /* used */    uint64_t alloc;         /* excluding the header and null terminator */    unsigned char flags;    /* 3 lsb of type, 5 unused bits */    char buf[]; };#define SDS_TYPE_5    0#define SDS_TYPE_8    1#define SDS_TYPE_16   2#define SDS_TYPE_32   3#define SDS_TYPE_64   4#define SDS_TYPE_MASK 7#define SDS_TYPE_BITS 3#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)#define SDS_HDR(T, s)     ((struct sdshdr##T *)((s) - (sizeof(struct sdshdr##T))))#define SDS_HDR_VAR(T, s) struct sdshdr##T * sh = SDS_HDR(T, s)

可以先分析 sdshdr5 sdshdr8 sdshdr16 sdshdr32 sdshdr64, 感受作者的意图.  

首先这几个基本结构, 都有 len, alloc, flags, buf 4 个字段, 是不是. 有的同学会问, sdshdr5

中没有 alloc 和 len 字段呀,  这个啊 sdshdr5 结构比较特殊, 其 alloc 和 len 都隐含在 flags

中, 三者复用一个字段. 可以从函数宏 SDS_TYPE_5_LEN(f), 可以看出来.  因而 sdshdr5 表达的字符

串长度和字符串容量默认相同.  再考究一下 __attribute__ ((__packed__)) 意图(告诉编译器取消结

构在编译过程中的优化对齐, 按照实际占用字节数进行对齐). 对于取消结构内存编译对齐优化, 我的考究有

两点, 1 节约内存, 2 内存可移植变强.  

随后多数是流水线代码, 非常好理解. 例如有段这样关系的代码 sdsalloc() = sdsavail() + sdslen() 

inline size_t sdslen(const sds s) {    unsigned char flags = s[-1];    switch (flags & SDS_TYPE_MASK) {    case SDS_TYPE_5 :        return SDS_TYPE_5_LEN(flags);    case SDS_TYPE_8 :        return SDS_HDR(8 , s)->len;    case SDS_TYPE_16:        return SDS_HDR(16, s)->len;    case SDS_TYPE_32:        return SDS_HDR(32, s)->len;    case SDS_TYPE_64:        return SDS_HDR(64, s)->len;    }    return 0;}inline size_t sdsavail(const sds s) {    unsigned char flags = s[-1];    switch (flags & SDS_TYPE_MASK) {    case SDS_TYPE_8 : {        SDS_HDR_VAR(8 , s);        return sh->alloc - sh->len;           }    case SDS_TYPE_16: {        SDS_HDR_VAR(16, s);        return sh->alloc - sh->len;           }    case SDS_TYPE_32: {        SDS_HDR_VAR(32, s);        return sh->alloc - sh->len;           }    case SDS_TYPE_64: {        SDS_HDR_VAR(64, s);        return sh->alloc - sh->len;           }    default:        return 0;    }  }/* sdsalloc() = sdsavail() + sdslen() */inline size_t sdsalloc(const sds s) {    unsigned char flags = s[-1];    switch (flags & SDS_TYPE_MASK) {    case SDS_TYPE_5 :        return SDS_TYPE_5_LEN(flags);    case SDS_TYPE_8 :        return SDS_HDR(8 , s)->alloc;    case SDS_TYPE_16:        return SDS_HDR(16, s)->alloc;    case SDS_TYPE_32:        return SDS_HDR(32, s)->alloc;    case SDS_TYPE_64:        return SDS_HDR(64, s)->alloc;    }    return 0;}

是不是一下就理解了 sdsalloc(), sdsavail(), sdslen() 是干什么的呢 ❤

 

正文 - 代码抽样讲解

1. 重复代码可以修的更好

/* Helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * SDS_LLSTR_SIZE bytes. * * The function returns the length of the null-terminated string * representation stored at 's'. */#define SDS_LLSTR_SIZE 21int sdsll2str(char *s, long long value) {    char *p, aux;    unsigned long long v;    size_t l;    /* Generate the string representation, this method produces     * an reversed string. */    v = (value < 0) ? -value : value;    p = s;    do {        *p++ = '0'+(v%10);        v /= 10;    } while(v);    if (value < 0) *p++ = '-';    /* Compute length and add null term. */    l = p-s;    *p = '\0';    /* Reverse the string. */    p--;    while(s < p) {        aux = *s;        *s = *p;        *p = aux;        s++;        p--;    }    return l;}/* Identical sdsll2str(), but for unsigned long long type. */int sdsull2str(char *s, unsigned long long v) {    char *p, aux;    size_t l;    /* Generate the string representation, this method produces     * an reversed string. */    p = s;    do {        *p++ = '0'+(v%10);        v /= 10;    } while(v);    /* Compute length and add null term. */    l = p-s;    *p = '\0';    /* Reverse the string. */    p--;    while(s < p) {        aux = *s;        *s = *p;        *p = aux;        s++;        p--;    }    return l;}

long long or unsigned long long convert to char * 中, 功能很简单. 函数结尾处代码

重复不少, 是可以重构复用的. 

inline int sdsreverse(char * s, char * p) {    /* Compute length and add null term. */    size_t l = p - s;    *p = '\0';    p--;    while (s < p) {        char aux = *s;        *s = *p;        *p = aux;        s++;        p--;    }    return (int)l;}/* Helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * SDS_LLSTR_SIZE bytes. * * The function returns the length of the null-terminated string * representation stored at 's'. */#define SDS_LLSTR_SIZE 21int sdsll2str(char * s, long long value) {    char * p;    unsigned long long v;    /* Generate the string representation, this method produces     * an reversed string. */    v = (value < 0) ? -value : value;    p = s;    do {        *p++ = '0' + (v % 10);    } while ((v /= 10));    if (value < 0) *p++ = '-';    return sdsreverse(s, p);}/* Identical sdsll2str(), but for unsigned long long type. */int sdsull2str(char * s, unsigned long long v) {    /* Generate the string representation, this method produces     * an reversed string. */    char * p = s;    do {        *p++ = '0' + (v % 10);    } while ((v /= 10));    return sdsreverse(s, p);}

是不是显得老学究气质凸显了不少. 

2.  vsnprintf 用的太硬邦邦

/* Like sdscatprintf() but gets va_list instead of being variadic. */sds sdscatvprintf(sds s, const char *fmt, va_list ap) {    va_list cpy;    char staticbuf[1024], *buf = staticbuf, *t;    size_t buflen = strlen(fmt)*2;    /* We try to start using a static buffer for speed.     * If not possible we revert to heap allocation. */    if (buflen > sizeof(staticbuf)) {        buf = s_malloc(buflen);        if (buf == NULL) return NULL;    } else {        buflen = sizeof(staticbuf);    }    /* Try with buffers two times bigger every time we fail to     * fit the string in the current buffer size. */    while(1) {        buf[buflen-2] = '\0';        va_copy(cpy,ap);        vsnprintf(buf, buflen, fmt, cpy);        va_end(cpy);        if (buf[buflen-2] != '\0') {            if (buf != staticbuf) s_free(buf);            buflen *= 2;            buf = s_malloc(buflen);            if (buf == NULL) return NULL;            continue;        }        break;    }    /* Finally concat the obtained string to the SDS string and return it. */    t = sdscat(s, buf);    if (buf != staticbuf) s_free(buf);    return t;}

由 while vsnprintf 来扩容, 这实在太暴力. 更好操作可以观看 man vsnprintf/ 这里可以看我的提交

/* Like sdscatprintf() but gets va_list instead of being variadic. */sds sdscatvprintf(sds s, const char * fmt, va_list ap) {    int size;    va_list cpy;    char staticbuf[1024], * buf, * t;    /* Determine required size */    va_copy(cpy, ap);    size = vsnprintf(NULL, 0, fmt, cpy);    va_end(cpy);    if (size < 0) return NULL;    /* For '\0' */    size++;    /* We try to start using a static buffer for speed.     * If not possible we revert to heap allocation. */    if (size > sizeof(staticbuf)) {        buf = s_malloc(size);        if (buf == NULL) return NULL;            } else {        buf = staticbuf;    }    va_copy(cpy, ap);    size = vsnprintf(buf, size, fmt, cpy);    va_end(ap);    if (size < 0) {        if (buf != staticbuf) s_free(buf);        return NULL;    }    /* Finally concat the obtained string to the SDS string and return it. */    t = sdscat(s, buf);    if (buf != staticbuf) s_free(buf);    return t;}

别问, 问就是上天.

3. sdssplitargs 难以让人见色起意

/* Helper function for sdssplitargs() that returns non zero if 'c' * is a valid hex digit. */int is_hex_digit(char c) {    return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||           (c >= 'A' && c <= 'F');}/* Helper function for sdssplitargs() that converts a hex digit into an * integer from 0 to 15 */int hex_digit_to_int(char c) {    switch(c) {    case '0': return 0;    case '1': return 1;    case '2': return 2;    case '3': return 3;    case '4': return 4;    case '5': return 5;    case '6': return 6;    case '7': return 7;    case '8': return 8;    case '9': return 9;    case 'a': case 'A': return 10;    case 'b': case 'B': return 11;    case 'c': case 'C': return 12;    case 'd': case 'D': return 13;    case 'e': case 'E': return 14;    case 'f': case 'F': return 15;    default: return 0;    }}/* Split a line into arguments, where every argument can be in the * following programming-language REPL-alike form: * * foo bar "newline are supported\n" and "\xff\x00otherstuff" * * The number of arguments is stored into *argc, and an array * of sds is returned. * * The caller should free the resulting array of sds strings with * sdsfreesplitres(). * * Note that sdscatrepr() is able to convert back a string into * a quoted string in the same format sdssplitargs() is able to parse. * * The function returns the allocated tokens on success, even when the * input string is empty, or NULL if the input contains unbalanced * quotes or closed quotes followed by non space characters * as in: "foo"bar or "foo' */sds *sdssplitargs(const char *line, int *argc) {    const char *p = line;    char *current = NULL;    char **vector = NULL;    *argc = 0;    while(1) {        /* skip blanks */        while(*p && isspace(*p)) p++;        if (*p) {            /* get a token */            int inq=0;  /* set to 1 if we are in "quotes" */            int insq=0; /* set to 1 if we are in 'single quotes' */            int done=0;            if (current == NULL) current = sdsempty();            while(!done) {                if (inq) {                    if (*p == '\\' && *(p+1) == 'x' &&                                             is_hex_digit(*(p+2)) &&                                             is_hex_digit(*(p+3)))                    {                        unsigned char byte;                        byte = (hex_digit_to_int(*(p+2))*16)+                                hex_digit_to_int(*(p+3));                        current = sdscatlen(current,(char*)&byte,1);                        p += 3;                    } else if (*p == '\\' && *(p+1)) {                        char c;                        p++;                        switch(*p) {                        case 'n': c = '\n'; break;                        case 'r': c = '\r'; break;                        case 't': c = '\t'; break;                        case 'b': c = '\b'; break;                        case 'a': c = '\a'; break;                        default: c = *p; break;                        }                        current = sdscatlen(current,&c,1);                    } else if (*p == '"') {                        /* closing quote must be followed by a space or                         * nothing at all. */                        if (*(p+1) && !isspace(*(p+1))) goto err;                        done=1;                    } else if (!*p) {                        /* unterminated quotes */                        goto err;                    } else {                        current = sdscatlen(current,p,1);                    }                } else if (insq) {                    if (*p == '\\' && *(p+1) == '\'') {                        p++;                        current = sdscatlen(current,"'",1);                    } else if (*p == '\'') {                        /* closing quote must be followed by a space or                         * nothing at all. */                        if (*(p+1) && !isspace(*(p+1))) goto err;                        done=1;                    } else if (!*p) {                        /* unterminated quotes */                        goto err;                    } else {                        current = sdscatlen(current,p,1);                    }                } else {                    switch(*p) {                    case ' ':                    case '\n':                    case '\r':                    case '\t':                    case '\0':                        done=1;                        break;                    case '"':                        inq=1;                        break;                    case '\'':                        insq=1;                        break;                    default:                        current = sdscatlen(current,p,1);                        break;                    }                }                if (*p) p++;            }            /* add the token to the vector */            vector = s_realloc(vector,((*argc)+1)*sizeof(char*));            vector[*argc] = current;            (*argc)++;            current = NULL;        } else {            /* Even on empty input string return something not NULL. */            if (vector == NULL) vector = s_malloc(sizeof(void*));            return vector;        }    }err:    while((*argc)--)        sdsfree(vector[*argc]);    s_free(vector);    if (current) sdsfree(current);    *argc = 0;    return NULL;}/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */void sdsfreesplitres(sds *tokens, int count) {    if (!tokens) return;    while(count--)        sdsfree(tokens[count]);    s_free(tokens);}

sdssplitargs 函数开始不好理解, 不过我这写了个 demo 分享给大家

#include 
#include "sds.h"int main(int argc, char * argv[]) { int count; const char * line = " hset name \"name:filed\" \"value:field\" "; sds * tokens = sdssplitargs(line, &count); printf("line = [%s], count = [%d]\n", line, count); for (int i = 0; i < count; i++) { printf("tokens[%d] = [%s]\n", i, tokens[i]); } sdsfreesplitres(tokens, count); return 0;}

输出 

line = [ hset name "name:filed" "value:field" ], count = [4]tokens[0] = [hset]tokens[1] = [name]tokens[2] = [name:filed]tokens[3] = [value:field]

是不是瞬间开悟 -> sdssplitargs 到底要闹那样! 

整体写下来, 感受是 antirez sds 实现的很四平八稳, 没有什么花哨的地方.

感兴趣的朋友写写, 收获少不了 ~

 

后记 - 再接再厉

错误是难免, 欢迎交流吹水 ~

  - 

古朗月行(唐·李白)小时不识月,呼作白玉盘。又疑瑶台镜,飞在青云端。仙人垂两足,桂树作团团。白兔捣药成,问言与谁餐。蟾蜍蚀圆影,大明夜已残。羿昔落九乌,天人清且安。阴精此沦惑,去去不足观。忧来其如何,凄怆摧心肝。
上一篇:C基础 带你手写 redis ae 事件驱动模型
下一篇:C基础 带你手写 redis adlist 双向链表

发表评论

最新留言

初次前来,多多关照!
[***.217.46.12]2025年04月07日 19时04分49秒