本文共 19470 字,大约阅读时间需要 64 分钟。
Python3源码分析
本文环境python3.5.2。参考书籍<>python官网
Python3的List对象
list对象是一个变长对象,在运行时动态调整其所维护的内存和元素,并且支持插入删除等操作,list的定义如下;
#define PyObject_VAR_HEAD PyVarObject ob_base;#define Py_INVALID_SIZE (Py_ssize_t)-1/* Nothing is actually declared to be a PyObject, but every pointer to * a Python object can be cast to a PyObject*. This is inheritance built * by hand. Similarly every pointer to a variable-size Python object can, * in addition, be cast to PyVarObject*. */typedef struct _object { _PyObject_HEAD_EXTRA Py_ssize_t ob_refcnt; // 引用计数值 struct _typeobject *ob_type; // 基本的类型 type} PyObject;typedef struct { PyObject ob_base; // 对象基本信息 Py_ssize_t ob_size; /* Number of items in variable part */ // 变长对象的元素个数} PyVarObject;...typedef struct { PyObject_VAR_HEAD // 变量头部信息 /* Vector of pointers to list elements. list[0] is ob_item[0], etc. */ PyObject **ob_item; // 元素指向具体指 /* ob_item contains space for 'allocated' elements. The number * currently in use is ob_size. * Invariants: * 0 <= ob_size <= allocated * len(list) == ob_size * ob_item == NULL implies ob_size == allocated == 0 * list.sort() temporarily sets allocated to -1 to detect mutations. * * Items must normally not be NULL, except during construction when * the list is not yet visible outside the function that builds it. */ Py_ssize_t allocated; // 当前空间} PyListObject;
其中,ob_size就是list实际拥有的元素数量,allocated指的是实际申请的内存空间,比如列表A实际申请了5元素的空间,但是此时A只包含了2个元素,则ob_size为2,allocated为5。
List的创建和初始化
在新建List的时候会调用BUILD_LIST指令新建List,BUILD_LIST对应的字节码指令是,
TARGET(BUILD_LIST) { PyObject *list = PyList_New(oparg); // 生成新的列表调用,传入初始化时传入的参数值 if (list == NULL) goto error; while (--oparg >= 0) { PyObject *item = POP(); PyList_SET_ITEM(list, oparg, item); // 将初始化的列表值设置到列表中 } PUSH(list); // 压入list DISPATCH(); // 执行下一条指令 }
此时查看PyList_New函数,
PyObject *PyList_New(Py_ssize_t size){ PyListObject *op; size_t nbytes;#ifdef SHOW_ALLOC_COUNT static int initialized = 0; if (!initialized) { Py_AtExit(show_alloc); initialized = 1; }#endif if (size < 0) { // 如果传入的大小小于0则报错 PyErr_BadInternalCall(); return NULL; } /* Check for overflow without an actual overflow, * which can cause compiler to optimise out */ if ((size_t)size > PY_SIZE_MAX / sizeof(PyObject *)) // 计算申请的大小是否超过数组最大的索引值,PY_SIZE_MAX如果是64位则是最大的有符号int64值 return PyErr_NoMemory(); nbytes = size * sizeof(PyObject *); // 需要申请的内存字节大小 if (numfree) { // 缓冲池可用 numfree--; op = free_list[numfree]; // 获取缓冲的对象 _Py_NewReference((PyObject *)op);#ifdef SHOW_ALLOC_COUNT count_reuse++;#endif } else { op = PyObject_GC_New(PyListObject, &PyList_Type); // 生成新的对象 if (op == NULL) return NULL;#ifdef SHOW_ALLOC_COUNT count_alloc++;#endif } if (size <= 0) // 传入小于等于0 op->ob_item = NULL; // 则list实例的ob_item为NULL else { op->ob_item = (PyObject **) PyMem_MALLOC(nbytes); // 申请nbytes字节大小的内存,并将申请内存的首地址设置到ob_item if (op->ob_item == NULL) { // 如果为空则报错 Py_DECREF(op); return PyErr_NoMemory(); } memset(op->ob_item, 0, nbytes); // 将对应nbytes字节的数据设置为0 } Py_SIZE(op) = size; // 设置ob->size 为size op->allocated = size; // 设置allocated值 _PyObject_GC_TRACK(op); // 加入GC列表 return (PyObject *) op;}
在生成新的list的时候会先检查缓冲池是否拥有,默认会维护80个PyListObject对象;
#ifndef PyList_MAXFREELIST#define PyList_MAXFREELIST 80#endifstatic PyListObject *free_list[PyList_MAXFREELIST];static int numfree = 0;
在调用list_dealloc函数释放列表对象时,会检查缓冲池是否已经满了,如果没有满则添加到缓冲池中,
static voidlist_dealloc(PyListObject *op){ Py_ssize_t i; PyObject_GC_UnTrack(op); Py_TRASHCAN_SAFE_BEGIN(op) if (op->ob_item != NULL) { /* Do it backwards, for Christian Tismer. There's a simple test case where somehow this reduces thrashing when a *very* large list is created and immediately deleted. */ i = Py_SIZE(op); while (--i >= 0) { Py_XDECREF(op->ob_item[i]); // 减少列表对象的引用计数 } PyMem_FREE(op->ob_item); // 释放ob_item申请的内存空间 } if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op)) // 如果缓冲池未满 free_list[numfree++] = op; // 添加到缓冲池列表中 else Py_TYPE(op)->tp_free((PyObject *)op); // 大于缓冲池大小则直接释放内存 Py_TRASHCAN_SAFE_END(op)}
当缓冲池没有可用list的时候,则新生成一个list,调用PyObject_GC_New(PyListObject, &PyList_Type)生成一个;
#define PyObject_GC_New(type, typeobj) \ ( (type *) _PyObject_GC_New(typeobj) ) // 调用_PyObject_GC_New生成...PyObject *_PyObject_GC_New(PyTypeObject *tp){ PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp)); // 申请内存空间 if (op != NULL) op = PyObject_INIT(op, tp); // 初始化op return op; // 返回}...#define PyObject_INIT(op, typeobj) \ ( Py_TYPE(op) = (typeobj), _Py_NewReference((PyObject *)(op)), (op) ) // 设置ob_type,增加引用计数...#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) // 设置ob_type类型
此时新生成的list对象就基本初始化完成。
当调用给list设值时,会尝试如下字节码;
14 0 LOAD_CONST 0 (1) 3 LOAD_CONST 1 (2) 6 BUILD_LIST 2 9 STORE_NAME 0 (a) 15 12 LOAD_CONST 2 (10) 15 LOAD_NAME 0 (a) 18 LOAD_CONST 0 (1) 21 STORE_SUBSCR
其中,a初始化为包含1,2两个元素的列表,然后设置a[1]=10,此时调用STORE_SUBSCR来设置,
TARGET(STORE_SUBSCR) { PyObject *sub = TOP(); // 获取需要设置的key PyObject *container = SECOND(); // 获取list实例 PyObject *v = THIRD(); // 获取待设置的值 int err; STACKADJ(-3); // 栈移动三个值 /* v[w] = u */ err = PyObject_SetItem(container, sub, v); // 设置值 Py_DECREF(v); Py_DECREF(container); Py_DECREF(sub); if (err != 0) goto error; DISPATCH(); }
此时调用了PyObject_SetItem函数来将值设置到列表中,
intPyObject_SetItem(PyObject *o, PyObject *key, PyObject *value){ PyMappingMethods *m; if (o == NULL || key == NULL || value == NULL) { // 检查传入参数 null_error(); return -1; } m = o->ob_type->tp_as_mapping; // 获取类型的tp_as_mapping方法 if (m && m->mp_ass_subscript) return m->mp_ass_subscript(o, key, value); // 调用字典的设置方法 if (o->ob_type->tp_as_sequence) { // 调用tp_as_sequence方法 if (PyIndex_Check(key)) { // 检查key Py_ssize_t key_value; key_value = PyNumber_AsSsize_t(key, PyExc_IndexError); if (key_value == -1 && PyErr_Occurred()) return -1; return PySequence_SetItem(o, key_value, value); // 调用设置值 } else if (o->ob_type->tp_as_sequence->sq_ass_item) { type_error("sequence index must be " "integer, not '%.200s'", key); return -1; } } type_error("'%.200s' object does not support item assignment", o); return -1;}
由于此时,传入的list对象,此时的PyList_Type对应的tp_as_mapping为list_as_mapping;
static PyMappingMethods list_as_mapping = { (lenfunc)list_length, (binaryfunc)list_subscript, (objobjargproc)list_ass_subscript};
此时就是调用了list_ass_subscript方法来处理设值,
static intlist_ass_subscript(PyListObject* self, PyObject* item, PyObject* value){ if (PyIndex_Check(item)) { // item索引值检查 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); // 检查值是否合法,不合法返回PyExc_IndexError if (i == -1 && PyErr_Occurred()) // 如果检查不合法则返回错误 return -1; if (i < 0) // 如果小于0, 则转换成正值索引 i += PyList_GET_SIZE(self); return list_ass_item(self, i, value); // 设值值 } ...}
此时先检查索引值是否合法,然后将负值索引转换成正值索引,然后调用list_ass_item处理;
static intlist_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v){ PyObject *old_value; if (i < 0 || i >= Py_SIZE(a)) { // 检查索引值是否超出范围 PyErr_SetString(PyExc_IndexError, "list assignment index out of range"); return -1; } if (v == NULL) return list_ass_slice(a, i, i+1, v); // 如果v为NULL则删除对应i索引处的值 Py_INCREF(v); old_value = a->ob_item[i]; // 获取旧值 a->ob_item[i] = v; // 设值新值 Py_DECREF(old_value); return 0;}
至此一个List的设置过程就完成了。
List的添加和删除元素
a本来是[1,2]的列表,调用a.insert(5,105)时,对应的字节码如下;
18 45 LOAD_NAME 0 (a) 48 LOAD_ATTR 3 (insert) 51 LOAD_CONST 3 (5) 54 LOAD_CONST 4 (105) 57 CALL_FUNCTION 2 (2 positional, 0 keyword pair)
此时就会调用listinsert函数,
static PyObject *listinsert(PyListObject *self, PyObject *args){ Py_ssize_t i; PyObject *v; if (!PyArg_ParseTuple(args, "nO:insert", &i, &v)) // 解析参数和列表 return NULL; if (ins1(self, i, v) == 0) // 插入数据 Py_RETURN_NONE; return NULL;}
此时解析了相关参数为tuple后调用ins1插入数据,
static intins1(PyListObject *self, Py_ssize_t where, PyObject *v){ Py_ssize_t i, n = Py_SIZE(self); // 获取当前的列表大小 PyObject **items; if (v == NULL) { // 检查是否为空 PyErr_BadInternalCall(); return -1; } if (n == PY_SSIZE_T_MAX) { // 检查是否已经是列表允许的最大值了 PyErr_SetString(PyExc_OverflowError, "cannot add more objects to list"); return -1; } if (list_resize(self, n+1) == -1) // 重新扩招self大小 return -1; if (where < 0) { // 如果索引为负值,则转换成正值 where += n; // 计算出正值索引 if (where < 0) where = 0; // 如果转换后还是负值则置0 } if (where > n) where = n; // 如果where大于最大值则置为最大值n items = self->ob_item; for (i = n; --i >= where; ) // 将大于where索引值的元素移动后一位 items[i+1] = items[i]; Py_INCREF(v); items[where] = v; // 设置where为传入的v return 0;}
首先会调整列表的大小,调用list_resize函数,
static intlist_resize(PyListObject *self, Py_ssize_t newsize){ PyObject **items; size_t new_allocated; Py_ssize_t allocated = self->allocated; // 获取当前的申请内存的大小 /* Bypass realloc() when a previous overallocation is large enough to accommodate the newsize. If the newsize falls lower than half the allocated size, then proceed with the realloc() to shrink the list. */ if (allocated >= newsize && newsize >= (allocated >> 1)) { // 如果已经申请的大小大于newsize并且newsize>=已有空间的一半 assert(self->ob_item != NULL || newsize == 0); Py_SIZE(self) = newsize; // 设置ob_size为newsize return 0; } /* This over-allocates proportional to the list size, making room * for additional growth. The over-allocation is mild, but is * enough to give linear-time amortized behavior over a long * sequence of appends() in the presence of a poorly-performing * system realloc(). * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... */ new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6); // 否则就调整申请空间大小 /* check for integer overflow */ if (new_allocated > PY_SIZE_MAX - newsize) { // 如果新申请的大小最大的大小 PyErr_NoMemory(); // 则报错 return -1; } else { new_allocated += newsize; // 新增newsize } if (newsize == 0) new_allocated = 0; items = self->ob_item; if (new_allocated <= (PY_SIZE_MAX / sizeof(PyObject *))) // 检查新申请的大小是否在允许范围内 PyMem_RESIZE(items, PyObject *, new_allocated); // 重新调整ob_item的申请空间 else items = NULL; if (items == NULL) { PyErr_NoMemory(); return -1; } self->ob_item = items; // 重置item Py_SIZE(self) = newsize; // 重置size 为newsize self->allocated = new_allocated; // 重置申请的空间大小 return 0;}
在调整过列表大小后,列表会判断如果传入的负值索引比列表元素还大则设置在0位,如果转换的正值索引比列表长度还大则设置为最后一位,并将大于索引值的元素,往后移动一位。此时就是list的插入过程。
当列表a=[1,2,10,105],调用a.remove(10)时,对应的字节码如下;
20 71 LOAD_NAME 0 (a) 74 LOAD_ATTR 4 (remove) 77 LOAD_CONST 2 (10) 80 CALL_FUNCTION 1 (1 positional, 0 keyword pair) 83 POP_TOP
此时调用了list的remove属性,对应调用了listremove方法,
static PyObject *listremove(PyListObject *self, PyObject *v){ Py_ssize_t i; for (i = 0; i < Py_SIZE(self); i++) { // 循环遍历lise int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); // 比较列表中的值与传入值v进行比较 if (cmp > 0) { // 如果找到 if (list_ass_slice(self, i, i+1, (PyObject *)NULL) == 0) // 设置该位为NULL Py_RETURN_NONE; return NULL; } else if (cmp < 0) return NULL; } PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); // 如果没有找到则返回该错误 return NULL;}
其中调用了PyObject_RichCompareBool函数,进行比较;
intPyObject_RichCompareBool(PyObject *v, PyObject *w, int op){ PyObject *res; int ok; /* Quick result when objects are the same. Guarantees that identity implies equality. */ if (v == w) { // 快速检查,如果两个相同则返回 if (op == Py_EQ) return 1; else if (op == Py_NE) return 0; } res = PyObject_RichCompare(v, w, op); // 比较v,w if (res == NULL) return -1; if (PyBool_Check(res)) ok = (res == Py_True); else ok = PyObject_IsTrue(res); Py_DECREF(res); return ok; // 返回结果}
继续调用了PyObject_RichCompare函数进行比较,
PyObject *PyObject_RichCompare(PyObject *v, PyObject *w, int op){ ... res = do_richcompare(v, w, op); // 进行比较 ...}
查看do_richcompare函数比较可知,
/* Perform a rich comparison, raising TypeError when the requested comparison operator is not supported. */static PyObject *do_richcompare(PyObject *v, PyObject *w, int op){ richcmpfunc f; PyObject *res; int checked_reverse_op = 0; if (v->ob_type != w->ob_type && PyType_IsSubtype(w->ob_type, v->ob_type) && (f = w->ob_type->tp_richcompare) != NULL) { // 检查v,w的两个类型是否相同,检查是否w,v是否是子类型,检查w对应的tp_richcompare是否为空 checked_reverse_op = 1; res = (*f)(w, v, _Py_SwappedOp[op]); // 直接调用w的tp_richcompare方法进行比较 if (res != Py_NotImplemented) return res; // 返回结果 Py_DECREF(res); } if ((f = v->ob_type->tp_richcompare) != NULL) { // 检查v是否有tp_richcompare方法 res = (*f)(v, w, op); // 调用v的tp_richcompare进行比较 if (res != Py_NotImplemented) return res; // 返回结果 Py_DECREF(res); } if (!checked_reverse_op && (f = w->ob_type->tp_richcompare) != NULL) { // 如果checked_reverse_op为0,w的tp_richcompare不为空 res = (*f)(w, v, _Py_SwappedOp[op]); // 调用w的tp_richcompare方法进行比较 if (res != Py_NotImplemented) return res; // 返回结果 Py_DECREF(res); } /* If neither object implements it, provide a sensible default for == and !=, but raise an exception for ordering. */ switch (op) { // 如果都没有实现该方法则使用简易的默认方法 case Py_EQ: res = (v == w) ? Py_True : Py_False; // 判断是否相等 break; case Py_NE: res = (v != w) ? Py_True : Py_False; // 判断是否相等 break; default: /* XXX Special-case None so it doesn't show as NoneType() */ PyErr_Format(PyExc_TypeError, "unorderable types: %.100s() %s %.100s()", v->ob_type->tp_name, opstrings[op], w->ob_type->tp_name); return NULL; } Py_INCREF(res); return res; // 返回结果}
由于此时列表a中的元素是long型的元素,则调用了long_richcompare方法进行两个值之间的比较,
static PyObject *long_richcompare(PyObject *self, PyObject *other, int op){ int result; PyObject *v; CHECK_BINOP(self, other); if (self == other) // 如果两者相等 result = 0; // 直接返回 else result = long_compare((PyLongObject*)self, (PyLongObject*)other); // 比较两个long型 /* Convert the return value to a Boolean */ switch (op) { // 根据op 返回不同的结果 case Py_EQ: v = TEST_COND(result == 0); break; case Py_NE: v = TEST_COND(result != 0); break; case Py_LE: v = TEST_COND(result <= 0); break; case Py_GE: v = TEST_COND(result >= 0); break; case Py_LT: v = TEST_COND(result == -1); break; case Py_GT: v = TEST_COND(result == 1); break; default: PyErr_BadArgument(); return NULL; } Py_INCREF(v); return v;}
其中又调用了long_compare函数进行比较,
static intlong_compare(PyLongObject *a, PyLongObject *b){ Py_ssize_t sign; if (Py_SIZE(a) != Py_SIZE(b)) { // 比较大小是否相同 sign = Py_SIZE(a) - Py_SIZE(b); // 获取两个值的差值 } else { Py_ssize_t i = Py_ABS(Py_SIZE(a)); // 获取a值大小 while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]) ; if (i < 0) sign = 0; else { sign = (sdigit)a->ob_digit[i] - (sdigit)b->ob_digit[i]; if (Py_SIZE(a) < 0) sign = -sign; } } return sign < 0 ? -1 : sign > 0 ? 1 : 0; // 根据sign的处理结果返回}
至此就比较出来了两个值的大小,如果相同则会继续执行listremove
中的list_ass_slice函数,将对应索引位置的值置空,该函数具体的执行流程大家可自行阅读,至此一个列表中的元素的删除过程已经分析完成。总结
列表的初始化和插入数据和删除数据基本上的流程分析完成,其中还有很多其他的细节在流程中没有涉及到,列表在元素增长或者减少的过程中,都会涉及到都申请得到内存进行相关操作,此时的大量细节还需要大家自行查看。
转载地址:https://blog.csdn.net/qq_33339479/article/details/81807096 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!