Skip to content

Latest commit

 

History

History
1240 lines (1003 loc) · 42.6 KB

File metadata and controls

1240 lines (1003 loc) · 42.6 KB

元类

PyType_Type 的定义:

// Objects/typeobject.c

PyTypeObject PyType_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "type",                                     /* tp_name */
    sizeof(PyHeapTypeObject),                   /* tp_basicsize */
    sizeof(PyMemberDef),                        /* tp_itemsize */
    (destructor)type_dealloc,                   /* tp_dealloc */
    offsetof(PyTypeObject, tp_vectorcall),      /* tp_vectorcall_offset */
    0,                                          /* tp_getattr */
    0,                                          /* tp_setattr */
    0,                                          /* tp_as_async */
    (reprfunc)type_repr,                        /* tp_repr */
    0,                                          /* tp_as_number */
    0,                                          /* tp_as_sequence */
    0,                                          /* tp_as_mapping */
    0,                                          /* tp_hash */
    (ternaryfunc)type_call,                     /* tp_call */
    0,                                          /* tp_str */
    (getattrofunc)type_getattro,                /* tp_getattro */
    (setattrofunc)type_setattro,                /* tp_setattro */
    0,                                          /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
    Py_TPFLAGS_BASETYPE | Py_TPFLAGS_TYPE_SUBCLASS |
    Py_TPFLAGS_HAVE_VECTORCALL,                 /* tp_flags */
    type_doc,                                   /* tp_doc */
    (traverseproc)type_traverse,                /* tp_traverse */
    (inquiry)type_clear,                        /* tp_clear */
    0,                                          /* tp_richcompare */
    offsetof(PyTypeObject, tp_weaklist),        /* tp_weaklistoffset */
    0,                                          /* tp_iter */
    0,                                          /* tp_iternext */
    type_methods,                               /* tp_methods */
    type_members,                               /* tp_members */
    type_getsets,                               /* tp_getset */
    0,                                          /* tp_base */
    0,                                          /* tp_dict */
    0,                                          /* tp_descr_get */
    0,                                          /* tp_descr_set */
    offsetof(PyTypeObject, tp_dict),            /* tp_dictoffset */
    type_init,                                  /* tp_init */
    0,                                          /* tp_alloc */
    type_new,                                   /* tp_new */
    PyObject_GC_Del,                            /* tp_free */
    (inquiry)type_is_gc,                        /* tp_is_gc */
};

PyType_Type 的 tp_type 是它自己.

注意, 虽然 PyType_Type 设置了 tp_vectorcall_offset, 但是其实 PyType_Type 的 tp_vectorcall 等于 NULL.

PyType_Type 的 tp_basicsize 和 tp_itemsize

tp_basicsize 等于 sizeof(PyHeapTypeObject), 而 tp_itemsize 等于 sizeof(PyMemberDef).

对于普通的类来说, tp_basicsize 表示该类的实例的大小, tp_itemsize 表示该类的实例变长元素列表中元素的大小. 例如 PyTuple_Type 的 tp_itemsize 等于 sizeof(PyObject *).

我们知道 PyType_Type 是元类, 普通类相当于是 PyType_Type 的实例, 因此, tp_basicsize 表示要创建的类的大小, tp_itemsize 表示用户定义字段(类成员)的大小, 这里就是 PyMemberDef 的大小. 当使用 PyType_Type 创建一个类时, 通过 tp_basicsize 和 tp_itemsize 便可以分配正确大小的内存.

PyType_Type 创建类的流程

调用流程:

PyType_Type.tp_call -> PyType_Type.tp_new -> PyType_Ready
             |       
             +-------> PyType_Type.tp_init

type_call

// Objects/typeobject.c

/* args: [name, bases, attrs]
   kwds: {}, 关键字参数, 来自于类的定义, 例如: `class C(a=1, b=2)`, kwds 会传递给父类的 __init_subclass__ 方法.
*/
static PyObject *
type_call(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    PyObject *obj;
    PyThreadState *tstate = _PyThreadState_GET();

#ifdef Py_DEBUG
    /* type_call() must not be called with an exception set,
       because it can clear it (directly or indirectly) and so the
       caller loses its exception */
    assert(!_PyErr_Occurred(tstate));
#endif

    /* Special case: type(x) should return Py_TYPE(x) */
    /* We only want type itself to accept the one-argument form (#27157) */
    if (type == &PyType_Type) {
        assert(args != NULL && PyTuple_Check(args));
        assert(kwds == NULL || PyDict_Check(kwds));
        Py_ssize_t nargs = PyTuple_GET_SIZE(args);

        if (nargs == 1 && (kwds == NULL || !PyDict_GET_SIZE(kwds))) {
            obj = (PyObject *) Py_TYPE(PyTuple_GET_ITEM(args, 0));
            Py_INCREF(obj);
            return obj;
        }

        /* SF bug 475327 -- if that didn't trigger, we need 3
           arguments. But PyArg_ParseTuple in type_new may give
           a msg saying type() needs exactly 3. */
        if (nargs != 3) {
            PyErr_SetString(PyExc_TypeError,
                            "type() takes 1 or 3 arguments");
            return NULL;
        }
    }

    if (type->tp_new == NULL) {
        _PyErr_Format(tstate, PyExc_TypeError,
                      "cannot create '%.100s' instances",
                      type->tp_name);
        return NULL;
    }

    obj = type->tp_new(type, args, kwds);
    obj = _Py_CheckFunctionResult(tstate, (PyObject*)type, obj, NULL);
    if (obj == NULL)
        return NULL;

    /* If the returned object is not an instance of type,
       it won't be initialized. */
    if (!PyType_IsSubtype(Py_TYPE(obj), type))
        return obj;

    type = Py_TYPE(obj);
    if (type->tp_init != NULL) {
        int res = type->tp_init(obj, args, kwds);
        if (res < 0) {
            assert(_PyErr_Occurred(tstate));
            Py_DECREF(obj);
            obj = NULL;
        }
        else {
            assert(!_PyErr_Occurred(tstate));
        }
    }
    return obj;
}

type_new

type_new 的代码非常长, 这里就不复制粘贴了. type_new 的关键要点:

  • 检查基类列表.

    如果用户在定义类时没有指定基类, 那么使用 PyBaseObject_Type. 如果用户指定了基类, 那么通过 best_base 计算出最合适的作为类的父类(tp_base).

    best_base 的基本原理: 如果基类列表都是 Python 层面定义的类, 那么 best_base 返回 object. 如果基类列表中包含 CPython 层面的内部类, 那么情况比较复杂, 大体原则是返回最年轻的类.

    一个不常见的异常:

    class C(collections.OrderedDict, collections.defaultdict): 
        pass

    异常信息为: TypeError: multiple bases have instance lay-out conflict.

    OrderedDict 和 defaultdict 都是用 C 实现的, 都继承自 dict. best_baes 发现它们两者互不兼容, 所以报错.

    详细信息可以阅读源码.

  • 处理 __slots__.

    • 检查是否允许定义 __slots__. 如果 base->itemsize != 0, 那么就不允许定义 __slots__. 例如:

      class C(tuple):
          __slots__ = ['a', 'b']     

      tuple 不允许子类定义 __slots__.

    • 检查 __slots__ 中的字符串是否符合标识符命名规则.

    • 检查 __slots__ 中的字符串是否出现在类成员中, 如果是的话报错.

  • 调用元类的 tp_alloc 为类分配内存.

    type = (PyTypeObject *)metatype->tp_alloc(metatype, nslots);
  • 设置 ht_slots = slots.

  • 设置 tp_flags.

    type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE |
          Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC;
  • 设置 tp_as_xxx(魔术方法)

    type->tp_as_async = &et->as_async;
    type->tp_as_number = &et->as_number;
    type->tp_as_sequence = &et->as_sequence;
    type->tp_as_mapping = &et->as_mapping;
    type->tp_as_buffer = &et->as_buffer;
  • 设置基类

    type->tp_bases = bases;
    bases = NULL;
    Py_INCREF(base);
    type->tp_base = base;
  • 设置 tp_dict

    Py_INCREF(dict);
    type->tp_dict = dict;
  • __new__ 转换为 staticmethod. 这个我之前倒是不知道. 例如:

    class C:
        def __new__(cls):
            pass
    
    print(C.__dict__['__new__']) # <staticmethod object at 0x7ffbb5d9efa0>

    __new__ 用于创建实例, 详情参考 类型系统.

  • __init_subclass____class_getitem__ 转换为 classmethod.

  • 初始化 PyMemberDef 列表:

    /* Add descriptors for custom slots from __slots__, or for __dict__ */
    mp = PyHeapType_GET_MEMBERS(et);
    slotoffset = base->tp_basicsize;
    if (et->ht_slots != NULL) {
        for (i = 0; i < nslots; i++, mp++) {
            mp->name = PyUnicode_AsUTF8(
                PyTuple_GET_ITEM(et->ht_slots, i));
            if (mp->name == NULL)
                goto error;
            mp->type = T_OBJECT_EX;
            mp->offset = slotoffset;
    
            /* __dict__ and __weakref__ are already filtered out */
            assert(strcmp(mp->name, "__dict__") != 0);
            assert(strcmp(mp->name, "__weakref__") != 0);
    
            slotoffset += sizeof(PyObject *);
        }
    }

    slotoffset 指向实例(PyObject)内存的偏移量

    简单示意图:

    +------------------+-------------+-------------+-------------+
    | PyHeapTypeObject | PyMemberDef | PyMemberDef | PyMemberDef |
    +------------------+-------------+-------------+-------------+
                       ^
                       |
                       mp                   
             
             slotoffset         
               |       
               v        
    +----------+--------+--------+--------+
    | PyObject | slot 1 | slot 2 | slot 3 |
    +----------+--------+--------+--------+                    
    

    关于 PyMemberDef 的信息可以看这里

  • 设置 tp_dictoffset

    关于 tp_dictoffset 可以看 Python 的官方文档.

    tp_dictoffset 和 tp_dict 没有关系, tp_dict 是类自己的 __dict__, 而 tp_dictoffset 指的是该类的实例的 __dict__ 在 PyObject 的偏移量.

    通过 tp_dictoffset 便可以知道类的实例是否包含 __dict__.

    if (add_dict) {
        if (base->tp_itemsize)
            type->tp_dictoffset = -(long)sizeof(PyObject *);
        else
            type->tp_dictoffset = slotoffset;
        slotoffset += sizeof(PyObject *);
    }
  • 设置 tp_weaklistoffset

    if (add_weak) {
        assert(!base->tp_itemsize);
        type->tp_weaklistoffset = slotoffset;
        slotoffset += sizeof(PyObject *);
    }
  • 设置 tp_basicsize, tp_itemsize

    type->tp_basicsize = slotoffset;
    type->tp_itemsize = base->tp_itemsize;
  • 设置 tp_members(类成员)

    type->tp_members = PyHeapType_GET_MEMBERS(et);

    详情见这里

  • 设置 tp_getset

    if (type->tp_weaklistoffset && type->tp_dictoffset)
        type->tp_getset = subtype_getsets_full;
    else if (type->tp_weaklistoffset && !type->tp_dictoffset)
        type->tp_getset = subtype_getsets_weakref_only;
    else if (!type->tp_weaklistoffset && type->tp_dictoffset)
        type->tp_getset = subtype_getsets_dict_only;
    else
        type->tp_getset = NULL;
  • 设置 tp_getattro 和 tp_setattro

    if (type->tp_dictoffset != 0 || nslots > 0) {
        if (base->tp_getattr == NULL && base->tp_getattro == NULL)
            type->tp_getattro = PyObject_GenericGetAttr;
        if (base->tp_setattr == NULL && base->tp_setattro == NULL)
            type->tp_setattro = PyObject_GenericSetAttr;
    }
  • 设置 GC 相关的操作方法

    type->tp_dealloc = subtype_dealloc;
    
    /* Always override allocation strategy to use regular heap */
    type->tp_alloc = PyType_GenericAlloc;
    type->tp_free = PyObject_GC_Del;
    type->tp_traverse = subtype_traverse;
    type->tp_clear = subtype_clear;
  • 设置 __classcell__

    cell = _PyDict_GetItemIdWithError(dict, &PyId___classcell__);
    if (cell != NULL) {
        /* At least one method requires a reference to its defining class */
        if (!PyCell_Check(cell)) {
            PyErr_Format(PyExc_TypeError,
                          "__classcell__ must be a nonlocal cell, not %.200R",
                          Py_TYPE(cell));
            goto error;
        }
        PyCell_Set(cell, (PyObject *) type);
        if (_PyDict_DelItemId(dict, &PyId___classcell__) < 0) {
            goto error;
        }
    }
    else if (PyErr_Occurred()) {
        goto error;
    }

    详情见 classcell

  • PyType_Ready

    /* Initialize the rest */
    if (PyType_Ready(type) < 0)
        goto error;

    详情见 PyType_Ready

  • fixup_slot_dispatchers

    详情见 fixup_slot_dispatchers

  • set_names

    调用所有 descriptor 的 __set_name__ 方法.

  • init_subclass

    调用父类的 __init_subclass__ 方法, 详情见 init_subclass.

__classcell__

经过我的测试, 发现如下代码会出现 __classcell__:

class C:
    def __init__(self):
        print(__class__)
        return super().__init__()
C()

对应的字节码如下:

  4           0 LOAD_BUILD_CLASS
              2 LOAD_CONST               0 (<code object C at 0x7f9bd8b67a80, file "mydemo/class_demo.py", line 4>)
              4 LOAD_CONST               1 ('C')
              6 MAKE_FUNCTION            0
              8 LOAD_CONST               1 ('C')
             10 CALL_FUNCTION            2
             12 STORE_NAME               0 (C)

  8          14 LOAD_NAME                0 (C)
             16 CALL_FUNCTION            0
             18 POP_TOP
             20 LOAD_CONST               2 (None)
             22 RETURN_VALUE

Disassembly of <code object C at 0x7f9bd8b67a80, file "mydemo/class_demo.py", line 4>:
  4           0 LOAD_NAME                0 (__name__)
              2 STORE_NAME               1 (__module__)
              4 LOAD_CONST               0 ('C')
              6 STORE_NAME               2 (__qualname__)

  5           8 LOAD_CLOSURE             0 (__class__)
             10 BUILD_TUPLE              1
             12 LOAD_CONST               1 (<code object __init__ at 0x7f9bd8b679d0, file "mydemo/class_demo.py", line 5>)
             14 LOAD_CONST               2 ('C.__init__')
             16 MAKE_FUNCTION            8 (closure)
             18 STORE_NAME               3 (__init__)
             20 LOAD_CLOSURE             0 (__class__)
             22 DUP_TOP
             24 STORE_NAME               4 (__classcell__)
             26 RETURN_VALUE

Disassembly of <code object __init__ at 0x7f9bd8b679d0, file "mydemo/class_demo.py", line 5>:
  6           0 LOAD_GLOBAL              0 (print)
              2 LOAD_DEREF               0 (__class__)
              4 CALL_FUNCTION            1
              6 POP_TOP

  7           8 LOAD_GLOBAL              1 (super)
             10 CALL_FUNCTION            0
             12 LOAD_METHOD              2 (__init__)
             14 CALL_METHOD              0
             16 RETURN_VALUE

当代码中出现 super() 时, 会产生 __classcell____class__.

注意, 类 C 对应的字节码对象的 co_cellvars 的长度为 1, 其中保存的就是 __class__.

在上面的字节码中可以看出, __init__ 方法是一个闭包函数, 在 __init__ 的代码中可以通过 __class__ 变量访问当前所在的类.

__classcell__ 保存的就是 __class__, 当元类在创建类时, 如果发现 dict 包含 __classcell__, 那么会设置 __classcell__ 为当前想要创建的类.

__classcell____class__ 都是 PyCellObject, PyCellObject 其实就是一个非常简单的对象, 其中的内部字段 ob_ref 指向实际的值. PyCellObject 起到一个类似 "惰性求值" 的作用. 当执行类 C 的字节码时 __classcell____class__ 中的 ob_ref 都是 NULL, 直到元类创建类对象时, 发现 __classcell__ 然后设置 ob_ref 指向当前创建的类.

一些链接:

PyType_GenericAlloc

// Objects/typeobject.c

/* 为类分配内存
   type 一般是 PyType_Type
   nitems 一般等于 nslots(排除了 __dict__ 和 __weakref__)
*/
PyObject *
PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems)
{
    PyObject *obj;
    // 可以简单认为 type 就是 PyType_Type, PyType_Type 的 tp_basicsize 等于 sizeof(PyHeapTypeObject), tp_itemsize 等于 sizeof(PyMemberDef)
    // 那么 size = sizeof(PyHeapTypeObject) + (nitems + 1) * sizeof(PyMemberDef)
    const size_t size = _PyObject_VAR_SIZE(type, nitems+1);
    /* note that we need to add one, for the sentinel */

    if (_PyType_IS_GC(type)) {
        obj = _PyObject_GC_Malloc(size);
    }
    else {
        obj = (PyObject *)PyObject_MALLOC(size);
    }

    if (obj == NULL) {
        return PyErr_NoMemory();
    }

    // '\0' 就等于 0 啊, 这里为什么要写 '\0' 呢? 这行代码看起来是非常久之前写的, 难道那时候编译器或者 memset 对 '\0' 和 0 的处理不一样.
    memset(obj, '\0', size);

    // 设置 obj->ob_type 和 obj->ob_size 
    if (type->tp_itemsize == 0) {
        (void)PyObject_INIT(obj, type);
    }
    else {
        (void) PyObject_INIT_VAR((PyVarObject *)obj, type, nitems);
    }

    if (_PyType_IS_GC(type)) {
        _PyObject_GC_TRACK(obj);
    }
    return obj;
}

PyType_GenericAlloc 的核心功能就是分配内存.

如果 type->tp_itemsize != 0, 那么分配的内存除了 PyHeapTypeObject, 还会在尾部添加额外的内存区域, 用于保存 __slots__. 同时在 ob_size 保存 __slots__ 的数量.

简单的内存结构示意图:

+------------------+-------------+-------------+-------------+
| PyHeapTypeObject | PyMemberDef | PyMemberDef | PyMemberDef |
+------------------+-------------+-------------+-------------+
                   | <------     __slots__             ------> 

通过 ob_size 便知道尾部是否保存有 __slots__.

PyType_Ready

工作流程:

  • 如果 type 已经初始化, 则马上退出(return 0).

    if (type->tp_flags & Py_TPFLAGS_READY) {
        assert(_PyType_CheckConsistency(type));
        return 0;
    }
  • 其它一些检查工作, 具体查看源码.

  • 设置 tp_base 和 tp_bases. 如果 tp_base 等于 NULL, 那么将其设置为 PyBaseObject_Type. 如果 tp_bases 等于 NULL, 那么将其设置为 (tp_base, ).

  • 设置 tp_dict. 如果 tp_dict 等于 NULL, 就设置为一个全新的空的 dict.

  • add_operators

    往 tp_dict 中添加 __xxx__. 详情见 add_operators

  • add_methods

    add_methods 负责将 tp_methods 中的方法添加到 tp_dict 中. 详情见 add_methods

  • add_members

    对于内部类, tp_members 指向一个静态的 PyMemberDef 数组.

    对于用户定义的类, 如果定义了 __slots__ 的话, tp_members 指向 PyHeapTypeObject 的结尾. 用户.

    add_members 将 PyMemberDef 转换为 PyMemberDescrObject, 然后保存在 tp_dict 中.

    详情见 https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_members

  • add_getset

  • mro_internal

    /* Calculate method resolution order, 忽略细节的话, 主要功能可以看 mro_implementation 和 pmerge 函数 */
    if (mro_internal(type, NULL) < 0)
        goto error;

    详情见 类型系统之 MRO.

  • inherit_special

    视情况继承 base 的:

    • tp_traverse
    • tp_clear
    • tp_new
    • tp_basicsize
    • tp_itemsize
    • tp_weaklistoffset
    • tp_dictoffset

    以及设置 tp_flags

    /* Inherit special flags from dominant base */
    if (type->tp_base != NULL)
        inherit_special(type, type->tp_base);
  • inherit_slots

    简单来说, 如果 type 没有定义 tp_xxx, 但是 base 定义了, 那么将其复制到 type.

    #define SLOTDEFINED(SLOT) \
      (base->SLOT != 0 && \
       (basebase == NULL || base->SLOT != basebase->SLOT))
    
    #define COPYSLOT(SLOT) \
      if (!type->SLOT && SLOTDEFINED(SLOT)) type->SLOT = base->SLOT
    

    继承的内容非常多, 具体看 inherit_slots 的代码.

    /* Initialize tp_dict properly */
    bases = type->tp_mro;
    assert(bases != NULL);
    assert(PyTuple_Check(bases));
    n = PyTuple_GET_SIZE(bases);
    for (i = 1; i < n; i++) {
        PyObject *b = PyTuple_GET_ITEM(bases, i);
        if (PyType_Check(b))
            inherit_slots(type, (PyTypeObject *)b);
    }
  • 继承基类的 tp_as_xxx.

    /* Some more special stuff */
    base = type->tp_base;
    if (base != NULL) {
        if (type->tp_as_async == NULL)
            type->tp_as_async = base->tp_as_async;
        if (type->tp_as_number == NULL)
            type->tp_as_number = base->tp_as_number;
        if (type->tp_as_sequence == NULL)
            type->tp_as_sequence = base->tp_as_sequence;
        if (type->tp_as_mapping == NULL)
            type->tp_as_mapping = base->tp_as_mapping;
        if (type->tp_as_buffer == NULL)
            type->tp_as_buffer = base->tp_as_buffer;
    }
  • 更新基类的 tp_subclasses

    /* Link into each base class's list of subclasses */
    bases = type->tp_bases;
    n = PyTuple_GET_SIZE(bases);
    for (i = 0; i < n; i++) {
        PyObject *b = PyTuple_GET_ITEM(bases, i);
        if (PyType_Check(b) &&
            add_subclass((PyTypeObject *)b, type) < 0)
            goto error;
    }
  • 更新 tp_flags

    /* All done -- set the ready flag */
    type->tp_flags =
        (type->tp_flags & ~Py_TPFLAGS_READYING) | Py_TPFLAGS_READY;

fixup_slot_dispatchers

来自代码的一段注释:

/* Common code for update_slots_callback() and fixup_slot_dispatchers().
 *
 * This is meant to set a "slot" like type->tp_repr or
 * type->tp_as_sequence->sq_concat by looking up special methods like
 * __repr__ or __add__. The opposite (adding special methods from slots) is
 * done by add_operators(), called from PyType_Ready(). Since update_one_slot()
 * calls PyType_Ready() if needed, the special methods are already in place.
 *
 * The special methods corresponding to each slot are defined in the "slotdef"
 * array. Note that one slot may correspond to multiple special methods and vice
 * versa. For example, tp_richcompare uses 6 methods __lt__, ..., __ge__ and
 * tp_as_number->nb_add uses __add__ and __radd__. In the other direction,
 * __add__ is used by the number and sequence protocols and __getitem__ by the
 * sequence and mapping protocols. This causes a lot of complications.
 *
 * In detail, update_one_slot() does the following:
 *
 * First of all, if the slot in question does not exist, return immediately.
 * This can happen for example if it's tp_as_number->nb_add but tp_as_number
 * is NULL.
 *
 * For the given slot, we loop over all the special methods with a name
 * corresponding to that slot (for example, for tp_descr_set, this would be
 * __set__ and __delete__) and we look up these names in the MRO of the type.
 * If we don't find any special method, the slot is set to NULL (regardless of
 * what was in the slot before).
 *
 * Suppose that we find exactly one special method. If it's a wrapper_descriptor
 * (i.e. a special method calling a slot, for example str.__repr__ which calls
 * the tp_repr for the 'str' class) with the correct name ("__repr__" for
 * tp_repr), for the right class, calling the right wrapper C function (like
 * wrap_unaryfunc for tp_repr), then the slot is set to the slot that the
 * wrapper_descriptor originally wrapped. For example, a class inheriting
 * from 'str' and not redefining __repr__ will have tp_repr set to the tp_repr
 * of 'str'.
 * In all other cases where the special method exists, the slot is set to a
 * wrapper calling the special method. There is one exception: if the special
 * method is a wrapper_descriptor with the correct name but the type has
 * precisely one slot set for that name and that slot is not the one that we
 * are updating, then NULL is put in the slot (this exception is the only place
 * in update_one_slot() where the *existing* slots matter).
 *
 * When there are multiple special methods for the same slot, the above is
 * applied for each special method. As long as the results agree, the common
 * resulting slot is applied. If the results disagree, then a wrapper for
 * the special methods is installed. This is always safe, but less efficient
 * because it uses method lookup instead of direct C calls.
 *
 * There are some further special cases for specific slots, like supporting
 * __hash__ = None for tp_hash and special code for tp_new.
 *
 * When done, return a pointer to the next slotdef with a different offset,
 * because that's convenient for fixup_slot_dispatchers(). This function never
 * sets an exception: if an internal error happens (unlikely), it's ignored. */

fixup_slot_dispatchers 和 add_operators 可以看作是两个相反的操作, 彼此之间联系紧密, 两者一起阅读有助于理解.

fixup_slot_dispatchers 根据 slotdefs 设置 tp_xxx. add_operators 根据 slotdefs 往 tp_dict 中添加 __xxx__ 方法.

static slotdef *
update_one_slot(PyTypeObject *type, slotdef *p)
{
    PyObject *descr;
    PyWrapperDescrObject *d;
    void *generic = NULL, *specific = NULL;
    int use_generic = 0;
    int offset = p->offset;
    int error;
    void **ptr = slotptr(type, offset);
    // int debug = PySys_GetObject("_debug") != NULL ? 1 : 0;
    
    // if(debug){
    //     printf("update_one_slot: %s\n", p->name);
    //     if(ptr == NULL){
    //         printf("update_one_slot: ptr == NULL\n");
    //     }
    // }
    if (ptr == NULL) {
        // 跳过相邻的 offset 相同的 slot
        do {
            ++p;
        } while (p->offset == offset);
        return p;
    }
    /* We may end up clearing live exceptions below, so make sure it's ours. */
    assert(!PyErr_Occurred());
    do {
        /* Use faster uncached lookup as we won't get any cache hits during type setup. */
        descr = find_name_in_mro(type, p->name_strobj, &error);
        // if(debug){
        //     printf("update_one_slot: descr = %p\n", descr);
        // }
        if (descr == NULL) {
            if (error == -1) {
                /* It is unlikely but not impossible that there has been an exception
                   during lookup. Since this function originally expected no errors,
                   we ignore them here in order to keep up the interface. */
                PyErr_Clear();
            }
            // 针对 tp_iternext(__next__) 做特殊处理
            if (ptr == (void**)&type->tp_iternext) {
                specific = (void *)_PyObject_NextNotImplemented;
            }
            // 继续下一条 slotdef, 如果 offset 不相等的话退出循环.
            continue;
        }
        
        if (Py_IS_TYPE(descr, &PyWrapperDescr_Type) &&
            ((PyWrapperDescrObject *)descr)->d_base->name_strobj == p->name_strobj) {
            // 如果 descr 是 PyWrapperDescr_Type 而且名字和 slotdef 相同
            void **tptr = resolve_slotdups(type, p->name_strobj);
            // tptr == NULL 说明 type 中定义了多个和 p->name_strobj 对应的方法
            if (tptr == NULL || tptr == ptr)
                generic = p->function;
            d = (PyWrapperDescrObject *)descr;
            // if(debug){
            //     printf("update_one_slot: tptr = %p, descr = %s %p %p %d\n", tptr, d->d_base->name, d->d_base->wrapper, p->wrapper, PyType_IsSubtype(type, PyDescr_TYPE(d)));
            // }
            if ((specific == NULL || specific == d->d_wrapped) &&
                d->d_base->wrapper == p->wrapper &&
                PyType_IsSubtype(type, PyDescr_TYPE(d)))
            {
                specific = d->d_wrapped;
                // if(debug){
                //     printf("update_one_slot: specific 1\n");
                // }
            }
            else {
                /* We cannot use the specific slot function because either
                   - it is not unique: there are multiple methods for this
                     slot and they conflict
                   - the signature is wrong (as checked by the ->wrapper
                     comparison above)
                   - it's wrapping the wrong class
                 */
                use_generic = 1;
            }
        }
        else if (Py_IS_TYPE(descr, &PyCFunction_Type) &&
                 PyCFunction_GET_FUNCTION(descr) ==
                 (PyCFunction)(void(*)(void))tp_new_wrapper &&
                 ptr == (void**)&type->tp_new)
        {
            /* The __new__ wrapper is not a wrapper descriptor,
               so must be special-cased differently.
               If we don't do this, creating an instance will
               always use slot_tp_new which will look up
               __new__ in the MRO which will call tp_new_wrapper
               which will look through the base classes looking
               for a static base and call its tp_new (usually
               PyType_GenericNew), after performing various
               sanity checks and constructing a new argument
               list.  Cut all that nonsense short -- this speeds
               up instance creation tremendously. */
            specific = (void *)type->tp_new;
            /* XXX I'm not 100% sure that there isn't a hole
               in this reasoning that requires additional
               sanity checks.  I'll buy the first person to
               point out a bug in this reasoning a beer. */
            // if(debug){
            //     printf("update_one_slot: specific 2 \n");
            // }
        }
        else if (descr == Py_None &&
                 ptr == (void**)&type->tp_hash) {
            /* We specifically allow __hash__ to be set to None
               to prevent inheritance of the default
               implementation from object.__hash__ */
            specific = (void *)PyObject_HashNotImplemented;
            // if(debug){
            //     printf("update_one_slot: specific 3 \n");
            // }
        }
        else {
            use_generic = 1;
            generic = p->function;
            // if(debug){
            //     printf("update_one_slot: generic\n");
            // }
        }
    } while ((++p)->offset == offset);
    // if(debug){
    //     printf("update_one_slot: specific = %p, generic = %p, use_generic = %d\n", specific, generic, use_generic);
    // }
    if (specific && !use_generic)
        *ptr = specific;
    else
        *ptr = generic;
    return p;
}

static void
fixup_slot_dispatchers(PyTypeObject *type)
{
    slotdef *p;

    assert(slotdefs_initialized);
    for (p = slotdefs; p->name; )
        p = update_one_slot(type, p);
}

工作流程:

  • 如果 slotdef 的 offset 对应的 ptr 等于 NULL, 说明 slotdef 的 offset 指向的是 tp_as_xxx 内部, 而 tp_as_xxx 等于 NULL. 在这种情况下, 跳过接下来相邻的 offset 相同的 slotdef.

  • 调用 find_name_in_mro, 从 tp_mro 的各个基类的 tp_dcit 中查找 slotdef->name_strobj 方法. 如果没有找到则继续下一个 slotdef.

    如果找到了, 则判断该方法的类型, 根据情况设置 special 或者 generic.

  • 最终将 special 或者 generic 设置给 *ptr.

special 可能等于 descr->d_wrapped, generic 可能等于 slotdef->function.

我的一些观察

对于 CPython 内部定义的类来说, 在 PyType_Ready 中调用 add_operators 函数, add_operators 函数根据 slotdefs 添加对应的 __xxx__ 方法到 tp_dict 中. 之后不会再调用 fixup_slot_dispatchers.

对于用户定义的类来说, 在 type_new 中先调用 PyType_Ready, 接着调用 fixup_slot_dispatchers. add_operators 设置了 tp_dict, inherit_slots 继承了基类的 tp_xxx 方法, 接着 fixup_slot_dispatchers 又反过来设置 tp_xxx 方法.

为什么要调用 fixup_slot_dispatchers 呢?

如果用户没有在类中自定义 __xxx__ 方法, 我觉得是没必要调用 fixup_slot_dispatchers 的.

如果用户定义了 __xxx__ 方法, 那么该方法的优先级肯定是最高的, fixup_slot_dispatchers 此时会将 *ptr 设置为 slotdef->function(general). slotdef->function 会调用自定义的 __xxx__ 方法.

__init__ 为例, 如果用户自定义了 __init__ 方法, 那么 tp_init 会被设置为 slot_tp_init.

slotdef:

    FLSLOT("__init__", tp_init, slot_tp_init, (wrapperfunc)(void(*)(void))wrap_init,
           "__init__($self, /, *args, **kwargs)\n--\n\n"
           "Initialize self.  See help(type(self)) for accurate signature.",
           PyWrapperFlag_KEYWORDS),

slot_tp_init:

static int
slot_tp_init(PyObject *self, PyObject *args, PyObject *kwds)
{
    PyThreadState *tstate = _PyThreadState_GET();

    _Py_IDENTIFIER(__init__);
    int unbound;
    PyObject *meth = lookup_method(self, &PyId___init__, &unbound);
    if (meth == NULL) {
        return -1;
    }

    PyObject *res;
    if (unbound) {
        res = _PyObject_Call_Prepend(tstate, meth, self, args, kwds);
    }
    else {
        res = _PyObject_Call(tstate, meth, args, kwds);
    }
    Py_DECREF(meth);
    if (res == NULL)
        return -1;
    if (res != Py_None) {
        PyErr_Format(PyExc_TypeError,
                     "__init__() should return None, not '%.200s'",
                     Py_TYPE(res)->tp_name);
        Py_DECREF(res);
        return -1;
    }
    Py_DECREF(res);
    return 0;
}

add_operators

add_operators 的主要功能是往 tp_dict 中添加 __xxx__ 方法.

遍历 slotdefs 数组, 如果 slotdef->offset 在 type 对应的 slot 存在, 而且slotdef->name_strobj 不存在于 tp_dict, 那么向 tp_dict 中添加一个 PyWrapperDescrObject.

如果用户自定义了 __xxx__ 方法, 那么 add_operators 不会往 tp_dict 中添加东西. 其实 add_operators 主要针对 CPython 内部的类.

//Objects/typeobject.c

/* This function is called by PyType_Ready() to populate the type's
   dictionary with method descriptors for function slots.  For each
   function slot (like tp_repr) that's defined in the type, one or more
   corresponding descriptors are added in the type's tp_dict dictionary
   under the appropriate name (like __repr__).  Some function slots
   cause more than one descriptor to be added (for example, the nb_add
   slot adds both __add__ and __radd__ descriptors) and some function
   slots compete for the same descriptor (for example both sq_item and
   mp_subscript generate a __getitem__ descriptor).

   In the latter case, the first slotdef entry encountered wins.  Since
   slotdef entries are sorted by the offset of the slot in the
   PyHeapTypeObject, this gives us some control over disambiguating
   between competing slots: the members of PyHeapTypeObject are listed
   from most general to least general, so the most general slot is
   preferred.  In particular, because as_mapping comes before as_sequence,
   for a type that defines both mp_subscript and sq_item, mp_subscript
   wins.

   This only adds new descriptors and doesn't overwrite entries in
   tp_dict that were previously defined.  The descriptors contain a
   reference to the C function they must call, so that it's safe if they
   are copied into a subtype's __dict__ and the subtype has a different
   C function in its slot -- calling the method defined by the
   descriptor will call the C function that was used to create it,
   rather than the C function present in the slot when it is called.
   (This is important because a subtype may have a C function in the
   slot that calls the method from the dictionary, and we want to avoid
   infinite recursion here.) */

static int
add_operators(PyTypeObject *type)
{
    PyObject *dict = type->tp_dict;
    slotdef *p;
    PyObject *descr;
    void **ptr;

    assert(slotdefs_initialized);
    for (p = slotdefs; p->name; p++) {
        if (p->wrapper == NULL)
            continue;
        ptr = slotptr(type, p->offset);
        if (!ptr || !*ptr)
            continue;
        // 如果在 tp_dict 中已经存在, 那么继续下一个 slotdef
        if (PyDict_GetItemWithError(dict, p->name_strobj))
            continue;
        if (PyErr_Occurred()) {
            return -1;
        }
        if (*ptr == (void *)PyObject_HashNotImplemented) {
            /* Classes may prevent the inheritance of the tp_hash
               slot by storing PyObject_HashNotImplemented in it. Make it
               visible as a None value for the __hash__ attribute. */
            if (PyDict_SetItem(dict, p->name_strobj, Py_None) < 0)
                return -1;
        }
        else {
            descr = PyDescr_NewWrapper(type, p, *ptr);
            if (descr == NULL)
                return -1;
            if (PyDict_SetItem(dict, p->name_strobj, descr) < 0) {
                Py_DECREF(descr);
                return -1;
            }
            Py_DECREF(descr);
        }
    }
    if (type->tp_new != NULL) {
        if (add_tp_new_wrapper(type) < 0)
            return -1;
    }
    return 0;
}

添加完成后 tp_dict 和 PyWrapperDescrObject 的示意图:

                            +---------+
                       +--->| getattr |<-------------------------------------------+
                       |    +---------+                                            |
                       |                                                           |
+--------------------+ |       +-------------+         +----------------------+    |
|                    | |  +--->|    dict     |    +--->| PyWrapperDescrObject |    |
|  PyHeapTypeObject  | |  |    +-------------+    |    +----------------------+    |    +---------------+
|                    | |  |    |             |    |    |       d_common       |----+--->| PyDescrObject |
+--------------------+ |  |    +-------------+    |    +----------------------+    |    +---------------+
|      tp_call       |-+  |    |  __call__   |----+    |       d_base         |    |    |   d_type      +--+
+--------------------+    |    +-------------+         +----------------------|    |    +---------------+  |
|      tp_dict       |----+    |             |         |       d_wrapped      +----+    |   d_name      |  |
+--------------------+         +-------------+         +----------------------+         +---------------+  |
|       ...          |         |             |                                                             |
+--------------------+         +-------------+                                                             |
         ^                                                                                                 | 
         |                                                                                                 |
         +-------------------------------------------------------------------------------------------------+

以 tuple 为例, add_operators 会添加以下方法:

add_operators: __repr__
add_operators: __hash__
add_operators: __getattribute__
add_operators: __lt__
add_operators: __le__
add_operators: __eq__
add_operators: __ne__
add_operators: __gt__
add_operators: __ge__
add_operators: __iter__
add_operators: __len__
add_operators: __getitem__
add_operators: __add__
add_operators: __mul__
add_operators: __rmul__
add_operators: __contains__

以 tuple 的 __hash__ 为例, 当调用 __hash__ 方法时, Python 首先找到的是 PyWrapperDescrObject, 接着的调用链为: PY_TYPE(descr)->tp_call -> wrapperdescr_call -> descr->d_base->wrapper -> descr->d_wrapped.

关于 PyDescr_NewWrapper 的信息可以查看这里.

add_methods

add_methods 负责将 tp_methods 中的方法添加到 tp_dict 中, add_methods 只对 CPython 内部定义的类有效(当然前提是 tp_methods 不等于 NULL), 因为用户定义的类的 tp_methods 等于 NULL.

从 ml_flags 可以知道该方法属于什么类型, 常见的类型有:

  • METH_VARARGS
  • METH_KEYWORDS
  • METH_NOARGS
  • METH_O
  • METH_CLASS
  • METH_STATIC

add_methods 的代码如下:

//Objects/typeobject.c

/* Add the methods from tp_methods to the __dict__ in a type object */
static int add_methods(PyTypeObject *type, PyMethodDef *meth) {
    PyObject *dict = type->tp_dict;
    PyObject *name;

    for (; meth->ml_name != NULL; meth++) {
        PyObject *descr;
        int err;
        int isdescr = 1;
        if (meth->ml_flags & METH_CLASS) {
            if (meth->ml_flags & METH_STATIC) {
                PyErr_SetString(PyExc_ValueError,
                                "method cannot be both class and static");
                return -1;
            }
            descr = PyDescr_NewClassMethod(type, meth);
        } else if (meth->ml_flags & METH_STATIC) {
            PyObject *cfunc = PyCFunction_NewEx(meth, (PyObject *)type, NULL);
            if (cfunc == NULL) return -1;
            descr = PyStaticMethod_New(cfunc);
            isdescr = 0;  // PyStaticMethod is not PyDescrObject
            Py_DECREF(cfunc);
        } else {
            descr = PyDescr_NewMethod(type, meth);
        }
        if (descr == NULL) return -1;

        if (isdescr) {
            name = PyDescr_NAME(descr);
        } else {
            name = PyUnicode_FromString(meth->ml_name);
            if (name == NULL) {
                Py_DECREF(descr);
                return -1;
            }
        }

        if (!(meth->ml_flags & METH_COEXIST)) {
            if (PyDict_GetItemWithError(dict, name)) {
                if (!isdescr) {
                    Py_DECREF(name);
                }
                Py_DECREF(descr);
                continue;
            } else if (PyErr_Occurred()) {
                if (!isdescr) {
                    Py_DECREF(name);
                }
                return -1;
            }
        }
        err = PyDict_SetItem(dict, name, descr);
        if (!isdescr) {
            Py_DECREF(name);
        }
        Py_DECREF(descr);
        if (err < 0) return -1;
    }
    return 0;
}

实例

一个普通的类

class C:
    pass
  • C.__basicsize__ = 32

  • C.__itemsize__ = 0

  • C.__dictoffset__ = 16

  • C.__weakrefoffset__ = 24

C 的实例的内存布局:

+-----------+---------+----------+-------------+
| ob_refcnt | ob_type | __dict__ | __weakref__ |
+-----------+---------+----------+-------------+
      8          8         8             8

定义了 slots 的类

class C:
    __slots__ = ('a', 'b', '__dict__')
  • C.__basicsize__ = 40

  • C.__itemsize__ = 0

  • C.__dictoffset__ = 32

  • C.__weakrefoffset__ = 24

C 的实例的内存布局:

+-----------+---------+---+---+----------+-------------+
| ob_refcnt | ob_type | a | b | __dict__ | __weakref__ |
+-----------+---------+---+---+----------+-------------+
      8          8      8   8      8            8