Python 虚拟机

涉及文件:

Python/ceval.c
Include/internal/pycore_pystate.h
Include/pystate.h

一些重要的结构体

_PyRuntimeState

typedef struct pyruntimestate {
    /* Is running Py_PreInitialize()? */
    int preinitializing;

    /* Is Python preinitialized? Set to 1 by Py_PreInitialize() */
    int preinitialized;

    /* Is Python core initialized? Set to 1 by _Py_InitializeCore() */
    int core_initialized;

    /* Is Python fully initialized? Set to 1 by Py_Initialize() */
    int initialized;

    /* Set by Py_FinalizeEx(). Only reset to NULL if Py_Initialize()
      is called again. */
    PyThreadState *finalizing;

    struct pyinterpreters {
        PyThread_type_lock mutex;
        PyInterpreterState *head;
        PyInterpreterState *main;
        /* _next_interp_id is an auto-numbered sequence of small
          integers.  It gets initialized in _PyInterpreterState_Init(),
          which is called in Py_Initialize(), and used in
          PyInterpreterState_New().  A negative interpreter ID
          indicates an error occurred.  The main interpreter will
          always have an ID of 0.  Overflow results in a RuntimeError.
          If that becomes a problem later then we can adjust, e.g. by
          using a Python int. */
        int64_t next_id;
    } interpreters;

    unsigned long main_thread;

#define NEXITFUNCS 32
    void (*exitfuncs[NEXITFUNCS])(void);
    int nexitfuncs;

    struct _gc_runtime_state gc;
    struct _ceval_runtime_state ceval;
    struct _gilstate_runtime_state gilstate;
} _PyRuntimeState;

_PyRuntimeState _PyRuntime = _PyRuntimeState_INIT;

_PyRuntime 是一个全局变量, 保存着许多和解释器运行状态相关的信息.

PyInterpreterState

//  Include/internal/pycore_pystate.h
struct _is {
    ...
    struct _is *next;
    struct _ts *tstate_head;
    PyObject *modules;
    PyObject *modules_by_index;
    PyObject *sysdict;
    PyObject *builtins;
    PyObject *importlib;
    ...
};

// Include/pystate.h
/* struct _is is defined in internal/pycore_pystate.h */
typedef struct _is PyInterpreterState;

PyInterpreterState 通过 next 指针形成一个单向链表, tstate_head 指向线程链表.

PyThreadState

// Include/cpython/pystate.h
struct _ts {
    /* See Python/ceval.c for comments explaining most fields */

    struct _ts *prev;
    struct _ts *next;
    PyInterpreterState *interp;

    struct _frame *frame;
    int recursion_depth;
    char overflowed; /* The stack has overflowed. Allow 50 more calls
                        to handle the runtime error. */
    char recursion_critical; /* The current calls must not cause
                                a stack overflow. */
    int stackcheck_counter;

    /* 'tracing' keeps track of the execution depth when tracing/profiling.
      This is to prevent the actual trace/profile code from being recorded in
      the trace/profile. */
    int tracing;
    int use_tracing;

    Py_tracefunc c_profilefunc;
    Py_tracefunc c_tracefunc;
    PyObject *c_profileobj;
    PyObject *c_traceobj;

    /* The exception currently being raised */
    PyObject *curexc_type;
    PyObject *curexc_value;
    PyObject *curexc_traceback;

    /* The exception currently being handled, if no coroutines/generators
    * are present. Always last element on the stack referred to be exc_info.
    */
    _PyErr_StackItem exc_state;

    /* Pointer to the top of the stack of the exceptions currently
    * being handled */
    _PyErr_StackItem *exc_info;

    PyObject *dict;  /* Stores per-thread state */

    int gilstate_counter;

    PyObject *async_exc; /* Asynchronous exception to raise */
    unsigned long thread_id; /* Thread id where this tstate was created */

    int trash_delete_nesting;
    PyObject *trash_delete_later;

    /* Called when a thread state is deleted normally, but not when it
    * is destroyed after fork().
    * Pain:  to prevent rare but fatal shutdown errors (issue 18808),
    * Thread.join() must wait for the join'ed thread's tstate to be unlinked
    * from the tstate chain.  That happens at the end of a thread's life,
    * in pystate.c.
    * The obvious way doesn't quite work:  create a lock which the tstate
    * unlinking code releases, and have Thread.join() wait to acquire that
    * lock.  The problem is that we _are_ at the end of the thread's life:
    * if the thread holds the last reference to the lock, decref'ing the
    * lock will delete the lock, and that may trigger arbitrary Python code
    * if there's a weakref, with a callback, to the lock.  But by this time
    * _PyRuntime.gilstate.tstate_current is already NULL, so only the simplest
    * of C code can be allowed to run (in particular it must not be possible to
    * release the GIL).
    * So instead of holding the lock directly, the tstate holds a weakref to
    * the lock:  that's the value of on_delete_data below.  Decref'ing a
    * weakref is harmless.
    * on_delete points to _threadmodule.c's static release_sentinel() function.
    * After the tstate is unlinked, release_sentinel is called with the
    * weakref-to-lock (on_delete_data) argument, and release_sentinel releases
    * the indirectly held lock.
    */
    void (*on_delete)(void *);
    void *on_delete_data;

    int coroutine_origin_tracking_depth;

    PyObject *async_gen_firstiter;
    PyObject *async_gen_finalizer;

    PyObject *context;
    uint64_t context_ver;

    /* Unique thread state id. */
    uint64_t id;

    /* XXX signal handlers should also be here */
};

/* struct _ts is defined in cpython/pystate.h */
typedef struct _ts PyThreadState;

PyThreadState 保存了和线程相关的信息, 通过 next 和 prev 形成一个双向链表. frame 指向第一个栈帧对象.

PyFrameObject

typedef struct _frame {
    PyObject_VAR_HEAD
    struct _frame *f_back;      /* previous frame, or NULL */
    PyCodeObject *f_code;       /* code segment */
    PyObject *f_builtins;       /* builtin symbol table (PyDictObject) */
    PyObject *f_globals;        /* global symbol table (PyDictObject) */
    PyObject *f_locals;         /* local symbol table (any mapping) */
    PyObject **f_valuestack;    /* points after the last local */
    /* Next free slot in f_valuestack.  Frame creation sets to f_valuestack.
      Frame evaluation usually NULLs it, but a frame that yields sets it
      to the current stack top. */
    PyObject **f_stacktop;
    PyObject *f_trace;          /* Trace function */
    char f_trace_lines;         /* Emit per-line trace events? */
    char f_trace_opcodes;       /* Emit per-opcode trace events? */

    /* Borrowed reference to a generator, or NULL */
    PyObject *f_gen;

    int f_lasti;                /* Last instruction if called */
    /* Call PyFrame_GetLineNumber() instead of reading this field
      directly.  As of 2.3 f_lineno is only valid when tracing is
      active (i.e. when f_trace is set).  At other times we use
      PyCode_Addr2Line to calculate the line from the current
      bytecode index. */
    int f_lineno;               /* Current line number */
    int f_iblock;               /* index in f_blockstack */
    char f_executing;           /* whether the frame is still executing */
    PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
    PyObject *f_localsplus[1];  /* locals+stack, dynamically sized */
} PyFrameObject;

PyInterpreterState, PyThreadState 和 frame 的关系图:

+---------------------+
|  PyInterpreterState |
+---------------------+
        |
        |
        v
+----------------+        +----------------+        +----------------+    
|  PyThreadState |  <-->  |  PyThreadState |  <-->  |  PyThreadState |   ... 
+----------------+        +----------------+        +----------------+    
    |
    |
    v
+--------+        +--------+        +--------+
|  frame |  <--   |  frame |  <--  |  frame |
+--------+        +--------+        +--------+

注意 frame 的指针方向(frame->f_back).

ceval

ceval 循环 switch case 的结构:

for(;;){
    // ...
fast_next_opcode:    
    // 取下一条指令  
    NEXTOPARG();

    switch(opcode){
    case xxx:
        // ...
        FAST_DISPATCH() // goto fast_next_opcode
    case yyy:
        // ...
        DISPATCH() // continue
    default:
        // ...
        goto error;
    }

error:
    // 错误处理

    /* Log traceback info. */
    PyTraceBack_Here(f);
}

sys.settrace 和 sys.setprofile

// Python/sysmodule.c
static PyObject *
call_trampoline(PyObject* callback,
                PyFrameObject *frame, int what, PyObject *arg)
{
    if (PyFrame_FastToLocalsWithError(frame) < 0) {
        return NULL;
    }

    PyObject *stack[3];
    stack[0] = (PyObject *)frame;
    stack[1] = whatstrings[what];
    stack[2] = (arg != NULL) ? arg : Py_None;

    /* call the Python-level function */
    PyObject *result = _PyObject_FastCall(callback, stack, 3);

    PyFrame_LocalsToFast(frame, 1);
    if (result == NULL) {
        PyTraceBack_Here(frame);
    }

    return result;
}

static int
profile_trampoline(PyObject *self, PyFrameObject *frame,
                   int what, PyObject *arg)
{
    PyObject *result;

    if (arg == NULL)
        arg = Py_None;
    result = call_trampoline(self, frame, what, arg);
    if (result == NULL) {
        PyEval_SetProfile(NULL, NULL);
        return -1;
    }
    Py_DECREF(result);
    return 0;
}

static int
trace_trampoline(PyObject *self, PyFrameObject *frame,
                 int what, PyObject *arg)
{
    /* 这里的 self 名字太具有误导性了, 拜托认真取个符合语义的名字吧. self 其实是用户设置的 tracefunc, 也就是    tstate->c_traceobj.
    */
    PyObject *callback;
    PyObject *result;

    if (what == PyTrace_CALL)
        callback = self;
    else
        callback = frame->f_trace;
    if (callback == NULL)
        return 0;
    result = call_trampoline(callback, frame, what, arg);
    if (result == NULL) {
        PyEval_SetTrace(NULL, NULL);
        Py_CLEAR(frame->f_trace);
        return -1;
    }
    if (result != Py_None) {
        Py_XSETREF(frame->f_trace, result);
    }
    else {
        Py_DECREF(result);
    }
    return 0;
}

static PyObject *
sys_settrace(PyObject *self, PyObject *args)
{
    /* self 指向 sys 模块本身
      args 才是 Python 层调用 sys.settrace(tracefunc) 传入的 tracefunc

      最终的调用形式: trace_trampline(args, frame, what, arg)
    */
    if (trace_init() == -1)
        return NULL;
    if (args == Py_None)
        PyEval_SetTrace(NULL, NULL);
    else
        PyEval_SetTrace(trace_trampoline, args);
    Py_RETURN_NONE;
}

static PyObject *
sys_setprofile(PyObject *self, PyObject *args)
{
    if (trace_init() == -1)
        return NULL;
    if (args == Py_None)
        PyEval_SetProfile(NULL, NULL);
    else
        PyEval_SetProfile(profile_trampoline, args);
    Py_RETURN_NONE;
}

// Python/ceval.c

void
PyEval_SetTrace(Py_tracefunc func, PyObject *arg)
{
    if (PySys_Audit("sys.settrace", NULL) < 0) {
        return;
    }

    _PyRuntimeState *runtime = &_PyRuntime;
    PyThreadState *tstate = _PyRuntimeState_GetThreadState(runtime);
    PyObject *temp = tstate->c_traceobj;
    runtime->ceval.tracing_possible += (func != NULL) - (tstate->c_tracefunc != NULL);
    Py_XINCREF(arg);
    tstate->c_tracefunc = NULL;
    tstate->c_traceobj = NULL;
    /* Must make sure that profiling is not ignored if 'temp' is freed */
    tstate->use_tracing = tstate->c_profilefunc != NULL;
    Py_XDECREF(temp);
    tstate->c_tracefunc = func;
    tstate->c_traceobj = arg;
    /* Flag that tracing or profiling is turned on */
    tstate->use_tracing = ((func != NULL)
                           || (tstate->c_profilefunc != NULL));
}

void
PyEval_SetProfile(Py_tracefunc func, PyObject *arg)
{
    if (PySys_Audit("sys.setprofile", NULL) < 0) {
        return;
    }

    PyThreadState *tstate = _PyThreadState_GET();
    PyObject *temp = tstate->c_profileobj;
    Py_XINCREF(arg);
    tstate->c_profilefunc = NULL;
    tstate->c_profileobj = NULL;
    /* Must make sure that tracing is not ignored if 'temp' is freed */
    tstate->use_tracing = tstate->c_tracefunc != NULL;
    Py_XDECREF(temp);
    tstate->c_profilefunc = func;
    tstate->c_profileobj = arg;
    /* Flag that tracing or profiling is turned on */
    tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL);
}

c_tracefunc 和 c_profilefunc 在进入一个 code block 时会被调用.

// Python/ceval.c

if (tstate->use_tracing) {
    if (tstate->c_tracefunc != NULL) {
        /* tstate->c_tracefunc, if defined, is a
            function that will be called on *every* entry
            to a code block.  Its return value, if not
            None, is a function that will be called at
            the start of each executed line of code.
            (Actually, the function must return itself
            in order to continue tracing.)  The trace
            functions are called with three arguments:
            a pointer to the current frame, a string
            indicating why the function is called, and
            an argument which depends on the situation.
            The global trace function is also called
            whenever an exception is detected. */
        if (call_trace_protected(tstate->c_tracefunc,
                                  tstate->c_traceobj,
                                  tstate, f, PyTrace_CALL, Py_None)) {
            /* Trace function raised an error */
            goto exit_eval_frame;
        }
    }
    if (tstate->c_profilefunc != NULL) {
        /* Similar for c_profilefunc, except it needn't
            return itself and isn't called for "line" events */
        if (call_trace_protected(tstate->c_profilefunc,
                                  tstate->c_profileobj,
                                  tstate, f, PyTrace_CALL, Py_None)) {
            /* Profile function raised an error */
            goto exit_eval_frame;
        }
    }
}

具体的用法查看文档:

其它

sys 模块增加了很多设置 hook 的方法, 具体可以查看 sys 文档.

优化

使用 "threaded code" 优化指令分派效率, 详情见源代码注释和GCC 文档

GCC 文档说:

Such an array of label values serves a purpose much like that of the switch statement. The switch statement is cleaner, so use that rather than an array unless the problem does not fit a switch statement very well.

Another use of label values is in an interpreter for threaded code. The labels within the interpreter function can be stored in the threaded code for super-fast dispatching.

预测下一条指令提高性能

/* OpCode prediction macros
    Some opcodes tend to come in pairs thus making it possible to
    predict the second code when the first is run.  For example,
    COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE.

    Verifying the prediction costs a single high-speed test of a register
    variable against a constant.  If the pairing was good, then the
    processor's own internal branch predication has a high likelihood of
    success, resulting in a nearly zero-overhead transition to the
    next opcode.  A successful prediction saves a trip through the eval-loop
    including its unpredictable switch-case branch.  Combined with the
    processor's internal branch prediction, a successful PREDICT has the
    effect of making the two opcodes run as if they were a single new opcode
    with the bodies combined.

    If collecting opcode statistics, your choices are to either keep the
    predictions turned-on and interpret the results as if some opcodes
    had been combined or turn-off predictions so that the opcode frequency
    counter updates for both opcodes.

    Opcode prediction is disabled with threaded code, since the latter allows
    the CPU to record separate branch prediction information for each
    opcode.

*/

#define PREDICT_ID(op)          PRED_##op

#if defined(DYNAMIC_EXECUTION_PROFILE) || USE_COMPUTED_GOTOS
#define PREDICT(op)             if (0) goto PREDICT_ID(op)
#else
#define PREDICT(op) \
    do { \
        _Py_CODEUNIT word = *next_instr; \
        opcode = _Py_OPCODE(word); \
        if (opcode == op) { \
            oparg = _Py_OPARG(word); \
            next_instr++; \
            goto PREDICT_ID(op); \
        } \
    } while(0)
#endif
#define PREDICTED(op)           PREDICT_ID(op):

总结

一个用 Python 写的 Python 解释器:

a-python-interpreter-written-in-python

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Python 虚拟机

一些重要的结构体

ceval

sys.settrace 和 sys.setprofile

其它

优化

总结

FilesExpand file tree

虚拟机.md

Latest commit

History

虚拟机.md

File metadata and controls

Python 虚拟机

一些重要的结构体

ceval

sys.settrace 和 sys.setprofile

其它

优化

总结