You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
805 lines
32 KiB
805 lines
32 KiB
#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
|
|
/*
|
|
pybind11/detail/internals.h: Internal data structure and related functions
|
|
|
|
Copyright (c) 2017 Wenzel Jakob <wenzel.jakob@epfl.ch>
|
|
|
|
All rights reserved. Use of this source code is governed by a
|
|
BSD-style license that can be found in the LICENSE file.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <pybind11/conduit/pybind11_platform_abi_id.h>
|
|
#include <pybind11/gil_simple.h>
|
|
#include <pybind11/pytypes.h>
|
|
#include <pybind11/trampoline_self_life_support.h>
|
|
|
|
#include "common.h"
|
|
#include "struct_smart_holder.h"
|
|
|
|
#include <atomic>
|
|
#include <cstdint>
|
|
#include <exception>
|
|
#include <limits>
|
|
#include <mutex>
|
|
#include <thread>
|
|
|
|
/// Tracks the `internals` and `type_info` ABI version independent of the main library version.
|
|
///
|
|
/// Some portions of the code use an ABI that is conditional depending on this
|
|
/// version number. That allows ABI-breaking changes to be "pre-implemented".
|
|
/// Once the default version number is incremented, the conditional logic that
|
|
/// no longer applies can be removed. Additionally, users that need not
|
|
/// maintain ABI compatibility can increase the version number in order to take
|
|
/// advantage of any functionality/efficiency improvements that depend on the
|
|
/// newer ABI.
|
|
///
|
|
/// WARNING: If you choose to manually increase the ABI version, note that
|
|
/// pybind11 may not be tested as thoroughly with a non-default ABI version, and
|
|
/// further ABI-incompatible changes may be made before the ABI is officially
|
|
/// changed to the new version.
|
|
#ifndef PYBIND11_INTERNALS_VERSION
|
|
# define PYBIND11_INTERNALS_VERSION 11
|
|
#endif
|
|
|
|
#if PYBIND11_INTERNALS_VERSION < 11
|
|
# error "PYBIND11_INTERNALS_VERSION 11 is the minimum for all platforms for pybind11v3."
|
|
#endif
|
|
|
|
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
|
|
|
using ExceptionTranslator = void (*)(std::exception_ptr);
|
|
|
|
// The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new
|
|
// Thread Specific Storage (TSS) API.
|
|
// Avoid unnecessary allocation of `Py_tss_t`, since we cannot use
|
|
// `Py_LIMITED_API` anyway.
|
|
#define PYBIND11_TLS_KEY_REF Py_tss_t &
|
|
#if defined(__clang__)
|
|
# define PYBIND11_TLS_KEY_INIT(var) \
|
|
_Pragma("clang diagnostic push") /**/ \
|
|
_Pragma("clang diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \
|
|
Py_tss_t var \
|
|
= Py_tss_NEEDS_INIT; \
|
|
_Pragma("clang diagnostic pop")
|
|
#elif defined(__GNUC__) && !defined(__INTEL_COMPILER)
|
|
# define PYBIND11_TLS_KEY_INIT(var) \
|
|
_Pragma("GCC diagnostic push") /**/ \
|
|
_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \
|
|
Py_tss_t var \
|
|
= Py_tss_NEEDS_INIT; \
|
|
_Pragma("GCC diagnostic pop")
|
|
#else
|
|
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t var = Py_tss_NEEDS_INIT;
|
|
#endif
|
|
#define PYBIND11_TLS_KEY_CREATE(var) (PyThread_tss_create(&(var)) == 0)
|
|
#define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get(&(key))
|
|
#define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set(&(key), (value))
|
|
#define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set(&(key), nullptr)
|
|
#define PYBIND11_TLS_FREE(key) PyThread_tss_delete(&(key))
|
|
|
|
/// A smart-pointer-like wrapper around a thread-specific value. get/set of the pointer applies to
|
|
/// the current thread only.
|
|
template <typename T>
|
|
class thread_specific_storage {
|
|
public:
|
|
thread_specific_storage() {
|
|
// NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
|
|
if (!PYBIND11_TLS_KEY_CREATE(key_)) {
|
|
pybind11_fail(
|
|
"thread_specific_storage constructor: could not initialize the TSS key!");
|
|
}
|
|
}
|
|
|
|
~thread_specific_storage() {
|
|
// This destructor is often called *after* Py_Finalize(). That *SHOULD BE* fine on most
|
|
// platforms. The following details what happens when PyThread_tss_free is called in
|
|
// CPython. PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does
|
|
// nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
|
|
// PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX).
|
|
// Neither of those have anything to do with CPython internals. PyMem_RawFree *requires*
|
|
// that the `key` be allocated with the CPython allocator (as it is by
|
|
// PyThread_tss_create).
|
|
// However, in GraalPy (as of v24.2 or older), TSS is implemented by Java and this call
|
|
// requires a living Python interpreter.
|
|
#ifdef GRAALVM_PYTHON
|
|
if (!Py_IsInitialized() || _Py_IsFinalizing()) {
|
|
return;
|
|
}
|
|
#endif
|
|
PYBIND11_TLS_FREE(key_);
|
|
}
|
|
|
|
thread_specific_storage(thread_specific_storage const &) = delete;
|
|
thread_specific_storage(thread_specific_storage &&) = delete;
|
|
thread_specific_storage &operator=(thread_specific_storage const &) = delete;
|
|
thread_specific_storage &operator=(thread_specific_storage &&) = delete;
|
|
|
|
T *get() const { return reinterpret_cast<T *>(PYBIND11_TLS_GET_VALUE(key_)); }
|
|
|
|
T &operator*() const { return *get(); }
|
|
explicit operator T *() const { return get(); }
|
|
explicit operator bool() const { return get() != nullptr; }
|
|
|
|
void set(T *val) { PYBIND11_TLS_REPLACE_VALUE(key_, reinterpret_cast<void *>(val)); }
|
|
void reset(T *p = nullptr) { set(p); }
|
|
thread_specific_storage &operator=(T *pval) {
|
|
set(pval);
|
|
return *this;
|
|
}
|
|
|
|
private:
|
|
PYBIND11_TLS_KEY_INIT(mutable key_)
|
|
};
|
|
|
|
PYBIND11_NAMESPACE_BEGIN(detail)
|
|
|
|
// This does NOT actually exist as a module.
|
|
#define PYBIND11_DUMMY_MODULE_NAME "pybind11_builtins"
|
|
|
|
// Forward declarations
|
|
inline PyTypeObject *make_static_property_type();
|
|
inline PyTypeObject *make_default_metaclass();
|
|
inline PyObject *make_object_base_type(PyTypeObject *metaclass);
|
|
inline void translate_exception(std::exception_ptr p);
|
|
|
|
// Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly
|
|
// other STLs, this means `typeid(A)` from one module won't equal `typeid(A)` from another module
|
|
// even when `A` is the same, non-hidden-visibility type (e.g. from a common include). Under
|
|
// libstdc++, this doesn't happen: equality and the type_index hash are based on the type name,
|
|
// which works. If not under a known-good stl, provide our own name-based hash and equality
|
|
// functions that use the type name.
|
|
#if !defined(_LIBCPP_VERSION)
|
|
inline bool same_type(const std::type_info &lhs, const std::type_info &rhs) { return lhs == rhs; }
|
|
using type_hash = std::hash<std::type_index>;
|
|
using type_equal_to = std::equal_to<std::type_index>;
|
|
#else
|
|
inline bool same_type(const std::type_info &lhs, const std::type_info &rhs) {
|
|
return lhs.name() == rhs.name() || std::strcmp(lhs.name(), rhs.name()) == 0;
|
|
}
|
|
|
|
struct type_hash {
|
|
size_t operator()(const std::type_index &t) const {
|
|
size_t hash = 5381;
|
|
const char *ptr = t.name();
|
|
while (auto c = static_cast<unsigned char>(*ptr++)) {
|
|
hash = (hash * 33) ^ c;
|
|
}
|
|
return hash;
|
|
}
|
|
};
|
|
|
|
struct type_equal_to {
|
|
bool operator()(const std::type_index &lhs, const std::type_index &rhs) const {
|
|
return lhs.name() == rhs.name() || std::strcmp(lhs.name(), rhs.name()) == 0;
|
|
}
|
|
};
|
|
#endif
|
|
|
|
template <typename value_type>
|
|
using type_map = std::unordered_map<std::type_index, value_type, type_hash, type_equal_to>;
|
|
|
|
struct override_hash {
|
|
inline size_t operator()(const std::pair<const PyObject *, const char *> &v) const {
|
|
size_t value = std::hash<const void *>()(v.first);
|
|
value ^= std::hash<const void *>()(v.second) + 0x9e3779b9 + (value << 6) + (value >> 2);
|
|
return value;
|
|
}
|
|
};
|
|
|
|
using instance_map = std::unordered_multimap<const void *, instance *>;
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
// Wrapper around PyMutex to provide BasicLockable semantics
|
|
class pymutex {
|
|
PyMutex mutex;
|
|
|
|
public:
|
|
pymutex() : mutex({}) {}
|
|
void lock() { PyMutex_Lock(&mutex); }
|
|
void unlock() { PyMutex_Unlock(&mutex); }
|
|
};
|
|
|
|
// Instance map shards are used to reduce mutex contention in free-threaded Python.
|
|
struct instance_map_shard {
|
|
instance_map registered_instances;
|
|
pymutex mutex;
|
|
// alignas(64) would be better, but causes compile errors in macOS before 10.14 (see #5200)
|
|
char padding[64 - (sizeof(instance_map) + sizeof(pymutex)) % 64];
|
|
};
|
|
|
|
static_assert(sizeof(instance_map_shard) % 64 == 0,
|
|
"instance_map_shard size is not a multiple of 64 bytes");
|
|
|
|
inline uint64_t round_up_to_next_pow2(uint64_t x) {
|
|
// Round-up to the next power of two.
|
|
// See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
|
|
x--;
|
|
x |= (x >> 1);
|
|
x |= (x >> 2);
|
|
x |= (x >> 4);
|
|
x |= (x >> 8);
|
|
x |= (x >> 16);
|
|
x |= (x >> 32);
|
|
x++;
|
|
return x;
|
|
}
|
|
#endif
|
|
|
|
class loader_life_support;
|
|
|
|
/// Internal data structure used to track registered instances and types.
|
|
/// Whenever binary incompatible changes are made to this structure,
|
|
/// `PYBIND11_INTERNALS_VERSION` must be incremented.
|
|
struct internals {
|
|
#ifdef Py_GIL_DISABLED
|
|
pymutex mutex;
|
|
pymutex exception_translator_mutex;
|
|
#endif
|
|
// std::type_index -> pybind11's type information
|
|
type_map<type_info *> registered_types_cpp;
|
|
// PyTypeObject* -> base type_info(s)
|
|
std::unordered_map<PyTypeObject *, std::vector<type_info *>> registered_types_py;
|
|
#ifdef Py_GIL_DISABLED
|
|
std::unique_ptr<instance_map_shard[]> instance_shards; // void * -> instance*
|
|
size_t instance_shards_mask = 0;
|
|
#else
|
|
instance_map registered_instances; // void * -> instance*
|
|
#endif
|
|
std::unordered_set<std::pair<const PyObject *, const char *>, override_hash>
|
|
inactive_override_cache;
|
|
type_map<std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
|
|
std::unordered_map<const PyObject *, std::vector<PyObject *>> patients;
|
|
std::forward_list<ExceptionTranslator> registered_exception_translators;
|
|
std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across
|
|
// extensions
|
|
std::forward_list<std::string> static_strings; // Stores the std::strings backing
|
|
// detail::c_str()
|
|
PyTypeObject *static_property_type = nullptr;
|
|
PyTypeObject *default_metaclass = nullptr;
|
|
PyObject *instance_base = nullptr;
|
|
// Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
|
|
thread_specific_storage<PyThreadState> tstate;
|
|
thread_specific_storage<loader_life_support> loader_life_support_tls;
|
|
// Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
|
|
PyInterpreterState *istate = nullptr;
|
|
|
|
type_map<PyObject *> native_enum_type_map;
|
|
|
|
internals()
|
|
: static_property_type(make_static_property_type()),
|
|
default_metaclass(make_default_metaclass()) {
|
|
PyThreadState *cur_tstate = PyThreadState_Get();
|
|
tstate = cur_tstate;
|
|
|
|
istate = cur_tstate->interp;
|
|
registered_exception_translators.push_front(&translate_exception);
|
|
#ifdef Py_GIL_DISABLED
|
|
// Scale proportional to the number of cores. 2x is a heuristic to reduce contention.
|
|
// Make sure the number isn't unreasonable by limiting it to 16 bits (65K)
|
|
auto num_shards = static_cast<std::uint16_t>(
|
|
std::min<std::size_t>(round_up_to_next_pow2(2 * std::thread::hardware_concurrency()),
|
|
std::numeric_limits<std::uint16_t>::max()));
|
|
if (num_shards == 0) {
|
|
num_shards = 1;
|
|
}
|
|
instance_shards.reset(new instance_map_shard[num_shards]);
|
|
instance_shards_mask = num_shards - 1;
|
|
#endif
|
|
}
|
|
internals(const internals &other) = delete;
|
|
internals(internals &&other) = delete;
|
|
internals &operator=(const internals &other) = delete;
|
|
internals &operator=(internals &&other) = delete;
|
|
~internals() = default;
|
|
};
|
|
|
|
// the internals struct (above) is shared between all the modules. local_internals are only
|
|
// for a single module. Any changes made to internals may require an update to
|
|
// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
|
|
// restricted to a single module. Whether a module has local internals or not should not
|
|
// impact any other modules, because the only things accessing the local internals is the
|
|
// module that contains them.
|
|
struct local_internals {
|
|
type_map<type_info *> registered_types_cpp;
|
|
std::forward_list<ExceptionTranslator> registered_exception_translators;
|
|
PyTypeObject *function_record_py_type = nullptr;
|
|
};
|
|
|
|
enum class holder_enum_t : uint8_t {
|
|
undefined,
|
|
std_unique_ptr, // Default, lacking interop with std::shared_ptr.
|
|
std_shared_ptr, // Lacking interop with std::unique_ptr.
|
|
smart_holder, // Full std::unique_ptr / std::shared_ptr interop.
|
|
custom_holder,
|
|
};
|
|
|
|
/// Additional type information which does not fit into the PyTypeObject.
|
|
/// Changes to this struct also require bumping `PYBIND11_INTERNALS_VERSION`.
|
|
struct type_info {
|
|
PyTypeObject *type;
|
|
const std::type_info *cpptype;
|
|
size_t type_size, type_align, holder_size_in_ptrs;
|
|
void *(*operator_new)(size_t);
|
|
void (*init_instance)(instance *, const void *);
|
|
void (*dealloc)(value_and_holder &v_h);
|
|
|
|
// Cross-DSO-safe function pointers, to sidestep cross-DSO RTTI issues
|
|
// on platforms like macOS (see PR #5728 for details):
|
|
memory::get_guarded_delete_fn get_memory_guarded_delete = memory::get_guarded_delete;
|
|
get_trampoline_self_life_support_fn get_trampoline_self_life_support = nullptr;
|
|
|
|
std::vector<PyObject *(*) (PyObject *, PyTypeObject *)> implicit_conversions;
|
|
std::vector<std::pair<const std::type_info *, void *(*) (void *)>> implicit_casts;
|
|
std::vector<bool (*)(PyObject *, void *&)> *direct_conversions;
|
|
buffer_info *(*get_buffer)(PyObject *, void *) = nullptr;
|
|
void *get_buffer_data = nullptr;
|
|
void *(*module_local_load)(PyObject *, const type_info *) = nullptr;
|
|
holder_enum_t holder_enum_v = holder_enum_t::undefined;
|
|
/* A simple type never occurs as a (direct or indirect) parent
|
|
* of a class that makes use of multiple inheritance.
|
|
* A type can be simple even if it has non-simple ancestors as long as it has no descendants.
|
|
*/
|
|
bool simple_type : 1;
|
|
/* True if there is no multiple inheritance in this type's inheritance tree */
|
|
bool simple_ancestors : 1;
|
|
/* true if this is a type registered with py::module_local */
|
|
bool module_local : 1;
|
|
};
|
|
|
|
#define PYBIND11_INTERNALS_ID \
|
|
"__pybind11_internals_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) \
|
|
PYBIND11_COMPILER_TYPE_LEADING_UNDERSCORE PYBIND11_PLATFORM_ABI_ID "__"
|
|
|
|
#define PYBIND11_MODULE_LOCAL_ID \
|
|
"__pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) \
|
|
PYBIND11_COMPILER_TYPE_LEADING_UNDERSCORE PYBIND11_PLATFORM_ABI_ID "__"
|
|
|
|
inline PyThreadState *get_thread_state_unchecked() {
|
|
#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON)
|
|
return PyThreadState_GET();
|
|
#elif PY_VERSION_HEX < 0x030D0000
|
|
return _PyThreadState_UncheckedGet();
|
|
#else
|
|
return PyThreadState_GetUnchecked();
|
|
#endif
|
|
}
|
|
|
|
/// We use this counter to figure out if there are or have been multiple subinterpreters active at
|
|
/// any point. This must never decrease while any interpreter may be running in any thread!
|
|
inline std::atomic<int> &get_num_interpreters_seen() {
|
|
static std::atomic<int> counter(0);
|
|
return counter;
|
|
}
|
|
|
|
template <class T,
|
|
enable_if_t<std::is_same<std::nested_exception, remove_cvref_t<T>>::value, int> = 0>
|
|
bool handle_nested_exception(const T &exc, const std::exception_ptr &p) {
|
|
std::exception_ptr nested = exc.nested_ptr();
|
|
if (nested != nullptr && nested != p) {
|
|
translate_exception(nested);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <class T,
|
|
enable_if_t<!std::is_same<std::nested_exception, remove_cvref_t<T>>::value, int> = 0>
|
|
bool handle_nested_exception(const T &exc, const std::exception_ptr &p) {
|
|
if (const auto *nep = dynamic_cast<const std::nested_exception *>(std::addressof(exc))) {
|
|
return handle_nested_exception(*nep, p);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
inline bool raise_err(PyObject *exc_type, const char *msg) {
|
|
if (PyErr_Occurred()) {
|
|
raise_from(exc_type, msg);
|
|
return true;
|
|
}
|
|
set_error(exc_type, msg);
|
|
return false;
|
|
}
|
|
|
|
inline void translate_exception(std::exception_ptr p) {
|
|
if (!p) {
|
|
return;
|
|
}
|
|
try {
|
|
std::rethrow_exception(p);
|
|
} catch (error_already_set &e) {
|
|
handle_nested_exception(e, p);
|
|
e.restore();
|
|
return;
|
|
} catch (const builtin_exception &e) {
|
|
// Could not use template since it's an abstract class.
|
|
if (const auto *nep = dynamic_cast<const std::nested_exception *>(std::addressof(e))) {
|
|
handle_nested_exception(*nep, p);
|
|
}
|
|
e.set_error();
|
|
return;
|
|
} catch (const std::bad_alloc &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_MemoryError, e.what());
|
|
return;
|
|
} catch (const std::domain_error &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_ValueError, e.what());
|
|
return;
|
|
} catch (const std::invalid_argument &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_ValueError, e.what());
|
|
return;
|
|
} catch (const std::length_error &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_ValueError, e.what());
|
|
return;
|
|
} catch (const std::out_of_range &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_IndexError, e.what());
|
|
return;
|
|
} catch (const std::range_error &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_ValueError, e.what());
|
|
return;
|
|
} catch (const std::overflow_error &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_OverflowError, e.what());
|
|
return;
|
|
} catch (const std::exception &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_RuntimeError, e.what());
|
|
return;
|
|
} catch (const std::nested_exception &e) {
|
|
handle_nested_exception(e, p);
|
|
raise_err(PyExc_RuntimeError, "Caught an unknown nested exception!");
|
|
return;
|
|
} catch (...) {
|
|
raise_err(PyExc_RuntimeError, "Caught an unknown exception!");
|
|
return;
|
|
}
|
|
}
|
|
|
|
#if !defined(__GLIBCXX__)
|
|
inline void translate_local_exception(std::exception_ptr p) {
|
|
try {
|
|
if (p) {
|
|
std::rethrow_exception(p);
|
|
}
|
|
} catch (error_already_set &e) {
|
|
e.restore();
|
|
return;
|
|
} catch (const builtin_exception &e) {
|
|
e.set_error();
|
|
return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
inline object get_python_state_dict() {
|
|
object state_dict;
|
|
#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON)
|
|
state_dict = reinterpret_borrow<object>(PyEval_GetBuiltins());
|
|
#else
|
|
# if PY_VERSION_HEX < 0x03090000
|
|
PyInterpreterState *istate = _PyInterpreterState_Get();
|
|
# else
|
|
PyInterpreterState *istate = PyInterpreterState_Get();
|
|
# endif
|
|
if (istate) {
|
|
state_dict = reinterpret_borrow<object>(PyInterpreterState_GetDict(istate));
|
|
}
|
|
#endif
|
|
if (!state_dict) {
|
|
raise_from(PyExc_SystemError, "pybind11::detail::get_python_state_dict() FAILED");
|
|
throw error_already_set();
|
|
}
|
|
return state_dict;
|
|
}
|
|
|
|
template <typename InternalsType>
|
|
class internals_pp_manager {
|
|
public:
|
|
using on_fetch_function = void(InternalsType *);
|
|
internals_pp_manager(char const *id, on_fetch_function *on_fetch)
|
|
: holder_id_(id), on_fetch_(on_fetch) {}
|
|
|
|
/// Get the current pointer-to-pointer, allocating it if it does not already exist. May
|
|
/// acquire the GIL. Will never return nullptr.
|
|
std::unique_ptr<InternalsType> *get_pp() {
|
|
#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
|
|
if (get_num_interpreters_seen() > 1) {
|
|
// Whenever the interpreter changes on the current thread we need to invalidate the
|
|
// internals_pp so that it can be pulled from the interpreter's state dict. That is
|
|
// slow, so we use the current PyThreadState to check if it is necessary.
|
|
auto *tstate = get_thread_state_unchecked();
|
|
if (!tstate || tstate->interp != last_istate_.get()) {
|
|
gil_scoped_acquire_simple gil;
|
|
if (!tstate) {
|
|
tstate = get_thread_state_unchecked();
|
|
}
|
|
last_istate_ = tstate->interp;
|
|
internals_tls_p_ = get_or_create_pp_in_state_dict();
|
|
}
|
|
return internals_tls_p_.get();
|
|
}
|
|
#endif
|
|
if (!internals_singleton_pp_) {
|
|
gil_scoped_acquire_simple gil;
|
|
internals_singleton_pp_ = get_or_create_pp_in_state_dict();
|
|
}
|
|
return internals_singleton_pp_;
|
|
}
|
|
|
|
/// Drop all the references we're currently holding.
|
|
void unref() {
|
|
#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
|
|
if (get_num_interpreters_seen() > 1) {
|
|
last_istate_.reset();
|
|
internals_tls_p_.reset();
|
|
return;
|
|
}
|
|
#endif
|
|
internals_singleton_pp_ = nullptr;
|
|
}
|
|
|
|
void destroy() {
|
|
#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
|
|
if (get_num_interpreters_seen() > 1) {
|
|
auto *tstate = get_thread_state_unchecked();
|
|
// this could be called without an active interpreter, just use what was cached
|
|
if (!tstate || tstate->interp == last_istate_.get()) {
|
|
auto tpp = internals_tls_p_.get();
|
|
if (tpp) {
|
|
delete tpp;
|
|
}
|
|
}
|
|
unref();
|
|
return;
|
|
}
|
|
#endif
|
|
delete internals_singleton_pp_;
|
|
unref();
|
|
}
|
|
|
|
private:
|
|
std::unique_ptr<InternalsType> *get_or_create_pp_in_state_dict() {
|
|
error_scope err_scope;
|
|
dict state_dict = get_python_state_dict();
|
|
auto internals_obj
|
|
= reinterpret_steal<object>(dict_getitemstringref(state_dict.ptr(), holder_id_));
|
|
std::unique_ptr<InternalsType> *pp = nullptr;
|
|
if (internals_obj) {
|
|
void *raw_ptr = PyCapsule_GetPointer(internals_obj.ptr(), /*name=*/nullptr);
|
|
if (!raw_ptr) {
|
|
raise_from(PyExc_SystemError,
|
|
"pybind11::detail::internals_pp_manager::get_pp_from_dict() FAILED");
|
|
throw error_already_set();
|
|
}
|
|
pp = reinterpret_cast<std::unique_ptr<InternalsType> *>(raw_ptr);
|
|
if (on_fetch_ && pp) {
|
|
on_fetch_(pp->get());
|
|
}
|
|
} else {
|
|
pp = new std::unique_ptr<InternalsType>;
|
|
// NOLINTNEXTLINE(bugprone-casting-through-void)
|
|
state_dict[holder_id_] = capsule(reinterpret_cast<void *>(pp));
|
|
}
|
|
return pp;
|
|
}
|
|
|
|
char const *holder_id_ = nullptr;
|
|
on_fetch_function *on_fetch_ = nullptr;
|
|
#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
|
|
thread_specific_storage<PyInterpreterState> last_istate_;
|
|
thread_specific_storage<std::unique_ptr<InternalsType>> internals_tls_p_;
|
|
#endif
|
|
std::unique_ptr<InternalsType> *internals_singleton_pp_;
|
|
};
|
|
|
|
// If We loaded the internals through `state_dict`, our `error_already_set`
|
|
// and `builtin_exception` may be different local classes than the ones set up in the
|
|
// initial exception translator, below, so add another for our local exception classes.
|
|
//
|
|
// libstdc++ doesn't require this (types there are identified only by name)
|
|
// libc++ with CPython doesn't require this (types are explicitly exported)
|
|
// libc++ with PyPy still need it, awaiting further investigation
|
|
#if !defined(__GLIBCXX__)
|
|
inline void check_internals_local_exception_translator(internals *internals_ptr) {
|
|
if (internals_ptr) {
|
|
for (auto et : internals_ptr->registered_exception_translators) {
|
|
if (et == &translate_local_exception) {
|
|
return;
|
|
}
|
|
}
|
|
internals_ptr->registered_exception_translators.push_front(&translate_local_exception);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
inline internals_pp_manager<internals> &get_internals_pp_manager() {
|
|
#if defined(__GLIBCXX__)
|
|
# define ON_FETCH_FN nullptr
|
|
#else
|
|
# define ON_FETCH_FN &check_internals_local_exception_translator
|
|
#endif
|
|
static internals_pp_manager<internals> internals_pp_manager(PYBIND11_INTERNALS_ID,
|
|
ON_FETCH_FN);
|
|
#undef ON_FETCH_FN
|
|
return internals_pp_manager;
|
|
}
|
|
|
|
/// Return a reference to the current `internals` data
|
|
PYBIND11_NOINLINE internals &get_internals() {
|
|
auto &ppmgr = get_internals_pp_manager();
|
|
auto &internals_ptr = *ppmgr.get_pp();
|
|
if (!internals_ptr) {
|
|
// Slow path, something needs fetched from the state dict or created
|
|
gil_scoped_acquire_simple gil;
|
|
error_scope err_scope;
|
|
internals_ptr.reset(new internals());
|
|
|
|
if (!internals_ptr->instance_base) {
|
|
// This calls get_internals, so cannot be called from within the internals constructor
|
|
// called above because internals_ptr must be set before get_internals is called again
|
|
internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass);
|
|
}
|
|
}
|
|
return *internals_ptr;
|
|
}
|
|
|
|
inline internals_pp_manager<local_internals> &get_local_internals_pp_manager() {
|
|
// Use the address of this static itself as part of the key, so that the value is uniquely tied
|
|
// to where the module is loaded in memory
|
|
static const std::string this_module_idstr
|
|
= PYBIND11_MODULE_LOCAL_ID
|
|
+ std::to_string(reinterpret_cast<uintptr_t>(&this_module_idstr));
|
|
static internals_pp_manager<local_internals> local_internals_pp_manager(
|
|
this_module_idstr.c_str(), nullptr);
|
|
return local_internals_pp_manager;
|
|
}
|
|
|
|
/// Works like `get_internals`, but for things which are locally registered.
|
|
inline local_internals &get_local_internals() {
|
|
auto &ppmgr = get_local_internals_pp_manager();
|
|
auto &internals_ptr = *ppmgr.get_pp();
|
|
if (!internals_ptr) {
|
|
internals_ptr.reset(new local_internals());
|
|
}
|
|
return *internals_ptr;
|
|
}
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock<pymutex> lock((internals).mutex)
|
|
#else
|
|
# define PYBIND11_LOCK_INTERNALS(internals)
|
|
#endif
|
|
|
|
template <typename F>
|
|
inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) {
|
|
auto &internals = get_internals();
|
|
PYBIND11_LOCK_INTERNALS(internals);
|
|
return cb(internals);
|
|
}
|
|
|
|
template <typename F>
|
|
inline auto with_exception_translators(const F &cb)
|
|
-> decltype(cb(get_internals().registered_exception_translators,
|
|
get_local_internals().registered_exception_translators)) {
|
|
auto &internals = get_internals();
|
|
#ifdef Py_GIL_DISABLED
|
|
std::unique_lock<pymutex> lock((internals).exception_translator_mutex);
|
|
#endif
|
|
auto &local_internals = get_local_internals();
|
|
return cb(internals.registered_exception_translators,
|
|
local_internals.registered_exception_translators);
|
|
}
|
|
|
|
inline std::uint64_t mix64(std::uint64_t z) {
|
|
// David Stafford's variant 13 of the MurmurHash3 finalizer popularized
|
|
// by the SplitMix PRNG.
|
|
// https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html
|
|
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
|
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
|
return z ^ (z >> 31);
|
|
}
|
|
|
|
template <typename F>
|
|
inline auto with_instance_map(const void *ptr, const F &cb)
|
|
-> decltype(cb(std::declval<instance_map &>())) {
|
|
auto &internals = get_internals();
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
// Hash address to compute shard, but ignore low bits. We'd like allocations
|
|
// from the same thread/core to map to the same shard and allocations from
|
|
// other threads/cores to map to other shards. Using the high bits is a good
|
|
// heuristic because memory allocators often have a per-thread
|
|
// arena/superblock/segment from which smaller allocations are served.
|
|
auto addr = reinterpret_cast<std::uintptr_t>(ptr);
|
|
auto hash = mix64(static_cast<std::uint64_t>(addr >> 20));
|
|
auto idx = static_cast<size_t>(hash & internals.instance_shards_mask);
|
|
|
|
auto &shard = internals.instance_shards[idx];
|
|
std::unique_lock<pymutex> lock(shard.mutex);
|
|
return cb(shard.registered_instances);
|
|
#else
|
|
(void) ptr;
|
|
return cb(internals.registered_instances);
|
|
#endif
|
|
}
|
|
|
|
// Returns the number of registered instances for testing purposes. The result may not be
|
|
// consistent if other threads are registering or unregistering instances concurrently.
|
|
inline size_t num_registered_instances() {
|
|
auto &internals = get_internals();
|
|
#ifdef Py_GIL_DISABLED
|
|
size_t count = 0;
|
|
for (size_t i = 0; i <= internals.instance_shards_mask; ++i) {
|
|
auto &shard = internals.instance_shards[i];
|
|
std::unique_lock<pymutex> lock(shard.mutex);
|
|
count += shard.registered_instances.size();
|
|
}
|
|
return count;
|
|
#else
|
|
return internals.registered_instances.size();
|
|
#endif
|
|
}
|
|
|
|
/// Constructs a std::string with the given arguments, stores it in `internals`, and returns its
|
|
/// `c_str()`. Such strings objects have a long storage duration -- the internal strings are only
|
|
/// cleared when the program exits or after interpreter shutdown (when embedding), and so are
|
|
/// suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name).
|
|
template <typename... Args>
|
|
const char *c_str(Args &&...args) {
|
|
// GCC 4.8 doesn't like parameter unpack within lambda capture, so use
|
|
// PYBIND11_LOCK_INTERNALS.
|
|
auto &internals = get_internals();
|
|
PYBIND11_LOCK_INTERNALS(internals);
|
|
auto &strings = internals.static_strings;
|
|
strings.emplace_front(std::forward<Args>(args)...);
|
|
return strings.front().c_str();
|
|
}
|
|
|
|
PYBIND11_NAMESPACE_END(detail)
|
|
|
|
/// Returns a named pointer that is shared among all extension modules (using the same
|
|
/// pybind11 version) running in the current interpreter. Names starting with underscores
|
|
/// are reserved for internal usage. Returns `nullptr` if no matching entry was found.
|
|
PYBIND11_NOINLINE void *get_shared_data(const std::string &name) {
|
|
return detail::with_internals([&](detail::internals &internals) {
|
|
auto it = internals.shared_data.find(name);
|
|
return it != internals.shared_data.end() ? it->second : nullptr;
|
|
});
|
|
}
|
|
|
|
/// Set the shared data that can be later recovered by `get_shared_data()`.
|
|
PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) {
|
|
return detail::with_internals([&](detail::internals &internals) {
|
|
internals.shared_data[name] = data;
|
|
return data;
|
|
});
|
|
}
|
|
|
|
/// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if
|
|
/// such entry exists. Otherwise, a new object of default-constructible type `T` is
|
|
/// added to the shared data under the given name and a reference to it is returned.
|
|
template <typename T>
|
|
T &get_or_create_shared_data(const std::string &name) {
|
|
return *detail::with_internals([&](detail::internals &internals) {
|
|
auto it = internals.shared_data.find(name);
|
|
T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr);
|
|
if (!ptr) {
|
|
ptr = new T();
|
|
internals.shared_data[name] = ptr;
|
|
}
|
|
return ptr;
|
|
});
|
|
}
|
|
|
|
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
|
|
|
|
#else
|
|
#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
|
|
#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
|