/* Generated by Cython 0.24 */ #define PY_SSIZE_T_CLEAN #include "Python.h" #ifndef Py_PYTHON_H #error Python headers needed to compile C extensions, please install development version of Python. #elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000) #error Cython requires Python 2.6+ or Python 3.2+. #else #define CYTHON_ABI "0_24" #include #ifndef offsetof #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) #endif #if !defined(WIN32) && !defined(MS_WINDOWS) #ifndef __stdcall #define __stdcall #endif #ifndef __cdecl #define __cdecl #endif #ifndef __fastcall #define __fastcall #endif #endif #ifndef DL_IMPORT #define DL_IMPORT(t) t #endif #ifndef DL_EXPORT #define DL_EXPORT(t) t #endif #ifndef PY_LONG_LONG #define PY_LONG_LONG LONG_LONG #endif #ifndef Py_HUGE_VAL #define Py_HUGE_VAL HUGE_VAL #endif #ifdef PYPY_VERSION #define CYTHON_COMPILING_IN_PYPY 1 #define CYTHON_COMPILING_IN_CPYTHON 0 #else #define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_CPYTHON 1 #endif #if !defined(CYTHON_USE_PYLONG_INTERNALS) && CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x02070000 #define CYTHON_USE_PYLONG_INTERNALS 1 #endif #if CYTHON_USE_PYLONG_INTERNALS #include "longintrepr.h" #undef SHIFT #undef BASE #undef MASK #endif #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) #define Py_OptimizeFlag 0 #endif #define __PYX_BUILD_PY_SSIZE_T "n" #define CYTHON_FORMAT_SSIZE_T "z" #if PY_MAJOR_VERSION < 3 #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) #define __Pyx_DefaultClassType PyClass_Type #else #define __Pyx_BUILTIN_MODULE_NAME "builtins" #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) #define __Pyx_DefaultClassType PyType_Type #endif #ifndef Py_TPFLAGS_CHECKTYPES #define Py_TPFLAGS_CHECKTYPES 0 #endif #ifndef Py_TPFLAGS_HAVE_INDEX #define Py_TPFLAGS_HAVE_INDEX 0 #endif #ifndef Py_TPFLAGS_HAVE_NEWBUFFER #define Py_TPFLAGS_HAVE_NEWBUFFER 0 #endif #ifndef Py_TPFLAGS_HAVE_FINALIZE #define Py_TPFLAGS_HAVE_FINALIZE 0 #endif #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) #define CYTHON_PEP393_ENABLED 1 #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ 0 : _PyUnicode_Ready((PyObject *)(op))) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u) #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) #else #define CYTHON_PEP393_ENABLED 0 #define __Pyx_PyUnicode_READY(op) (0) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE)) #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) #endif #if CYTHON_COMPILING_IN_PYPY #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) #else #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) #endif #if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains) #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) #endif #if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format) #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) #endif #if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) #define PyObject_Malloc(s) PyMem_Malloc(s) #define PyObject_Free(p) PyMem_Free(p) #define PyObject_Realloc(p) PyMem_Realloc(p) #endif #define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) #define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) #else #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) #endif #if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) #define PyObject_ASCII(o) PyObject_Repr(o) #endif #if PY_MAJOR_VERSION >= 3 #define PyBaseString_Type PyUnicode_Type #define PyStringObject PyUnicodeObject #define PyString_Type PyUnicode_Type #define PyString_Check PyUnicode_Check #define PyString_CheckExact PyUnicode_CheckExact #endif #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) #else #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) #endif #ifndef PySet_CheckExact #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) #endif #define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) #if PY_MAJOR_VERSION >= 3 #define PyIntObject PyLongObject #define PyInt_Type PyLong_Type #define PyInt_Check(op) PyLong_Check(op) #define PyInt_CheckExact(op) PyLong_CheckExact(op) #define PyInt_FromString PyLong_FromString #define PyInt_FromUnicode PyLong_FromUnicode #define PyInt_FromLong PyLong_FromLong #define PyInt_FromSize_t PyLong_FromSize_t #define PyInt_FromSsize_t PyLong_FromSsize_t #define PyInt_AsLong PyLong_AsLong #define PyInt_AS_LONG PyLong_AS_LONG #define PyInt_AsSsize_t PyLong_AsSsize_t #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask #define PyNumber_Int PyNumber_Long #endif #if PY_MAJOR_VERSION >= 3 #define PyBoolObject PyLongObject #endif #if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY #ifndef PyUnicode_InternFromString #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) #endif #endif #if PY_VERSION_HEX < 0x030200A4 typedef long Py_hash_t; #define __Pyx_PyInt_FromHash_t PyInt_FromLong #define __Pyx_PyInt_AsHash_t PyInt_AsLong #else #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t #endif #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) #else #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) #endif #if PY_VERSION_HEX >= 0x030500B1 #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) #elif CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 typedef struct { unaryfunc am_await; unaryfunc am_aiter; unaryfunc am_anext; } __Pyx_PyAsyncMethodsStruct; #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) #else #define __Pyx_PyType_AsAsync(obj) NULL #endif #ifndef CYTHON_RESTRICT #if defined(__GNUC__) #define CYTHON_RESTRICT __restrict__ #elif defined(_MSC_VER) && _MSC_VER >= 1400 #define CYTHON_RESTRICT __restrict #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define CYTHON_RESTRICT restrict #else #define CYTHON_RESTRICT #endif #endif #define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) #ifndef __cplusplus #error "Cython files generated with the C++ option must be compiled with a C++ compiler." #endif #ifndef CYTHON_INLINE #define CYTHON_INLINE inline #endif template void __Pyx_call_destructor(T& x) { x.~T(); } template class __Pyx_FakeReference { public: __Pyx_FakeReference() : ptr(NULL) { } __Pyx_FakeReference(const T& ref) : ptr(const_cast(&ref)) { } T *operator->() { return ptr; } operator T&() { return *ptr; } private: T *ptr; }; #if defined(WIN32) || defined(MS_WINDOWS) #define _USE_MATH_DEFINES #endif #include #ifdef NAN #define __PYX_NAN() ((float) NAN) #else static CYTHON_INLINE float __PYX_NAN() { float value; memset(&value, 0xFF, sizeof(value)); return value; } #endif #define __PYX_ERR(f_index, lineno, Ln_error) \ { \ __pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \ } #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) #else #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) #endif #ifndef __PYX_EXTERN_C #ifdef __cplusplus #define __PYX_EXTERN_C extern "C" #else #define __PYX_EXTERN_C extern #endif #endif #define __PYX_HAVE__gumbocy #define __PYX_HAVE_API__gumbocy #include "gumbo.h" #include "string.h" #include #include "ios" #include "new" #include "stdexcept" #include "typeinfo" #include "re2/stringpiece.h" #include "re2/re2.h" #include #include #include #include #include "stdio.h" #ifdef _OPENMP #include #endif /* _OPENMP */ #ifdef PYREX_WITHOUT_ASSERTIONS #define CYTHON_WITHOUT_ASSERTIONS #endif #ifndef CYTHON_UNUSED # if defined(__GNUC__) # if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) # define CYTHON_UNUSED __attribute__ ((__unused__)) # else # define CYTHON_UNUSED # endif # elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) # define CYTHON_UNUSED __attribute__ ((__unused__)) # else # define CYTHON_UNUSED # endif #endif #ifndef CYTHON_NCP_UNUSED # if CYTHON_COMPILING_IN_CPYTHON # define CYTHON_NCP_UNUSED # else # define CYTHON_NCP_UNUSED CYTHON_UNUSED # endif #endif typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; #define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 #define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0 #define __PYX_DEFAULT_STRING_ENCODING "" #define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString #define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize #define __Pyx_uchar_cast(c) ((unsigned char)c) #define __Pyx_long_cast(x) ((long)x) #define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ (sizeof(type) < sizeof(Py_ssize_t)) ||\ (sizeof(type) > sizeof(Py_ssize_t) &&\ likely(v < (type)PY_SSIZE_T_MAX ||\ v == (type)PY_SSIZE_T_MAX) &&\ (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ v == (type)PY_SSIZE_T_MIN))) ||\ (sizeof(type) == sizeof(Py_ssize_t) &&\ (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ v == (type)PY_SSIZE_T_MAX))) ) #if defined (__cplusplus) && __cplusplus >= 201103L #include #define __Pyx_sst_abs(value) std::abs(value) #elif SIZEOF_INT >= SIZEOF_SIZE_T #define __Pyx_sst_abs(value) abs(value) #elif SIZEOF_LONG >= SIZEOF_SIZE_T #define __Pyx_sst_abs(value) labs(value) #elif defined (_MSC_VER) && defined (_M_X64) #define __Pyx_sst_abs(value) _abs64(value) #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define __Pyx_sst_abs(value) llabs(value) #elif defined (__GNUC__) #define __Pyx_sst_abs(value) __builtin_llabs(value) #else #define __Pyx_sst_abs(value) ((value<0) ? -value : value) #endif static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); #define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) #define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) #define __Pyx_PyBytes_FromString PyBytes_FromString #define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); #if PY_MAJOR_VERSION < 3 #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize #else #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize #endif #define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) #define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) #define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) #define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) #define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) #define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) #define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) #if PY_MAJOR_VERSION < 3 static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) { const Py_UNICODE *u_end = u; while (*u_end++) ; return (size_t)(u_end - u - 1); } #else #define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen #endif #define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) #define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode #define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode #define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) #define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) #define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False)) static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); #if CYTHON_COMPILING_IN_CPYTHON #define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) #else #define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) #endif #define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) #else #define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) #endif #define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x)) #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII static int __Pyx_sys_getdefaultencoding_not_ascii; static int __Pyx_init_sys_getdefaultencoding_params(void) { PyObject* sys; PyObject* default_encoding = NULL; PyObject* ascii_chars_u = NULL; PyObject* ascii_chars_b = NULL; const char* default_encoding_c; sys = PyImport_ImportModule("sys"); if (!sys) goto bad; default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); Py_DECREF(sys); if (!default_encoding) goto bad; default_encoding_c = PyBytes_AsString(default_encoding); if (!default_encoding_c) goto bad; if (strcmp(default_encoding_c, "ascii") == 0) { __Pyx_sys_getdefaultencoding_not_ascii = 0; } else { char ascii_chars[128]; int c; for (c = 0; c < 128; c++) { ascii_chars[c] = c; } __Pyx_sys_getdefaultencoding_not_ascii = 1; ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); if (!ascii_chars_u) goto bad; ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { PyErr_Format( PyExc_ValueError, "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", default_encoding_c); goto bad; } Py_DECREF(ascii_chars_u); Py_DECREF(ascii_chars_b); } Py_DECREF(default_encoding); return 0; bad: Py_XDECREF(default_encoding); Py_XDECREF(ascii_chars_u); Py_XDECREF(ascii_chars_b); return -1; } #endif #if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 #define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) #else #define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) #if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT static char* __PYX_DEFAULT_STRING_ENCODING; static int __Pyx_init_sys_getdefaultencoding_params(void) { PyObject* sys; PyObject* default_encoding = NULL; char* default_encoding_c; sys = PyImport_ImportModule("sys"); if (!sys) goto bad; default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); Py_DECREF(sys); if (!default_encoding) goto bad; default_encoding_c = PyBytes_AsString(default_encoding); if (!default_encoding_c) goto bad; __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c)); if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); Py_DECREF(default_encoding); return 0; bad: Py_XDECREF(default_encoding); return -1; } #endif #endif /* Test for GCC > 2.95 */ #if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #else /* !__GNUC__ or GCC < 2.95 */ #define likely(x) (x) #define unlikely(x) (x) #endif /* __GNUC__ */ static PyObject *__pyx_m; static PyObject *__pyx_d; static PyObject *__pyx_b; static PyObject *__pyx_empty_tuple; static PyObject *__pyx_empty_bytes; static PyObject *__pyx_empty_unicode; static int __pyx_lineno; static int __pyx_clineno = 0; static const char * __pyx_cfilenm= __FILE__; static const char *__pyx_filename; static const char *__pyx_f[] = { "gumbocy.pyx", }; /*--- Type declarations ---*/ struct __pyx_obj_7gumbocy_Attributes; struct __pyx_obj_7gumbocy_HTMLParser; /* "re2cy.pxd":3 * from libcpp.string cimport string * * ctypedef Arg* ArgPtr # <<<<<<<<<<<<<< * * */ typedef RE2::Arg *__pyx_t_5re2cy_ArgPtr; /* "gumbocy.pyx":26 * _RE_SPLIT_WHITESPACE = re.compile(r"\s+") * * ctypedef enum AttributeNames: # <<<<<<<<<<<<<< * ATTR_ID, * ATTR_ROLE, */ enum __pyx_t_7gumbocy_AttributeNames { __pyx_e_7gumbocy_ATTR_ID, __pyx_e_7gumbocy_ATTR_ROLE, __pyx_e_7gumbocy_ATTR_HREF, __pyx_e_7gumbocy_ATTR_STYLE, __pyx_e_7gumbocy_ATTR_REL, __pyx_e_7gumbocy_ATTR_SRC, __pyx_e_7gumbocy_ATTR_ALT, __pyx_e_7gumbocy_ATTR_NAME, __pyx_e_7gumbocy_ATTR_PROPERTY, __pyx_e_7gumbocy_ATTR_CONTENT }; typedef enum __pyx_t_7gumbocy_AttributeNames __pyx_t_7gumbocy_AttributeNames; /* "gumbocy.pyx":55 * # map[AttributeNames, const char*] values * * cdef class Attributes: # <<<<<<<<<<<<<< * cdef int size_classes * cdef dict values */ struct __pyx_obj_7gumbocy_Attributes { PyObject_HEAD int size_classes; PyObject *values; PyObject *classes; int has_hidden; }; /* "gumbocy.pyx":66 * # ctypedef sAttributes Attributes * * cdef class HTMLParser: # <<<<<<<<<<<<<< * * # Global parser variables */ struct __pyx_obj_7gumbocy_HTMLParser { PyObject_HEAD struct __pyx_vtabstruct_7gumbocy_HTMLParser *__pyx_vtab; int nesting_limit; int head_only; int has_ids_ignore; int has_classes_ignore; int has_ids_hidden; int has_classes_hidden; int has_attributes_whitelist; int has_classes_boilerplate; int has_ids_boilerplate; int has_roles_boilerplate; int has_metas_whitelist; std::unordered_set tags_ignore; std::unordered_set tags_ignore_head_only; std::unordered_set tags_boilerplate; std::unordered_set tags_boilerplate_bypass; std::unordered_set tags_separators; re2::RE2 *attributes_whitelist; re2::RE2 *metas_whitelist; re2::RE2 *classes_ignore; re2::RE2 *ids_ignore; re2::RE2 *classes_hidden; re2::RE2 *ids_hidden; re2::RE2 *classes_boilerplate; re2::RE2 *ids_boilerplate; re2::RE2 *roles_boilerplate; int analyze_internal_hyperlinks; int analyze_external_hyperlinks; int analyze_word_groups; PyObject *current_stack; int has_url; char *url; char *netloc; char *scheme; re2::RE2 *internal_netloc_search; PyObject *analysis; PyObject *current_word_group; PyObject *current_hyperlink; int has_output; GumboOutput *output; PyObject *nodes; }; struct __pyx_vtabstruct_7gumbocy_HTMLParser { int (*guess_node_hidden)(struct __pyx_obj_7gumbocy_HTMLParser *, GumboNode *, struct __pyx_obj_7gumbocy_Attributes *); int (*guess_node_boilerplate)(struct __pyx_obj_7gumbocy_HTMLParser *, GumboNode *, struct __pyx_obj_7gumbocy_Attributes *); struct __pyx_obj_7gumbocy_Attributes *(*get_attributes)(struct __pyx_obj_7gumbocy_HTMLParser *, GumboNode *); void (*close_word_group)(struct __pyx_obj_7gumbocy_HTMLParser *); void (*add_text)(struct __pyx_obj_7gumbocy_HTMLParser *, PyObject *); void (*add_hyperlink_text)(struct __pyx_obj_7gumbocy_HTMLParser *, PyObject *); void (*open_hyperlink)(struct __pyx_obj_7gumbocy_HTMLParser *, struct __pyx_obj_7gumbocy_Attributes *); void (*close_hyperlink)(struct __pyx_obj_7gumbocy_HTMLParser *); int (*_traverse_node)(struct __pyx_obj_7gumbocy_HTMLParser *, int, GumboNode *, int, int, int, int, int); int (*_traverse_node_simple)(struct __pyx_obj_7gumbocy_HTMLParser *, int, GumboNode *); PyObject *(*free)(struct __pyx_obj_7gumbocy_HTMLParser *); }; static struct __pyx_vtabstruct_7gumbocy_HTMLParser *__pyx_vtabptr_7gumbocy_HTMLParser; /* --- Runtime support code (head) --- */ /* Refnanny.proto */ #ifndef CYTHON_REFNANNY #define CYTHON_REFNANNY 0 #endif #if CYTHON_REFNANNY typedef struct { void (*INCREF)(void*, PyObject*, int); void (*DECREF)(void*, PyObject*, int); void (*GOTREF)(void*, PyObject*, int); void (*GIVEREF)(void*, PyObject*, int); void* (*SetupContext)(const char*, int, const char*); void (*FinishContext)(void**); } __Pyx_RefNannyAPIStruct; static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; #ifdef WITH_THREAD #define __Pyx_RefNannySetupContext(name, acquire_gil)\ if (acquire_gil) {\ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ PyGILState_Release(__pyx_gilstate_save);\ } else {\ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ } #else #define __Pyx_RefNannySetupContext(name, acquire_gil)\ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) #endif #define __Pyx_RefNannyFinishContext()\ __Pyx_RefNanny->FinishContext(&__pyx_refnanny) #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) #else #define __Pyx_RefNannyDeclarations #define __Pyx_RefNannySetupContext(name, acquire_gil) #define __Pyx_RefNannyFinishContext() #define __Pyx_INCREF(r) Py_INCREF(r) #define __Pyx_DECREF(r) Py_DECREF(r) #define __Pyx_GOTREF(r) #define __Pyx_GIVEREF(r) #define __Pyx_XINCREF(r) Py_XINCREF(r) #define __Pyx_XDECREF(r) Py_XDECREF(r) #define __Pyx_XGOTREF(r) #define __Pyx_XGIVEREF(r) #endif #define __Pyx_XDECREF_SET(r, v) do {\ PyObject *tmp = (PyObject *) r;\ r = v; __Pyx_XDECREF(tmp);\ } while (0) #define __Pyx_DECREF_SET(r, v) do {\ PyObject *tmp = (PyObject *) r;\ r = v; __Pyx_DECREF(tmp);\ } while (0) #define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) #define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) /* PyObjectGetAttrStr.proto */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { PyTypeObject* tp = Py_TYPE(obj); if (likely(tp->tp_getattro)) return tp->tp_getattro(obj, attr_name); #if PY_MAJOR_VERSION < 3 if (likely(tp->tp_getattr)) return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); #endif return PyObject_GetAttr(obj, attr_name); } #else #define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) #endif /* GetBuiltinName.proto */ static PyObject *__Pyx_GetBuiltinName(PyObject *name); /* RaiseDoubleKeywords.proto */ static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); /* ParseKeywords.proto */ static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ const char* function_name); /* RaiseArgTupleInvalid.proto */ static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); /* ArgTypeTest.proto */ static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, const char *name, int exact); /* dict_getitem_default.proto */ static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value); /* pyfrozenset_new.proto */ static CYTHON_INLINE PyObject* __Pyx_PyFrozenSet_New(PyObject* it) { if (it) { PyObject* result; #if CYTHON_COMPILING_IN_PYPY PyObject* args; args = PyTuple_Pack(1, it); if (unlikely(!args)) return NULL; result = PyObject_Call((PyObject*)&PyFrozenSet_Type, args, NULL); Py_DECREF(args); return result; #else if (PyFrozenSet_CheckExact(it)) { Py_INCREF(it); return it; } result = PyFrozenSet_New(it); if (unlikely(!result)) return NULL; if (likely(PySet_GET_SIZE(result))) return result; Py_DECREF(result); #endif } #if CYTHON_COMPILING_IN_CPYTHON return PyFrozenSet_Type.tp_new(&PyFrozenSet_Type, __pyx_empty_tuple, NULL); #else return PyObject_Call((PyObject*)&PyFrozenSet_Type, __pyx_empty_tuple, NULL); #endif } /* StringJoin.proto */ #if PY_MAJOR_VERSION < 3 #define __Pyx_PyString_Join __Pyx_PyBytes_Join #define __Pyx_PyBaseString_Join(s, v) (PyUnicode_CheckExact(s) ? PyUnicode_Join(s, v) : __Pyx_PyBytes_Join(s, v)) #else #define __Pyx_PyString_Join PyUnicode_Join #define __Pyx_PyBaseString_Join PyUnicode_Join #endif #if CYTHON_COMPILING_IN_CPYTHON #if PY_MAJOR_VERSION < 3 #define __Pyx_PyBytes_Join _PyString_Join #else #define __Pyx_PyBytes_Join _PyBytes_Join #endif #else static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values); #endif /* DictGetItem.proto */ #if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) { PyObject *value; value = PyDict_GetItemWithError(d, key); if (unlikely(!value)) { if (!PyErr_Occurred()) { PyObject* args = PyTuple_Pack(1, key); if (likely(args)) PyErr_SetObject(PyExc_KeyError, args); Py_XDECREF(args); } return NULL; } Py_INCREF(value); return value; } #else #define __Pyx_PyDict_GetItem(d, key) PyObject_GetItem(d, key) #endif /* PyThreadStateGet.proto */ #if CYTHON_COMPILING_IN_CPYTHON #define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; #define __Pyx_PyThreadState_assign __pyx_tstate = PyThreadState_GET(); #else #define __Pyx_PyThreadState_declare #define __Pyx_PyThreadState_assign #endif /* PyErrFetchRestore.proto */ #if CYTHON_COMPILING_IN_CPYTHON #define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) #define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) #define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) #define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); #else #define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) #define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) #define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) #define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) #endif /* WriteUnraisableException.proto */ static void __Pyx_WriteUnraisable(const char *name, int clineno, int lineno, const char *filename, int full_traceback, int nogil); /* PySequenceContains.proto */ static CYTHON_INLINE int __Pyx_PySequence_ContainsTF(PyObject* item, PyObject* seq, int eq) { int result = PySequence_Contains(seq, item); return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); } /* PyObjectCall.proto */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); #else #define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) #endif /* IncludeStringH.proto */ #include /* BytesEquals.proto */ static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /* GetModuleGlobalName.proto */ static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); /* PyObjectCallMethO.proto */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); #endif /* PyObjectCallOneArg.proto */ static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); /* PyObjectCallNoArg.proto */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); #else #define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL) #endif /* ListAppend.proto */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) { PyListObject* L = (PyListObject*) list; Py_ssize_t len = Py_SIZE(list); if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) { Py_INCREF(x); PyList_SET_ITEM(list, len, x); Py_SIZE(list) = len+1; return 0; } return PyList_Append(list, x); } #else #define __Pyx_PyList_Append(L,x) PyList_Append(L,x) #endif /* PyObjectCallMethod1.proto */ static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg); /* append.proto */ static CYTHON_INLINE int __Pyx_PyObject_Append(PyObject* L, PyObject* x); /* GetItemInt.proto */ #define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ __Pyx_GetItemInt_Generic(o, to_py_func(i)))) #define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, int wraparound, int boundscheck); #define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, int wraparound, int boundscheck); static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, int wraparound, int boundscheck); /* SetItemInt.proto */ #define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) :\ (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) :\ __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) static CYTHON_INLINE int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v); static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, int wraparound, int boundscheck); /* SliceObject.proto */ static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice( PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop, PyObject** py_start, PyObject** py_stop, PyObject** py_slice, int has_cstart, int has_cstop, int wraparound); /* PyObjectCallMethod2.proto */ static PyObject* __Pyx_PyObject_CallMethod2(PyObject* obj, PyObject* method_name, PyObject* arg1, PyObject* arg2); /* dict_setdefault.proto */ static CYTHON_INLINE PyObject *__Pyx_PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *default_value, int is_safe_type); /* PyDictContains.proto */ static CYTHON_INLINE int __Pyx_PyDict_ContainsTF(PyObject* item, PyObject* dict, int eq) { int result = PyDict_Contains(dict, item); return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); } /* PyObjectCallMethod0.proto */ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); /* pop.proto */ static CYTHON_INLINE PyObject* __Pyx__PyObject_Pop(PyObject* L); #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyList_Pop(PyObject* L); #define __Pyx_PyObject_Pop(L) (likely(PyList_CheckExact(L)) ?\ __Pyx_PyList_Pop(L) : __Pyx__PyObject_Pop(L)) #else #define __Pyx_PyList_Pop(L) __Pyx__PyObject_Pop(L) #define __Pyx_PyObject_Pop(L) __Pyx__PyObject_Pop(L) #endif /* UnpackUnboundCMethod.proto */ typedef struct { PyObject *type; PyObject **method_name; PyCFunction func; PyObject *method; int flag; } __Pyx_CachedCFunction; /* CallUnboundCMethod0.proto */ static PyObject* __Pyx__CallUnboundCMethod0(__Pyx_CachedCFunction* cfunc, PyObject* self); #if CYTHON_COMPILING_IN_CPYTHON #define __Pyx_CallUnboundCMethod0(cfunc, self)\ ((likely((cfunc)->func)) ?\ (likely((cfunc)->flag == METH_NOARGS) ? (*((cfunc)->func))(self, NULL) :\ (likely((cfunc)->flag == (METH_VARARGS | METH_KEYWORDS)) ? ((*(PyCFunctionWithKeywords)(cfunc)->func)(self, __pyx_empty_tuple, NULL)) :\ ((cfunc)->flag == METH_VARARGS ? (*((cfunc)->func))(self, __pyx_empty_tuple) : __Pyx__CallUnboundCMethod0(cfunc, self)))) :\ __Pyx__CallUnboundCMethod0(cfunc, self)) #else #define __Pyx_CallUnboundCMethod0(cfunc, self) __Pyx__CallUnboundCMethod0(cfunc, self) #endif /* SetVTable.proto */ static int __Pyx_SetVtable(PyObject *dict, void *vtable); /* Import.proto */ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /* CodeObjectCache.proto */ typedef struct { PyCodeObject* code_object; int code_line; } __Pyx_CodeObjectCacheEntry; struct __Pyx_CodeObjectCache { int count; int max_count; __Pyx_CodeObjectCacheEntry* entries; }; static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); static PyCodeObject *__pyx_find_code_object(int code_line); static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); /* AddTraceback.proto */ static void __Pyx_AddTraceback(const char *funcname, int c_line, int py_line, const char *filename); /* None.proto */ #include /* CppExceptionConversion.proto */ #ifndef __Pyx_CppExn2PyErr #include #include #include #include static void __Pyx_CppExn2PyErr() { try { if (PyErr_Occurred()) ; // let the latest Python exn pass through and ignore the current one else throw; } catch (const std::bad_alloc& exn) { PyErr_SetString(PyExc_MemoryError, exn.what()); } catch (const std::bad_cast& exn) { PyErr_SetString(PyExc_TypeError, exn.what()); } catch (const std::domain_error& exn) { PyErr_SetString(PyExc_ValueError, exn.what()); } catch (const std::invalid_argument& exn) { PyErr_SetString(PyExc_ValueError, exn.what()); } catch (const std::ios_base::failure& exn) { PyErr_SetString(PyExc_IOError, exn.what()); } catch (const std::out_of_range& exn) { PyErr_SetString(PyExc_IndexError, exn.what()); } catch (const std::overflow_error& exn) { PyErr_SetString(PyExc_OverflowError, exn.what()); } catch (const std::range_error& exn) { PyErr_SetString(PyExc_ArithmeticError, exn.what()); } catch (const std::underflow_error& exn) { PyErr_SetString(PyExc_ArithmeticError, exn.what()); } catch (const std::exception& exn) { PyErr_SetString(PyExc_RuntimeError, exn.what()); } catch (...) { PyErr_SetString(PyExc_RuntimeError, "Unknown exception"); } } #endif /* CIntToPy.proto */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_t_7gumbocy_AttributeNames value); /* CIntToPy.proto */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value); /* CIntToPy.proto */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); /* CIntToPy.proto */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); /* CIntFromPy.proto */ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); /* CIntFromPy.proto */ static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *); /* CIntFromPy.proto */ static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); /* CheckBinaryVersion.proto */ static int __Pyx_check_binary_version(void); /* InitStrings.proto */ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); static int __pyx_f_7gumbocy_10HTMLParser_guess_node_hidden(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, CYTHON_UNUSED GumboNode *__pyx_v_node, struct __pyx_obj_7gumbocy_Attributes *__pyx_v_attrs); /* proto*/ static int __pyx_f_7gumbocy_10HTMLParser_guess_node_boilerplate(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, GumboNode *__pyx_v_node, struct __pyx_obj_7gumbocy_Attributes *__pyx_v_attrs); /* proto*/ static struct __pyx_obj_7gumbocy_Attributes *__pyx_f_7gumbocy_10HTMLParser_get_attributes(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, GumboNode *__pyx_v_node); /* proto*/ static void __pyx_f_7gumbocy_10HTMLParser_close_word_group(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self); /* proto*/ static void __pyx_f_7gumbocy_10HTMLParser_add_text(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, PyObject *__pyx_v_text); /* proto*/ static void __pyx_f_7gumbocy_10HTMLParser_add_hyperlink_text(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, PyObject *__pyx_v_text); /* proto*/ static void __pyx_f_7gumbocy_10HTMLParser_open_hyperlink(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, struct __pyx_obj_7gumbocy_Attributes *__pyx_v_attrs); /* proto*/ static void __pyx_f_7gumbocy_10HTMLParser_close_hyperlink(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self); /* proto*/ static int __pyx_f_7gumbocy_10HTMLParser__traverse_node(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, int __pyx_v_level, GumboNode *__pyx_v_node, int __pyx_v_is_head, int __pyx_v_is_hidden, int __pyx_v_is_boilerplate, int __pyx_v_is_boilerplate_bypassed, int __pyx_v_is_hyperlink); /* proto*/ static int __pyx_f_7gumbocy_10HTMLParser__traverse_node_simple(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, int __pyx_v_level, GumboNode *__pyx_v_node); /* proto*/ static PyObject *__pyx_f_7gumbocy_10HTMLParser_free(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self); /* proto*/ /* Module declarations from 'libc.string' */ /* Module declarations from 'libcpp.string' */ /* Module declarations from 're2cy' */ /* Module declarations from 'libcpp.utility' */ /* Module declarations from 'libcpp.unordered_set' */ /* Module declarations from 'libcpp.vector' */ /* Module declarations from 'libcpp.map' */ /* Module declarations from 'gumbocy' */ static PyTypeObject *__pyx_ptype_7gumbocy_Attributes = 0; static PyTypeObject *__pyx_ptype_7gumbocy_HTMLParser = 0; static std::vector<__pyx_t_5re2cy_ArgPtr> *__pyx_v_7gumbocy_argp; static __pyx_t_5re2cy_ArgPtr *__pyx_v_7gumbocy_empty_args; static re2::RE2 *__pyx_v_7gumbocy__RE2_SEARCH_STYLE_HIDDEN; static re2::RE2 *__pyx_v_7gumbocy__RE2_ABSOLUTE_HREF; static re2::RE2 *__pyx_v_7gumbocy__RE2_IGNORED_HREF; static int __pyx_f_7gumbocy_re2_search(char const *, re2::RE2 &); /*proto*/ #define __Pyx_MODULE_NAME "gumbocy" int __pyx_module_is_main_gumbocy = 0; /* Implementation of 'gumbocy' */ static PyObject *__pyx_builtin_range; static const char __pyx_k_[] = "^(?:"; static const char __pyx_k_s[] = "\\s+"; static const char __pyx_k__2[] = "|"; static const char __pyx_k__3[] = ")$"; static const char __pyx_k__4[] = " "; static const char __pyx_k__5[] = ""; static const char __pyx_k__6[] = "//"; static const char __pyx_k__8[] = ":"; static const char __pyx_k_id[] = "id"; static const char __pyx_k_re[] = "re"; static const char __pyx_k_alt[] = "alt"; static const char __pyx_k_get[] = "get"; static const char __pyx_k_pop[] = "pop"; static const char __pyx_k_rel[] = "rel"; static const char __pyx_k_src[] = "src"; static const char __pyx_k_url[] = "url"; static const char __pyx_k_href[] = "href"; static const char __pyx_k_join[] = "join"; static const char __pyx_k_main[] = "__main__"; static const char __pyx_k_name[] = "name"; static const char __pyx_k_role[] = "role"; static const char __pyx_k_test[] = "__test__"; static const char __pyx_k_true[] = "true"; static const char __pyx_k_class[] = "class"; static const char __pyx_k_lower[] = "lower"; static const char __pyx_k_range[] = "range"; static const char __pyx_k_split[] = "split"; static const char __pyx_k_strip[] = "strip"; static const char __pyx_k_style[] = "style"; static const char __pyx_k_title[] = "title"; static const char __pyx_k_append[] = "append"; static const char __pyx_k_escape[] = "escape"; static const char __pyx_k_hidden[] = "hidden"; static const char __pyx_k_import[] = "__import__"; static const char __pyx_k_netloc[] = "netloc"; static const char __pyx_k_scheme[] = "scheme"; static const char __pyx_k_article[] = "article"; static const char __pyx_k_compile[] = "compile"; static const char __pyx_k_content[] = "content"; static const char __pyx_k_options[] = "options"; static const char __pyx_k_base_url[] = "base_url"; static const char __pyx_k_http_s_s[] = "^http(?:s)?://%s"; static const char __pyx_k_property[] = "property"; static const char __pyx_k_urlparse[] = "urlparse"; static const char __pyx_k_head_only[] = "head_only"; static const char __pyx_k_head_links[] = "head_links"; static const char __pyx_k_head_metas[] = "head_metas"; static const char __pyx_k_ids_hidden[] = "ids_hidden"; static const char __pyx_k_ids_ignore[] = "ids_ignore"; static const char __pyx_k_pyx_vtable[] = "__pyx_vtable__"; static const char __pyx_k_setdefault[] = "setdefault"; static const char __pyx_k_startswith[] = "startswith"; static const char __pyx_k_aria_hidden[] = "aria-hidden"; static const char __pyx_k_tags_ignore[] = "tags_ignore"; static const char __pyx_k_word_groups[] = "word_groups"; static const char __pyx_k_nesting_limit[] = "nesting_limit"; static const char __pyx_k_classes_hidden[] = "classes_hidden"; static const char __pyx_k_classes_ignore[] = "classes_ignore"; static const char __pyx_k_ids_boilerplate[] = "ids_boilerplate"; static const char __pyx_k_metas_whitelist[] = "metas_whitelist"; static const char __pyx_k_tags_separators[] = "tags_separators"; static const char __pyx_k_tags_boilerplate[] = "tags_boilerplate"; static const char __pyx_k_roles_boilerplate[] = "roles_boilerplate"; static const char __pyx_k_RE_SPLIT_WHITESPACE[] = "_RE_SPLIT_WHITESPACE"; static const char __pyx_k_analyze_word_groups[] = "analyze_word_groups"; static const char __pyx_k_classes_boilerplate[] = "classes_boilerplate"; static const char __pyx_k_external_hyperlinks[] = "external_hyperlinks"; static const char __pyx_k_internal_hyperlinks[] = "internal_hyperlinks"; static const char __pyx_k_attributes_whitelist[] = "attributes_whitelist"; static const char __pyx_k_tags_boilerplate_bypass[] = "tags_boilerplate_bypass"; static const char __pyx_k_analyze_external_hyperlinks[] = "analyze_external_hyperlinks"; static const char __pyx_k_analyze_internal_hyperlinks[] = "analyze_internal_hyperlinks"; static PyObject *__pyx_kp_s_; static PyObject *__pyx_n_s_RE_SPLIT_WHITESPACE; static PyObject *__pyx_kp_s__2; static PyObject *__pyx_kp_s__3; static PyObject *__pyx_kp_s__4; static PyObject *__pyx_kp_b__5; static PyObject *__pyx_kp_s__5; static PyObject *__pyx_kp_s__6; static PyObject *__pyx_kp_s__8; static PyObject *__pyx_n_b_alt; static PyObject *__pyx_n_s_analyze_external_hyperlinks; static PyObject *__pyx_n_s_analyze_internal_hyperlinks; static PyObject *__pyx_n_s_analyze_word_groups; static PyObject *__pyx_n_s_append; static PyObject *__pyx_kp_b_aria_hidden; static PyObject *__pyx_n_s_article; static PyObject *__pyx_n_s_attributes_whitelist; static PyObject *__pyx_n_s_base_url; static PyObject *__pyx_n_b_class; static PyObject *__pyx_n_s_class; static PyObject *__pyx_n_s_classes_boilerplate; static PyObject *__pyx_n_s_classes_hidden; static PyObject *__pyx_n_s_classes_ignore; static PyObject *__pyx_n_s_compile; static PyObject *__pyx_n_b_content; static PyObject *__pyx_n_s_content; static PyObject *__pyx_n_s_escape; static PyObject *__pyx_n_s_external_hyperlinks; static PyObject *__pyx_n_s_get; static PyObject *__pyx_n_s_head_links; static PyObject *__pyx_n_s_head_metas; static PyObject *__pyx_n_s_head_only; static PyObject *__pyx_n_b_hidden; static PyObject *__pyx_n_b_href; static PyObject *__pyx_n_s_href; static PyObject *__pyx_kp_s_http_s_s; static PyObject *__pyx_n_b_id; static PyObject *__pyx_n_s_id; static PyObject *__pyx_n_s_ids_boilerplate; static PyObject *__pyx_n_s_ids_hidden; static PyObject *__pyx_n_s_ids_ignore; static PyObject *__pyx_n_s_import; static PyObject *__pyx_n_s_internal_hyperlinks; static PyObject *__pyx_n_s_join; static PyObject *__pyx_n_s_lower; static PyObject *__pyx_n_s_main; static PyObject *__pyx_n_s_metas_whitelist; static PyObject *__pyx_n_b_name; static PyObject *__pyx_n_s_name; static PyObject *__pyx_n_s_nesting_limit; static PyObject *__pyx_n_s_netloc; static PyObject *__pyx_n_s_options; static PyObject *__pyx_n_s_pop; static PyObject *__pyx_n_b_property; static PyObject *__pyx_n_s_property; static PyObject *__pyx_n_s_pyx_vtable; static PyObject *__pyx_n_s_range; static PyObject *__pyx_n_s_re; static PyObject *__pyx_n_b_rel; static PyObject *__pyx_n_s_rel; static PyObject *__pyx_n_b_role; static PyObject *__pyx_n_s_role; static PyObject *__pyx_n_s_roles_boilerplate; static PyObject *__pyx_kp_s_s; static PyObject *__pyx_n_s_scheme; static PyObject *__pyx_n_s_setdefault; static PyObject *__pyx_n_s_split; static PyObject *__pyx_n_b_src; static PyObject *__pyx_n_s_startswith; static PyObject *__pyx_n_s_strip; static PyObject *__pyx_n_b_style; static PyObject *__pyx_n_s_tags_boilerplate; static PyObject *__pyx_n_s_tags_boilerplate_bypass; static PyObject *__pyx_n_s_tags_ignore; static PyObject *__pyx_n_s_tags_separators; static PyObject *__pyx_n_s_test; static PyObject *__pyx_n_s_title; static PyObject *__pyx_n_b_true; static PyObject *__pyx_n_s_url; static PyObject *__pyx_n_s_urlparse; static PyObject *__pyx_n_s_word_groups; static int __pyx_pf_7gumbocy_10HTMLParser___cinit__(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, PyObject *__pyx_v_options); /* proto */ static PyObject *__pyx_pf_7gumbocy_10HTMLParser_2parse(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, char *__pyx_v_html); /* proto */ static PyObject *__pyx_pf_7gumbocy_10HTMLParser_4analyze(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, PyObject *__pyx_v_url); /* proto */ static PyObject *__pyx_pf_7gumbocy_10HTMLParser_6listnodes(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self); /* proto */ static void __pyx_pf_7gumbocy_10HTMLParser_8__dealloc__(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self); /* proto */ static PyObject *__pyx_tp_new_7gumbocy_Attributes(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ static PyObject *__pyx_tp_new_7gumbocy_HTMLParser(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ static __Pyx_CachedCFunction __pyx_umethod_PyList_Type_pop = {0, &__pyx_n_s_pop, 0, 0, 0}; static PyObject *__pyx_int_1; static PyObject *__pyx_int_999; static PyObject *__pyx_tuple__7; static PyObject *__pyx_tuple__9; static PyObject *__pyx_tuple__10; /* "gumbocy.pyx":17 * cdef re2cy.ArgPtr *empty_args = &(deref(argp)[0]) * * cdef bint re2_search(const char* s, re2cy.RE2 &pattern): # <<<<<<<<<<<<<< * return re2cy.RE2.PartialMatchN(s, pattern, empty_args, 0) * */ static int __pyx_f_7gumbocy_re2_search(char const *__pyx_v_s, re2::RE2 &__pyx_v_pattern) { int __pyx_r; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("re2_search", 0); /* "gumbocy.pyx":18 * * cdef bint re2_search(const char* s, re2cy.RE2 &pattern): * return re2cy.RE2.PartialMatchN(s, pattern, empty_args, 0) # <<<<<<<<<<<<<< * * cdef re2cy.RE2 *_RE2_SEARCH_STYLE_HIDDEN = new re2cy.RE2(r"(display\s*\:\s*none)|(visibility\s*\:\s*hidden)") */ __pyx_r = re2::RE2::PartialMatchN(__pyx_v_s, __pyx_v_pattern, __pyx_v_7gumbocy_empty_args, 0); goto __pyx_L0; /* "gumbocy.pyx":17 * cdef re2cy.ArgPtr *empty_args = &(deref(argp)[0]) * * cdef bint re2_search(const char* s, re2cy.RE2 &pattern): # <<<<<<<<<<<<<< * return re2cy.RE2.PartialMatchN(s, pattern, empty_args, 0) * */ /* function exit code */ __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":119 * cdef list nodes * * def __cinit__(self, dict options=None): # <<<<<<<<<<<<<< * * options = options or {} */ /* Python wrapper */ static int __pyx_pw_7gumbocy_10HTMLParser_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static int __pyx_pw_7gumbocy_10HTMLParser_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_options = 0; int __pyx_r; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_options,0}; PyObject* values[1] = {0}; values[0] = ((PyObject*)Py_None); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); case 0: break; default: goto __pyx_L5_argtuple_error; } kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: if (kw_args > 0) { PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_options); if (value) { values[0] = value; kw_args--; } } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(0, 119, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); case 0: break; default: goto __pyx_L5_argtuple_error; } } __pyx_v_options = ((PyObject*)values[0]); } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 1, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 119, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gumbocy.HTMLParser.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return -1; __pyx_L4_argument_unpacking_done:; if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_options), (&PyDict_Type), 1, "options", 1))) __PYX_ERR(0, 119, __pyx_L1_error) __pyx_r = __pyx_pf_7gumbocy_10HTMLParser___cinit__(((struct __pyx_obj_7gumbocy_HTMLParser *)__pyx_v_self), __pyx_v_options); /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __pyx_r = -1; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } static int __pyx_pf_7gumbocy_10HTMLParser___cinit__(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, PyObject *__pyx_v_options) { PyObject *__pyx_v_attributes_whitelist = NULL; PyObject *__pyx_v_classes_ignore = NULL; PyObject *__pyx_v_ids_ignore = NULL; PyObject *__pyx_v_classes_hidden = NULL; PyObject *__pyx_v_ids_hidden = NULL; PyObject *__pyx_v_classes_boilerplate = NULL; PyObject *__pyx_v_ids_boilerplate = NULL; PyObject *__pyx_v_roles_boilerplate = NULL; PyObject *__pyx_v_metas_whitelist = NULL; PyObject *__pyx_v_tag_name = NULL; GumboTag __pyx_v_tag; int __pyx_r; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; PyObject *__pyx_t_3 = NULL; int __pyx_t_4; Py_ssize_t __pyx_t_5; char const *__pyx_t_6; re2::RE2 *__pyx_t_7; int __pyx_t_8; int __pyx_t_9; PyObject *(*__pyx_t_10)(PyObject *); char const *__pyx_t_11; __Pyx_RefNannySetupContext("__cinit__", 0); __Pyx_INCREF(__pyx_v_options); /* "gumbocy.pyx":121 * def __cinit__(self, dict options=None): * * options = options or {} # <<<<<<<<<<<<<< * * self.nesting_limit = options.get("nesting_limit", 999) */ __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_options); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 121, __pyx_L1_error) if (!__pyx_t_2) { } else { __Pyx_INCREF(__pyx_v_options); __pyx_t_1 = __pyx_v_options; goto __pyx_L3_bool_binop_done; } __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 121, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_L3_bool_binop_done:; __Pyx_DECREF_SET(__pyx_v_options, ((PyObject*)__pyx_t_1)); __pyx_t_1 = 0; /* "gumbocy.pyx":123 * options = options or {} * * self.nesting_limit = options.get("nesting_limit", 999) # <<<<<<<<<<<<<< * self.head_only = options.get("head_only") * */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 123, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_nesting_limit, __pyx_int_999); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 123, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 123, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->nesting_limit = __pyx_t_4; /* "gumbocy.pyx":124 * * self.nesting_limit = options.get("nesting_limit", 999) * self.head_only = options.get("head_only") # <<<<<<<<<<<<<< * * self.analyze_external_hyperlinks = bool(options.get("analyze_external_hyperlinks", True)) */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 124, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_head_only, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 124, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 124, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->head_only = __pyx_t_2; /* "gumbocy.pyx":126 * self.head_only = options.get("head_only") * * self.analyze_external_hyperlinks = bool(options.get("analyze_external_hyperlinks", True)) # <<<<<<<<<<<<<< * self.analyze_internal_hyperlinks = bool(options.get("analyze_internal_hyperlinks", True)) * self.analyze_word_groups = bool(options.get("analyze_word_groups", True)) */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 126, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_analyze_external_hyperlinks, Py_True); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 126, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 126, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->analyze_external_hyperlinks = (!(!__pyx_t_2)); /* "gumbocy.pyx":127 * * self.analyze_external_hyperlinks = bool(options.get("analyze_external_hyperlinks", True)) * self.analyze_internal_hyperlinks = bool(options.get("analyze_internal_hyperlinks", True)) # <<<<<<<<<<<<<< * self.analyze_word_groups = bool(options.get("analyze_word_groups", True)) * */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 127, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_analyze_internal_hyperlinks, Py_True); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 127, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 127, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->analyze_internal_hyperlinks = (!(!__pyx_t_2)); /* "gumbocy.pyx":128 * self.analyze_external_hyperlinks = bool(options.get("analyze_external_hyperlinks", True)) * self.analyze_internal_hyperlinks = bool(options.get("analyze_internal_hyperlinks", True)) * self.analyze_word_groups = bool(options.get("analyze_word_groups", True)) # <<<<<<<<<<<<<< * * attributes_whitelist = set(options.get("attributes_whitelist") or []) */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 128, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_analyze_word_groups, Py_True); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 128, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 128, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->analyze_word_groups = (!(!__pyx_t_2)); /* "gumbocy.pyx":130 * self.analyze_word_groups = bool(options.get("analyze_word_groups", True)) * * attributes_whitelist = set(options.get("attributes_whitelist") or []) # <<<<<<<<<<<<<< * * classes_ignore = frozenset(options.get("classes_ignore") or []) */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 130, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_attributes_whitelist, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 130, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 130, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L5_bool_binop_done; } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 130, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_L5_bool_binop_done:; __pyx_t_3 = PySet_New(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 130, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_attributes_whitelist = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":132 * attributes_whitelist = set(options.get("attributes_whitelist") or []) * * classes_ignore = frozenset(options.get("classes_ignore") or []) # <<<<<<<<<<<<<< * if len(classes_ignore) > 0: * self.has_classes_ignore = True */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 132, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_classes_ignore, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 132, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 132, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { __Pyx_INCREF(__pyx_t_1); __pyx_t_3 = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L7_bool_binop_done; } __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 132, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_t_1); __pyx_t_3 = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_L7_bool_binop_done:; __pyx_t_1 = __Pyx_PyFrozenSet_New(__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 132, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_classes_ignore = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":133 * * classes_ignore = frozenset(options.get("classes_ignore") or []) * if len(classes_ignore) > 0: # <<<<<<<<<<<<<< * self.has_classes_ignore = True * self.classes_ignore = new re2cy.RE2("^(?:" + "|".join(classes_ignore) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_classes_ignore); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 133, __pyx_L1_error) __pyx_t_2 = ((__pyx_t_5 > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":134 * classes_ignore = frozenset(options.get("classes_ignore") or []) * if len(classes_ignore) > 0: * self.has_classes_ignore = True # <<<<<<<<<<<<<< * self.classes_ignore = new re2cy.RE2("^(?:" + "|".join(classes_ignore) + ")$") * attributes_whitelist.add("class") */ __pyx_v_self->has_classes_ignore = 1; /* "gumbocy.pyx":135 * if len(classes_ignore) > 0: * self.has_classes_ignore = True * self.classes_ignore = new re2cy.RE2("^(?:" + "|".join(classes_ignore) + ")$") # <<<<<<<<<<<<<< * attributes_whitelist.add("class") * */ __pyx_t_1 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_classes_ignore); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 135, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = PyNumber_Add(__pyx_kp_s_, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 135, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = PyNumber_Add(__pyx_t_3, __pyx_kp_s__3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 135, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_1); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 135, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 135, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->classes_ignore = __pyx_t_7; /* "gumbocy.pyx":136 * self.has_classes_ignore = True * self.classes_ignore = new re2cy.RE2("^(?:" + "|".join(classes_ignore) + ")$") * attributes_whitelist.add("class") # <<<<<<<<<<<<<< * * ids_ignore = frozenset(options.get("ids_ignore") or []) */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_class); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 136, __pyx_L1_error) /* "gumbocy.pyx":133 * * classes_ignore = frozenset(options.get("classes_ignore") or []) * if len(classes_ignore) > 0: # <<<<<<<<<<<<<< * self.has_classes_ignore = True * self.classes_ignore = new re2cy.RE2("^(?:" + "|".join(classes_ignore) + ")$") */ } /* "gumbocy.pyx":138 * attributes_whitelist.add("class") * * ids_ignore = frozenset(options.get("ids_ignore") or []) # <<<<<<<<<<<<<< * if len(ids_ignore) > 0: * self.has_ids_ignore = True */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 138, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_ids_ignore, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 138, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 138, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L10_bool_binop_done; } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 138, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_L10_bool_binop_done:; __pyx_t_3 = __Pyx_PyFrozenSet_New(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 138, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_ids_ignore = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":139 * * ids_ignore = frozenset(options.get("ids_ignore") or []) * if len(ids_ignore) > 0: # <<<<<<<<<<<<<< * self.has_ids_ignore = True * self.ids_ignore = new re2cy.RE2("^(?:" + "|".join(ids_ignore) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_ids_ignore); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 139, __pyx_L1_error) __pyx_t_2 = ((__pyx_t_5 > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":140 * ids_ignore = frozenset(options.get("ids_ignore") or []) * if len(ids_ignore) > 0: * self.has_ids_ignore = True # <<<<<<<<<<<<<< * self.ids_ignore = new re2cy.RE2("^(?:" + "|".join(ids_ignore) + ")$") * attributes_whitelist.add("id") */ __pyx_v_self->has_ids_ignore = 1; /* "gumbocy.pyx":141 * if len(ids_ignore) > 0: * self.has_ids_ignore = True * self.ids_ignore = new re2cy.RE2("^(?:" + "|".join(ids_ignore) + ")$") # <<<<<<<<<<<<<< * attributes_whitelist.add("id") * */ __pyx_t_3 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_ids_ignore); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 141, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = PyNumber_Add(__pyx_kp_s_, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 141, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyNumber_Add(__pyx_t_1, __pyx_kp_s__3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 141, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 141, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 141, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_self->ids_ignore = __pyx_t_7; /* "gumbocy.pyx":142 * self.has_ids_ignore = True * self.ids_ignore = new re2cy.RE2("^(?:" + "|".join(ids_ignore) + ")$") * attributes_whitelist.add("id") # <<<<<<<<<<<<<< * * classes_hidden = frozenset(options.get("classes_hidden") or []) */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_id); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 142, __pyx_L1_error) /* "gumbocy.pyx":139 * * ids_ignore = frozenset(options.get("ids_ignore") or []) * if len(ids_ignore) > 0: # <<<<<<<<<<<<<< * self.has_ids_ignore = True * self.ids_ignore = new re2cy.RE2("^(?:" + "|".join(ids_ignore) + ")$") */ } /* "gumbocy.pyx":144 * attributes_whitelist.add("id") * * classes_hidden = frozenset(options.get("classes_hidden") or []) # <<<<<<<<<<<<<< * if len(classes_hidden) > 0: * self.has_classes_hidden = True */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 144, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_classes_hidden, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 144, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 144, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { __Pyx_INCREF(__pyx_t_1); __pyx_t_3 = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L13_bool_binop_done; } __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 144, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_t_1); __pyx_t_3 = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_L13_bool_binop_done:; __pyx_t_1 = __Pyx_PyFrozenSet_New(__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 144, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_classes_hidden = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":145 * * classes_hidden = frozenset(options.get("classes_hidden") or []) * if len(classes_hidden) > 0: # <<<<<<<<<<<<<< * self.has_classes_hidden = True * self.classes_hidden = new re2cy.RE2("^(?:" + "|".join(classes_hidden) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_classes_hidden); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 145, __pyx_L1_error) __pyx_t_2 = ((__pyx_t_5 > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":146 * classes_hidden = frozenset(options.get("classes_hidden") or []) * if len(classes_hidden) > 0: * self.has_classes_hidden = True # <<<<<<<<<<<<<< * self.classes_hidden = new re2cy.RE2("^(?:" + "|".join(classes_hidden) + ")$") * attributes_whitelist.add("class") */ __pyx_v_self->has_classes_hidden = 1; /* "gumbocy.pyx":147 * if len(classes_hidden) > 0: * self.has_classes_hidden = True * self.classes_hidden = new re2cy.RE2("^(?:" + "|".join(classes_hidden) + ")$") # <<<<<<<<<<<<<< * attributes_whitelist.add("class") * */ __pyx_t_1 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_classes_hidden); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 147, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = PyNumber_Add(__pyx_kp_s_, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 147, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = PyNumber_Add(__pyx_t_3, __pyx_kp_s__3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 147, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_1); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 147, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 147, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->classes_hidden = __pyx_t_7; /* "gumbocy.pyx":148 * self.has_classes_hidden = True * self.classes_hidden = new re2cy.RE2("^(?:" + "|".join(classes_hidden) + ")$") * attributes_whitelist.add("class") # <<<<<<<<<<<<<< * * ids_hidden = frozenset(options.get("ids_hidden") or []) */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_class); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 148, __pyx_L1_error) /* "gumbocy.pyx":145 * * classes_hidden = frozenset(options.get("classes_hidden") or []) * if len(classes_hidden) > 0: # <<<<<<<<<<<<<< * self.has_classes_hidden = True * self.classes_hidden = new re2cy.RE2("^(?:" + "|".join(classes_hidden) + ")$") */ } /* "gumbocy.pyx":150 * attributes_whitelist.add("class") * * ids_hidden = frozenset(options.get("ids_hidden") or []) # <<<<<<<<<<<<<< * if len(ids_hidden) > 0: * self.has_ids_hidden = True */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 150, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_ids_hidden, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 150, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 150, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L16_bool_binop_done; } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 150, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_L16_bool_binop_done:; __pyx_t_3 = __Pyx_PyFrozenSet_New(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 150, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_ids_hidden = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":151 * * ids_hidden = frozenset(options.get("ids_hidden") or []) * if len(ids_hidden) > 0: # <<<<<<<<<<<<<< * self.has_ids_hidden = True * self.ids_hidden = new re2cy.RE2("^(?:" + "|".join(ids_hidden) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_ids_hidden); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 151, __pyx_L1_error) __pyx_t_2 = ((__pyx_t_5 > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":152 * ids_hidden = frozenset(options.get("ids_hidden") or []) * if len(ids_hidden) > 0: * self.has_ids_hidden = True # <<<<<<<<<<<<<< * self.ids_hidden = new re2cy.RE2("^(?:" + "|".join(ids_hidden) + ")$") * attributes_whitelist.add("id") */ __pyx_v_self->has_ids_hidden = 1; /* "gumbocy.pyx":153 * if len(ids_hidden) > 0: * self.has_ids_hidden = True * self.ids_hidden = new re2cy.RE2("^(?:" + "|".join(ids_hidden) + ")$") # <<<<<<<<<<<<<< * attributes_whitelist.add("id") * */ __pyx_t_3 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_ids_hidden); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 153, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = PyNumber_Add(__pyx_kp_s_, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 153, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyNumber_Add(__pyx_t_1, __pyx_kp_s__3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 153, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 153, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 153, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_self->ids_hidden = __pyx_t_7; /* "gumbocy.pyx":154 * self.has_ids_hidden = True * self.ids_hidden = new re2cy.RE2("^(?:" + "|".join(ids_hidden) + ")$") * attributes_whitelist.add("id") # <<<<<<<<<<<<<< * * classes_boilerplate = frozenset(options.get("classes_boilerplate") or []) */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_id); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 154, __pyx_L1_error) /* "gumbocy.pyx":151 * * ids_hidden = frozenset(options.get("ids_hidden") or []) * if len(ids_hidden) > 0: # <<<<<<<<<<<<<< * self.has_ids_hidden = True * self.ids_hidden = new re2cy.RE2("^(?:" + "|".join(ids_hidden) + ")$") */ } /* "gumbocy.pyx":156 * attributes_whitelist.add("id") * * classes_boilerplate = frozenset(options.get("classes_boilerplate") or []) # <<<<<<<<<<<<<< * if len(classes_boilerplate) > 0: * self.has_classes_boilerplate = True */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 156, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_classes_boilerplate, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 156, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { __Pyx_INCREF(__pyx_t_1); __pyx_t_3 = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L19_bool_binop_done; } __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_t_1); __pyx_t_3 = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_L19_bool_binop_done:; __pyx_t_1 = __Pyx_PyFrozenSet_New(__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_classes_boilerplate = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":157 * * classes_boilerplate = frozenset(options.get("classes_boilerplate") or []) * if len(classes_boilerplate) > 0: # <<<<<<<<<<<<<< * self.has_classes_boilerplate = True * self.classes_boilerplate = new re2cy.RE2("^(?:" + "|".join(classes_boilerplate) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_classes_boilerplate); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 157, __pyx_L1_error) __pyx_t_2 = ((__pyx_t_5 > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":158 * classes_boilerplate = frozenset(options.get("classes_boilerplate") or []) * if len(classes_boilerplate) > 0: * self.has_classes_boilerplate = True # <<<<<<<<<<<<<< * self.classes_boilerplate = new re2cy.RE2("^(?:" + "|".join(classes_boilerplate) + ")$") * attributes_whitelist.add("class") */ __pyx_v_self->has_classes_boilerplate = 1; /* "gumbocy.pyx":159 * if len(classes_boilerplate) > 0: * self.has_classes_boilerplate = True * self.classes_boilerplate = new re2cy.RE2("^(?:" + "|".join(classes_boilerplate) + ")$") # <<<<<<<<<<<<<< * attributes_whitelist.add("class") * */ __pyx_t_1 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_classes_boilerplate); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 159, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = PyNumber_Add(__pyx_kp_s_, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 159, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = PyNumber_Add(__pyx_t_3, __pyx_kp_s__3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 159, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_1); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 159, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 159, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->classes_boilerplate = __pyx_t_7; /* "gumbocy.pyx":160 * self.has_classes_boilerplate = True * self.classes_boilerplate = new re2cy.RE2("^(?:" + "|".join(classes_boilerplate) + ")$") * attributes_whitelist.add("class") # <<<<<<<<<<<<<< * * ids_boilerplate = frozenset(options.get("ids_boilerplate") or []) */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_class); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 160, __pyx_L1_error) /* "gumbocy.pyx":157 * * classes_boilerplate = frozenset(options.get("classes_boilerplate") or []) * if len(classes_boilerplate) > 0: # <<<<<<<<<<<<<< * self.has_classes_boilerplate = True * self.classes_boilerplate = new re2cy.RE2("^(?:" + "|".join(classes_boilerplate) + ")$") */ } /* "gumbocy.pyx":162 * attributes_whitelist.add("class") * * ids_boilerplate = frozenset(options.get("ids_boilerplate") or []) # <<<<<<<<<<<<<< * if len(ids_boilerplate) > 0: * self.has_ids_boilerplate = True */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 162, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_ids_boilerplate, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 162, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 162, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L22_bool_binop_done; } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 162, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_L22_bool_binop_done:; __pyx_t_3 = __Pyx_PyFrozenSet_New(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 162, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_ids_boilerplate = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":163 * * ids_boilerplate = frozenset(options.get("ids_boilerplate") or []) * if len(ids_boilerplate) > 0: # <<<<<<<<<<<<<< * self.has_ids_boilerplate = True * self.ids_boilerplate = new re2cy.RE2("^(?:" + "|".join(ids_boilerplate) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_ids_boilerplate); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 163, __pyx_L1_error) __pyx_t_2 = ((__pyx_t_5 > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":164 * ids_boilerplate = frozenset(options.get("ids_boilerplate") or []) * if len(ids_boilerplate) > 0: * self.has_ids_boilerplate = True # <<<<<<<<<<<<<< * self.ids_boilerplate = new re2cy.RE2("^(?:" + "|".join(ids_boilerplate) + ")$") * attributes_whitelist.add("id") */ __pyx_v_self->has_ids_boilerplate = 1; /* "gumbocy.pyx":165 * if len(ids_boilerplate) > 0: * self.has_ids_boilerplate = True * self.ids_boilerplate = new re2cy.RE2("^(?:" + "|".join(ids_boilerplate) + ")$") # <<<<<<<<<<<<<< * attributes_whitelist.add("id") * */ __pyx_t_3 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_ids_boilerplate); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = PyNumber_Add(__pyx_kp_s_, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyNumber_Add(__pyx_t_1, __pyx_kp_s__3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 165, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 165, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_self->ids_boilerplate = __pyx_t_7; /* "gumbocy.pyx":166 * self.has_ids_boilerplate = True * self.ids_boilerplate = new re2cy.RE2("^(?:" + "|".join(ids_boilerplate) + ")$") * attributes_whitelist.add("id") # <<<<<<<<<<<<<< * * roles_boilerplate = frozenset(options.get("roles_boilerplate") or []) */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_id); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 166, __pyx_L1_error) /* "gumbocy.pyx":163 * * ids_boilerplate = frozenset(options.get("ids_boilerplate") or []) * if len(ids_boilerplate) > 0: # <<<<<<<<<<<<<< * self.has_ids_boilerplate = True * self.ids_boilerplate = new re2cy.RE2("^(?:" + "|".join(ids_boilerplate) + ")$") */ } /* "gumbocy.pyx":168 * attributes_whitelist.add("id") * * roles_boilerplate = frozenset(options.get("roles_boilerplate") or []) # <<<<<<<<<<<<<< * if len(roles_boilerplate) > 0: * self.has_roles_boilerplate = True */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 168, __pyx_L1_error) } __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_roles_boilerplate, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 168, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { __Pyx_INCREF(__pyx_t_1); __pyx_t_3 = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L25_bool_binop_done; } __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_t_1); __pyx_t_3 = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_L25_bool_binop_done:; __pyx_t_1 = __Pyx_PyFrozenSet_New(__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_roles_boilerplate = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":169 * * roles_boilerplate = frozenset(options.get("roles_boilerplate") or []) * if len(roles_boilerplate) > 0: # <<<<<<<<<<<<<< * self.has_roles_boilerplate = True * self.roles_boilerplate = new re2cy.RE2("^(?:" + "|".join(roles_boilerplate) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_roles_boilerplate); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 169, __pyx_L1_error) __pyx_t_2 = ((__pyx_t_5 > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":170 * roles_boilerplate = frozenset(options.get("roles_boilerplate") or []) * if len(roles_boilerplate) > 0: * self.has_roles_boilerplate = True # <<<<<<<<<<<<<< * self.roles_boilerplate = new re2cy.RE2("^(?:" + "|".join(roles_boilerplate) + ")$") * attributes_whitelist.add("role") */ __pyx_v_self->has_roles_boilerplate = 1; /* "gumbocy.pyx":171 * if len(roles_boilerplate) > 0: * self.has_roles_boilerplate = True * self.roles_boilerplate = new re2cy.RE2("^(?:" + "|".join(roles_boilerplate) + ")$") # <<<<<<<<<<<<<< * attributes_whitelist.add("role") * */ __pyx_t_1 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_roles_boilerplate); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 171, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = PyNumber_Add(__pyx_kp_s_, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 171, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = PyNumber_Add(__pyx_t_3, __pyx_kp_s__3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 171, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_1); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 171, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 171, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_self->roles_boilerplate = __pyx_t_7; /* "gumbocy.pyx":172 * self.has_roles_boilerplate = True * self.roles_boilerplate = new re2cy.RE2("^(?:" + "|".join(roles_boilerplate) + ")$") * attributes_whitelist.add("role") # <<<<<<<<<<<<<< * * metas_whitelist = frozenset(options.get("metas_whitelist") or []) */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_role); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 172, __pyx_L1_error) /* "gumbocy.pyx":169 * * roles_boilerplate = frozenset(options.get("roles_boilerplate") or []) * if len(roles_boilerplate) > 0: # <<<<<<<<<<<<<< * self.has_roles_boilerplate = True * self.roles_boilerplate = new re2cy.RE2("^(?:" + "|".join(roles_boilerplate) + ")$") */ } /* "gumbocy.pyx":174 * attributes_whitelist.add("role") * * metas_whitelist = frozenset(options.get("metas_whitelist") or []) # <<<<<<<<<<<<<< * if len(metas_whitelist) > 0: * self.has_metas_whitelist = True */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 174, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_metas_whitelist, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 174, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 174, __pyx_L1_error) if (!__pyx_t_2) { __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L28_bool_binop_done; } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 174, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = __pyx_t_3; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_L28_bool_binop_done:; __pyx_t_3 = __Pyx_PyFrozenSet_New(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 174, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_metas_whitelist = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":175 * * metas_whitelist = frozenset(options.get("metas_whitelist") or []) * if len(metas_whitelist) > 0: # <<<<<<<<<<<<<< * self.has_metas_whitelist = True * self.metas_whitelist = new re2cy.RE2("^(?:" + "|".join(metas_whitelist) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_metas_whitelist); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 175, __pyx_L1_error) __pyx_t_2 = ((__pyx_t_5 > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":176 * metas_whitelist = frozenset(options.get("metas_whitelist") or []) * if len(metas_whitelist) > 0: * self.has_metas_whitelist = True # <<<<<<<<<<<<<< * self.metas_whitelist = new re2cy.RE2("^(?:" + "|".join(metas_whitelist) + ")$") * attributes_whitelist.add("name") */ __pyx_v_self->has_metas_whitelist = 1; /* "gumbocy.pyx":177 * if len(metas_whitelist) > 0: * self.has_metas_whitelist = True * self.metas_whitelist = new re2cy.RE2("^(?:" + "|".join(metas_whitelist) + ")$") # <<<<<<<<<<<<<< * attributes_whitelist.add("name") * attributes_whitelist.add("property") */ __pyx_t_3 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_metas_whitelist); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 177, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = PyNumber_Add(__pyx_kp_s_, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 177, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyNumber_Add(__pyx_t_1, __pyx_kp_s__3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 177, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 177, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 177, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_self->metas_whitelist = __pyx_t_7; /* "gumbocy.pyx":178 * self.has_metas_whitelist = True * self.metas_whitelist = new re2cy.RE2("^(?:" + "|".join(metas_whitelist) + ")$") * attributes_whitelist.add("name") # <<<<<<<<<<<<<< * attributes_whitelist.add("property") * attributes_whitelist.add("content") */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_name); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 178, __pyx_L1_error) /* "gumbocy.pyx":179 * self.metas_whitelist = new re2cy.RE2("^(?:" + "|".join(metas_whitelist) + ")$") * attributes_whitelist.add("name") * attributes_whitelist.add("property") # <<<<<<<<<<<<<< * attributes_whitelist.add("content") * */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_property); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 179, __pyx_L1_error) /* "gumbocy.pyx":180 * attributes_whitelist.add("name") * attributes_whitelist.add("property") * attributes_whitelist.add("content") # <<<<<<<<<<<<<< * * # Some options add attributes to the whitelist */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_content); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 180, __pyx_L1_error) /* "gumbocy.pyx":175 * * metas_whitelist = frozenset(options.get("metas_whitelist") or []) * if len(metas_whitelist) > 0: # <<<<<<<<<<<<<< * self.has_metas_whitelist = True * self.metas_whitelist = new re2cy.RE2("^(?:" + "|".join(metas_whitelist) + ")$") */ } /* "gumbocy.pyx":183 * * # Some options add attributes to the whitelist * if self.analyze_external_hyperlinks or self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * attributes_whitelist.add("href") * attributes_whitelist.add("rel") */ __pyx_t_9 = (__pyx_v_self->analyze_external_hyperlinks != 0); if (!__pyx_t_9) { } else { __pyx_t_2 = __pyx_t_9; goto __pyx_L32_bool_binop_done; } __pyx_t_9 = (__pyx_v_self->analyze_internal_hyperlinks != 0); __pyx_t_2 = __pyx_t_9; __pyx_L32_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":184 * # Some options add attributes to the whitelist * if self.analyze_external_hyperlinks or self.analyze_internal_hyperlinks: * attributes_whitelist.add("href") # <<<<<<<<<<<<<< * attributes_whitelist.add("rel") * */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_href); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 184, __pyx_L1_error) /* "gumbocy.pyx":185 * if self.analyze_external_hyperlinks or self.analyze_internal_hyperlinks: * attributes_whitelist.add("href") * attributes_whitelist.add("rel") # <<<<<<<<<<<<<< * * # FInally, freeze the attributes whitelist */ __pyx_t_8 = PySet_Add(__pyx_v_attributes_whitelist, __pyx_n_s_rel); if (unlikely(__pyx_t_8 == -1)) __PYX_ERR(0, 185, __pyx_L1_error) /* "gumbocy.pyx":183 * * # Some options add attributes to the whitelist * if self.analyze_external_hyperlinks or self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * attributes_whitelist.add("href") * attributes_whitelist.add("rel") */ } /* "gumbocy.pyx":188 * * # FInally, freeze the attributes whitelist * self.has_attributes_whitelist = len(attributes_whitelist) > 0 # <<<<<<<<<<<<<< * if self.has_attributes_whitelist: * self.attributes_whitelist = new re2cy.RE2("^(?:" + "|".join(attributes_whitelist) + ")$") */ __pyx_t_5 = PySet_GET_SIZE(__pyx_v_attributes_whitelist); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 188, __pyx_L1_error) __pyx_v_self->has_attributes_whitelist = (__pyx_t_5 > 0); /* "gumbocy.pyx":189 * # FInally, freeze the attributes whitelist * self.has_attributes_whitelist = len(attributes_whitelist) > 0 * if self.has_attributes_whitelist: # <<<<<<<<<<<<<< * self.attributes_whitelist = new re2cy.RE2("^(?:" + "|".join(attributes_whitelist) + ")$") * */ __pyx_t_2 = (__pyx_v_self->has_attributes_whitelist != 0); if (__pyx_t_2) { /* "gumbocy.pyx":190 * self.has_attributes_whitelist = len(attributes_whitelist) > 0 * if self.has_attributes_whitelist: * self.attributes_whitelist = new re2cy.RE2("^(?:" + "|".join(attributes_whitelist) + ")$") # <<<<<<<<<<<<<< * * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_BODY) */ __pyx_t_3 = __Pyx_PyString_Join(__pyx_kp_s__2, __pyx_v_attributes_whitelist); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 190, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = PyNumber_Add(__pyx_kp_s_, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 190, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyNumber_Add(__pyx_t_1, __pyx_kp_s__3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 190, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 190, __pyx_L1_error) try { __pyx_t_7 = new re2::RE2(__pyx_t_6); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 190, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_self->attributes_whitelist = __pyx_t_7; /* "gumbocy.pyx":189 * # FInally, freeze the attributes whitelist * self.has_attributes_whitelist = len(attributes_whitelist) > 0 * if self.has_attributes_whitelist: # <<<<<<<<<<<<<< * self.attributes_whitelist = new re2cy.RE2("^(?:" + "|".join(attributes_whitelist) + ")$") * */ } /* "gumbocy.pyx":192 * self.attributes_whitelist = new re2cy.RE2("^(?:" + "|".join(attributes_whitelist) + ")$") * * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_BODY) # <<<<<<<<<<<<<< * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_P) * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_DIV) */ __pyx_v_self->tags_ignore_head_only.insert(GUMBO_TAG_BODY); /* "gumbocy.pyx":193 * * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_BODY) * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_P) # <<<<<<<<<<<<<< * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_DIV) * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_SPAN) */ __pyx_v_self->tags_ignore_head_only.insert(GUMBO_TAG_P); /* "gumbocy.pyx":194 * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_BODY) * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_P) * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_DIV) # <<<<<<<<<<<<<< * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_SPAN) * */ __pyx_v_self->tags_ignore_head_only.insert(GUMBO_TAG_DIV); /* "gumbocy.pyx":195 * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_P) * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_DIV) * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_SPAN) # <<<<<<<<<<<<<< * * for tag_name in options.get("tags_ignore", []): */ __pyx_v_self->tags_ignore_head_only.insert(GUMBO_TAG_SPAN); /* "gumbocy.pyx":197 * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_SPAN) * * for tag_name in options.get("tags_ignore", []): # <<<<<<<<<<<<<< * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 197, __pyx_L1_error) } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_tags_ignore, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_5 = 0; __pyx_t_10 = NULL; } else { __pyx_t_5 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 197, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 197, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 197, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } } else { __pyx_t_1 = __pyx_t_10(__pyx_t_3); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 197, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_1); } __Pyx_XDECREF_SET(__pyx_v_tag_name, __pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":198 * * for tag_name in options.get("tags_ignore", []): * tag = gumbocy.gumbo_tag_enum(tag_name) # <<<<<<<<<<<<<< * if tag != gumbocy.GUMBO_TAG_UNKNOWN: * self.tags_ignore.insert( gumbocy.gumbo_tag_enum(tag_name)) */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_tag_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 198, __pyx_L1_error) __pyx_v_tag = gumbo_tag_enum(__pyx_t_11); /* "gumbocy.pyx":199 * for tag_name in options.get("tags_ignore", []): * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: # <<<<<<<<<<<<<< * self.tags_ignore.insert( gumbocy.gumbo_tag_enum(tag_name)) * */ __pyx_t_2 = ((__pyx_v_tag != GUMBO_TAG_UNKNOWN) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":200 * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: * self.tags_ignore.insert( gumbocy.gumbo_tag_enum(tag_name)) # <<<<<<<<<<<<<< * * for tag_name in options.get("tags_boilerplate", []): */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_tag_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 200, __pyx_L1_error) __pyx_v_self->tags_ignore.insert(((int)gumbo_tag_enum(__pyx_t_11))); /* "gumbocy.pyx":199 * for tag_name in options.get("tags_ignore", []): * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: # <<<<<<<<<<<<<< * self.tags_ignore.insert( gumbocy.gumbo_tag_enum(tag_name)) * */ } /* "gumbocy.pyx":197 * self.tags_ignore_head_only.insert(gumbocy.GUMBO_TAG_SPAN) * * for tag_name in options.get("tags_ignore", []): # <<<<<<<<<<<<<< * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":202 * self.tags_ignore.insert( gumbocy.gumbo_tag_enum(tag_name)) * * for tag_name in options.get("tags_boilerplate", []): # <<<<<<<<<<<<<< * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 202, __pyx_L1_error) } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 202, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_tags_boilerplate, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 202, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_5 = 0; __pyx_t_10 = NULL; } else { __pyx_t_5 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 202, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 202, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 202, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 202, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 202, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 202, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } } else { __pyx_t_1 = __pyx_t_10(__pyx_t_3); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 202, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_1); } __Pyx_XDECREF_SET(__pyx_v_tag_name, __pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":203 * * for tag_name in options.get("tags_boilerplate", []): * tag = gumbocy.gumbo_tag_enum(tag_name) # <<<<<<<<<<<<<< * if tag != gumbocy.GUMBO_TAG_UNKNOWN: * self.tags_boilerplate.insert( gumbocy.gumbo_tag_enum(tag_name)) */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_tag_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 203, __pyx_L1_error) __pyx_v_tag = gumbo_tag_enum(__pyx_t_11); /* "gumbocy.pyx":204 * for tag_name in options.get("tags_boilerplate", []): * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: # <<<<<<<<<<<<<< * self.tags_boilerplate.insert( gumbocy.gumbo_tag_enum(tag_name)) * */ __pyx_t_2 = ((__pyx_v_tag != GUMBO_TAG_UNKNOWN) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":205 * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: * self.tags_boilerplate.insert( gumbocy.gumbo_tag_enum(tag_name)) # <<<<<<<<<<<<<< * * for tag_name in options.get("tags_boilerplate_bypass", []): */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_tag_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 205, __pyx_L1_error) __pyx_v_self->tags_boilerplate.insert(((int)gumbo_tag_enum(__pyx_t_11))); /* "gumbocy.pyx":204 * for tag_name in options.get("tags_boilerplate", []): * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: # <<<<<<<<<<<<<< * self.tags_boilerplate.insert( gumbocy.gumbo_tag_enum(tag_name)) * */ } /* "gumbocy.pyx":202 * self.tags_ignore.insert( gumbocy.gumbo_tag_enum(tag_name)) * * for tag_name in options.get("tags_boilerplate", []): # <<<<<<<<<<<<<< * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":207 * self.tags_boilerplate.insert( gumbocy.gumbo_tag_enum(tag_name)) * * for tag_name in options.get("tags_boilerplate_bypass", []): # <<<<<<<<<<<<<< * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 207, __pyx_L1_error) } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 207, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_tags_boilerplate_bypass, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 207, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_5 = 0; __pyx_t_10 = NULL; } else { __pyx_t_5 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 207, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 207, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 207, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 207, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 207, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 207, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } } else { __pyx_t_1 = __pyx_t_10(__pyx_t_3); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 207, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_1); } __Pyx_XDECREF_SET(__pyx_v_tag_name, __pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":208 * * for tag_name in options.get("tags_boilerplate_bypass", []): * tag = gumbocy.gumbo_tag_enum(tag_name) # <<<<<<<<<<<<<< * if tag != gumbocy.GUMBO_TAG_UNKNOWN: * self.tags_boilerplate_bypass.insert( gumbocy.gumbo_tag_enum(tag_name)) */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_tag_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 208, __pyx_L1_error) __pyx_v_tag = gumbo_tag_enum(__pyx_t_11); /* "gumbocy.pyx":209 * for tag_name in options.get("tags_boilerplate_bypass", []): * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: # <<<<<<<<<<<<<< * self.tags_boilerplate_bypass.insert( gumbocy.gumbo_tag_enum(tag_name)) * */ __pyx_t_2 = ((__pyx_v_tag != GUMBO_TAG_UNKNOWN) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":210 * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: * self.tags_boilerplate_bypass.insert( gumbocy.gumbo_tag_enum(tag_name)) # <<<<<<<<<<<<<< * * for tag_name in options.get("tags_separators", []): */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_tag_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 210, __pyx_L1_error) __pyx_v_self->tags_boilerplate_bypass.insert(((int)gumbo_tag_enum(__pyx_t_11))); /* "gumbocy.pyx":209 * for tag_name in options.get("tags_boilerplate_bypass", []): * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: # <<<<<<<<<<<<<< * self.tags_boilerplate_bypass.insert( gumbocy.gumbo_tag_enum(tag_name)) * */ } /* "gumbocy.pyx":207 * self.tags_boilerplate.insert( gumbocy.gumbo_tag_enum(tag_name)) * * for tag_name in options.get("tags_boilerplate_bypass", []): # <<<<<<<<<<<<<< * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":212 * self.tags_boilerplate_bypass.insert( gumbocy.gumbo_tag_enum(tag_name)) * * for tag_name in options.get("tags_separators", []): # <<<<<<<<<<<<<< * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: */ if (unlikely(__pyx_v_options == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 212, __pyx_L1_error) } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 212, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_options, __pyx_n_s_tags_separators, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 212, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_5 = 0; __pyx_t_10 = NULL; } else { __pyx_t_5 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 212, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 212, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 212, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 212, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 212, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 212, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } } else { __pyx_t_1 = __pyx_t_10(__pyx_t_3); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 212, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_1); } __Pyx_XDECREF_SET(__pyx_v_tag_name, __pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":213 * * for tag_name in options.get("tags_separators", []): * tag = gumbocy.gumbo_tag_enum(tag_name) # <<<<<<<<<<<<<< * if tag != gumbocy.GUMBO_TAG_UNKNOWN: * self.tags_separators.insert( gumbocy.gumbo_tag_enum(tag_name)) */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_tag_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 213, __pyx_L1_error) __pyx_v_tag = gumbo_tag_enum(__pyx_t_11); /* "gumbocy.pyx":214 * for tag_name in options.get("tags_separators", []): * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: # <<<<<<<<<<<<<< * self.tags_separators.insert( gumbocy.gumbo_tag_enum(tag_name)) * */ __pyx_t_2 = ((__pyx_v_tag != GUMBO_TAG_UNKNOWN) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":215 * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: * self.tags_separators.insert( gumbocy.gumbo_tag_enum(tag_name)) # <<<<<<<<<<<<<< * * self.tags_separators.insert(gumbocy.GUMBO_TAG_BODY) */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_tag_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 215, __pyx_L1_error) __pyx_v_self->tags_separators.insert(((int)gumbo_tag_enum(__pyx_t_11))); /* "gumbocy.pyx":214 * for tag_name in options.get("tags_separators", []): * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: # <<<<<<<<<<<<<< * self.tags_separators.insert( gumbocy.gumbo_tag_enum(tag_name)) * */ } /* "gumbocy.pyx":212 * self.tags_boilerplate_bypass.insert( gumbocy.gumbo_tag_enum(tag_name)) * * for tag_name in options.get("tags_separators", []): # <<<<<<<<<<<<<< * tag = gumbocy.gumbo_tag_enum(tag_name) * if tag != gumbocy.GUMBO_TAG_UNKNOWN: */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":217 * self.tags_separators.insert( gumbocy.gumbo_tag_enum(tag_name)) * * self.tags_separators.insert(gumbocy.GUMBO_TAG_BODY) # <<<<<<<<<<<<<< * * cdef bint guess_node_hidden(self, gumbocy.GumboNode* node, Attributes attrs): */ __pyx_v_self->tags_separators.insert(GUMBO_TAG_BODY); /* "gumbocy.pyx":119 * cdef list nodes * * def __cinit__(self, dict options=None): # <<<<<<<<<<<<<< * * options = options or {} */ /* function exit code */ __pyx_r = 0; goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_3); __Pyx_AddTraceback("gumbocy.HTMLParser.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = -1; __pyx_L0:; __Pyx_XDECREF(__pyx_v_attributes_whitelist); __Pyx_XDECREF(__pyx_v_classes_ignore); __Pyx_XDECREF(__pyx_v_ids_ignore); __Pyx_XDECREF(__pyx_v_classes_hidden); __Pyx_XDECREF(__pyx_v_ids_hidden); __Pyx_XDECREF(__pyx_v_classes_boilerplate); __Pyx_XDECREF(__pyx_v_ids_boilerplate); __Pyx_XDECREF(__pyx_v_roles_boilerplate); __Pyx_XDECREF(__pyx_v_metas_whitelist); __Pyx_XDECREF(__pyx_v_tag_name); __Pyx_XDECREF(__pyx_v_options); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":219 * self.tags_separators.insert(gumbocy.GUMBO_TAG_BODY) * * cdef bint guess_node_hidden(self, gumbocy.GumboNode* node, Attributes attrs): # <<<<<<<<<<<<<< * """ Rough guess to check if the element is explicitly hidden. * */ static int __pyx_f_7gumbocy_10HTMLParser_guess_node_hidden(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, CYTHON_UNUSED GumboNode *__pyx_v_node, struct __pyx_obj_7gumbocy_Attributes *__pyx_v_attrs) { PyObject *__pyx_v_k = NULL; int __pyx_r; __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; PyObject *__pyx_t_3 = NULL; PyObject *__pyx_t_4 = NULL; char const *__pyx_t_5; Py_ssize_t __pyx_t_6; __Pyx_RefNannySetupContext("guess_node_hidden", 0); /* "gumbocy.pyx":225 * """ * * if not self.has_attributes_whitelist: # <<<<<<<<<<<<<< * return False * */ __pyx_t_1 = ((!(__pyx_v_self->has_attributes_whitelist != 0)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":226 * * if not self.has_attributes_whitelist: * return False # <<<<<<<<<<<<<< * * # From the HTML5 spec */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":225 * """ * * if not self.has_attributes_whitelist: # <<<<<<<<<<<<<< * return False * */ } /* "gumbocy.pyx":229 * * # From the HTML5 spec * if attrs.has_hidden: # <<<<<<<<<<<<<< * return True * */ __pyx_t_1 = (__pyx_v_attrs->has_hidden != 0); if (__pyx_t_1) { /* "gumbocy.pyx":230 * # From the HTML5 spec * if attrs.has_hidden: * return True # <<<<<<<<<<<<<< * * if self.has_ids_hidden and attrs.values.get(ATTR_ID): */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":229 * * # From the HTML5 spec * if attrs.has_hidden: # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":232 * return True * * if self.has_ids_hidden and attrs.values.get(ATTR_ID): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_ID], deref(self.ids_hidden)): * return True */ __pyx_t_2 = (__pyx_v_self->has_ids_hidden != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L6_bool_binop_done; } if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 232, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ID); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 232, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 232, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 232, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_1 = __pyx_t_2; __pyx_L6_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":233 * * if self.has_ids_hidden and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_hidden)): # <<<<<<<<<<<<<< * return True * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 233, __pyx_L1_error) } __pyx_t_4 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ID); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 233, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 233, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_5 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) __PYX_ERR(0, 233, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_5, (*__pyx_v_self->ids_hidden)) != 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_1) { /* "gumbocy.pyx":234 * if self.has_ids_hidden and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_hidden)): * return True # <<<<<<<<<<<<<< * * if self.has_classes_hidden and attrs.size_classes > 0: */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":233 * * if self.has_ids_hidden and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_hidden)): # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":232 * return True * * if self.has_ids_hidden and attrs.values.get(ATTR_ID): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_ID], deref(self.ids_hidden)): * return True */ } /* "gumbocy.pyx":236 * return True * * if self.has_classes_hidden and attrs.size_classes > 0: # <<<<<<<<<<<<<< * for k in attrs.classes: * if re2_search(k, deref(self.classes_hidden)): */ __pyx_t_2 = (__pyx_v_self->has_classes_hidden != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L10_bool_binop_done; } __pyx_t_2 = ((__pyx_v_attrs->size_classes > 0) != 0); __pyx_t_1 = __pyx_t_2; __pyx_L10_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":237 * * if self.has_classes_hidden and attrs.size_classes > 0: * for k in attrs.classes: # <<<<<<<<<<<<<< * if re2_search(k, deref(self.classes_hidden)): * return True */ if (unlikely(__pyx_v_attrs->classes == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); __PYX_ERR(0, 237, __pyx_L1_error) } __pyx_t_3 = __pyx_v_attrs->classes; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; for (;;) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_4 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_4); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 237, __pyx_L1_error) #else __pyx_t_4 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 237, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif __Pyx_XDECREF_SET(__pyx_v_k, __pyx_t_4); __pyx_t_4 = 0; /* "gumbocy.pyx":238 * if self.has_classes_hidden and attrs.size_classes > 0: * for k in attrs.classes: * if re2_search(k, deref(self.classes_hidden)): # <<<<<<<<<<<<<< * return True * */ __pyx_t_5 = __Pyx_PyObject_AsString(__pyx_v_k); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) __PYX_ERR(0, 238, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_5, (*__pyx_v_self->classes_hidden)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":239 * for k in attrs.classes: * if re2_search(k, deref(self.classes_hidden)): * return True # <<<<<<<<<<<<<< * * if attrs.values.get(ATTR_STYLE): */ __pyx_r = 1; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L0; /* "gumbocy.pyx":238 * if self.has_classes_hidden and attrs.size_classes > 0: * for k in attrs.classes: * if re2_search(k, deref(self.classes_hidden)): # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":237 * * if self.has_classes_hidden and attrs.size_classes > 0: * for k in attrs.classes: # <<<<<<<<<<<<<< * if re2_search(k, deref(self.classes_hidden)): * return True */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":236 * return True * * if self.has_classes_hidden and attrs.size_classes > 0: # <<<<<<<<<<<<<< * for k in attrs.classes: * if re2_search(k, deref(self.classes_hidden)): */ } /* "gumbocy.pyx":241 * return True * * if attrs.values.get(ATTR_STYLE): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_STYLE], deref(_RE2_SEARCH_STYLE_HIDDEN)): * return True */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 241, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_STYLE); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 241, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 241, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 241, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_1) { /* "gumbocy.pyx":242 * * if attrs.values.get(ATTR_STYLE): * if re2_search(attrs.values[ATTR_STYLE], deref(_RE2_SEARCH_STYLE_HIDDEN)): # <<<<<<<<<<<<<< * return True * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 242, __pyx_L1_error) } __pyx_t_4 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_STYLE); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 242, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 242, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_5 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) __PYX_ERR(0, 242, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_5, (*__pyx_v_7gumbocy__RE2_SEARCH_STYLE_HIDDEN)) != 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_1) { /* "gumbocy.pyx":243 * if attrs.values.get(ATTR_STYLE): * if re2_search(attrs.values[ATTR_STYLE], deref(_RE2_SEARCH_STYLE_HIDDEN)): * return True # <<<<<<<<<<<<<< * * return False */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":242 * * if attrs.values.get(ATTR_STYLE): * if re2_search(attrs.values[ATTR_STYLE], deref(_RE2_SEARCH_STYLE_HIDDEN)): # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":241 * return True * * if attrs.values.get(ATTR_STYLE): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_STYLE], deref(_RE2_SEARCH_STYLE_HIDDEN)): * return True */ } /* "gumbocy.pyx":245 * return True * * return False # <<<<<<<<<<<<<< * * */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":219 * self.tags_separators.insert(gumbocy.GUMBO_TAG_BODY) * * cdef bint guess_node_hidden(self, gumbocy.GumboNode* node, Attributes attrs): # <<<<<<<<<<<<<< * """ Rough guess to check if the element is explicitly hidden. * */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); __Pyx_WriteUnraisable("gumbocy.HTMLParser.guess_node_hidden", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_r = 0; __pyx_L0:; __Pyx_XDECREF(__pyx_v_k); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":248 * * * cdef bint guess_node_boilerplate(self, gumbocy.GumboNode* node, Attributes attrs): # <<<<<<<<<<<<<< * """ Rough guess to check if the element is boilerplate """ * */ static int __pyx_f_7gumbocy_10HTMLParser_guess_node_boilerplate(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, GumboNode *__pyx_v_node, struct __pyx_obj_7gumbocy_Attributes *__pyx_v_attrs) { PyObject *__pyx_v_k = NULL; int __pyx_r; __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; PyObject *__pyx_t_3 = NULL; Py_ssize_t __pyx_t_4; PyObject *__pyx_t_5 = NULL; char const *__pyx_t_6; __Pyx_RefNannySetupContext("guess_node_boilerplate", 0); /* "gumbocy.pyx":251 * """ Rough guess to check if the element is boilerplate """ * * if self.tags_boilerplate.count( node.v.element.tag): # <<<<<<<<<<<<<< * return True * */ __pyx_t_1 = (__pyx_v_self->tags_boilerplate.count(((int)__pyx_v_node->v.element.tag)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":252 * * if self.tags_boilerplate.count( node.v.element.tag): * return True # <<<<<<<<<<<<<< * * # http://html5doctor.com/understanding-aside/ */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":251 * """ Rough guess to check if the element is boilerplate """ * * if self.tags_boilerplate.count( node.v.element.tag): # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":255 * * # http://html5doctor.com/understanding-aside/ * if node.v.element.tag == gumbocy.GUMBO_TAG_ASIDE: # <<<<<<<<<<<<<< * if "article" not in self.current_stack: * return True */ __pyx_t_1 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_ASIDE) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":256 * # http://html5doctor.com/understanding-aside/ * if node.v.element.tag == gumbocy.GUMBO_TAG_ASIDE: * if "article" not in self.current_stack: # <<<<<<<<<<<<<< * return True * */ __pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_n_s_article, __pyx_v_self->current_stack, Py_NE)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 256, __pyx_L1_error) __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { /* "gumbocy.pyx":257 * if node.v.element.tag == gumbocy.GUMBO_TAG_ASIDE: * if "article" not in self.current_stack: * return True # <<<<<<<<<<<<<< * * if self.has_classes_boilerplate and attrs.size_classes > 0: */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":256 * # http://html5doctor.com/understanding-aside/ * if node.v.element.tag == gumbocy.GUMBO_TAG_ASIDE: * if "article" not in self.current_stack: # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":255 * * # http://html5doctor.com/understanding-aside/ * if node.v.element.tag == gumbocy.GUMBO_TAG_ASIDE: # <<<<<<<<<<<<<< * if "article" not in self.current_stack: * return True */ } /* "gumbocy.pyx":259 * return True * * if self.has_classes_boilerplate and attrs.size_classes > 0: # <<<<<<<<<<<<<< * for k in attrs.classes: * if re2_search(k, deref(self.classes_boilerplate)): */ __pyx_t_1 = (__pyx_v_self->has_classes_boilerplate != 0); if (__pyx_t_1) { } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L7_bool_binop_done; } __pyx_t_1 = ((__pyx_v_attrs->size_classes > 0) != 0); __pyx_t_2 = __pyx_t_1; __pyx_L7_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":260 * * if self.has_classes_boilerplate and attrs.size_classes > 0: * for k in attrs.classes: # <<<<<<<<<<<<<< * if re2_search(k, deref(self.classes_boilerplate)): * return True */ if (unlikely(__pyx_v_attrs->classes == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); __PYX_ERR(0, 260, __pyx_L1_error) } __pyx_t_3 = __pyx_v_attrs->classes; __Pyx_INCREF(__pyx_t_3); __pyx_t_4 = 0; for (;;) { if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_5 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_4); __Pyx_INCREF(__pyx_t_5); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 260, __pyx_L1_error) #else __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 260, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif __Pyx_XDECREF_SET(__pyx_v_k, __pyx_t_5); __pyx_t_5 = 0; /* "gumbocy.pyx":261 * if self.has_classes_boilerplate and attrs.size_classes > 0: * for k in attrs.classes: * if re2_search(k, deref(self.classes_boilerplate)): # <<<<<<<<<<<<<< * return True * */ __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_v_k); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 261, __pyx_L1_error) __pyx_t_2 = (__pyx_f_7gumbocy_re2_search(__pyx_t_6, (*__pyx_v_self->classes_boilerplate)) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":262 * for k in attrs.classes: * if re2_search(k, deref(self.classes_boilerplate)): * return True # <<<<<<<<<<<<<< * * if self.has_ids_boilerplate and attrs.values.get(ATTR_ID): */ __pyx_r = 1; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L0; /* "gumbocy.pyx":261 * if self.has_classes_boilerplate and attrs.size_classes > 0: * for k in attrs.classes: * if re2_search(k, deref(self.classes_boilerplate)): # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":260 * * if self.has_classes_boilerplate and attrs.size_classes > 0: * for k in attrs.classes: # <<<<<<<<<<<<<< * if re2_search(k, deref(self.classes_boilerplate)): * return True */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":259 * return True * * if self.has_classes_boilerplate and attrs.size_classes > 0: # <<<<<<<<<<<<<< * for k in attrs.classes: * if re2_search(k, deref(self.classes_boilerplate)): */ } /* "gumbocy.pyx":264 * return True * * if self.has_ids_boilerplate and attrs.values.get(ATTR_ID): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_ID], deref(self.ids_boilerplate)): * return True */ __pyx_t_1 = (__pyx_v_self->has_ids_boilerplate != 0); if (__pyx_t_1) { } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L13_bool_binop_done; } if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 264, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ID); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 264, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 264, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 264, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_2 = __pyx_t_1; __pyx_L13_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":265 * * if self.has_ids_boilerplate and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_boilerplate)): # <<<<<<<<<<<<<< * return True * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 265, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ID); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 265, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 265, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 265, __pyx_L1_error) __pyx_t_2 = (__pyx_f_7gumbocy_re2_search(__pyx_t_6, (*__pyx_v_self->ids_boilerplate)) != 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":266 * if self.has_ids_boilerplate and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_boilerplate)): * return True # <<<<<<<<<<<<<< * * if self.has_roles_boilerplate and attrs.values.get(ATTR_ROLE): */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":265 * * if self.has_ids_boilerplate and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_boilerplate)): # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":264 * return True * * if self.has_ids_boilerplate and attrs.values.get(ATTR_ID): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_ID], deref(self.ids_boilerplate)): * return True */ } /* "gumbocy.pyx":268 * return True * * if self.has_roles_boilerplate and attrs.values.get(ATTR_ROLE): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_ROLE], deref(self.roles_boilerplate)): * return True */ __pyx_t_1 = (__pyx_v_self->has_roles_boilerplate != 0); if (__pyx_t_1) { } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L17_bool_binop_done; } if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 268, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ROLE); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 268, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 268, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 268, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_2 = __pyx_t_1; __pyx_L17_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":269 * * if self.has_roles_boilerplate and attrs.values.get(ATTR_ROLE): * if re2_search(attrs.values[ATTR_ROLE], deref(self.roles_boilerplate)): # <<<<<<<<<<<<<< * return True * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 269, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ROLE); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 269, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 269, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 269, __pyx_L1_error) __pyx_t_2 = (__pyx_f_7gumbocy_re2_search(__pyx_t_6, (*__pyx_v_self->roles_boilerplate)) != 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":270 * if self.has_roles_boilerplate and attrs.values.get(ATTR_ROLE): * if re2_search(attrs.values[ATTR_ROLE], deref(self.roles_boilerplate)): * return True # <<<<<<<<<<<<<< * * return False */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":269 * * if self.has_roles_boilerplate and attrs.values.get(ATTR_ROLE): * if re2_search(attrs.values[ATTR_ROLE], deref(self.roles_boilerplate)): # <<<<<<<<<<<<<< * return True * */ } /* "gumbocy.pyx":268 * return True * * if self.has_roles_boilerplate and attrs.values.get(ATTR_ROLE): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_ROLE], deref(self.roles_boilerplate)): * return True */ } /* "gumbocy.pyx":272 * return True * * return False # <<<<<<<<<<<<<< * * cdef Attributes get_attributes(self, gumbocy.GumboNode* node): */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":248 * * * cdef bint guess_node_boilerplate(self, gumbocy.GumboNode* node, Attributes attrs): # <<<<<<<<<<<<<< * """ Rough guess to check if the element is boilerplate """ * */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_5); __Pyx_WriteUnraisable("gumbocy.HTMLParser.guess_node_boilerplate", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_r = 0; __pyx_L0:; __Pyx_XDECREF(__pyx_v_k); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":274 * return False * * cdef Attributes get_attributes(self, gumbocy.GumboNode* node): # <<<<<<<<<<<<<< * """ Build a dict with all the whitelisted attributes """ * */ static struct __pyx_obj_7gumbocy_Attributes *__pyx_f_7gumbocy_10HTMLParser_get_attributes(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, GumboNode *__pyx_v_node) { struct __pyx_obj_7gumbocy_Attributes *__pyx_v_attrs = NULL; unsigned int __pyx_v_i; GumboAttribute *__pyx_v_attr; PyObject *__pyx_v_multiple_value = NULL; PyObject *__pyx_v_pystr = NULL; struct __pyx_obj_7gumbocy_Attributes *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; unsigned int __pyx_t_2; unsigned int __pyx_t_3; int __pyx_t_4; PyObject *__pyx_t_5 = NULL; PyObject *__pyx_t_6 = NULL; PyObject *__pyx_t_7 = NULL; PyObject *__pyx_t_8 = NULL; PyObject *__pyx_t_9 = NULL; Py_ssize_t __pyx_t_10; int __pyx_t_11; __Pyx_RefNannySetupContext("get_attributes", 0); /* "gumbocy.pyx":277 * """ Build a dict with all the whitelisted attributes """ * * attrs = Attributes() # <<<<<<<<<<<<<< * # cdef Attributes attrs * attrs.size_classes = 0 */ __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)__pyx_ptype_7gumbocy_Attributes), __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 277, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_attrs = ((struct __pyx_obj_7gumbocy_Attributes *)__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":279 * attrs = Attributes() * # cdef Attributes attrs * attrs.size_classes = 0 # <<<<<<<<<<<<<< * attrs.has_hidden = 0 * # attrs.values = [""] * 10 */ __pyx_v_attrs->size_classes = 0; /* "gumbocy.pyx":280 * # cdef Attributes attrs * attrs.size_classes = 0 * attrs.has_hidden = 0 # <<<<<<<<<<<<<< * # attrs.values = [""] * 10 * # attrs.classes = [] */ __pyx_v_attrs->has_hidden = 0; /* "gumbocy.pyx":283 * # attrs.values = [""] * 10 * # attrs.classes = [] * attrs.values = {} # deref(new map[AttributeNames, const char*]()) # <<<<<<<<<<<<<< * # attrs.values[ATTR_ID] = "x" * # print dict(attrs.values) */ __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 283, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __Pyx_GOTREF(__pyx_v_attrs->values); __Pyx_DECREF(__pyx_v_attrs->values); __pyx_v_attrs->values = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":287 * # print dict(attrs.values) * * for i in range(node.v.element.attributes.length): # <<<<<<<<<<<<<< * * attr = node.v.element.attributes.data[i] */ __pyx_t_2 = __pyx_v_node->v.element.attributes.length; for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; /* "gumbocy.pyx":289 * for i in range(node.v.element.attributes.length): * * attr = node.v.element.attributes.data[i] # <<<<<<<<<<<<<< * * if re2_search(attr.name, deref(self.attributes_whitelist)): */ __pyx_v_attr = ((GumboAttribute *)(__pyx_v_node->v.element.attributes.data[__pyx_v_i])); /* "gumbocy.pyx":291 * attr = node.v.element.attributes.data[i] * * if re2_search(attr.name, deref(self.attributes_whitelist)): # <<<<<<<<<<<<<< * * if attr.name == b"class": */ __pyx_t_4 = (__pyx_f_7gumbocy_re2_search(__pyx_v_attr->name, (*__pyx_v_self->attributes_whitelist)) != 0); if (__pyx_t_4) { /* "gumbocy.pyx":293 * if re2_search(attr.name, deref(self.attributes_whitelist)): * * if attr.name == b"class": # <<<<<<<<<<<<<< * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * attrs.size_classes = len(multiple_value) */ __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 293, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_1, __pyx_n_b_class, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 293, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":294 * * if attr.name == b"class": * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) # <<<<<<<<<<<<<< * attrs.size_classes = len(multiple_value) * if attrs.size_classes > 0: */ __pyx_t_5 = __Pyx_GetModuleGlobalName(__pyx_n_s_RE_SPLIT_WHITESPACE); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_split); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_8 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_strip); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_t_8 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_9))) { __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_9); if (likely(__pyx_t_8)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_9); __Pyx_INCREF(__pyx_t_8); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_9, function); } } if (__pyx_t_8) { __pyx_t_7 = __Pyx_PyObject_CallOneArg(__pyx_t_9, __pyx_t_8); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; } else { __pyx_t_7 = __Pyx_PyObject_CallNoArg(__pyx_t_9); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 294, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_lower); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_9))) { __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_9); if (likely(__pyx_t_7)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_9); __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_9, function); } } if (__pyx_t_7) { __pyx_t_5 = __Pyx_PyObject_CallOneArg(__pyx_t_9, __pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { __pyx_t_5 = __Pyx_PyObject_CallNoArg(__pyx_t_9); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 294, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_t_9 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_6))) { __pyx_t_9 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_9)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_9); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_6, function); } } if (!__pyx_t_9) { __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_1); } else { __pyx_t_7 = PyTuple_New(1+1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_GIVEREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_9); __pyx_t_9 = NULL; __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_7, 0+1, __pyx_t_5); __pyx_t_5 = 0; __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_7, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = __Pyx_PyFrozenSet_New(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 294, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF_SET(__pyx_v_multiple_value, ((PyObject*)__pyx_t_6)); __pyx_t_6 = 0; /* "gumbocy.pyx":295 * if attr.name == b"class": * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * attrs.size_classes = len(multiple_value) # <<<<<<<<<<<<<< * if attrs.size_classes > 0: * attrs.classes = list(multiple_value) */ __pyx_t_10 = PySet_GET_SIZE(__pyx_v_multiple_value); if (unlikely(__pyx_t_10 == -1)) __PYX_ERR(0, 295, __pyx_L1_error) __pyx_v_attrs->size_classes = __pyx_t_10; /* "gumbocy.pyx":296 * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * attrs.size_classes = len(multiple_value) * if attrs.size_classes > 0: # <<<<<<<<<<<<<< * attrs.classes = list(multiple_value) * # for k in multiple_value: */ __pyx_t_4 = ((__pyx_v_attrs->size_classes > 0) != 0); if (__pyx_t_4) { /* "gumbocy.pyx":297 * attrs.size_classes = len(multiple_value) * if attrs.size_classes > 0: * attrs.classes = list(multiple_value) # <<<<<<<<<<<<<< * # for k in multiple_value: * # ck = k */ __pyx_t_6 = PySequence_List(__pyx_v_multiple_value); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 297, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_GIVEREF(__pyx_t_6); __Pyx_GOTREF(__pyx_v_attrs->classes); __Pyx_DECREF(__pyx_v_attrs->classes); __pyx_v_attrs->classes = ((PyObject*)__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":296 * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * attrs.size_classes = len(multiple_value) * if attrs.size_classes > 0: # <<<<<<<<<<<<<< * attrs.classes = list(multiple_value) * # for k in multiple_value: */ } /* "gumbocy.pyx":293 * if re2_search(attr.name, deref(self.attributes_whitelist)): * * if attr.name == b"class": # <<<<<<<<<<<<<< * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * attrs.size_classes = len(multiple_value) */ goto __pyx_L6; } /* "gumbocy.pyx":302 * # attrs.classes.push_back(ck) # = list(multiple_value) * * elif attr.name == b"id": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_ID] = pystr */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_id, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":303 * * elif attr.name == b"id": * pystr = str(attr.value).lower() # <<<<<<<<<<<<<< * attrs.values[ATTR_ID] = pystr * */ __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 303, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 303, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_7, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 303, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_lower); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 303, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_7); if (likely(__pyx_t_1)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); __Pyx_INCREF(__pyx_t_1); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_7, function); } } if (__pyx_t_1) { __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 303, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { __pyx_t_6 = __Pyx_PyObject_CallNoArg(__pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 303, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_XDECREF_SET(__pyx_v_pystr, __pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":304 * elif attr.name == b"id": * pystr = str(attr.value).lower() * attrs.values[ATTR_ID] = pystr # <<<<<<<<<<<<<< * * elif attr.name == b"style": */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 304, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ID); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 304, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_6, __pyx_v_pystr) < 0)) __PYX_ERR(0, 304, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":302 * # attrs.classes.push_back(ck) # = list(multiple_value) * * elif attr.name == b"id": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_ID] = pystr */ goto __pyx_L6; } /* "gumbocy.pyx":306 * attrs.values[ATTR_ID] = pystr * * elif attr.name == b"style": # <<<<<<<<<<<<<< * attrs.values[ATTR_STYLE] = attr.value * */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 306, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_style, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 306, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":307 * * elif attr.name == b"style": * attrs.values[ATTR_STYLE] = attr.value # <<<<<<<<<<<<<< * * elif attr.name == b"href": */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 307, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 307, __pyx_L1_error) } __pyx_t_7 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_STYLE); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 307, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_7, __pyx_t_6) < 0)) __PYX_ERR(0, 307, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":306 * attrs.values[ATTR_ID] = pystr * * elif attr.name == b"style": # <<<<<<<<<<<<<< * attrs.values[ATTR_STYLE] = attr.value * */ goto __pyx_L6; } /* "gumbocy.pyx":309 * attrs.values[ATTR_STYLE] = attr.value * * elif attr.name == b"href": # <<<<<<<<<<<<<< * attrs.values[ATTR_HREF] = attr.value * */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_href, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":310 * * elif attr.name == b"href": * attrs.values[ATTR_HREF] = attr.value # <<<<<<<<<<<<<< * * elif attr.name == b"role": */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 310, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 310, __pyx_L1_error) } __pyx_t_7 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 310, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_7, __pyx_t_6) < 0)) __PYX_ERR(0, 310, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":309 * attrs.values[ATTR_STYLE] = attr.value * * elif attr.name == b"href": # <<<<<<<<<<<<<< * attrs.values[ATTR_HREF] = attr.value * */ goto __pyx_L6; } /* "gumbocy.pyx":312 * attrs.values[ATTR_HREF] = attr.value * * elif attr.name == b"role": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_ROLE] = pystr */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_role, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":313 * * elif attr.name == b"role": * pystr = str(attr.value).lower() # <<<<<<<<<<<<<< * attrs.values[ATTR_ROLE] = pystr * */ __pyx_t_7 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 313, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 313, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_1, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 313, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_lower); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 313, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_1))) { __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_1); if (likely(__pyx_t_7)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_1, function); } } if (__pyx_t_7) { __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 313, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { __pyx_t_6 = __Pyx_PyObject_CallNoArg(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 313, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF_SET(__pyx_v_pystr, __pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":314 * elif attr.name == b"role": * pystr = str(attr.value).lower() * attrs.values[ATTR_ROLE] = pystr # <<<<<<<<<<<<<< * * elif attr.name == b"rel": */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 314, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ROLE); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 314, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_6, __pyx_v_pystr) < 0)) __PYX_ERR(0, 314, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":312 * attrs.values[ATTR_HREF] = attr.value * * elif attr.name == b"role": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_ROLE] = pystr */ goto __pyx_L6; } /* "gumbocy.pyx":316 * attrs.values[ATTR_ROLE] = pystr * * elif attr.name == b"rel": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_REL] = pystr */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_rel, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":317 * * elif attr.name == b"rel": * pystr = str(attr.value).lower() # <<<<<<<<<<<<<< * attrs.values[ATTR_REL] = pystr * */ __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 317, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 317, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_7, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 317, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_lower); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 317, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_7); if (likely(__pyx_t_1)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); __Pyx_INCREF(__pyx_t_1); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_7, function); } } if (__pyx_t_1) { __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 317, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { __pyx_t_6 = __Pyx_PyObject_CallNoArg(__pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 317, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_XDECREF_SET(__pyx_v_pystr, __pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":318 * elif attr.name == b"rel": * pystr = str(attr.value).lower() * attrs.values[ATTR_REL] = pystr # <<<<<<<<<<<<<< * * elif attr.name == b"aria-hidden" and attr.value == b"true": */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 318, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_REL); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_6, __pyx_v_pystr) < 0)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":316 * attrs.values[ATTR_ROLE] = pystr * * elif attr.name == b"rel": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_REL] = pystr */ goto __pyx_L6; } /* "gumbocy.pyx":320 * attrs.values[ATTR_REL] = pystr * * elif attr.name == b"aria-hidden" and attr.value == b"true": # <<<<<<<<<<<<<< * attrs.has_hidden = 1 * */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_11 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_kp_b_aria_hidden, Py_EQ)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_11) { } else { __pyx_t_4 = __pyx_t_11; goto __pyx_L8_bool_binop_done; } __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_11 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_true, Py_EQ)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_4 = __pyx_t_11; __pyx_L8_bool_binop_done:; if (__pyx_t_4) { /* "gumbocy.pyx":321 * * elif attr.name == b"aria-hidden" and attr.value == b"true": * attrs.has_hidden = 1 # <<<<<<<<<<<<<< * * elif attr.name == b"hidden": */ __pyx_v_attrs->has_hidden = 1; /* "gumbocy.pyx":320 * attrs.values[ATTR_REL] = pystr * * elif attr.name == b"aria-hidden" and attr.value == b"true": # <<<<<<<<<<<<<< * attrs.has_hidden = 1 * */ goto __pyx_L6; } /* "gumbocy.pyx":323 * attrs.has_hidden = 1 * * elif attr.name == b"hidden": # <<<<<<<<<<<<<< * attrs.has_hidden = 1 * */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 323, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_hidden, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 323, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":324 * * elif attr.name == b"hidden": * attrs.has_hidden = 1 # <<<<<<<<<<<<<< * * elif attr.name == b"alt": */ __pyx_v_attrs->has_hidden = 1; /* "gumbocy.pyx":323 * attrs.has_hidden = 1 * * elif attr.name == b"hidden": # <<<<<<<<<<<<<< * attrs.has_hidden = 1 * */ goto __pyx_L6; } /* "gumbocy.pyx":326 * attrs.has_hidden = 1 * * elif attr.name == b"alt": # <<<<<<<<<<<<<< * attrs.values[ATTR_ALT] = attr.value * */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 326, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_alt, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 326, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":327 * * elif attr.name == b"alt": * attrs.values[ATTR_ALT] = attr.value # <<<<<<<<<<<<<< * * elif attr.name == b"src": */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 327, __pyx_L1_error) } __pyx_t_7 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ALT); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_7, __pyx_t_6) < 0)) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":326 * attrs.has_hidden = 1 * * elif attr.name == b"alt": # <<<<<<<<<<<<<< * attrs.values[ATTR_ALT] = attr.value * */ goto __pyx_L6; } /* "gumbocy.pyx":329 * attrs.values[ATTR_ALT] = attr.value * * elif attr.name == b"src": # <<<<<<<<<<<<<< * attrs.values[ATTR_SRC] = attr.value * */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_src, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":330 * * elif attr.name == b"src": * attrs.values[ATTR_SRC] = attr.value # <<<<<<<<<<<<<< * * elif attr.name == b"name": */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 330, __pyx_L1_error) } __pyx_t_7 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_SRC); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_7, __pyx_t_6) < 0)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":329 * attrs.values[ATTR_ALT] = attr.value * * elif attr.name == b"src": # <<<<<<<<<<<<<< * attrs.values[ATTR_SRC] = attr.value * */ goto __pyx_L6; } /* "gumbocy.pyx":332 * attrs.values[ATTR_SRC] = attr.value * * elif attr.name == b"name": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_NAME] = pystr */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 332, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_name, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 332, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":333 * * elif attr.name == b"name": * pystr = str(attr.value).lower() # <<<<<<<<<<<<<< * attrs.values[ATTR_NAME] = pystr * */ __pyx_t_7 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_1, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_lower); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_1))) { __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_1); if (likely(__pyx_t_7)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_1, function); } } if (__pyx_t_7) { __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { __pyx_t_6 = __Pyx_PyObject_CallNoArg(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 333, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF_SET(__pyx_v_pystr, __pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":334 * elif attr.name == b"name": * pystr = str(attr.value).lower() * attrs.values[ATTR_NAME] = pystr # <<<<<<<<<<<<<< * * elif attr.name == b"property": */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 334, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_NAME); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 334, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_6, __pyx_v_pystr) < 0)) __PYX_ERR(0, 334, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":332 * attrs.values[ATTR_SRC] = attr.value * * elif attr.name == b"name": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_NAME] = pystr */ goto __pyx_L6; } /* "gumbocy.pyx":336 * attrs.values[ATTR_NAME] = pystr * * elif attr.name == b"property": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_PROPERTY] = pystr */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 336, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_property, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 336, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":337 * * elif attr.name == b"property": * pystr = str(attr.value).lower() # <<<<<<<<<<<<<< * attrs.values[ATTR_PROPERTY] = pystr * */ __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_7, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_lower); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_7); if (likely(__pyx_t_1)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); __Pyx_INCREF(__pyx_t_1); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_7, function); } } if (__pyx_t_1) { __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { __pyx_t_6 = __Pyx_PyObject_CallNoArg(__pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 337, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_XDECREF_SET(__pyx_v_pystr, __pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":338 * elif attr.name == b"property": * pystr = str(attr.value).lower() * attrs.values[ATTR_PROPERTY] = pystr # <<<<<<<<<<<<<< * * elif attr.name == b"content": */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 338, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_PROPERTY); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 338, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_6, __pyx_v_pystr) < 0)) __PYX_ERR(0, 338, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":336 * attrs.values[ATTR_NAME] = pystr * * elif attr.name == b"property": # <<<<<<<<<<<<<< * pystr = str(attr.value).lower() * attrs.values[ATTR_PROPERTY] = pystr */ goto __pyx_L6; } /* "gumbocy.pyx":340 * attrs.values[ATTR_PROPERTY] = pystr * * elif attr.name == b"content": # <<<<<<<<<<<<<< * attrs.values[ATTR_CONTENT] = attr.value * */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 340, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = (__Pyx_PyBytes_Equals(__pyx_t_6, __pyx_n_b_content, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 340, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { /* "gumbocy.pyx":341 * * elif attr.name == b"content": * attrs.values[ATTR_CONTENT] = attr.value # <<<<<<<<<<<<<< * * return attrs */ __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 341, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 341, __pyx_L1_error) } __pyx_t_7 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_CONTENT); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 341, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (unlikely(PyDict_SetItem(__pyx_v_attrs->values, __pyx_t_7, __pyx_t_6) < 0)) __PYX_ERR(0, 341, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":340 * attrs.values[ATTR_PROPERTY] = pystr * * elif attr.name == b"content": # <<<<<<<<<<<<<< * attrs.values[ATTR_CONTENT] = attr.value * */ } __pyx_L6:; /* "gumbocy.pyx":291 * attr = node.v.element.attributes.data[i] * * if re2_search(attr.name, deref(self.attributes_whitelist)): # <<<<<<<<<<<<<< * * if attr.name == b"class": */ } } /* "gumbocy.pyx":343 * attrs.values[ATTR_CONTENT] = attr.value * * return attrs # <<<<<<<<<<<<<< * * cdef void close_word_group(self): */ __Pyx_XDECREF(((PyObject *)__pyx_r)); __Pyx_INCREF(((PyObject *)__pyx_v_attrs)); __pyx_r = __pyx_v_attrs; goto __pyx_L0; /* "gumbocy.pyx":274 * return False * * cdef Attributes get_attributes(self, gumbocy.GumboNode* node): # <<<<<<<<<<<<<< * """ Build a dict with all the whitelisted attributes """ * */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_5); __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_7); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_9); __Pyx_AddTraceback("gumbocy.HTMLParser.get_attributes", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_XDECREF((PyObject *)__pyx_v_attrs); __Pyx_XDECREF(__pyx_v_multiple_value); __Pyx_XDECREF(__pyx_v_pystr); __Pyx_XGIVEREF((PyObject *)__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":345 * return attrs * * cdef void close_word_group(self): # <<<<<<<<<<<<<< * """ Close the current word group """ * */ static void __pyx_f_7gumbocy_10HTMLParser_close_word_group(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self) { __Pyx_RefNannyDeclarations int __pyx_t_1; PyObject *__pyx_t_2 = NULL; PyObject *__pyx_t_3 = NULL; int __pyx_t_4; __Pyx_RefNannySetupContext("close_word_group", 0); /* "gumbocy.pyx":348 * """ Close the current word group """ * * if self.current_word_group: # <<<<<<<<<<<<<< * self.analysis["word_groups"].append(tuple(self.current_word_group)) * self.current_word_group = None */ __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_self->current_word_group); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 348, __pyx_L1_error) if (__pyx_t_1) { /* "gumbocy.pyx":349 * * if self.current_word_group: * self.analysis["word_groups"].append(tuple(self.current_word_group)) # <<<<<<<<<<<<<< * self.current_word_group = None * */ if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 349, __pyx_L1_error) } __pyx_t_2 = __Pyx_PyDict_GetItem(__pyx_v_self->analysis, __pyx_n_s_word_groups); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 349, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = PySequence_Tuple(__pyx_v_self->current_word_group); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 349, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = __Pyx_PyObject_Append(__pyx_t_2, __pyx_t_3); if (unlikely(__pyx_t_4 == -1)) __PYX_ERR(0, 349, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":350 * if self.current_word_group: * self.analysis["word_groups"].append(tuple(self.current_word_group)) * self.current_word_group = None # <<<<<<<<<<<<<< * * */ __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); __Pyx_GOTREF(__pyx_v_self->current_word_group); __Pyx_DECREF(__pyx_v_self->current_word_group); __pyx_v_self->current_word_group = Py_None; /* "gumbocy.pyx":348 * """ Close the current word group """ * * if self.current_word_group: # <<<<<<<<<<<<<< * self.analysis["word_groups"].append(tuple(self.current_word_group)) * self.current_word_group = None */ } /* "gumbocy.pyx":345 * return attrs * * cdef void close_word_group(self): # <<<<<<<<<<<<<< * """ Close the current word group """ * */ /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_3); __Pyx_WriteUnraisable("gumbocy.HTMLParser.close_word_group", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_L0:; __Pyx_RefNannyFinishContext(); } /* "gumbocy.pyx":353 * * * cdef void add_text(self, text): # <<<<<<<<<<<<<< * """ Adds inner text to the current word group """ * */ static void __pyx_f_7gumbocy_10HTMLParser_add_text(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, PyObject *__pyx_v_text) { __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; PyObject *__pyx_t_3 = NULL; PyObject *__pyx_t_4 = NULL; PyObject *__pyx_t_5 = NULL; Py_ssize_t __pyx_t_6; PyObject *__pyx_t_7 = NULL; PyObject *__pyx_t_8 = NULL; __Pyx_RefNannySetupContext("add_text", 0); /* "gumbocy.pyx":356 * """ Adds inner text to the current word group """ * * if not self.current_word_group: # <<<<<<<<<<<<<< * self.current_word_group = [text.strip(), self.current_stack[-1]] * else: */ __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_self->current_word_group); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 356, __pyx_L1_error) __pyx_t_2 = ((!__pyx_t_1) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":357 * * if not self.current_word_group: * self.current_word_group = [text.strip(), self.current_stack[-1]] # <<<<<<<<<<<<<< * else: * self.current_word_group[0] += " " + text.strip() */ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_text, __pyx_n_s_strip); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_5 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_4))) { __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4); if (likely(__pyx_t_5)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_4, function); } } if (__pyx_t_5) { __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } else { __pyx_t_3 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 357, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (unlikely(__pyx_v_self->current_stack == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 357, __pyx_L1_error) } __pyx_t_4 = __Pyx_GetItemInt_List(__pyx_v_self->current_stack, -1L, long, 1, __Pyx_PyInt_From_long, 1, 1, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_5 = PyList_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_GIVEREF(__pyx_t_3); PyList_SET_ITEM(__pyx_t_5, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_4); PyList_SET_ITEM(__pyx_t_5, 1, __pyx_t_4); __pyx_t_3 = 0; __pyx_t_4 = 0; __Pyx_GIVEREF(__pyx_t_5); __Pyx_GOTREF(__pyx_v_self->current_word_group); __Pyx_DECREF(__pyx_v_self->current_word_group); __pyx_v_self->current_word_group = __pyx_t_5; __pyx_t_5 = 0; /* "gumbocy.pyx":356 * """ Adds inner text to the current word group """ * * if not self.current_word_group: # <<<<<<<<<<<<<< * self.current_word_group = [text.strip(), self.current_stack[-1]] * else: */ goto __pyx_L3; } /* "gumbocy.pyx":359 * self.current_word_group = [text.strip(), self.current_stack[-1]] * else: * self.current_word_group[0] += " " + text.strip() # <<<<<<<<<<<<<< * * cdef void add_hyperlink_text(self, text): */ /*else*/ { __Pyx_INCREF(__pyx_v_self->current_word_group); __pyx_t_5 = __pyx_v_self->current_word_group; __pyx_t_6 = 0; __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_5, __pyx_t_6, Py_ssize_t, 1, PyInt_FromSsize_t, 0, 1, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 359, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_text, __pyx_n_s_strip); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 359, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __pyx_t_8 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_7); if (likely(__pyx_t_8)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); __Pyx_INCREF(__pyx_t_8); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_7, function); } } if (__pyx_t_8) { __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_8); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 359, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; } else { __pyx_t_3 = __Pyx_PyObject_CallNoArg(__pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 359, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = PyNumber_Add(__pyx_kp_s__4, __pyx_t_3); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 359, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyNumber_InPlaceAdd(__pyx_t_4, __pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 359, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; if (unlikely(__Pyx_SetItemInt(__pyx_t_5, __pyx_t_6, __pyx_t_3, Py_ssize_t, 1, PyInt_FromSsize_t, 0, 1, 1) < 0)) __PYX_ERR(0, 359, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } __pyx_L3:; /* "gumbocy.pyx":353 * * * cdef void add_text(self, text): # <<<<<<<<<<<<<< * """ Adds inner text to the current word group """ * */ /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_5); __Pyx_XDECREF(__pyx_t_7); __Pyx_XDECREF(__pyx_t_8); __Pyx_WriteUnraisable("gumbocy.HTMLParser.add_text", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_L0:; __Pyx_RefNannyFinishContext(); } /* "gumbocy.pyx":361 * self.current_word_group[0] += " " + text.strip() * * cdef void add_hyperlink_text(self, text): # <<<<<<<<<<<<<< * """ Adds inner text to the currently open hyperlink """ * */ static void __pyx_f_7gumbocy_10HTMLParser_add_hyperlink_text(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, PyObject *__pyx_v_text) { __Pyx_RefNannyDeclarations int __pyx_t_1; PyObject *__pyx_t_2 = NULL; Py_ssize_t __pyx_t_3; PyObject *__pyx_t_4 = NULL; PyObject *__pyx_t_5 = NULL; __Pyx_RefNannySetupContext("add_hyperlink_text", 0); /* "gumbocy.pyx":364 * """ Adds inner text to the currently open hyperlink """ * * if self.current_hyperlink: # <<<<<<<<<<<<<< * self.current_hyperlink[1] += text * */ __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_self->current_hyperlink); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 364, __pyx_L1_error) if (__pyx_t_1) { /* "gumbocy.pyx":365 * * if self.current_hyperlink: * self.current_hyperlink[1] += text # <<<<<<<<<<<<<< * * cdef void open_hyperlink(self, Attributes attrs): */ __Pyx_INCREF(__pyx_v_self->current_hyperlink); __pyx_t_2 = __pyx_v_self->current_hyperlink; __pyx_t_3 = 1; __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_2, __pyx_t_3, Py_ssize_t, 1, PyInt_FromSsize_t, 0, 1, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 365, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_5 = PyNumber_InPlaceAdd(__pyx_t_4, __pyx_v_text); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 365, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (unlikely(__Pyx_SetItemInt(__pyx_t_2, __pyx_t_3, __pyx_t_5, Py_ssize_t, 1, PyInt_FromSsize_t, 0, 1, 1) < 0)) __PYX_ERR(0, 365, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "gumbocy.pyx":364 * """ Adds inner text to the currently open hyperlink """ * * if self.current_hyperlink: # <<<<<<<<<<<<<< * self.current_hyperlink[1] += text * */ } /* "gumbocy.pyx":361 * self.current_word_group[0] += " " + text.strip() * * cdef void add_hyperlink_text(self, text): # <<<<<<<<<<<<<< * """ Adds inner text to the currently open hyperlink """ * */ /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_5); __Pyx_WriteUnraisable("gumbocy.HTMLParser.add_hyperlink_text", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_L0:; __Pyx_RefNannyFinishContext(); } /* "gumbocy.pyx":367 * self.current_hyperlink[1] += text * * cdef void open_hyperlink(self, Attributes attrs): # <<<<<<<<<<<<<< * """ Opens a new hyperlink """ * */ static void __pyx_f_7gumbocy_10HTMLParser_open_hyperlink(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, struct __pyx_obj_7gumbocy_Attributes *__pyx_v_attrs) { __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; PyObject *__pyx_t_3 = NULL; PyObject *__pyx_t_4 = NULL; Py_ssize_t __pyx_t_5; char const *__pyx_t_6; PyObject *__pyx_t_7 = NULL; __Pyx_RefNannySetupContext("open_hyperlink", 0); /* "gumbocy.pyx":370 * """ Opens a new hyperlink """ * * if not self.analyze_external_hyperlinks and not self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * return * */ __pyx_t_2 = ((!(__pyx_v_self->analyze_external_hyperlinks != 0)) != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L4_bool_binop_done; } __pyx_t_2 = ((!(__pyx_v_self->analyze_internal_hyperlinks != 0)) != 0); __pyx_t_1 = __pyx_t_2; __pyx_L4_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":371 * * if not self.analyze_external_hyperlinks and not self.analyze_internal_hyperlinks: * return # <<<<<<<<<<<<<< * * if not attrs.values.get(ATTR_HREF): */ goto __pyx_L0; /* "gumbocy.pyx":370 * """ Opens a new hyperlink """ * * if not self.analyze_external_hyperlinks and not self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * return * */ } /* "gumbocy.pyx":373 * return * * if not attrs.values.get(ATTR_HREF): # <<<<<<<<<<<<<< * return * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 373, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 373, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 373, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 373, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_2 = ((!__pyx_t_1) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":374 * * if not attrs.values.get(ATTR_HREF): * return # <<<<<<<<<<<<<< * * if len(attrs.values[ATTR_HREF]) == 0: */ goto __pyx_L0; /* "gumbocy.pyx":373 * return * * if not attrs.values.get(ATTR_HREF): # <<<<<<<<<<<<<< * return * */ } /* "gumbocy.pyx":376 * return * * if len(attrs.values[ATTR_HREF]) == 0: # <<<<<<<<<<<<<< * return * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 376, __pyx_L1_error) } __pyx_t_4 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_5 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_2 = ((__pyx_t_5 == 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":377 * * if len(attrs.values[ATTR_HREF]) == 0: * return # <<<<<<<<<<<<<< * * if re2_search(attrs.values[ATTR_HREF], deref(_RE2_IGNORED_HREF)): */ goto __pyx_L0; /* "gumbocy.pyx":376 * return * * if len(attrs.values[ATTR_HREF]) == 0: # <<<<<<<<<<<<<< * return * */ } /* "gumbocy.pyx":379 * return * * if re2_search(attrs.values[ATTR_HREF], deref(_RE2_IGNORED_HREF)): # <<<<<<<<<<<<<< * return * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 379, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_t_4); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 379, __pyx_L1_error) __pyx_t_2 = (__pyx_f_7gumbocy_re2_search(__pyx_t_6, (*__pyx_v_7gumbocy__RE2_IGNORED_HREF)) != 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":380 * * if re2_search(attrs.values[ATTR_HREF], deref(_RE2_IGNORED_HREF)): * return # <<<<<<<<<<<<<< * * self.close_hyperlink() */ goto __pyx_L0; /* "gumbocy.pyx":379 * return * * if re2_search(attrs.values[ATTR_HREF], deref(_RE2_IGNORED_HREF)): # <<<<<<<<<<<<<< * return * */ } /* "gumbocy.pyx":382 * return * * self.close_hyperlink() # <<<<<<<<<<<<<< * * # href, text, rel */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->close_hyperlink(__pyx_v_self); /* "gumbocy.pyx":385 * * # href, text, rel * self.current_hyperlink = [attrs.values[ATTR_HREF], "", attrs.values.get(ATTR_REL)] # <<<<<<<<<<<<<< * * cdef void close_hyperlink(self): */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 385, __pyx_L1_error) } __pyx_t_4 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 385, __pyx_L1_error) } __pyx_t_4 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_REL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_7 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_4, Py_None); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = PyList_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_GIVEREF(__pyx_t_3); PyList_SET_ITEM(__pyx_t_4, 0, __pyx_t_3); __Pyx_INCREF(__pyx_kp_s__5); __Pyx_GIVEREF(__pyx_kp_s__5); PyList_SET_ITEM(__pyx_t_4, 1, __pyx_kp_s__5); __Pyx_GIVEREF(__pyx_t_7); PyList_SET_ITEM(__pyx_t_4, 2, __pyx_t_7); __pyx_t_3 = 0; __pyx_t_7 = 0; __Pyx_GIVEREF(__pyx_t_4); __Pyx_GOTREF(__pyx_v_self->current_hyperlink); __Pyx_DECREF(__pyx_v_self->current_hyperlink); __pyx_v_self->current_hyperlink = __pyx_t_4; __pyx_t_4 = 0; /* "gumbocy.pyx":367 * self.current_hyperlink[1] += text * * cdef void open_hyperlink(self, Attributes attrs): # <<<<<<<<<<<<<< * """ Opens a new hyperlink """ * */ /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_7); __Pyx_WriteUnraisable("gumbocy.HTMLParser.open_hyperlink", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_L0:; __Pyx_RefNannyFinishContext(); } /* "gumbocy.pyx":387 * self.current_hyperlink = [attrs.values[ATTR_HREF], "", attrs.values.get(ATTR_REL)] * * cdef void close_hyperlink(self): # <<<<<<<<<<<<<< * """ Closes the current hyperlink if any, and decides if it's an external or internal link """ * */ static void __pyx_f_7gumbocy_10HTMLParser_close_hyperlink(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self) { int __pyx_v_is_external; PyObject *__pyx_v_href = NULL; __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; PyObject *__pyx_t_3 = NULL; char const *__pyx_t_4; PyObject *__pyx_t_5 = NULL; PyObject *__pyx_t_6 = NULL; PyObject *__pyx_t_7 = NULL; Py_ssize_t __pyx_t_8; PyObject *__pyx_t_9 = NULL; int __pyx_t_10; __Pyx_RefNannySetupContext("close_hyperlink", 0); /* "gumbocy.pyx":390 * """ Closes the current hyperlink if any, and decides if it's an external or internal link """ * * cdef bint is_external = 0 # <<<<<<<<<<<<<< * * if not self.analyze_external_hyperlinks and not self.analyze_internal_hyperlinks: */ __pyx_v_is_external = 0; /* "gumbocy.pyx":392 * cdef bint is_external = 0 * * if not self.analyze_external_hyperlinks and not self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * return * */ __pyx_t_2 = ((!(__pyx_v_self->analyze_external_hyperlinks != 0)) != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L4_bool_binop_done; } __pyx_t_2 = ((!(__pyx_v_self->analyze_internal_hyperlinks != 0)) != 0); __pyx_t_1 = __pyx_t_2; __pyx_L4_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":393 * * if not self.analyze_external_hyperlinks and not self.analyze_internal_hyperlinks: * return # <<<<<<<<<<<<<< * * if self.current_hyperlink: */ goto __pyx_L0; /* "gumbocy.pyx":392 * cdef bint is_external = 0 * * if not self.analyze_external_hyperlinks and not self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * return * */ } /* "gumbocy.pyx":395 * return * * if self.current_hyperlink: # <<<<<<<<<<<<<< * href = self.current_hyperlink[0] * */ __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_self->current_hyperlink); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 395, __pyx_L1_error) if (__pyx_t_1) { /* "gumbocy.pyx":396 * * if self.current_hyperlink: * href = self.current_hyperlink[0] # <<<<<<<<<<<<<< * * if re2_search(href, deref(_RE2_ABSOLUTE_HREF)): */ __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_self->current_hyperlink, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 396, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_href = __pyx_t_3; __pyx_t_3 = 0; /* "gumbocy.pyx":398 * href = self.current_hyperlink[0] * * if re2_search(href, deref(_RE2_ABSOLUTE_HREF)): # <<<<<<<<<<<<<< * is_external = 1 * */ __pyx_t_4 = __Pyx_PyObject_AsString(__pyx_v_href); if (unlikely((!__pyx_t_4) && PyErr_Occurred())) __PYX_ERR(0, 398, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_4, (*__pyx_v_7gumbocy__RE2_ABSOLUTE_HREF)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":399 * * if re2_search(href, deref(_RE2_ABSOLUTE_HREF)): * is_external = 1 # <<<<<<<<<<<<<< * * if self.has_url: */ __pyx_v_is_external = 1; /* "gumbocy.pyx":401 * is_external = 1 * * if self.has_url: # <<<<<<<<<<<<<< * * if href.startswith("//"): */ __pyx_t_1 = (__pyx_v_self->has_url != 0); if (__pyx_t_1) { /* "gumbocy.pyx":403 * if self.has_url: * * if href.startswith("//"): # <<<<<<<<<<<<<< * href = self.scheme + ":" + href * */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_href, __pyx_n_s_startswith); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 403, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 403, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 403, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_1) { /* "gumbocy.pyx":404 * * if href.startswith("//"): * href = self.scheme + ":" + href # <<<<<<<<<<<<<< * * # This may be an absolute link but to the same domain */ __pyx_t_5 = __Pyx_PyBytes_FromString(__pyx_v_self->scheme); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 404, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = PyNumber_Add(__pyx_t_5, __pyx_kp_s__8); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 404, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = PyNumber_Add(__pyx_t_3, __pyx_v_href); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 404, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_href, __pyx_t_5); __pyx_t_5 = 0; /* "gumbocy.pyx":403 * if self.has_url: * * if href.startswith("//"): # <<<<<<<<<<<<<< * href = self.scheme + ":" + href * */ } /* "gumbocy.pyx":407 * * # This may be an absolute link but to the same domain * if re2_search(href, deref(self.internal_netloc_search)): # <<<<<<<<<<<<<< * is_external = 0 * href = href.split(self.netloc, 1)[1] */ __pyx_t_4 = __Pyx_PyObject_AsString(__pyx_v_href); if (unlikely((!__pyx_t_4) && PyErr_Occurred())) __PYX_ERR(0, 407, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_4, (*__pyx_v_self->internal_netloc_search)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":408 * # This may be an absolute link but to the same domain * if re2_search(href, deref(self.internal_netloc_search)): * is_external = 0 # <<<<<<<<<<<<<< * href = href.split(self.netloc, 1)[1] * */ __pyx_v_is_external = 0; /* "gumbocy.pyx":409 * if re2_search(href, deref(self.internal_netloc_search)): * is_external = 0 * href = href.split(self.netloc, 1)[1] # <<<<<<<<<<<<<< * * if is_external: */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_href, __pyx_n_s_split); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_self->netloc); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_7 = NULL; __pyx_t_8 = 0; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_3))) { __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_3); if (likely(__pyx_t_7)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_3, function); __pyx_t_8 = 1; } } __pyx_t_9 = PyTuple_New(2+__pyx_t_8); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); if (__pyx_t_7) { __Pyx_GIVEREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_7); __pyx_t_7 = NULL; } __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_9, 0+__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_int_1); __Pyx_GIVEREF(__pyx_int_1); PyTuple_SET_ITEM(__pyx_t_9, 1+__pyx_t_8, __pyx_int_1); __pyx_t_6 = 0; __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_9, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_5, 1, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF_SET(__pyx_v_href, __pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":407 * * # This may be an absolute link but to the same domain * if re2_search(href, deref(self.internal_netloc_search)): # <<<<<<<<<<<<<< * is_external = 0 * href = href.split(self.netloc, 1)[1] */ } /* "gumbocy.pyx":401 * is_external = 1 * * if self.has_url: # <<<<<<<<<<<<<< * * if href.startswith("//"): */ } /* "gumbocy.pyx":398 * href = self.current_hyperlink[0] * * if re2_search(href, deref(_RE2_ABSOLUTE_HREF)): # <<<<<<<<<<<<<< * is_external = 1 * */ } /* "gumbocy.pyx":411 * href = href.split(self.netloc, 1)[1] * * if is_external: # <<<<<<<<<<<<<< * if self.analyze_external_hyperlinks: * self.analysis["external_hyperlinks"].append( */ __pyx_t_1 = (__pyx_v_is_external != 0); if (__pyx_t_1) { /* "gumbocy.pyx":412 * * if is_external: * if self.analyze_external_hyperlinks: # <<<<<<<<<<<<<< * self.analysis["external_hyperlinks"].append( * (href, self.current_hyperlink[1], self.current_hyperlink[2]) */ __pyx_t_1 = (__pyx_v_self->analyze_external_hyperlinks != 0); if (__pyx_t_1) { /* "gumbocy.pyx":413 * if is_external: * if self.analyze_external_hyperlinks: * self.analysis["external_hyperlinks"].append( # <<<<<<<<<<<<<< * (href, self.current_hyperlink[1], self.current_hyperlink[2]) * ) */ if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 413, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_self->analysis, __pyx_n_s_external_hyperlinks); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 413, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); /* "gumbocy.pyx":414 * if self.analyze_external_hyperlinks: * self.analysis["external_hyperlinks"].append( * (href, self.current_hyperlink[1], self.current_hyperlink[2]) # <<<<<<<<<<<<<< * ) * */ __pyx_t_5 = __Pyx_GetItemInt(__pyx_v_self->current_hyperlink, 1, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 414, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_9 = __Pyx_GetItemInt(__pyx_v_self->current_hyperlink, 2, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 414, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __pyx_t_6 = PyTuple_New(3); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 414, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_INCREF(__pyx_v_href); __Pyx_GIVEREF(__pyx_v_href); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_v_href); __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_6, 2, __pyx_t_9); __pyx_t_5 = 0; __pyx_t_9 = 0; /* "gumbocy.pyx":413 * if is_external: * if self.analyze_external_hyperlinks: * self.analysis["external_hyperlinks"].append( # <<<<<<<<<<<<<< * (href, self.current_hyperlink[1], self.current_hyperlink[2]) * ) */ __pyx_t_10 = __Pyx_PyObject_Append(__pyx_t_3, __pyx_t_6); if (unlikely(__pyx_t_10 == -1)) __PYX_ERR(0, 413, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":412 * * if is_external: * if self.analyze_external_hyperlinks: # <<<<<<<<<<<<<< * self.analysis["external_hyperlinks"].append( * (href, self.current_hyperlink[1], self.current_hyperlink[2]) */ } /* "gumbocy.pyx":411 * href = href.split(self.netloc, 1)[1] * * if is_external: # <<<<<<<<<<<<<< * if self.analyze_external_hyperlinks: * self.analysis["external_hyperlinks"].append( */ goto __pyx_L11; } /* "gumbocy.pyx":417 * ) * * elif self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * self.analysis["internal_hyperlinks"].append( * (href, self.current_hyperlink[1], self.current_hyperlink[2]) */ __pyx_t_1 = (__pyx_v_self->analyze_internal_hyperlinks != 0); if (__pyx_t_1) { /* "gumbocy.pyx":418 * * elif self.analyze_internal_hyperlinks: * self.analysis["internal_hyperlinks"].append( # <<<<<<<<<<<<<< * (href, self.current_hyperlink[1], self.current_hyperlink[2]) * ) */ if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 418, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyDict_GetItem(__pyx_v_self->analysis, __pyx_n_s_internal_hyperlinks); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 418, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); /* "gumbocy.pyx":419 * elif self.analyze_internal_hyperlinks: * self.analysis["internal_hyperlinks"].append( * (href, self.current_hyperlink[1], self.current_hyperlink[2]) # <<<<<<<<<<<<<< * ) * */ __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_self->current_hyperlink, 1, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 419, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_9 = __Pyx_GetItemInt(__pyx_v_self->current_hyperlink, 2, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 419, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 419, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(__pyx_v_href); __Pyx_GIVEREF(__pyx_v_href); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_v_href); __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_9); __pyx_t_3 = 0; __pyx_t_9 = 0; /* "gumbocy.pyx":418 * * elif self.analyze_internal_hyperlinks: * self.analysis["internal_hyperlinks"].append( # <<<<<<<<<<<<<< * (href, self.current_hyperlink[1], self.current_hyperlink[2]) * ) */ __pyx_t_10 = __Pyx_PyObject_Append(__pyx_t_6, __pyx_t_5); if (unlikely(__pyx_t_10 == -1)) __PYX_ERR(0, 418, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "gumbocy.pyx":417 * ) * * elif self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * self.analysis["internal_hyperlinks"].append( * (href, self.current_hyperlink[1], self.current_hyperlink[2]) */ } __pyx_L11:; /* "gumbocy.pyx":422 * ) * * self.current_hyperlink = None # <<<<<<<<<<<<<< * * cdef bint _traverse_node(self, int level, gumbocy.GumboNode* node, bint is_head, bint is_hidden, bint is_boilerplate, bint is_boilerplate_bypassed, bint is_hyperlink): */ __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); __Pyx_GOTREF(__pyx_v_self->current_hyperlink); __Pyx_DECREF(__pyx_v_self->current_hyperlink); __pyx_v_self->current_hyperlink = Py_None; /* "gumbocy.pyx":395 * return * * if self.current_hyperlink: # <<<<<<<<<<<<<< * href = self.current_hyperlink[0] * */ } /* "gumbocy.pyx":387 * self.current_hyperlink = [attrs.values[ATTR_HREF], "", attrs.values.get(ATTR_REL)] * * cdef void close_hyperlink(self): # <<<<<<<<<<<<<< * """ Closes the current hyperlink if any, and decides if it's an external or internal link """ * */ /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_5); __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_7); __Pyx_XDECREF(__pyx_t_9); __Pyx_WriteUnraisable("gumbocy.HTMLParser.close_hyperlink", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_L0:; __Pyx_XDECREF(__pyx_v_href); __Pyx_RefNannyFinishContext(); } /* "gumbocy.pyx":424 * self.current_hyperlink = None * * cdef bint _traverse_node(self, int level, gumbocy.GumboNode* node, bint is_head, bint is_hidden, bint is_boilerplate, bint is_boilerplate_bypassed, bint is_hyperlink): # <<<<<<<<<<<<<< * """ Traverses the node tree. Return 1 to stop at this level """ * */ static int __pyx_f_7gumbocy_10HTMLParser__traverse_node(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, int __pyx_v_level, GumboNode *__pyx_v_node, int __pyx_v_is_head, int __pyx_v_is_hidden, int __pyx_v_is_boilerplate, int __pyx_v_is_boilerplate_bypassed, int __pyx_v_is_hyperlink) { GumboStringPiece __pyx_v_gsp; char const *__pyx_v_tag_name; int __pyx_v_tag_n; PyObject *__pyx_v_py_tag_name = NULL; struct __pyx_obj_7gumbocy_Attributes *__pyx_v_attrs = NULL; PyObject *__pyx_v_v = NULL; GumboNode *__pyx_v_first_child; unsigned int __pyx_v_i; GumboNode *__pyx_v_child; int __pyx_r; __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; PyObject *__pyx_t_3 = NULL; GumboStringPiece __pyx_t_4; PyObject *__pyx_t_5 = NULL; PyObject *__pyx_t_6 = NULL; char const *__pyx_t_7; Py_ssize_t __pyx_t_8; char const *__pyx_t_9; int __pyx_t_10; PyObject *__pyx_t_11 = NULL; int __pyx_t_12; unsigned int __pyx_t_13; unsigned int __pyx_t_14; __Pyx_RefNannySetupContext("_traverse_node", 0); /* "gumbocy.pyx":431 * cdef int tag_n * * if level > self.nesting_limit: # <<<<<<<<<<<<<< * return 0 * */ __pyx_t_1 = ((__pyx_v_level > __pyx_v_self->nesting_limit) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":432 * * if level > self.nesting_limit: * return 0 # <<<<<<<<<<<<<< * * if node.type == gumbocy.GUMBO_NODE_TEXT: */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":431 * cdef int tag_n * * if level > self.nesting_limit: # <<<<<<<<<<<<<< * return 0 * */ } /* "gumbocy.pyx":434 * return 0 * * if node.type == gumbocy.GUMBO_NODE_TEXT: # <<<<<<<<<<<<<< * * if (self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks) and is_hyperlink: */ switch (__pyx_v_node->type) { case GUMBO_NODE_TEXT: /* "gumbocy.pyx":436 * if node.type == gumbocy.GUMBO_NODE_TEXT: * * if (self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks) and is_hyperlink: # <<<<<<<<<<<<<< * self.add_hyperlink_text(node.v.text.text) * */ __pyx_t_2 = (__pyx_v_self->analyze_internal_hyperlinks != 0); if (!__pyx_t_2) { } else { goto __pyx_L6_next_and; } __pyx_t_2 = (__pyx_v_self->analyze_external_hyperlinks != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L5_bool_binop_done; } __pyx_L6_next_and:; __pyx_t_2 = (__pyx_v_is_hyperlink != 0); __pyx_t_1 = __pyx_t_2; __pyx_L5_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":437 * * if (self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks) and is_hyperlink: * self.add_hyperlink_text(node.v.text.text) # <<<<<<<<<<<<<< * * if self.analyze_word_groups and not is_head and not is_hidden and (not is_boilerplate or is_boilerplate_bypassed): */ __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_node->v.text.text); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 437, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->add_hyperlink_text(__pyx_v_self, __pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":436 * if node.type == gumbocy.GUMBO_NODE_TEXT: * * if (self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks) and is_hyperlink: # <<<<<<<<<<<<<< * self.add_hyperlink_text(node.v.text.text) * */ } /* "gumbocy.pyx":439 * self.add_hyperlink_text(node.v.text.text) * * if self.analyze_word_groups and not is_head and not is_hidden and (not is_boilerplate or is_boilerplate_bypassed): # <<<<<<<<<<<<<< * self.add_text(node.v.text.text) * */ __pyx_t_2 = (__pyx_v_self->analyze_word_groups != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L9_bool_binop_done; } __pyx_t_2 = ((!(__pyx_v_is_head != 0)) != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L9_bool_binop_done; } __pyx_t_2 = ((!(__pyx_v_is_hidden != 0)) != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L9_bool_binop_done; } __pyx_t_2 = ((!(__pyx_v_is_boilerplate != 0)) != 0); if (!__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L9_bool_binop_done; } __pyx_t_2 = (__pyx_v_is_boilerplate_bypassed != 0); __pyx_t_1 = __pyx_t_2; __pyx_L9_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":440 * * if self.analyze_word_groups and not is_head and not is_hidden and (not is_boilerplate or is_boilerplate_bypassed): * self.add_text(node.v.text.text) # <<<<<<<<<<<<<< * * elif node.type == gumbocy.GUMBO_NODE_ELEMENT: */ __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_node->v.text.text); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->add_text(__pyx_v_self, __pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":439 * self.add_hyperlink_text(node.v.text.text) * * if self.analyze_word_groups and not is_head and not is_hidden and (not is_boilerplate or is_boilerplate_bypassed): # <<<<<<<<<<<<<< * self.add_text(node.v.text.text) * */ } /* "gumbocy.pyx":434 * return 0 * * if node.type == gumbocy.GUMBO_NODE_TEXT: # <<<<<<<<<<<<<< * * if (self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks) and is_hyperlink: */ break; /* "gumbocy.pyx":442 * self.add_text(node.v.text.text) * * elif node.type == gumbocy.GUMBO_NODE_ELEMENT: # <<<<<<<<<<<<<< * * tag_n = node.v.element.tag */ case GUMBO_NODE_ELEMENT: /* "gumbocy.pyx":444 * elif node.type == gumbocy.GUMBO_NODE_ELEMENT: * * tag_n = node.v.element.tag # <<<<<<<<<<<<<< * * if self.head_only and self.tags_ignore_head_only.count(tag_n): */ __pyx_v_tag_n = ((int)__pyx_v_node->v.element.tag); /* "gumbocy.pyx":446 * tag_n = node.v.element.tag * * if self.head_only and self.tags_ignore_head_only.count(tag_n): # <<<<<<<<<<<<<< * return 1 * */ __pyx_t_2 = (__pyx_v_self->head_only != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L15_bool_binop_done; } __pyx_t_2 = (__pyx_v_self->tags_ignore_head_only.count(__pyx_v_tag_n) != 0); __pyx_t_1 = __pyx_t_2; __pyx_L15_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":447 * * if self.head_only and self.tags_ignore_head_only.count(tag_n): * return 1 # <<<<<<<<<<<<<< * * if self.tags_ignore.count(tag_n): */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":446 * tag_n = node.v.element.tag * * if self.head_only and self.tags_ignore_head_only.count(tag_n): # <<<<<<<<<<<<<< * return 1 * */ } /* "gumbocy.pyx":449 * return 1 * * if self.tags_ignore.count(tag_n): # <<<<<<<<<<<<<< * return 0 * */ __pyx_t_1 = (__pyx_v_self->tags_ignore.count(__pyx_v_tag_n) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":450 * * if self.tags_ignore.count(tag_n): * return 0 # <<<<<<<<<<<<<< * * tag_name = gumbocy.gumbo_normalized_tagname(node.v.element.tag) */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":449 * return 1 * * if self.tags_ignore.count(tag_n): # <<<<<<<<<<<<<< * return 0 * */ } /* "gumbocy.pyx":452 * return 0 * * tag_name = gumbocy.gumbo_normalized_tagname(node.v.element.tag) # <<<<<<<<<<<<<< * * # When we find an unknown tag, find its tag_name in the buffer */ __pyx_v_tag_name = gumbo_normalized_tagname(__pyx_v_node->v.element.tag); /* "gumbocy.pyx":455 * * # When we find an unknown tag, find its tag_name in the buffer * if tag_name == b"": # <<<<<<<<<<<<<< * gsp = node.v.element.original_tag * gumbo_tag_from_original_text(&gsp) */ __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_tag_name); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = (__Pyx_PyBytes_Equals(__pyx_t_3, __pyx_kp_b__5, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_1) { /* "gumbocy.pyx":456 * # When we find an unknown tag, find its tag_name in the buffer * if tag_name == b"": * gsp = node.v.element.original_tag # <<<<<<<<<<<<<< * gumbo_tag_from_original_text(&gsp) * py_tag_name = str(gsp.data)[0:gsp.length].lower() # TODO try to do that only in C! */ __pyx_t_4 = __pyx_v_node->v.element.original_tag; __pyx_v_gsp = __pyx_t_4; /* "gumbocy.pyx":457 * if tag_name == b"": * gsp = node.v.element.original_tag * gumbo_tag_from_original_text(&gsp) # <<<<<<<<<<<<<< * py_tag_name = str(gsp.data)[0:gsp.length].lower() # TODO try to do that only in C! * tag_name = py_tag_name */ gumbo_tag_from_original_text((&__pyx_v_gsp)); /* "gumbocy.pyx":458 * gsp = node.v.element.original_tag * gumbo_tag_from_original_text(&gsp) * py_tag_name = str(gsp.data)[0:gsp.length].lower() # TODO try to do that only in C! # <<<<<<<<<<<<<< * tag_name = py_tag_name * */ __pyx_t_5 = __Pyx_PyBytes_FromString(__pyx_v_gsp.data); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 458, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 458, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_6, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 458, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = __Pyx_PyObject_GetSlice(__pyx_t_5, 0, __pyx_v_gsp.length, NULL, NULL, NULL, 1, 1, 1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 458, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_lower); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 458, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_5))) { __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5); if (likely(__pyx_t_6)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_5, function); } } if (__pyx_t_6) { __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_6); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 458, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; } else { __pyx_t_3 = __Pyx_PyObject_CallNoArg(__pyx_t_5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 458, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_v_py_tag_name = __pyx_t_3; __pyx_t_3 = 0; /* "gumbocy.pyx":459 * gumbo_tag_from_original_text(&gsp) * py_tag_name = str(gsp.data)[0:gsp.length].lower() # TODO try to do that only in C! * tag_name = py_tag_name # <<<<<<<<<<<<<< * * # if self.has_attributes_whitelist: */ __pyx_t_7 = __Pyx_PyObject_AsString(__pyx_v_py_tag_name); if (unlikely((!__pyx_t_7) && PyErr_Occurred())) __PYX_ERR(0, 459, __pyx_L1_error) __pyx_v_tag_name = ((char const *)__pyx_t_7); /* "gumbocy.pyx":455 * * # When we find an unknown tag, find its tag_name in the buffer * if tag_name == b"": # <<<<<<<<<<<<<< * gsp = node.v.element.original_tag * gumbo_tag_from_original_text(&gsp) */ } /* "gumbocy.pyx":463 * # if self.has_attributes_whitelist: * * attrs = self.get_attributes(node) # <<<<<<<<<<<<<< * * if self.has_classes_ignore and attrs.size_classes > 0: */ __pyx_t_3 = ((PyObject *)((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->get_attributes(__pyx_v_self, __pyx_v_node)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 463, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_attrs = ((struct __pyx_obj_7gumbocy_Attributes *)__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":465 * attrs = self.get_attributes(node) * * if self.has_classes_ignore and attrs.size_classes > 0: # <<<<<<<<<<<<<< * for v in attrs.classes: * if re2_search(v, deref(self.classes_ignore)): */ __pyx_t_2 = (__pyx_v_self->has_classes_ignore != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L20_bool_binop_done; } __pyx_t_2 = ((__pyx_v_attrs->size_classes > 0) != 0); __pyx_t_1 = __pyx_t_2; __pyx_L20_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":466 * * if self.has_classes_ignore and attrs.size_classes > 0: * for v in attrs.classes: # <<<<<<<<<<<<<< * if re2_search(v, deref(self.classes_ignore)): * return 0 */ if (unlikely(__pyx_v_attrs->classes == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); __PYX_ERR(0, 466, __pyx_L1_error) } __pyx_t_3 = __pyx_v_attrs->classes; __Pyx_INCREF(__pyx_t_3); __pyx_t_8 = 0; for (;;) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_5 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 466, __pyx_L1_error) #else __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 466, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif __Pyx_XDECREF_SET(__pyx_v_v, __pyx_t_5); __pyx_t_5 = 0; /* "gumbocy.pyx":467 * if self.has_classes_ignore and attrs.size_classes > 0: * for v in attrs.classes: * if re2_search(v, deref(self.classes_ignore)): # <<<<<<<<<<<<<< * return 0 * */ __pyx_t_9 = __Pyx_PyObject_AsString(__pyx_v_v); if (unlikely((!__pyx_t_9) && PyErr_Occurred())) __PYX_ERR(0, 467, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_9, (*__pyx_v_self->classes_ignore)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":468 * for v in attrs.classes: * if re2_search(v, deref(self.classes_ignore)): * return 0 # <<<<<<<<<<<<<< * * if self.has_ids_ignore and attrs.values.get(ATTR_ID): */ __pyx_r = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L0; /* "gumbocy.pyx":467 * if self.has_classes_ignore and attrs.size_classes > 0: * for v in attrs.classes: * if re2_search(v, deref(self.classes_ignore)): # <<<<<<<<<<<<<< * return 0 * */ } /* "gumbocy.pyx":466 * * if self.has_classes_ignore and attrs.size_classes > 0: * for v in attrs.classes: # <<<<<<<<<<<<<< * if re2_search(v, deref(self.classes_ignore)): * return 0 */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":465 * attrs = self.get_attributes(node) * * if self.has_classes_ignore and attrs.size_classes > 0: # <<<<<<<<<<<<<< * for v in attrs.classes: * if re2_search(v, deref(self.classes_ignore)): */ } /* "gumbocy.pyx":470 * return 0 * * if self.has_ids_ignore and attrs.values.get(ATTR_ID): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_ID], deref(self.ids_ignore)): * return 0 */ __pyx_t_2 = (__pyx_v_self->has_ids_ignore != 0); if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; goto __pyx_L26_bool_binop_done; } if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 470, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ID); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 470, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 470, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 470, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_1 = __pyx_t_2; __pyx_L26_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":471 * * if self.has_ids_ignore and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_ignore)): # <<<<<<<<<<<<<< * return 0 * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 471, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ID); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 471, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 471, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_9 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_9) && PyErr_Occurred())) __PYX_ERR(0, 471, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_9, (*__pyx_v_self->ids_ignore)) != 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_1) { /* "gumbocy.pyx":472 * if self.has_ids_ignore and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_ignore)): * return 0 # <<<<<<<<<<<<<< * * if node.v.element.tag == gumbocy.GUMBO_TAG_TITLE: */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":471 * * if self.has_ids_ignore and attrs.values.get(ATTR_ID): * if re2_search(attrs.values[ATTR_ID], deref(self.ids_ignore)): # <<<<<<<<<<<<<< * return 0 * */ } /* "gumbocy.pyx":470 * return 0 * * if self.has_ids_ignore and attrs.values.get(ATTR_ID): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_ID], deref(self.ids_ignore)): * return 0 */ } /* "gumbocy.pyx":474 * return 0 * * if node.v.element.tag == gumbocy.GUMBO_TAG_TITLE: # <<<<<<<<<<<<<< * if not self.analysis.get("title"): * if node.v.element.children.length > 0: */ __pyx_t_1 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_TITLE) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":475 * * if node.v.element.tag == gumbocy.GUMBO_TAG_TITLE: * if not self.analysis.get("title"): # <<<<<<<<<<<<<< * if node.v.element.children.length > 0: * first_child = node.v.element.children.data[0] */ if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 475, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_self->analysis, __pyx_n_s_title, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 475, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 475, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_2 = ((!__pyx_t_1) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":476 * if node.v.element.tag == gumbocy.GUMBO_TAG_TITLE: * if not self.analysis.get("title"): * if node.v.element.children.length > 0: # <<<<<<<<<<<<<< * first_child = node.v.element.children.data[0] * if first_child.type == gumbocy.GUMBO_NODE_TEXT: */ __pyx_t_2 = ((__pyx_v_node->v.element.children.length > 0) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":477 * if not self.analysis.get("title"): * if node.v.element.children.length > 0: * first_child = node.v.element.children.data[0] # <<<<<<<<<<<<<< * if first_child.type == gumbocy.GUMBO_NODE_TEXT: * self.analysis["title"] = first_child.v.text.text */ __pyx_v_first_child = ((GumboNode *)(__pyx_v_node->v.element.children.data[0])); /* "gumbocy.pyx":478 * if node.v.element.children.length > 0: * first_child = node.v.element.children.data[0] * if first_child.type == gumbocy.GUMBO_NODE_TEXT: # <<<<<<<<<<<<<< * self.analysis["title"] = first_child.v.text.text * return 0 */ __pyx_t_2 = ((__pyx_v_first_child->type == GUMBO_NODE_TEXT) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":479 * first_child = node.v.element.children.data[0] * if first_child.type == gumbocy.GUMBO_NODE_TEXT: * self.analysis["title"] = first_child.v.text.text # <<<<<<<<<<<<<< * return 0 * */ __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_first_child->v.text.text); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 479, __pyx_L1_error) } if (unlikely(PyDict_SetItem(__pyx_v_self->analysis, __pyx_n_s_title, __pyx_t_3) < 0)) __PYX_ERR(0, 479, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":478 * if node.v.element.children.length > 0: * first_child = node.v.element.children.data[0] * if first_child.type == gumbocy.GUMBO_NODE_TEXT: # <<<<<<<<<<<<<< * self.analysis["title"] = first_child.v.text.text * return 0 */ } /* "gumbocy.pyx":476 * if node.v.element.tag == gumbocy.GUMBO_TAG_TITLE: * if not self.analysis.get("title"): * if node.v.element.children.length > 0: # <<<<<<<<<<<<<< * first_child = node.v.element.children.data[0] * if first_child.type == gumbocy.GUMBO_NODE_TEXT: */ } /* "gumbocy.pyx":475 * * if node.v.element.tag == gumbocy.GUMBO_TAG_TITLE: * if not self.analysis.get("title"): # <<<<<<<<<<<<<< * if node.v.element.children.length > 0: * first_child = node.v.element.children.data[0] */ } /* "gumbocy.pyx":480 * if first_child.type == gumbocy.GUMBO_NODE_TEXT: * self.analysis["title"] = first_child.v.text.text * return 0 # <<<<<<<<<<<<<< * * self.current_stack.append(tag_name) */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":474 * return 0 * * if node.v.element.tag == gumbocy.GUMBO_TAG_TITLE: # <<<<<<<<<<<<<< * if not self.analysis.get("title"): * if node.v.element.children.length > 0: */ } /* "gumbocy.pyx":482 * return 0 * * self.current_stack.append(tag_name) # <<<<<<<<<<<<<< * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: */ if (unlikely(__pyx_v_self->current_stack == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "append"); __PYX_ERR(0, 482, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_tag_name); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 482, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_10 = __Pyx_PyList_Append(__pyx_v_self->current_stack, __pyx_t_3); if (unlikely(__pyx_t_10 == -1)) __PYX_ERR(0, 482, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":484 * self.current_stack.append(tag_name) * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: # <<<<<<<<<<<<<< * is_head = 1 * */ __pyx_t_2 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_HEAD) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":485 * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: * is_head = 1 # <<<<<<<<<<<<<< * * elif node.v.element.tag == gumbocy.GUMBO_TAG_A: */ __pyx_v_is_head = 1; /* "gumbocy.pyx":484 * self.current_stack.append(tag_name) * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: # <<<<<<<<<<<<<< * is_head = 1 * */ goto __pyx_L33; } /* "gumbocy.pyx":487 * is_head = 1 * * elif node.v.element.tag == gumbocy.GUMBO_TAG_A: # <<<<<<<<<<<<<< * self.open_hyperlink(attrs) * is_hyperlink = 1 */ __pyx_t_2 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_A) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":488 * * elif node.v.element.tag == gumbocy.GUMBO_TAG_A: * self.open_hyperlink(attrs) # <<<<<<<<<<<<<< * is_hyperlink = 1 * */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->open_hyperlink(__pyx_v_self, __pyx_v_attrs); /* "gumbocy.pyx":489 * elif node.v.element.tag == gumbocy.GUMBO_TAG_A: * self.open_hyperlink(attrs) * is_hyperlink = 1 # <<<<<<<<<<<<<< * * elif node.v.element.tag == gumbocy.GUMBO_TAG_IMG: */ __pyx_v_is_hyperlink = 1; /* "gumbocy.pyx":487 * is_head = 1 * * elif node.v.element.tag == gumbocy.GUMBO_TAG_A: # <<<<<<<<<<<<<< * self.open_hyperlink(attrs) * is_hyperlink = 1 */ goto __pyx_L33; } /* "gumbocy.pyx":491 * is_hyperlink = 1 * * elif node.v.element.tag == gumbocy.GUMBO_TAG_IMG: # <<<<<<<<<<<<<< * self.close_word_group() * if attrs.values.get(ATTR_ALT): */ __pyx_t_2 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_IMG) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":492 * * elif node.v.element.tag == gumbocy.GUMBO_TAG_IMG: * self.close_word_group() # <<<<<<<<<<<<<< * if attrs.values.get(ATTR_ALT): * self.add_text(attrs.values[ATTR_ALT]) */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->close_word_group(__pyx_v_self); /* "gumbocy.pyx":493 * elif node.v.element.tag == gumbocy.GUMBO_TAG_IMG: * self.close_word_group() * if attrs.values.get(ATTR_ALT): # <<<<<<<<<<<<<< * self.add_text(attrs.values[ATTR_ALT]) * self.close_word_group() */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 493, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ALT); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 493, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 493, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 493, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":494 * self.close_word_group() * if attrs.values.get(ATTR_ALT): * self.add_text(attrs.values[ATTR_ALT]) # <<<<<<<<<<<<<< * self.close_word_group() * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 494, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_ALT); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 494, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 494, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->add_text(__pyx_v_self, __pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":495 * if attrs.values.get(ATTR_ALT): * self.add_text(attrs.values[ATTR_ALT]) * self.close_word_group() # <<<<<<<<<<<<<< * * # Text extraction from image filenames disabled for now */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->close_word_group(__pyx_v_self); /* "gumbocy.pyx":493 * elif node.v.element.tag == gumbocy.GUMBO_TAG_IMG: * self.close_word_group() * if attrs.values.get(ATTR_ALT): # <<<<<<<<<<<<<< * self.add_text(attrs.values[ATTR_ALT]) * self.close_word_group() */ } /* "gumbocy.pyx":491 * is_hyperlink = 1 * * elif node.v.element.tag == gumbocy.GUMBO_TAG_IMG: # <<<<<<<<<<<<<< * self.close_word_group() * if attrs.values.get(ATTR_ALT): */ } __pyx_L33:; /* "gumbocy.pyx":504 * * * if is_head: # <<<<<<<<<<<<<< * if node.v.element.tag == gumbocy.GUMBO_TAG_LINK: * */ __pyx_t_2 = (__pyx_v_is_head != 0); if (__pyx_t_2) { /* "gumbocy.pyx":505 * * if is_head: * if node.v.element.tag == gumbocy.GUMBO_TAG_LINK: # <<<<<<<<<<<<<< * * # TODO: more properties */ __pyx_t_2 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_LINK) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":508 * * # TODO: more properties * if attrs.values.get(ATTR_REL) and attrs.values.get(ATTR_HREF): # <<<<<<<<<<<<<< * self.analysis.setdefault("head_links", []) * self.analysis["head_links"].append({"rel": attrs.values[ATTR_REL], "href": attrs.values[ATTR_HREF]}) */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 508, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_REL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_1) { } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L38_bool_binop_done; } if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 508, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_5, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_2 = __pyx_t_1; __pyx_L38_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":509 * # TODO: more properties * if attrs.values.get(ATTR_REL) and attrs.values.get(ATTR_HREF): * self.analysis.setdefault("head_links", []) # <<<<<<<<<<<<<< * self.analysis["head_links"].append({"rel": attrs.values[ATTR_REL], "href": attrs.values[ATTR_HREF]}) * */ if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "setdefault"); __PYX_ERR(0, 509, __pyx_L1_error) } __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_SetDefault(__pyx_v_self->analysis, __pyx_n_s_head_links, __pyx_t_3, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "gumbocy.pyx":510 * if attrs.values.get(ATTR_REL) and attrs.values.get(ATTR_HREF): * self.analysis.setdefault("head_links", []) * self.analysis["head_links"].append({"rel": attrs.values[ATTR_REL], "href": attrs.values[ATTR_HREF]}) # <<<<<<<<<<<<<< * * elif self.has_metas_whitelist and node.v.element.tag == gumbocy.GUMBO_TAG_META: */ if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 510, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyDict_GetItem(__pyx_v_self->analysis, __pyx_n_s_head_links); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 510, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_REL); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_11 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_6); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_rel, __pyx_t_11) < 0) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 510, __pyx_L1_error) } __pyx_t_11 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __pyx_t_6 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_11); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_href, __pyx_t_6) < 0) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_10 = __Pyx_PyObject_Append(__pyx_t_5, __pyx_t_3); if (unlikely(__pyx_t_10 == -1)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":508 * * # TODO: more properties * if attrs.values.get(ATTR_REL) and attrs.values.get(ATTR_HREF): # <<<<<<<<<<<<<< * self.analysis.setdefault("head_links", []) * self.analysis["head_links"].append({"rel": attrs.values[ATTR_REL], "href": attrs.values[ATTR_HREF]}) */ } /* "gumbocy.pyx":505 * * if is_head: * if node.v.element.tag == gumbocy.GUMBO_TAG_LINK: # <<<<<<<<<<<<<< * * # TODO: more properties */ goto __pyx_L36; } /* "gumbocy.pyx":512 * self.analysis["head_links"].append({"rel": attrs.values[ATTR_REL], "href": attrs.values[ATTR_HREF]}) * * elif self.has_metas_whitelist and node.v.element.tag == gumbocy.GUMBO_TAG_META: # <<<<<<<<<<<<<< * * if attrs.values.get(ATTR_CONTENT): */ __pyx_t_1 = (__pyx_v_self->has_metas_whitelist != 0); if (__pyx_t_1) { } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L40_bool_binop_done; } __pyx_t_1 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_META) != 0); __pyx_t_2 = __pyx_t_1; __pyx_L40_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":514 * elif self.has_metas_whitelist and node.v.element.tag == gumbocy.GUMBO_TAG_META: * * if attrs.values.get(ATTR_CONTENT): # <<<<<<<<<<<<<< * * if attrs.values.get(ATTR_NAME): */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 514, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_CONTENT); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 514, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 514, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 514, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":516 * if attrs.values.get(ATTR_CONTENT): * * if attrs.values.get(ATTR_NAME): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_NAME], deref(self.metas_whitelist)): * self.analysis.setdefault("head_metas", {}) */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 516, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_NAME); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_5, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":517 * * if attrs.values.get(ATTR_NAME): * if re2_search(attrs.values[ATTR_NAME], deref(self.metas_whitelist)): # <<<<<<<<<<<<<< * self.analysis.setdefault("head_metas", {}) * self.analysis["head_metas"][attrs.values[ATTR_NAME]] = str(attrs.values[ATTR_CONTENT]).strip() */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 517, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_NAME); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_9 = __Pyx_PyObject_AsString(__pyx_t_5); if (unlikely((!__pyx_t_9) && PyErr_Occurred())) __PYX_ERR(0, 517, __pyx_L1_error) __pyx_t_2 = (__pyx_f_7gumbocy_re2_search(__pyx_t_9, (*__pyx_v_self->metas_whitelist)) != 0); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":518 * if attrs.values.get(ATTR_NAME): * if re2_search(attrs.values[ATTR_NAME], deref(self.metas_whitelist)): * self.analysis.setdefault("head_metas", {}) # <<<<<<<<<<<<<< * self.analysis["head_metas"][attrs.values[ATTR_NAME]] = str(attrs.values[ATTR_CONTENT]).strip() * */ if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "setdefault"); __PYX_ERR(0, 518, __pyx_L1_error) } __pyx_t_5 = PyDict_New(); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 518, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = __Pyx_PyDict_SetDefault(__pyx_v_self->analysis, __pyx_n_s_head_metas, __pyx_t_5, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 518, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":519 * if re2_search(attrs.values[ATTR_NAME], deref(self.metas_whitelist)): * self.analysis.setdefault("head_metas", {}) * self.analysis["head_metas"][attrs.values[ATTR_NAME]] = str(attrs.values[ATTR_CONTENT]).strip() # <<<<<<<<<<<<<< * * elif attrs.values.get(ATTR_PROPERTY): */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 519, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_CONTENT); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_5, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_strip); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_5))) { __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5); if (likely(__pyx_t_6)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_5, function); } } if (__pyx_t_6) { __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_6); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; } else { __pyx_t_3 = __Pyx_PyObject_CallNoArg(__pyx_t_5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 519, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 519, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyDict_GetItem(__pyx_v_self->analysis, __pyx_n_s_head_metas); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 519, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_NAME); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_11 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_6); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (unlikely(PyObject_SetItem(__pyx_t_5, __pyx_t_11, __pyx_t_3) < 0)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gumbocy.pyx":517 * * if attrs.values.get(ATTR_NAME): * if re2_search(attrs.values[ATTR_NAME], deref(self.metas_whitelist)): # <<<<<<<<<<<<<< * self.analysis.setdefault("head_metas", {}) * self.analysis["head_metas"][attrs.values[ATTR_NAME]] = str(attrs.values[ATTR_CONTENT]).strip() */ } /* "gumbocy.pyx":516 * if attrs.values.get(ATTR_CONTENT): * * if attrs.values.get(ATTR_NAME): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_NAME], deref(self.metas_whitelist)): * self.analysis.setdefault("head_metas", {}) */ goto __pyx_L43; } /* "gumbocy.pyx":521 * self.analysis["head_metas"][attrs.values[ATTR_NAME]] = str(attrs.values[ATTR_CONTENT]).strip() * * elif attrs.values.get(ATTR_PROPERTY): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_PROPERTY], deref(self.metas_whitelist)): * self.analysis.setdefault("head_metas", {}) */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 521, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_PROPERTY); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 521, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_11 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_3, Py_None); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 521, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 521, __pyx_L1_error) __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":522 * * elif attrs.values.get(ATTR_PROPERTY): * if re2_search(attrs.values[ATTR_PROPERTY], deref(self.metas_whitelist)): # <<<<<<<<<<<<<< * self.analysis.setdefault("head_metas", {}) * self.analysis["head_metas"][attrs.values[ATTR_PROPERTY]] = str(attrs.values[ATTR_CONTENT]).strip() */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 522, __pyx_L1_error) } __pyx_t_11 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_PROPERTY); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 522, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_11); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 522, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __pyx_t_9 = __Pyx_PyObject_AsString(__pyx_t_3); if (unlikely((!__pyx_t_9) && PyErr_Occurred())) __PYX_ERR(0, 522, __pyx_L1_error) __pyx_t_2 = (__pyx_f_7gumbocy_re2_search(__pyx_t_9, (*__pyx_v_self->metas_whitelist)) != 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_2) { /* "gumbocy.pyx":523 * elif attrs.values.get(ATTR_PROPERTY): * if re2_search(attrs.values[ATTR_PROPERTY], deref(self.metas_whitelist)): * self.analysis.setdefault("head_metas", {}) # <<<<<<<<<<<<<< * self.analysis["head_metas"][attrs.values[ATTR_PROPERTY]] = str(attrs.values[ATTR_CONTENT]).strip() * */ if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "setdefault"); __PYX_ERR(0, 523, __pyx_L1_error) } __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 523, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_11 = __Pyx_PyDict_SetDefault(__pyx_v_self->analysis, __pyx_n_s_head_metas, __pyx_t_3, 1); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 523, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; /* "gumbocy.pyx":524 * if re2_search(attrs.values[ATTR_PROPERTY], deref(self.metas_whitelist)): * self.analysis.setdefault("head_metas", {}) * self.analysis["head_metas"][attrs.values[ATTR_PROPERTY]] = str(attrs.values[ATTR_CONTENT]).strip() # <<<<<<<<<<<<<< * * elif node.v.element.tag == gumbocy.GUMBO_TAG_BASE: */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 524, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_CONTENT); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_3, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_strip); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_3))) { __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_3); if (likely(__pyx_t_5)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_3, function); } } if (__pyx_t_5) { __pyx_t_11 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } else { __pyx_t_11 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 524, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 524, __pyx_L1_error) } __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_self->analysis, __pyx_n_s_head_metas); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 524, __pyx_L1_error) } __pyx_t_5 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_PROPERTY); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (unlikely(PyObject_SetItem(__pyx_t_3, __pyx_t_6, __pyx_t_11) < 0)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; /* "gumbocy.pyx":522 * * elif attrs.values.get(ATTR_PROPERTY): * if re2_search(attrs.values[ATTR_PROPERTY], deref(self.metas_whitelist)): # <<<<<<<<<<<<<< * self.analysis.setdefault("head_metas", {}) * self.analysis["head_metas"][attrs.values[ATTR_PROPERTY]] = str(attrs.values[ATTR_CONTENT]).strip() */ } /* "gumbocy.pyx":521 * self.analysis["head_metas"][attrs.values[ATTR_NAME]] = str(attrs.values[ATTR_CONTENT]).strip() * * elif attrs.values.get(ATTR_PROPERTY): # <<<<<<<<<<<<<< * if re2_search(attrs.values[ATTR_PROPERTY], deref(self.metas_whitelist)): * self.analysis.setdefault("head_metas", {}) */ } __pyx_L43:; /* "gumbocy.pyx":514 * elif self.has_metas_whitelist and node.v.element.tag == gumbocy.GUMBO_TAG_META: * * if attrs.values.get(ATTR_CONTENT): # <<<<<<<<<<<<<< * * if attrs.values.get(ATTR_NAME): */ } /* "gumbocy.pyx":512 * self.analysis["head_links"].append({"rel": attrs.values[ATTR_REL], "href": attrs.values[ATTR_HREF]}) * * elif self.has_metas_whitelist and node.v.element.tag == gumbocy.GUMBO_TAG_META: # <<<<<<<<<<<<<< * * if attrs.values.get(ATTR_CONTENT): */ goto __pyx_L36; } /* "gumbocy.pyx":526 * self.analysis["head_metas"][attrs.values[ATTR_PROPERTY]] = str(attrs.values[ATTR_CONTENT]).strip() * * elif node.v.element.tag == gumbocy.GUMBO_TAG_BASE: # <<<<<<<<<<<<<< * if attrs.values.get(ATTR_HREF) and "base_url" not in self.analysis: * self.analysis["base_url"] = attrs.values[ATTR_HREF] */ __pyx_t_2 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_BASE) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":527 * * elif node.v.element.tag == gumbocy.GUMBO_TAG_BASE: * if attrs.values.get(ATTR_HREF) and "base_url" not in self.analysis: # <<<<<<<<<<<<<< * self.analysis["base_url"] = attrs.values[ATTR_HREF] * */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "get"); __PYX_ERR(0, 527, __pyx_L1_error) } __pyx_t_11 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 527, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __pyx_t_6 = __Pyx_PyDict_GetItemDefault(__pyx_v_attrs->values, __pyx_t_11, Py_None); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 527, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 527, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_1) { } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L47_bool_binop_done; } if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); __PYX_ERR(0, 527, __pyx_L1_error) } __pyx_t_1 = (__Pyx_PyDict_ContainsTF(__pyx_n_s_base_url, __pyx_v_self->analysis, Py_NE)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 527, __pyx_L1_error) __pyx_t_12 = (__pyx_t_1 != 0); __pyx_t_2 = __pyx_t_12; __pyx_L47_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":528 * elif node.v.element.tag == gumbocy.GUMBO_TAG_BASE: * if attrs.values.get(ATTR_HREF) and "base_url" not in self.analysis: * self.analysis["base_url"] = attrs.values[ATTR_HREF] # <<<<<<<<<<<<<< * * # TODO is_article */ if (unlikely(__pyx_v_attrs->values == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 528, __pyx_L1_error) } __pyx_t_6 = __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_e_7gumbocy_ATTR_HREF); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 528, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_11 = __Pyx_PyDict_GetItem(__pyx_v_attrs->values, __pyx_t_6); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 528, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 528, __pyx_L1_error) } if (unlikely(PyDict_SetItem(__pyx_v_self->analysis, __pyx_n_s_base_url, __pyx_t_11) < 0)) __PYX_ERR(0, 528, __pyx_L1_error) __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; /* "gumbocy.pyx":527 * * elif node.v.element.tag == gumbocy.GUMBO_TAG_BASE: * if attrs.values.get(ATTR_HREF) and "base_url" not in self.analysis: # <<<<<<<<<<<<<< * self.analysis["base_url"] = attrs.values[ATTR_HREF] * */ } /* "gumbocy.pyx":526 * self.analysis["head_metas"][attrs.values[ATTR_PROPERTY]] = str(attrs.values[ATTR_CONTENT]).strip() * * elif node.v.element.tag == gumbocy.GUMBO_TAG_BASE: # <<<<<<<<<<<<<< * if attrs.values.get(ATTR_HREF) and "base_url" not in self.analysis: * self.analysis["base_url"] = attrs.values[ATTR_HREF] */ } __pyx_L36:; /* "gumbocy.pyx":504 * * * if is_head: # <<<<<<<<<<<<<< * if node.v.element.tag == gumbocy.GUMBO_TAG_LINK: * */ } /* "gumbocy.pyx":532 * # TODO is_article * * if not is_hidden: # <<<<<<<<<<<<<< * is_hidden = self.guess_node_hidden(node, attrs) * */ __pyx_t_2 = ((!(__pyx_v_is_hidden != 0)) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":533 * * if not is_hidden: * is_hidden = self.guess_node_hidden(node, attrs) # <<<<<<<<<<<<<< * * if is_boilerplate and not is_boilerplate_bypassed: */ __pyx_v_is_hidden = ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->guess_node_hidden(__pyx_v_self, __pyx_v_node, __pyx_v_attrs); /* "gumbocy.pyx":532 * # TODO is_article * * if not is_hidden: # <<<<<<<<<<<<<< * is_hidden = self.guess_node_hidden(node, attrs) * */ } /* "gumbocy.pyx":535 * is_hidden = self.guess_node_hidden(node, attrs) * * if is_boilerplate and not is_boilerplate_bypassed: # <<<<<<<<<<<<<< * if self.tags_boilerplate_bypass.count(tag_n): * is_boilerplate_bypassed = True */ __pyx_t_12 = (__pyx_v_is_boilerplate != 0); if (__pyx_t_12) { } else { __pyx_t_2 = __pyx_t_12; goto __pyx_L51_bool_binop_done; } __pyx_t_12 = ((!(__pyx_v_is_boilerplate_bypassed != 0)) != 0); __pyx_t_2 = __pyx_t_12; __pyx_L51_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":536 * * if is_boilerplate and not is_boilerplate_bypassed: * if self.tags_boilerplate_bypass.count(tag_n): # <<<<<<<<<<<<<< * is_boilerplate_bypassed = True * */ __pyx_t_2 = (__pyx_v_self->tags_boilerplate_bypass.count(__pyx_v_tag_n) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":537 * if is_boilerplate and not is_boilerplate_bypassed: * if self.tags_boilerplate_bypass.count(tag_n): * is_boilerplate_bypassed = True # <<<<<<<<<<<<<< * * if not is_boilerplate: */ __pyx_v_is_boilerplate_bypassed = 1; /* "gumbocy.pyx":536 * * if is_boilerplate and not is_boilerplate_bypassed: * if self.tags_boilerplate_bypass.count(tag_n): # <<<<<<<<<<<<<< * is_boilerplate_bypassed = True * */ } /* "gumbocy.pyx":535 * is_hidden = self.guess_node_hidden(node, attrs) * * if is_boilerplate and not is_boilerplate_bypassed: # <<<<<<<<<<<<<< * if self.tags_boilerplate_bypass.count(tag_n): * is_boilerplate_bypassed = True */ } /* "gumbocy.pyx":539 * is_boilerplate_bypassed = True * * if not is_boilerplate: # <<<<<<<<<<<<<< * is_boilerplate = self.guess_node_boilerplate(node, attrs) * */ __pyx_t_2 = ((!(__pyx_v_is_boilerplate != 0)) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":540 * * if not is_boilerplate: * is_boilerplate = self.guess_node_boilerplate(node, attrs) # <<<<<<<<<<<<<< * * # print " " * level, "BOILER", tag_name, is_boilerplate, dict(attrs.values), attrs.classes */ __pyx_v_is_boilerplate = ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->guess_node_boilerplate(__pyx_v_self, __pyx_v_node, __pyx_v_attrs); /* "gumbocy.pyx":539 * is_boilerplate_bypassed = True * * if not is_boilerplate: # <<<<<<<<<<<<<< * is_boilerplate = self.guess_node_boilerplate(node, attrs) * */ } /* "gumbocy.pyx":545 * * # Close the word group * if self.tags_separators.count(tag_n): # <<<<<<<<<<<<<< * self.close_word_group() * */ __pyx_t_2 = (__pyx_v_self->tags_separators.count(__pyx_v_tag_n) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":546 * # Close the word group * if self.tags_separators.count(tag_n): * self.close_word_group() # <<<<<<<<<<<<<< * * # Call _traverse_node() recursively for each of the children */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->close_word_group(__pyx_v_self); /* "gumbocy.pyx":545 * * # Close the word group * if self.tags_separators.count(tag_n): # <<<<<<<<<<<<<< * self.close_word_group() * */ } /* "gumbocy.pyx":549 * * # Call _traverse_node() recursively for each of the children * for i in range(node.v.element.children.length): # <<<<<<<<<<<<<< * child = node.v.element.children.data[i] * if self._traverse_node(level + 1, child, is_head, is_hidden, is_boilerplate, is_boilerplate_bypassed, is_hyperlink) == 1: */ __pyx_t_13 = __pyx_v_node->v.element.children.length; for (__pyx_t_14 = 0; __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) { __pyx_v_i = __pyx_t_14; /* "gumbocy.pyx":550 * # Call _traverse_node() recursively for each of the children * for i in range(node.v.element.children.length): * child = node.v.element.children.data[i] # <<<<<<<<<<<<<< * if self._traverse_node(level + 1, child, is_head, is_hidden, is_boilerplate, is_boilerplate_bypassed, is_hyperlink) == 1: * break */ __pyx_v_child = ((GumboNode *)(__pyx_v_node->v.element.children.data[__pyx_v_i])); /* "gumbocy.pyx":551 * for i in range(node.v.element.children.length): * child = node.v.element.children.data[i] * if self._traverse_node(level + 1, child, is_head, is_hidden, is_boilerplate, is_boilerplate_bypassed, is_hyperlink) == 1: # <<<<<<<<<<<<<< * break * */ __pyx_t_2 = ((((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->_traverse_node(__pyx_v_self, (__pyx_v_level + 1), __pyx_v_child, __pyx_v_is_head, __pyx_v_is_hidden, __pyx_v_is_boilerplate, __pyx_v_is_boilerplate_bypassed, __pyx_v_is_hyperlink) == 1) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":552 * child = node.v.element.children.data[i] * if self._traverse_node(level + 1, child, is_head, is_hidden, is_boilerplate, is_boilerplate_bypassed, is_hyperlink) == 1: * break # <<<<<<<<<<<<<< * * # Close the word group */ goto __pyx_L57_break; /* "gumbocy.pyx":551 * for i in range(node.v.element.children.length): * child = node.v.element.children.data[i] * if self._traverse_node(level + 1, child, is_head, is_hidden, is_boilerplate, is_boilerplate_bypassed, is_hyperlink) == 1: # <<<<<<<<<<<<<< * break * */ } } __pyx_L57_break:; /* "gumbocy.pyx":555 * * # Close the word group * if self.tags_separators.count(tag_n): # <<<<<<<<<<<<<< * self.close_word_group() * */ __pyx_t_2 = (__pyx_v_self->tags_separators.count(__pyx_v_tag_n) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":556 * # Close the word group * if self.tags_separators.count(tag_n): * self.close_word_group() # <<<<<<<<<<<<<< * * self.current_stack.pop() */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->close_word_group(__pyx_v_self); /* "gumbocy.pyx":555 * * # Close the word group * if self.tags_separators.count(tag_n): # <<<<<<<<<<<<<< * self.close_word_group() * */ } /* "gumbocy.pyx":558 * self.close_word_group() * * self.current_stack.pop() # <<<<<<<<<<<<<< * * if node.v.element.tag == gumbocy.GUMBO_TAG_A: */ if (unlikely(__pyx_v_self->current_stack == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "pop"); __PYX_ERR(0, 558, __pyx_L1_error) } __pyx_t_11 = __Pyx_PyList_Pop(__pyx_v_self->current_stack); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 558, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; /* "gumbocy.pyx":560 * self.current_stack.pop() * * if node.v.element.tag == gumbocy.GUMBO_TAG_A: # <<<<<<<<<<<<<< * self.close_hyperlink() * */ __pyx_t_2 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_A) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":561 * * if node.v.element.tag == gumbocy.GUMBO_TAG_A: * self.close_hyperlink() # <<<<<<<<<<<<<< * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->close_hyperlink(__pyx_v_self); /* "gumbocy.pyx":560 * self.current_stack.pop() * * if node.v.element.tag == gumbocy.GUMBO_TAG_A: # <<<<<<<<<<<<<< * self.close_hyperlink() * */ } /* "gumbocy.pyx":563 * self.close_hyperlink() * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: # <<<<<<<<<<<<<< * if self.head_only: * return 1 */ __pyx_t_2 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_HEAD) != 0); if (__pyx_t_2) { /* "gumbocy.pyx":564 * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: * if self.head_only: # <<<<<<<<<<<<<< * return 1 * */ __pyx_t_2 = (__pyx_v_self->head_only != 0); if (__pyx_t_2) { /* "gumbocy.pyx":565 * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: * if self.head_only: * return 1 # <<<<<<<<<<<<<< * * return 0 */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":564 * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: * if self.head_only: # <<<<<<<<<<<<<< * return 1 * */ } /* "gumbocy.pyx":563 * self.close_hyperlink() * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD: # <<<<<<<<<<<<<< * if self.head_only: * return 1 */ } /* "gumbocy.pyx":442 * self.add_text(node.v.text.text) * * elif node.type == gumbocy.GUMBO_NODE_ELEMENT: # <<<<<<<<<<<<<< * * tag_n = node.v.element.tag */ break; default: break; } /* "gumbocy.pyx":567 * return 1 * * return 0 # <<<<<<<<<<<<<< * * def parse(self, char* html): */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":424 * self.current_hyperlink = None * * cdef bint _traverse_node(self, int level, gumbocy.GumboNode* node, bint is_head, bint is_hidden, bint is_boilerplate, bint is_boilerplate_bypassed, bint is_hyperlink): # <<<<<<<<<<<<<< * """ Traverses the node tree. Return 1 to stop at this level """ * */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_5); __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_11); __Pyx_WriteUnraisable("gumbocy.HTMLParser._traverse_node", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_r = 0; __pyx_L0:; __Pyx_XDECREF(__pyx_v_py_tag_name); __Pyx_XDECREF((PyObject *)__pyx_v_attrs); __Pyx_XDECREF(__pyx_v_v); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":569 * return 0 * * def parse(self, char* html): # <<<<<<<<<<<<<< * """ Do the actual parsing of the HTML with gumbo """ * */ /* Python wrapper */ static PyObject *__pyx_pw_7gumbocy_10HTMLParser_3parse(PyObject *__pyx_v_self, PyObject *__pyx_arg_html); /*proto*/ static char __pyx_doc_7gumbocy_10HTMLParser_2parse[] = " Do the actual parsing of the HTML with gumbo "; static PyObject *__pyx_pw_7gumbocy_10HTMLParser_3parse(PyObject *__pyx_v_self, PyObject *__pyx_arg_html) { char *__pyx_v_html; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("parse (wrapper)", 0); assert(__pyx_arg_html); { __pyx_v_html = __Pyx_PyObject_AsString(__pyx_arg_html); if (unlikely((!__pyx_v_html) && PyErr_Occurred())) __PYX_ERR(0, 569, __pyx_L3_error) } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; __Pyx_AddTraceback("gumbocy.HTMLParser.parse", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_7gumbocy_10HTMLParser_2parse(((struct __pyx_obj_7gumbocy_HTMLParser *)__pyx_v_self), ((char *)__pyx_v_html)); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_7gumbocy_10HTMLParser_2parse(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, char *__pyx_v_html) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("parse", 0); /* "gumbocy.pyx":572 * """ Do the actual parsing of the HTML with gumbo """ * * self.free() # <<<<<<<<<<<<<< * self.output = gumbocy.gumbo_parse(html) * self.has_output = 1 */ __pyx_t_1 = ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->free(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 572, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":573 * * self.free() * self.output = gumbocy.gumbo_parse(html) # <<<<<<<<<<<<<< * self.has_output = 1 * */ __pyx_v_self->output = gumbo_parse(__pyx_v_html); /* "gumbocy.pyx":574 * self.free() * self.output = gumbocy.gumbo_parse(html) * self.has_output = 1 # <<<<<<<<<<<<<< * * def analyze(self, url=None): */ __pyx_v_self->has_output = 1; /* "gumbocy.pyx":569 * return 0 * * def parse(self, char* html): # <<<<<<<<<<<<<< * """ Do the actual parsing of the HTML with gumbo """ * */ /* function exit code */ __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_AddTraceback("gumbocy.HTMLParser.parse", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":576 * self.has_output = 1 * * def analyze(self, url=None): # <<<<<<<<<<<<<< * """ Traverse the parsed tree and return the results """ * */ /* Python wrapper */ static PyObject *__pyx_pw_7gumbocy_10HTMLParser_5analyze(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static char __pyx_doc_7gumbocy_10HTMLParser_4analyze[] = " Traverse the parsed tree and return the results "; static PyObject *__pyx_pw_7gumbocy_10HTMLParser_5analyze(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_url = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("analyze (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_url,0}; PyObject* values[1] = {0}; values[0] = ((PyObject *)Py_None); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); case 0: break; default: goto __pyx_L5_argtuple_error; } kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: if (kw_args > 0) { PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_url); if (value) { values[0] = value; kw_args--; } } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "analyze") < 0)) __PYX_ERR(0, 576, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); case 0: break; default: goto __pyx_L5_argtuple_error; } } __pyx_v_url = values[0]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("analyze", 0, 0, 1, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 576, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gumbocy.HTMLParser.analyze", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_7gumbocy_10HTMLParser_4analyze(((struct __pyx_obj_7gumbocy_HTMLParser *)__pyx_v_self), __pyx_v_url); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_7gumbocy_10HTMLParser_4analyze(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, PyObject *__pyx_v_url) { PyObject *__pyx_v_parsed = NULL; PyObject *__pyx_v_netloc = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; int __pyx_t_3; char *__pyx_t_4; PyObject *__pyx_t_5 = NULL; PyObject *__pyx_t_6 = NULL; PyObject *__pyx_t_7 = NULL; PyObject *__pyx_t_8 = NULL; char const *__pyx_t_9; re2::RE2 *__pyx_t_10; __Pyx_RefNannySetupContext("analyze", 0); /* "gumbocy.pyx":579 * """ Traverse the parsed tree and return the results """ * * self.analysis = {} # <<<<<<<<<<<<<< * self.has_url = 0 * */ __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 579, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __Pyx_GOTREF(__pyx_v_self->analysis); __Pyx_DECREF(__pyx_v_self->analysis); __pyx_v_self->analysis = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":580 * * self.analysis = {} * self.has_url = 0 # <<<<<<<<<<<<<< * * if self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks: */ __pyx_v_self->has_url = 0; /* "gumbocy.pyx":582 * self.has_url = 0 * * if self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks: # <<<<<<<<<<<<<< * * if url: */ __pyx_t_3 = (__pyx_v_self->analyze_internal_hyperlinks != 0); if (!__pyx_t_3) { } else { __pyx_t_2 = __pyx_t_3; goto __pyx_L4_bool_binop_done; } __pyx_t_3 = (__pyx_v_self->analyze_external_hyperlinks != 0); __pyx_t_2 = __pyx_t_3; __pyx_L4_bool_binop_done:; if (__pyx_t_2) { /* "gumbocy.pyx":584 * if self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks: * * if url: # <<<<<<<<<<<<<< * self.has_url = 1 * self.url = url */ __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_url); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 584, __pyx_L1_error) if (__pyx_t_2) { /* "gumbocy.pyx":585 * * if url: * self.has_url = 1 # <<<<<<<<<<<<<< * self.url = url * parsed = urlparse.urlparse(url) */ __pyx_v_self->has_url = 1; /* "gumbocy.pyx":586 * if url: * self.has_url = 1 * self.url = url # <<<<<<<<<<<<<< * parsed = urlparse.urlparse(url) * netloc = parsed.netloc.lower() */ __pyx_t_4 = __Pyx_PyObject_AsString(__pyx_v_url); if (unlikely((!__pyx_t_4) && PyErr_Occurred())) __PYX_ERR(0, 586, __pyx_L1_error) __pyx_v_self->url = __pyx_t_4; /* "gumbocy.pyx":587 * self.has_url = 1 * self.url = url * parsed = urlparse.urlparse(url) # <<<<<<<<<<<<<< * netloc = parsed.netloc.lower() * self.netloc = netloc */ __pyx_t_5 = __Pyx_GetModuleGlobalName(__pyx_n_s_urlparse); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 587, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_urlparse); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 587, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_6))) { __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_5)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_6, function); } } if (!__pyx_t_5) { __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_url); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 587, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); } else { __pyx_t_7 = PyTuple_New(1+1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 587, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_5); __pyx_t_5 = NULL; __Pyx_INCREF(__pyx_v_url); __Pyx_GIVEREF(__pyx_v_url); PyTuple_SET_ITEM(__pyx_t_7, 0+1, __pyx_v_url); __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_7, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 587, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_parsed = __pyx_t_1; __pyx_t_1 = 0; /* "gumbocy.pyx":588 * self.url = url * parsed = urlparse.urlparse(url) * netloc = parsed.netloc.lower() # <<<<<<<<<<<<<< * self.netloc = netloc * self.scheme = parsed.scheme */ __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_parsed, __pyx_n_s_netloc); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 588, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_lower); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 588, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_7); if (likely(__pyx_t_6)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_7, function); } } if (__pyx_t_6) { __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 588, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; } else { __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_7); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 588, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_v_netloc = __pyx_t_1; __pyx_t_1 = 0; /* "gumbocy.pyx":589 * parsed = urlparse.urlparse(url) * netloc = parsed.netloc.lower() * self.netloc = netloc # <<<<<<<<<<<<<< * self.scheme = parsed.scheme * self.internal_netloc_search = new re2cy.RE2("^http(?:s)?://%s" % re.escape(self.netloc)) */ __pyx_t_4 = __Pyx_PyObject_AsString(__pyx_v_netloc); if (unlikely((!__pyx_t_4) && PyErr_Occurred())) __PYX_ERR(0, 589, __pyx_L1_error) __pyx_v_self->netloc = __pyx_t_4; /* "gumbocy.pyx":590 * netloc = parsed.netloc.lower() * self.netloc = netloc * self.scheme = parsed.scheme # <<<<<<<<<<<<<< * self.internal_netloc_search = new re2cy.RE2("^http(?:s)?://%s" % re.escape(self.netloc)) * */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_parsed, __pyx_n_s_scheme); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 590, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = __Pyx_PyObject_AsString(__pyx_t_1); if (unlikely((!__pyx_t_4) && PyErr_Occurred())) __PYX_ERR(0, 590, __pyx_L1_error) __pyx_v_self->scheme = __pyx_t_4; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":591 * self.netloc = netloc * self.scheme = parsed.scheme * self.internal_netloc_search = new re2cy.RE2("^http(?:s)?://%s" % re.escape(self.netloc)) # <<<<<<<<<<<<<< * * if self.analyze_internal_hyperlinks: */ __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_re); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 591, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_escape); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 591, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_7 = __Pyx_PyBytes_FromString(__pyx_v_self->netloc); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 591, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __pyx_t_5 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_6))) { __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_5)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_6, function); } } if (!__pyx_t_5) { __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_7); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 591, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_GOTREF(__pyx_t_1); } else { __pyx_t_8 = PyTuple_New(1+1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 591, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_5); __pyx_t_5 = NULL; __Pyx_GIVEREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_8, 0+1, __pyx_t_7); __pyx_t_7 = 0; __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_8, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 591, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = __Pyx_PyString_Format(__pyx_kp_s_http_s_s, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 591, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_9 = __Pyx_PyObject_AsString(__pyx_t_6); if (unlikely((!__pyx_t_9) && PyErr_Occurred())) __PYX_ERR(0, 591, __pyx_L1_error) try { __pyx_t_10 = new re2::RE2(__pyx_t_9); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 591, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_self->internal_netloc_search = __pyx_t_10; /* "gumbocy.pyx":584 * if self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks: * * if url: # <<<<<<<<<<<<<< * self.has_url = 1 * self.url = url */ } /* "gumbocy.pyx":593 * self.internal_netloc_search = new re2cy.RE2("^http(?:s)?://%s" % re.escape(self.netloc)) * * if self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * self.analysis["internal_hyperlinks"] = [] * */ __pyx_t_2 = (__pyx_v_self->analyze_internal_hyperlinks != 0); if (__pyx_t_2) { /* "gumbocy.pyx":594 * * if self.analyze_internal_hyperlinks: * self.analysis["internal_hyperlinks"] = [] # <<<<<<<<<<<<<< * * if self.analyze_external_hyperlinks: */ __pyx_t_6 = PyList_New(0); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 594, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 594, __pyx_L1_error) } if (unlikely(PyDict_SetItem(__pyx_v_self->analysis, __pyx_n_s_internal_hyperlinks, __pyx_t_6) < 0)) __PYX_ERR(0, 594, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":593 * self.internal_netloc_search = new re2cy.RE2("^http(?:s)?://%s" % re.escape(self.netloc)) * * if self.analyze_internal_hyperlinks: # <<<<<<<<<<<<<< * self.analysis["internal_hyperlinks"] = [] * */ } /* "gumbocy.pyx":596 * self.analysis["internal_hyperlinks"] = [] * * if self.analyze_external_hyperlinks: # <<<<<<<<<<<<<< * self.analysis["external_hyperlinks"] = [] * */ __pyx_t_2 = (__pyx_v_self->analyze_external_hyperlinks != 0); if (__pyx_t_2) { /* "gumbocy.pyx":597 * * if self.analyze_external_hyperlinks: * self.analysis["external_hyperlinks"] = [] # <<<<<<<<<<<<<< * * if self.analyze_word_groups: */ __pyx_t_6 = PyList_New(0); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 597, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 597, __pyx_L1_error) } if (unlikely(PyDict_SetItem(__pyx_v_self->analysis, __pyx_n_s_external_hyperlinks, __pyx_t_6) < 0)) __PYX_ERR(0, 597, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":596 * self.analysis["internal_hyperlinks"] = [] * * if self.analyze_external_hyperlinks: # <<<<<<<<<<<<<< * self.analysis["external_hyperlinks"] = [] * */ } /* "gumbocy.pyx":582 * self.has_url = 0 * * if self.analyze_internal_hyperlinks or self.analyze_external_hyperlinks: # <<<<<<<<<<<<<< * * if url: */ } /* "gumbocy.pyx":599 * self.analysis["external_hyperlinks"] = [] * * if self.analyze_word_groups: # <<<<<<<<<<<<<< * self.analysis["word_groups"] = [] * */ __pyx_t_2 = (__pyx_v_self->analyze_word_groups != 0); if (__pyx_t_2) { /* "gumbocy.pyx":600 * * if self.analyze_word_groups: * self.analysis["word_groups"] = [] # <<<<<<<<<<<<<< * * self.current_stack = [] */ __pyx_t_6 = PyList_New(0); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 600, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); if (unlikely(__pyx_v_self->analysis == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); __PYX_ERR(0, 600, __pyx_L1_error) } if (unlikely(PyDict_SetItem(__pyx_v_self->analysis, __pyx_n_s_word_groups, __pyx_t_6) < 0)) __PYX_ERR(0, 600, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":599 * self.analysis["external_hyperlinks"] = [] * * if self.analyze_word_groups: # <<<<<<<<<<<<<< * self.analysis["word_groups"] = [] * */ } /* "gumbocy.pyx":602 * self.analysis["word_groups"] = [] * * self.current_stack = [] # <<<<<<<<<<<<<< * self.current_word_group = None * self.current_hyperlink = None */ __pyx_t_6 = PyList_New(0); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 602, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_GIVEREF(__pyx_t_6); __Pyx_GOTREF(__pyx_v_self->current_stack); __Pyx_DECREF(__pyx_v_self->current_stack); __pyx_v_self->current_stack = ((PyObject*)__pyx_t_6); __pyx_t_6 = 0; /* "gumbocy.pyx":603 * * self.current_stack = [] * self.current_word_group = None # <<<<<<<<<<<<<< * self.current_hyperlink = None * */ __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); __Pyx_GOTREF(__pyx_v_self->current_word_group); __Pyx_DECREF(__pyx_v_self->current_word_group); __pyx_v_self->current_word_group = Py_None; /* "gumbocy.pyx":604 * self.current_stack = [] * self.current_word_group = None * self.current_hyperlink = None # <<<<<<<<<<<<<< * * self._traverse_node(0, self.output.root, 0, 0, 0, 0, 0) */ __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); __Pyx_GOTREF(__pyx_v_self->current_hyperlink); __Pyx_DECREF(__pyx_v_self->current_hyperlink); __pyx_v_self->current_hyperlink = Py_None; /* "gumbocy.pyx":606 * self.current_hyperlink = None * * self._traverse_node(0, self.output.root, 0, 0, 0, 0, 0) # <<<<<<<<<<<<<< * * return self.analysis */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->_traverse_node(__pyx_v_self, 0, __pyx_v_self->output->root, 0, 0, 0, 0, 0); /* "gumbocy.pyx":608 * self._traverse_node(0, self.output.root, 0, 0, 0, 0, 0) * * return self.analysis # <<<<<<<<<<<<<< * * # */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_self->analysis); __pyx_r = __pyx_v_self->analysis; goto __pyx_L0; /* "gumbocy.pyx":576 * self.has_output = 1 * * def analyze(self, url=None): # <<<<<<<<<<<<<< * """ Traverse the parsed tree and return the results """ * */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_5); __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_7); __Pyx_XDECREF(__pyx_t_8); __Pyx_AddTraceback("gumbocy.HTMLParser.analyze", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_parsed); __Pyx_XDECREF(__pyx_v_netloc); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":614 * # * * def listnodes(self): # <<<<<<<<<<<<<< * """ Return the nodes as a flat list of tuples """ * */ /* Python wrapper */ static PyObject *__pyx_pw_7gumbocy_10HTMLParser_7listnodes(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ static char __pyx_doc_7gumbocy_10HTMLParser_6listnodes[] = " Return the nodes as a flat list of tuples "; static PyObject *__pyx_pw_7gumbocy_10HTMLParser_7listnodes(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("listnodes (wrapper)", 0); __pyx_r = __pyx_pf_7gumbocy_10HTMLParser_6listnodes(((struct __pyx_obj_7gumbocy_HTMLParser *)__pyx_v_self)); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_7gumbocy_10HTMLParser_6listnodes(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("listnodes", 0); /* "gumbocy.pyx":617 * """ Return the nodes as a flat list of tuples """ * * self.nodes = [] # <<<<<<<<<<<<<< * * self._traverse_node_simple(0, self.output.root) */ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 617, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __Pyx_GOTREF(__pyx_v_self->nodes); __Pyx_DECREF(__pyx_v_self->nodes); __pyx_v_self->nodes = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":619 * self.nodes = [] * * self._traverse_node_simple(0, self.output.root) # <<<<<<<<<<<<<< * * return self.nodes */ ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->_traverse_node_simple(__pyx_v_self, 0, __pyx_v_self->output->root); /* "gumbocy.pyx":621 * self._traverse_node_simple(0, self.output.root) * * return self.nodes # <<<<<<<<<<<<<< * * cdef bint _traverse_node_simple(self, int level, gumbocy.GumboNode* node): */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_self->nodes); __pyx_r = __pyx_v_self->nodes; goto __pyx_L0; /* "gumbocy.pyx":614 * # * * def listnodes(self): # <<<<<<<<<<<<<< * """ Return the nodes as a flat list of tuples """ * */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_AddTraceback("gumbocy.HTMLParser.listnodes", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":623 * return self.nodes * * cdef bint _traverse_node_simple(self, int level, gumbocy.GumboNode* node): # <<<<<<<<<<<<<< * """ Traverses the node tree. Return 1 to stop at this level """ * */ static int __pyx_f_7gumbocy_10HTMLParser__traverse_node_simple(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self, int __pyx_v_level, GumboNode *__pyx_v_node) { GumboStringPiece __pyx_v_gsp; int __pyx_v_tag_n; char const *__pyx_v_tag_name; PyObject *__pyx_v_py_tag_name = NULL; int __pyx_v_has_attrs; PyObject *__pyx_v_attrs = NULL; unsigned int __pyx_v_i; GumboAttribute *__pyx_v_attr; PyObject *__pyx_v_attr_name = NULL; PyObject *__pyx_v_multiple_value = NULL; PyObject *__pyx_v_v = NULL; GumboNode *__pyx_v_child; int __pyx_r; __Pyx_RefNannyDeclarations int __pyx_t_1; PyObject *__pyx_t_2 = NULL; PyObject *__pyx_t_3 = NULL; PyObject *__pyx_t_4 = NULL; int __pyx_t_5; int __pyx_t_6; GumboStringPiece __pyx_t_7; char const *__pyx_t_8; unsigned int __pyx_t_9; unsigned int __pyx_t_10; char const *__pyx_t_11; PyObject *__pyx_t_12 = NULL; PyObject *__pyx_t_13 = NULL; PyObject *__pyx_t_14 = NULL; Py_ssize_t __pyx_t_15; PyObject *(*__pyx_t_16)(PyObject *); __Pyx_RefNannySetupContext("_traverse_node_simple", 0); /* "gumbocy.pyx":628 * cdef GumboStringPiece gsp * * if level > self.nesting_limit: # <<<<<<<<<<<<<< * return 0 * */ __pyx_t_1 = ((__pyx_v_level > __pyx_v_self->nesting_limit) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":629 * * if level > self.nesting_limit: * return 0 # <<<<<<<<<<<<<< * * if node.type == gumbocy.GUMBO_NODE_TEXT: */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":628 * cdef GumboStringPiece gsp * * if level > self.nesting_limit: # <<<<<<<<<<<<<< * return 0 * */ } /* "gumbocy.pyx":631 * return 0 * * if node.type == gumbocy.GUMBO_NODE_TEXT: # <<<<<<<<<<<<<< * self.nodes.append((level, None, node.v.text.text)) * */ switch (__pyx_v_node->type) { case GUMBO_NODE_TEXT: /* "gumbocy.pyx":632 * * if node.type == gumbocy.GUMBO_NODE_TEXT: * self.nodes.append((level, None, node.v.text.text)) # <<<<<<<<<<<<<< * * elif node.type == gumbocy.GUMBO_NODE_ELEMENT: */ if (unlikely(__pyx_v_self->nodes == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "append"); __PYX_ERR(0, 632, __pyx_L1_error) } __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_level); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 632, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_node->v.text.text); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 632, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 632, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_2); __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); PyTuple_SET_ITEM(__pyx_t_4, 1, Py_None); __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_t_3); __pyx_t_2 = 0; __pyx_t_3 = 0; __pyx_t_5 = __Pyx_PyList_Append(__pyx_v_self->nodes, __pyx_t_4); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 632, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; /* "gumbocy.pyx":631 * return 0 * * if node.type == gumbocy.GUMBO_NODE_TEXT: # <<<<<<<<<<<<<< * self.nodes.append((level, None, node.v.text.text)) * */ break; /* "gumbocy.pyx":634 * self.nodes.append((level, None, node.v.text.text)) * * elif node.type == gumbocy.GUMBO_NODE_ELEMENT: # <<<<<<<<<<<<<< * * tag_n = node.v.element.tag */ case GUMBO_NODE_ELEMENT: /* "gumbocy.pyx":636 * elif node.type == gumbocy.GUMBO_NODE_ELEMENT: * * tag_n = node.v.element.tag # <<<<<<<<<<<<<< * * if self.head_only and self.tags_ignore_head_only.count(tag_n): */ __pyx_v_tag_n = ((int)__pyx_v_node->v.element.tag); /* "gumbocy.pyx":638 * tag_n = node.v.element.tag * * if self.head_only and self.tags_ignore_head_only.count(tag_n): # <<<<<<<<<<<<<< * return 1 * */ __pyx_t_6 = (__pyx_v_self->head_only != 0); if (__pyx_t_6) { } else { __pyx_t_1 = __pyx_t_6; goto __pyx_L5_bool_binop_done; } __pyx_t_6 = (__pyx_v_self->tags_ignore_head_only.count(__pyx_v_tag_n) != 0); __pyx_t_1 = __pyx_t_6; __pyx_L5_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":639 * * if self.head_only and self.tags_ignore_head_only.count(tag_n): * return 1 # <<<<<<<<<<<<<< * * if self.tags_ignore.count(tag_n): */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":638 * tag_n = node.v.element.tag * * if self.head_only and self.tags_ignore_head_only.count(tag_n): # <<<<<<<<<<<<<< * return 1 * */ } /* "gumbocy.pyx":641 * return 1 * * if self.tags_ignore.count(tag_n): # <<<<<<<<<<<<<< * return 0 * */ __pyx_t_1 = (__pyx_v_self->tags_ignore.count(__pyx_v_tag_n) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":642 * * if self.tags_ignore.count(tag_n): * return 0 # <<<<<<<<<<<<<< * * tag_name = gumbocy.gumbo_normalized_tagname(node.v.element.tag) */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":641 * return 1 * * if self.tags_ignore.count(tag_n): # <<<<<<<<<<<<<< * return 0 * */ } /* "gumbocy.pyx":644 * return 0 * * tag_name = gumbocy.gumbo_normalized_tagname(node.v.element.tag) # <<<<<<<<<<<<<< * * # When we find an unknown tag, find its tag_name in the buffer */ __pyx_v_tag_name = gumbo_normalized_tagname(__pyx_v_node->v.element.tag); /* "gumbocy.pyx":647 * * # When we find an unknown tag, find its tag_name in the buffer * if tag_name == b"": # <<<<<<<<<<<<<< * gsp = node.v.element.original_tag * gumbo_tag_from_original_text(&gsp) */ __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_tag_name); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 647, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_1 = (__Pyx_PyBytes_Equals(__pyx_t_4, __pyx_kp_b__5, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 647, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_1) { /* "gumbocy.pyx":648 * # When we find an unknown tag, find its tag_name in the buffer * if tag_name == b"": * gsp = node.v.element.original_tag # <<<<<<<<<<<<<< * gumbo_tag_from_original_text(&gsp) * py_tag_name = str(gsp.data)[0:gsp.length].lower() # TODO try to do that only in C! */ __pyx_t_7 = __pyx_v_node->v.element.original_tag; __pyx_v_gsp = __pyx_t_7; /* "gumbocy.pyx":649 * if tag_name == b"": * gsp = node.v.element.original_tag * gumbo_tag_from_original_text(&gsp) # <<<<<<<<<<<<<< * py_tag_name = str(gsp.data)[0:gsp.length].lower() # TODO try to do that only in C! * tag_name = py_tag_name */ gumbo_tag_from_original_text((&__pyx_v_gsp)); /* "gumbocy.pyx":650 * gsp = node.v.element.original_tag * gumbo_tag_from_original_text(&gsp) * py_tag_name = str(gsp.data)[0:gsp.length].lower() # TODO try to do that only in C! # <<<<<<<<<<<<<< * tag_name = py_tag_name * */ __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_gsp.data); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 650, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 650, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 650, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = __Pyx_PyObject_GetSlice(__pyx_t_3, 0, __pyx_v_gsp.length, NULL, NULL, NULL, 1, 1, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 650, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_lower); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 650, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_3))) { __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3); if (likely(__pyx_t_2)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_2); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_3, function); } } if (__pyx_t_2) { __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 650, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } else { __pyx_t_4 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 650, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_py_tag_name = __pyx_t_4; __pyx_t_4 = 0; /* "gumbocy.pyx":651 * gumbo_tag_from_original_text(&gsp) * py_tag_name = str(gsp.data)[0:gsp.length].lower() # TODO try to do that only in C! * tag_name = py_tag_name # <<<<<<<<<<<<<< * * if self.has_attributes_whitelist: */ __pyx_t_8 = __Pyx_PyObject_AsString(__pyx_v_py_tag_name); if (unlikely((!__pyx_t_8) && PyErr_Occurred())) __PYX_ERR(0, 651, __pyx_L1_error) __pyx_v_tag_name = ((char const *)__pyx_t_8); /* "gumbocy.pyx":647 * * # When we find an unknown tag, find its tag_name in the buffer * if tag_name == b"": # <<<<<<<<<<<<<< * gsp = node.v.element.original_tag * gumbo_tag_from_original_text(&gsp) */ } /* "gumbocy.pyx":653 * tag_name = py_tag_name * * if self.has_attributes_whitelist: # <<<<<<<<<<<<<< * * # Build a dict with all the whitelisted attributes */ __pyx_t_1 = (__pyx_v_self->has_attributes_whitelist != 0); if (__pyx_t_1) { /* "gumbocy.pyx":656 * * # Build a dict with all the whitelisted attributes * has_attrs = False # <<<<<<<<<<<<<< * attrs = False * for i in range(node.v.element.attributes.length): */ __pyx_v_has_attrs = 0; /* "gumbocy.pyx":657 * # Build a dict with all the whitelisted attributes * has_attrs = False * attrs = False # <<<<<<<<<<<<<< * for i in range(node.v.element.attributes.length): * attr = node.v.element.attributes.data[i] */ __Pyx_INCREF(Py_False); __pyx_v_attrs = Py_False; /* "gumbocy.pyx":658 * has_attrs = False * attrs = False * for i in range(node.v.element.attributes.length): # <<<<<<<<<<<<<< * attr = node.v.element.attributes.data[i] * attr_name = str(attr.name) */ __pyx_t_9 = __pyx_v_node->v.element.attributes.length; for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { __pyx_v_i = __pyx_t_10; /* "gumbocy.pyx":659 * attrs = False * for i in range(node.v.element.attributes.length): * attr = node.v.element.attributes.data[i] # <<<<<<<<<<<<<< * attr_name = str(attr.name) * if re2_search(attr_name, deref(self.attributes_whitelist)): */ __pyx_v_attr = ((GumboAttribute *)(__pyx_v_node->v.element.attributes.data[__pyx_v_i])); /* "gumbocy.pyx":660 * for i in range(node.v.element.attributes.length): * attr = node.v.element.attributes.data[i] * attr_name = str(attr.name) # <<<<<<<<<<<<<< * if re2_search(attr_name, deref(self.attributes_whitelist)): * if attr_name == b"class": */ __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_attr->name); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 660, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 660, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)(&PyString_Type)), __pyx_t_3, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 660, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_XDECREF_SET(__pyx_v_attr_name, __pyx_t_4); __pyx_t_4 = 0; /* "gumbocy.pyx":661 * attr = node.v.element.attributes.data[i] * attr_name = str(attr.name) * if re2_search(attr_name, deref(self.attributes_whitelist)): # <<<<<<<<<<<<<< * if attr_name == b"class": * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_attr_name); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 661, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_11, (*__pyx_v_self->attributes_whitelist)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":662 * attr_name = str(attr.name) * if re2_search(attr_name, deref(self.attributes_whitelist)): * if attr_name == b"class": # <<<<<<<<<<<<<< * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * if len(multiple_value): */ __pyx_t_1 = (__Pyx_PyBytes_Equals(__pyx_v_attr_name, __pyx_n_b_class, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 662, __pyx_L1_error) if (__pyx_t_1) { /* "gumbocy.pyx":663 * if re2_search(attr_name, deref(self.attributes_whitelist)): * if attr_name == b"class": * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) # <<<<<<<<<<<<<< * if len(multiple_value): * if self.has_classes_ignore: */ __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_RE_SPLIT_WHITESPACE); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_split); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_13 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_strip); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __pyx_t_13 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_14))) { __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_14); if (likely(__pyx_t_13)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_14); __Pyx_INCREF(__pyx_t_13); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_14, function); } } if (__pyx_t_13) { __pyx_t_12 = __Pyx_PyObject_CallOneArg(__pyx_t_14, __pyx_t_13); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } else { __pyx_t_12 = __Pyx_PyObject_CallNoArg(__pyx_t_14); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 663, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_t_12, __pyx_n_s_lower); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __pyx_t_12 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_14))) { __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_14); if (likely(__pyx_t_12)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_14); __Pyx_INCREF(__pyx_t_12); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_14, function); } } if (__pyx_t_12) { __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_14, __pyx_t_12); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; } else { __pyx_t_3 = __Pyx_PyObject_CallNoArg(__pyx_t_14); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 663, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_14 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { __pyx_t_14 = PyMethod_GET_SELF(__pyx_t_2); if (likely(__pyx_t_14)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2); __Pyx_INCREF(__pyx_t_14); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_2, function); } } if (!__pyx_t_14) { __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_GOTREF(__pyx_t_4); } else { __pyx_t_12 = PyTuple_New(1+1); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); __Pyx_GIVEREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_t_14); __pyx_t_14 = NULL; __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_12, 0+1, __pyx_t_3); __pyx_t_3 = 0; __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_12, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = __Pyx_PyFrozenSet_New(__pyx_t_4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 663, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF_SET(__pyx_v_multiple_value, ((PyObject*)__pyx_t_2)); __pyx_t_2 = 0; /* "gumbocy.pyx":664 * if attr_name == b"class": * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * if len(multiple_value): # <<<<<<<<<<<<<< * if self.has_classes_ignore: * for v in multiple_value: */ __pyx_t_15 = PySet_GET_SIZE(__pyx_v_multiple_value); if (unlikely(__pyx_t_15 == -1)) __PYX_ERR(0, 664, __pyx_L1_error) __pyx_t_1 = (__pyx_t_15 != 0); if (__pyx_t_1) { /* "gumbocy.pyx":665 * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * if len(multiple_value): * if self.has_classes_ignore: # <<<<<<<<<<<<<< * for v in multiple_value: * if re2_search(v, deref(self.classes_ignore)): */ __pyx_t_1 = (__pyx_v_self->has_classes_ignore != 0); if (__pyx_t_1) { /* "gumbocy.pyx":666 * if len(multiple_value): * if self.has_classes_ignore: * for v in multiple_value: # <<<<<<<<<<<<<< * if re2_search(v, deref(self.classes_ignore)): * return 0 */ __pyx_t_2 = PyObject_GetIter(__pyx_v_multiple_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 666, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_16 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 666, __pyx_L1_error) for (;;) { { __pyx_t_4 = __pyx_t_16(__pyx_t_2); if (unlikely(!__pyx_t_4)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 666, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_4); } __Pyx_XDECREF_SET(__pyx_v_v, __pyx_t_4); __pyx_t_4 = 0; /* "gumbocy.pyx":667 * if self.has_classes_ignore: * for v in multiple_value: * if re2_search(v, deref(self.classes_ignore)): # <<<<<<<<<<<<<< * return 0 * */ __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_v); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 667, __pyx_L1_error) __pyx_t_1 = (__pyx_f_7gumbocy_re2_search(__pyx_t_11, (*__pyx_v_self->classes_ignore)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":668 * for v in multiple_value: * if re2_search(v, deref(self.classes_ignore)): * return 0 # <<<<<<<<<<<<<< * * if not has_attrs: */ __pyx_r = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; goto __pyx_L0; /* "gumbocy.pyx":667 * if self.has_classes_ignore: * for v in multiple_value: * if re2_search(v, deref(self.classes_ignore)): # <<<<<<<<<<<<<< * return 0 * */ } /* "gumbocy.pyx":666 * if len(multiple_value): * if self.has_classes_ignore: * for v in multiple_value: # <<<<<<<<<<<<<< * if re2_search(v, deref(self.classes_ignore)): * return 0 */ } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "gumbocy.pyx":665 * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * if len(multiple_value): * if self.has_classes_ignore: # <<<<<<<<<<<<<< * for v in multiple_value: * if re2_search(v, deref(self.classes_ignore)): */ } /* "gumbocy.pyx":670 * return 0 * * if not has_attrs: # <<<<<<<<<<<<<< * attrs = {} * has_attrs = True */ __pyx_t_1 = ((!(__pyx_v_has_attrs != 0)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":671 * * if not has_attrs: * attrs = {} # <<<<<<<<<<<<<< * has_attrs = True * attrs[attr_name] = multiple_value */ __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 671, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF_SET(__pyx_v_attrs, __pyx_t_2); __pyx_t_2 = 0; /* "gumbocy.pyx":672 * if not has_attrs: * attrs = {} * has_attrs = True # <<<<<<<<<<<<<< * attrs[attr_name] = multiple_value * */ __pyx_v_has_attrs = 1; /* "gumbocy.pyx":670 * return 0 * * if not has_attrs: # <<<<<<<<<<<<<< * attrs = {} * has_attrs = True */ } /* "gumbocy.pyx":673 * attrs = {} * has_attrs = True * attrs[attr_name] = multiple_value # <<<<<<<<<<<<<< * * else: */ if (unlikely(PyObject_SetItem(__pyx_v_attrs, __pyx_v_attr_name, __pyx_v_multiple_value) < 0)) __PYX_ERR(0, 673, __pyx_L1_error) /* "gumbocy.pyx":664 * if attr_name == b"class": * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * if len(multiple_value): # <<<<<<<<<<<<<< * if self.has_classes_ignore: * for v in multiple_value: */ } /* "gumbocy.pyx":662 * attr_name = str(attr.name) * if re2_search(attr_name, deref(self.attributes_whitelist)): * if attr_name == b"class": # <<<<<<<<<<<<<< * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) * if len(multiple_value): */ goto __pyx_L13; } /* "gumbocy.pyx":677 * else: * * if not has_attrs: # <<<<<<<<<<<<<< * attrs = {} * has_attrs = True */ /*else*/ { __pyx_t_1 = ((!(__pyx_v_has_attrs != 0)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":678 * * if not has_attrs: * attrs = {} # <<<<<<<<<<<<<< * has_attrs = True * attrs[attr_name] = attr.value */ __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 678, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF_SET(__pyx_v_attrs, __pyx_t_2); __pyx_t_2 = 0; /* "gumbocy.pyx":679 * if not has_attrs: * attrs = {} * has_attrs = True # <<<<<<<<<<<<<< * attrs[attr_name] = attr.value * */ __pyx_v_has_attrs = 1; /* "gumbocy.pyx":677 * else: * * if not has_attrs: # <<<<<<<<<<<<<< * attrs = {} * has_attrs = True */ } /* "gumbocy.pyx":680 * attrs = {} * has_attrs = True * attrs[attr_name] = attr.value # <<<<<<<<<<<<<< * * if not has_attrs: */ __pyx_t_2 = __Pyx_PyBytes_FromString(__pyx_v_attr->value); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 680, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); if (unlikely(PyObject_SetItem(__pyx_v_attrs, __pyx_v_attr_name, __pyx_t_2) < 0)) __PYX_ERR(0, 680, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } __pyx_L13:; /* "gumbocy.pyx":661 * attr = node.v.element.attributes.data[i] * attr_name = str(attr.name) * if re2_search(attr_name, deref(self.attributes_whitelist)): # <<<<<<<<<<<<<< * if attr_name == b"class": * multiple_value = frozenset(_RE_SPLIT_WHITESPACE.split(attr.value.strip().lower())) */ } } /* "gumbocy.pyx":682 * attrs[attr_name] = attr.value * * if not has_attrs: # <<<<<<<<<<<<<< * self.nodes.append((level, tag_name)) * */ __pyx_t_1 = ((!(__pyx_v_has_attrs != 0)) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":683 * * if not has_attrs: * self.nodes.append((level, tag_name)) # <<<<<<<<<<<<<< * * else: */ if (unlikely(__pyx_v_self->nodes == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "append"); __PYX_ERR(0, 683, __pyx_L1_error) } __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_level); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 683, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_tag_name); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 683, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_12 = PyTuple_New(2); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 683, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_t_4); __pyx_t_2 = 0; __pyx_t_4 = 0; __pyx_t_5 = __Pyx_PyList_Append(__pyx_v_self->nodes, __pyx_t_12); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 683, __pyx_L1_error) __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; /* "gumbocy.pyx":682 * attrs[attr_name] = attr.value * * if not has_attrs: # <<<<<<<<<<<<<< * self.nodes.append((level, tag_name)) * */ goto __pyx_L21; } /* "gumbocy.pyx":687 * else: * * if self.has_ids_ignore: # <<<<<<<<<<<<<< * if attrs.get("id") and re2_search(attrs["id"].lower(), deref(self.ids_ignore)): * return 0 */ /*else*/ { __pyx_t_1 = (__pyx_v_self->has_ids_ignore != 0); if (__pyx_t_1) { /* "gumbocy.pyx":688 * * if self.has_ids_ignore: * if attrs.get("id") and re2_search(attrs["id"].lower(), deref(self.ids_ignore)): # <<<<<<<<<<<<<< * return 0 * */ __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_attrs, __pyx_n_s_get); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 688, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 688, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 688, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { } else { __pyx_t_1 = __pyx_t_6; goto __pyx_L24_bool_binop_done; } __pyx_t_12 = PyObject_GetItem(__pyx_v_attrs, __pyx_n_s_id); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 688, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_12, __pyx_n_s_lower); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 688, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __pyx_t_12 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_2))) { __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_2); if (likely(__pyx_t_12)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2); __Pyx_INCREF(__pyx_t_12); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_2, function); } } if (__pyx_t_12) { __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_12); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 688, __pyx_L1_error) __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; } else { __pyx_t_4 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 688, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_t_4); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 688, __pyx_L1_error) __pyx_t_6 = (__pyx_f_7gumbocy_re2_search(__pyx_t_11, (*__pyx_v_self->ids_ignore)) != 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_1 = __pyx_t_6; __pyx_L24_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":689 * if self.has_ids_ignore: * if attrs.get("id") and re2_search(attrs["id"].lower(), deref(self.ids_ignore)): * return 0 # <<<<<<<<<<<<<< * * self.nodes.append((level, tag_name, attrs)) */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":688 * * if self.has_ids_ignore: * if attrs.get("id") and re2_search(attrs["id"].lower(), deref(self.ids_ignore)): # <<<<<<<<<<<<<< * return 0 * */ } /* "gumbocy.pyx":687 * else: * * if self.has_ids_ignore: # <<<<<<<<<<<<<< * if attrs.get("id") and re2_search(attrs["id"].lower(), deref(self.ids_ignore)): * return 0 */ } /* "gumbocy.pyx":691 * return 0 * * self.nodes.append((level, tag_name, attrs)) # <<<<<<<<<<<<<< * * else: */ if (unlikely(__pyx_v_self->nodes == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "append"); __PYX_ERR(0, 691, __pyx_L1_error) } __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_level); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 691, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_2 = __Pyx_PyBytes_FromString(__pyx_v_tag_name); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 691, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_12 = PyTuple_New(3); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 691, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_t_2); __Pyx_INCREF(__pyx_v_attrs); __Pyx_GIVEREF(__pyx_v_attrs); PyTuple_SET_ITEM(__pyx_t_12, 2, __pyx_v_attrs); __pyx_t_4 = 0; __pyx_t_2 = 0; __pyx_t_5 = __Pyx_PyList_Append(__pyx_v_self->nodes, __pyx_t_12); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 691, __pyx_L1_error) __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; } __pyx_L21:; /* "gumbocy.pyx":653 * tag_name = py_tag_name * * if self.has_attributes_whitelist: # <<<<<<<<<<<<<< * * # Build a dict with all the whitelisted attributes */ goto __pyx_L9; } /* "gumbocy.pyx":694 * * else: * self.nodes.append((level, tag_name)) # <<<<<<<<<<<<<< * * # Call _iternode() recursively for each of the children */ /*else*/ { if (unlikely(__pyx_v_self->nodes == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "append"); __PYX_ERR(0, 694, __pyx_L1_error) } __pyx_t_12 = __Pyx_PyInt_From_int(__pyx_v_level); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 694, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); __pyx_t_2 = __Pyx_PyBytes_FromString(__pyx_v_tag_name); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 694, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 694, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_GIVEREF(__pyx_t_12); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_12); __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2); __pyx_t_12 = 0; __pyx_t_2 = 0; __pyx_t_5 = __Pyx_PyList_Append(__pyx_v_self->nodes, __pyx_t_4); if (unlikely(__pyx_t_5 == -1)) __PYX_ERR(0, 694, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; } __pyx_L9:; /* "gumbocy.pyx":697 * * # Call _iternode() recursively for each of the children * for i in range(node.v.element.children.length): # <<<<<<<<<<<<<< * child = node.v.element.children.data[i] * if self._traverse_node_simple(level + 1, child) == 1: */ __pyx_t_9 = __pyx_v_node->v.element.children.length; for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { __pyx_v_i = __pyx_t_10; /* "gumbocy.pyx":698 * # Call _iternode() recursively for each of the children * for i in range(node.v.element.children.length): * child = node.v.element.children.data[i] # <<<<<<<<<<<<<< * if self._traverse_node_simple(level + 1, child) == 1: * break */ __pyx_v_child = ((GumboNode *)(__pyx_v_node->v.element.children.data[__pyx_v_i])); /* "gumbocy.pyx":699 * for i in range(node.v.element.children.length): * child = node.v.element.children.data[i] * if self._traverse_node_simple(level + 1, child) == 1: # <<<<<<<<<<<<<< * break * */ __pyx_t_1 = ((((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->_traverse_node_simple(__pyx_v_self, (__pyx_v_level + 1), __pyx_v_child) == 1) != 0); if (__pyx_t_1) { /* "gumbocy.pyx":700 * child = node.v.element.children.data[i] * if self._traverse_node_simple(level + 1, child) == 1: * break # <<<<<<<<<<<<<< * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD and self.head_only: */ goto __pyx_L27_break; /* "gumbocy.pyx":699 * for i in range(node.v.element.children.length): * child = node.v.element.children.data[i] * if self._traverse_node_simple(level + 1, child) == 1: # <<<<<<<<<<<<<< * break * */ } } __pyx_L27_break:; /* "gumbocy.pyx":702 * break * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD and self.head_only: # <<<<<<<<<<<<<< * return 1 * */ __pyx_t_6 = ((__pyx_v_node->v.element.tag == GUMBO_TAG_HEAD) != 0); if (__pyx_t_6) { } else { __pyx_t_1 = __pyx_t_6; goto __pyx_L30_bool_binop_done; } __pyx_t_6 = (__pyx_v_self->head_only != 0); __pyx_t_1 = __pyx_t_6; __pyx_L30_bool_binop_done:; if (__pyx_t_1) { /* "gumbocy.pyx":703 * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD and self.head_only: * return 1 # <<<<<<<<<<<<<< * * return 0 */ __pyx_r = 1; goto __pyx_L0; /* "gumbocy.pyx":702 * break * * if node.v.element.tag == gumbocy.GUMBO_TAG_HEAD and self.head_only: # <<<<<<<<<<<<<< * return 1 * */ } /* "gumbocy.pyx":634 * self.nodes.append((level, None, node.v.text.text)) * * elif node.type == gumbocy.GUMBO_NODE_ELEMENT: # <<<<<<<<<<<<<< * * tag_n = node.v.element.tag */ break; default: break; } /* "gumbocy.pyx":705 * return 1 * * return 0 # <<<<<<<<<<<<<< * * def __dealloc__(self): */ __pyx_r = 0; goto __pyx_L0; /* "gumbocy.pyx":623 * return self.nodes * * cdef bint _traverse_node_simple(self, int level, gumbocy.GumboNode* node): # <<<<<<<<<<<<<< * """ Traverses the node tree. Return 1 to stop at this level """ * */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_12); __Pyx_XDECREF(__pyx_t_13); __Pyx_XDECREF(__pyx_t_14); __Pyx_WriteUnraisable("gumbocy.HTMLParser._traverse_node_simple", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_r = 0; __pyx_L0:; __Pyx_XDECREF(__pyx_v_py_tag_name); __Pyx_XDECREF(__pyx_v_attrs); __Pyx_XDECREF(__pyx_v_attr_name); __Pyx_XDECREF(__pyx_v_multiple_value); __Pyx_XDECREF(__pyx_v_v); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "gumbocy.pyx":707 * return 0 * * def __dealloc__(self): # <<<<<<<<<<<<<< * """ Cleanup gumbo memory when the parser is deallocated by Python """ * self.free() */ /* Python wrapper */ static void __pyx_pw_7gumbocy_10HTMLParser_9__dealloc__(PyObject *__pyx_v_self); /*proto*/ static void __pyx_pw_7gumbocy_10HTMLParser_9__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); __pyx_pf_7gumbocy_10HTMLParser_8__dealloc__(((struct __pyx_obj_7gumbocy_HTMLParser *)__pyx_v_self)); /* function exit code */ __Pyx_RefNannyFinishContext(); } static void __pyx_pf_7gumbocy_10HTMLParser_8__dealloc__(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self) { __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("__dealloc__", 0); /* "gumbocy.pyx":709 * def __dealloc__(self): * """ Cleanup gumbo memory when the parser is deallocated by Python """ * self.free() # <<<<<<<<<<<<<< * * cdef free(self): */ __pyx_t_1 = ((struct __pyx_vtabstruct_7gumbocy_HTMLParser *)__pyx_v_self->__pyx_vtab)->free(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 709, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":707 * return 0 * * def __dealloc__(self): # <<<<<<<<<<<<<< * """ Cleanup gumbo memory when the parser is deallocated by Python """ * self.free() */ /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_WriteUnraisable("gumbocy.HTMLParser.__dealloc__", __pyx_clineno, __pyx_lineno, __pyx_filename, 0, 0); __pyx_L0:; __Pyx_RefNannyFinishContext(); } /* "gumbocy.pyx":711 * self.free() * * cdef free(self): # <<<<<<<<<<<<<< * if self.has_output: * gumbocy.gumbo_destroy_output(&gumbocy.kGumboDefaultOptions, self.output) */ static PyObject *__pyx_f_7gumbocy_10HTMLParser_free(struct __pyx_obj_7gumbocy_HTMLParser *__pyx_v_self) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations int __pyx_t_1; __Pyx_RefNannySetupContext("free", 0); /* "gumbocy.pyx":712 * * cdef free(self): * if self.has_output: # <<<<<<<<<<<<<< * gumbocy.gumbo_destroy_output(&gumbocy.kGumboDefaultOptions, self.output) * self.has_output = 0 */ __pyx_t_1 = (__pyx_v_self->has_output != 0); if (__pyx_t_1) { /* "gumbocy.pyx":713 * cdef free(self): * if self.has_output: * gumbocy.gumbo_destroy_output(&gumbocy.kGumboDefaultOptions, self.output) # <<<<<<<<<<<<<< * self.has_output = 0 */ gumbo_destroy_output((&kGumboDefaultOptions), __pyx_v_self->output); /* "gumbocy.pyx":714 * if self.has_output: * gumbocy.gumbo_destroy_output(&gumbocy.kGumboDefaultOptions, self.output) * self.has_output = 0 # <<<<<<<<<<<<<< */ __pyx_v_self->has_output = 0; /* "gumbocy.pyx":712 * * cdef free(self): * if self.has_output: # <<<<<<<<<<<<<< * gumbocy.gumbo_destroy_output(&gumbocy.kGumboDefaultOptions, self.output) * self.has_output = 0 */ } /* "gumbocy.pyx":711 * self.free() * * cdef free(self): # <<<<<<<<<<<<<< * if self.has_output: * gumbocy.gumbo_destroy_output(&gumbocy.kGumboDefaultOptions, self.output) */ /* function exit code */ __pyx_r = Py_None; __Pyx_INCREF(Py_None); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_tp_new_7gumbocy_Attributes(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { struct __pyx_obj_7gumbocy_Attributes *p; PyObject *o; if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) { o = (*t->tp_alloc)(t, 0); } else { o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); } if (unlikely(!o)) return 0; p = ((struct __pyx_obj_7gumbocy_Attributes *)o); p->values = ((PyObject*)Py_None); Py_INCREF(Py_None); p->classes = ((PyObject*)Py_None); Py_INCREF(Py_None); return o; } static void __pyx_tp_dealloc_7gumbocy_Attributes(PyObject *o) { struct __pyx_obj_7gumbocy_Attributes *p = (struct __pyx_obj_7gumbocy_Attributes *)o; #if PY_VERSION_HEX >= 0x030400a1 if (unlikely(Py_TYPE(o)->tp_finalize) && !_PyGC_FINALIZED(o)) { if (PyObject_CallFinalizerFromDealloc(o)) return; } #endif PyObject_GC_UnTrack(o); Py_CLEAR(p->values); Py_CLEAR(p->classes); (*Py_TYPE(o)->tp_free)(o); } static int __pyx_tp_traverse_7gumbocy_Attributes(PyObject *o, visitproc v, void *a) { int e; struct __pyx_obj_7gumbocy_Attributes *p = (struct __pyx_obj_7gumbocy_Attributes *)o; if (p->values) { e = (*v)(p->values, a); if (e) return e; } if (p->classes) { e = (*v)(p->classes, a); if (e) return e; } return 0; } static int __pyx_tp_clear_7gumbocy_Attributes(PyObject *o) { PyObject* tmp; struct __pyx_obj_7gumbocy_Attributes *p = (struct __pyx_obj_7gumbocy_Attributes *)o; tmp = ((PyObject*)p->values); p->values = ((PyObject*)Py_None); Py_INCREF(Py_None); Py_XDECREF(tmp); tmp = ((PyObject*)p->classes); p->classes = ((PyObject*)Py_None); Py_INCREF(Py_None); Py_XDECREF(tmp); return 0; } static PyTypeObject __pyx_type_7gumbocy_Attributes = { PyVarObject_HEAD_INIT(0, 0) "gumbocy.Attributes", /*tp_name*/ sizeof(struct __pyx_obj_7gumbocy_Attributes), /*tp_basicsize*/ 0, /*tp_itemsize*/ __pyx_tp_dealloc_7gumbocy_Attributes, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ #if PY_MAJOR_VERSION < 3 0, /*tp_compare*/ #endif #if PY_MAJOR_VERSION >= 3 0, /*tp_as_async*/ #endif 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 0, /*tp_doc*/ __pyx_tp_traverse_7gumbocy_Attributes, /*tp_traverse*/ __pyx_tp_clear_7gumbocy_Attributes, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ 0, /*tp_methods*/ 0, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ 0, /*tp_dict*/ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ 0, /*tp_init*/ 0, /*tp_alloc*/ __pyx_tp_new_7gumbocy_Attributes, /*tp_new*/ 0, /*tp_free*/ 0, /*tp_is_gc*/ 0, /*tp_bases*/ 0, /*tp_mro*/ 0, /*tp_cache*/ 0, /*tp_subclasses*/ 0, /*tp_weaklist*/ 0, /*tp_del*/ 0, /*tp_version_tag*/ #if PY_VERSION_HEX >= 0x030400a1 0, /*tp_finalize*/ #endif }; static struct __pyx_vtabstruct_7gumbocy_HTMLParser __pyx_vtable_7gumbocy_HTMLParser; static PyObject *__pyx_tp_new_7gumbocy_HTMLParser(PyTypeObject *t, PyObject *a, PyObject *k) { struct __pyx_obj_7gumbocy_HTMLParser *p; PyObject *o; if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) { o = (*t->tp_alloc)(t, 0); } else { o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); } if (unlikely(!o)) return 0; p = ((struct __pyx_obj_7gumbocy_HTMLParser *)o); p->__pyx_vtab = __pyx_vtabptr_7gumbocy_HTMLParser; new((void*)&(p->tags_ignore)) std::unordered_set (); new((void*)&(p->tags_ignore_head_only)) std::unordered_set (); new((void*)&(p->tags_boilerplate)) std::unordered_set (); new((void*)&(p->tags_boilerplate_bypass)) std::unordered_set (); new((void*)&(p->tags_separators)) std::unordered_set (); p->current_stack = ((PyObject*)Py_None); Py_INCREF(Py_None); p->analysis = ((PyObject*)Py_None); Py_INCREF(Py_None); p->current_word_group = Py_None; Py_INCREF(Py_None); p->current_hyperlink = Py_None; Py_INCREF(Py_None); p->nodes = ((PyObject*)Py_None); Py_INCREF(Py_None); if (unlikely(__pyx_pw_7gumbocy_10HTMLParser_1__cinit__(o, a, k) < 0)) { Py_DECREF(o); o = 0; } return o; } static void __pyx_tp_dealloc_7gumbocy_HTMLParser(PyObject *o) { struct __pyx_obj_7gumbocy_HTMLParser *p = (struct __pyx_obj_7gumbocy_HTMLParser *)o; #if PY_VERSION_HEX >= 0x030400a1 if (unlikely(Py_TYPE(o)->tp_finalize) && !_PyGC_FINALIZED(o)) { if (PyObject_CallFinalizerFromDealloc(o)) return; } #endif PyObject_GC_UnTrack(o); { PyObject *etype, *eval, *etb; PyErr_Fetch(&etype, &eval, &etb); ++Py_REFCNT(o); __pyx_pw_7gumbocy_10HTMLParser_9__dealloc__(o); --Py_REFCNT(o); PyErr_Restore(etype, eval, etb); } __Pyx_call_destructor(p->tags_ignore); __Pyx_call_destructor(p->tags_ignore_head_only); __Pyx_call_destructor(p->tags_boilerplate); __Pyx_call_destructor(p->tags_boilerplate_bypass); __Pyx_call_destructor(p->tags_separators); Py_CLEAR(p->current_stack); Py_CLEAR(p->analysis); Py_CLEAR(p->current_word_group); Py_CLEAR(p->current_hyperlink); Py_CLEAR(p->nodes); (*Py_TYPE(o)->tp_free)(o); } static int __pyx_tp_traverse_7gumbocy_HTMLParser(PyObject *o, visitproc v, void *a) { int e; struct __pyx_obj_7gumbocy_HTMLParser *p = (struct __pyx_obj_7gumbocy_HTMLParser *)o; if (p->current_stack) { e = (*v)(p->current_stack, a); if (e) return e; } if (p->analysis) { e = (*v)(p->analysis, a); if (e) return e; } if (p->current_word_group) { e = (*v)(p->current_word_group, a); if (e) return e; } if (p->current_hyperlink) { e = (*v)(p->current_hyperlink, a); if (e) return e; } if (p->nodes) { e = (*v)(p->nodes, a); if (e) return e; } return 0; } static int __pyx_tp_clear_7gumbocy_HTMLParser(PyObject *o) { PyObject* tmp; struct __pyx_obj_7gumbocy_HTMLParser *p = (struct __pyx_obj_7gumbocy_HTMLParser *)o; tmp = ((PyObject*)p->current_stack); p->current_stack = ((PyObject*)Py_None); Py_INCREF(Py_None); Py_XDECREF(tmp); tmp = ((PyObject*)p->analysis); p->analysis = ((PyObject*)Py_None); Py_INCREF(Py_None); Py_XDECREF(tmp); tmp = ((PyObject*)p->current_word_group); p->current_word_group = Py_None; Py_INCREF(Py_None); Py_XDECREF(tmp); tmp = ((PyObject*)p->current_hyperlink); p->current_hyperlink = Py_None; Py_INCREF(Py_None); Py_XDECREF(tmp); tmp = ((PyObject*)p->nodes); p->nodes = ((PyObject*)Py_None); Py_INCREF(Py_None); Py_XDECREF(tmp); return 0; } static PyMethodDef __pyx_methods_7gumbocy_HTMLParser[] = { {"parse", (PyCFunction)__pyx_pw_7gumbocy_10HTMLParser_3parse, METH_O, __pyx_doc_7gumbocy_10HTMLParser_2parse}, {"analyze", (PyCFunction)__pyx_pw_7gumbocy_10HTMLParser_5analyze, METH_VARARGS|METH_KEYWORDS, __pyx_doc_7gumbocy_10HTMLParser_4analyze}, {"listnodes", (PyCFunction)__pyx_pw_7gumbocy_10HTMLParser_7listnodes, METH_NOARGS, __pyx_doc_7gumbocy_10HTMLParser_6listnodes}, {0, 0, 0, 0} }; static PyTypeObject __pyx_type_7gumbocy_HTMLParser = { PyVarObject_HEAD_INIT(0, 0) "gumbocy.HTMLParser", /*tp_name*/ sizeof(struct __pyx_obj_7gumbocy_HTMLParser), /*tp_basicsize*/ 0, /*tp_itemsize*/ __pyx_tp_dealloc_7gumbocy_HTMLParser, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ #if PY_MAJOR_VERSION < 3 0, /*tp_compare*/ #endif #if PY_MAJOR_VERSION >= 3 0, /*tp_as_async*/ #endif 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 0, /*tp_doc*/ __pyx_tp_traverse_7gumbocy_HTMLParser, /*tp_traverse*/ __pyx_tp_clear_7gumbocy_HTMLParser, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ __pyx_methods_7gumbocy_HTMLParser, /*tp_methods*/ 0, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ 0, /*tp_dict*/ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ 0, /*tp_init*/ 0, /*tp_alloc*/ __pyx_tp_new_7gumbocy_HTMLParser, /*tp_new*/ 0, /*tp_free*/ 0, /*tp_is_gc*/ 0, /*tp_bases*/ 0, /*tp_mro*/ 0, /*tp_cache*/ 0, /*tp_subclasses*/ 0, /*tp_weaklist*/ 0, /*tp_del*/ 0, /*tp_version_tag*/ #if PY_VERSION_HEX >= 0x030400a1 0, /*tp_finalize*/ #endif }; static PyMethodDef __pyx_methods[] = { {0, 0, 0, 0} }; #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef __pyx_moduledef = { #if PY_VERSION_HEX < 0x03020000 { PyObject_HEAD_INIT(NULL) NULL, 0, NULL }, #else PyModuleDef_HEAD_INIT, #endif "gumbocy", 0, /* m_doc */ -1, /* m_size */ __pyx_methods /* m_methods */, NULL, /* m_reload */ NULL, /* m_traverse */ NULL, /* m_clear */ NULL /* m_free */ }; #endif static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_, __pyx_k_, sizeof(__pyx_k_), 0, 0, 1, 0}, {&__pyx_n_s_RE_SPLIT_WHITESPACE, __pyx_k_RE_SPLIT_WHITESPACE, sizeof(__pyx_k_RE_SPLIT_WHITESPACE), 0, 0, 1, 1}, {&__pyx_kp_s__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 0, 1, 0}, {&__pyx_kp_s__3, __pyx_k__3, sizeof(__pyx_k__3), 0, 0, 1, 0}, {&__pyx_kp_s__4, __pyx_k__4, sizeof(__pyx_k__4), 0, 0, 1, 0}, {&__pyx_kp_b__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 0, 0, 0}, {&__pyx_kp_s__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 0, 1, 0}, {&__pyx_kp_s__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 0, 1, 0}, {&__pyx_kp_s__8, __pyx_k__8, sizeof(__pyx_k__8), 0, 0, 1, 0}, {&__pyx_n_b_alt, __pyx_k_alt, sizeof(__pyx_k_alt), 0, 0, 0, 1}, {&__pyx_n_s_analyze_external_hyperlinks, __pyx_k_analyze_external_hyperlinks, sizeof(__pyx_k_analyze_external_hyperlinks), 0, 0, 1, 1}, {&__pyx_n_s_analyze_internal_hyperlinks, __pyx_k_analyze_internal_hyperlinks, sizeof(__pyx_k_analyze_internal_hyperlinks), 0, 0, 1, 1}, {&__pyx_n_s_analyze_word_groups, __pyx_k_analyze_word_groups, sizeof(__pyx_k_analyze_word_groups), 0, 0, 1, 1}, {&__pyx_n_s_append, __pyx_k_append, sizeof(__pyx_k_append), 0, 0, 1, 1}, {&__pyx_kp_b_aria_hidden, __pyx_k_aria_hidden, sizeof(__pyx_k_aria_hidden), 0, 0, 0, 0}, {&__pyx_n_s_article, __pyx_k_article, sizeof(__pyx_k_article), 0, 0, 1, 1}, {&__pyx_n_s_attributes_whitelist, __pyx_k_attributes_whitelist, sizeof(__pyx_k_attributes_whitelist), 0, 0, 1, 1}, {&__pyx_n_s_base_url, __pyx_k_base_url, sizeof(__pyx_k_base_url), 0, 0, 1, 1}, {&__pyx_n_b_class, __pyx_k_class, sizeof(__pyx_k_class), 0, 0, 0, 1}, {&__pyx_n_s_class, __pyx_k_class, sizeof(__pyx_k_class), 0, 0, 1, 1}, {&__pyx_n_s_classes_boilerplate, __pyx_k_classes_boilerplate, sizeof(__pyx_k_classes_boilerplate), 0, 0, 1, 1}, {&__pyx_n_s_classes_hidden, __pyx_k_classes_hidden, sizeof(__pyx_k_classes_hidden), 0, 0, 1, 1}, {&__pyx_n_s_classes_ignore, __pyx_k_classes_ignore, sizeof(__pyx_k_classes_ignore), 0, 0, 1, 1}, {&__pyx_n_s_compile, __pyx_k_compile, sizeof(__pyx_k_compile), 0, 0, 1, 1}, {&__pyx_n_b_content, __pyx_k_content, sizeof(__pyx_k_content), 0, 0, 0, 1}, {&__pyx_n_s_content, __pyx_k_content, sizeof(__pyx_k_content), 0, 0, 1, 1}, {&__pyx_n_s_escape, __pyx_k_escape, sizeof(__pyx_k_escape), 0, 0, 1, 1}, {&__pyx_n_s_external_hyperlinks, __pyx_k_external_hyperlinks, sizeof(__pyx_k_external_hyperlinks), 0, 0, 1, 1}, {&__pyx_n_s_get, __pyx_k_get, sizeof(__pyx_k_get), 0, 0, 1, 1}, {&__pyx_n_s_head_links, __pyx_k_head_links, sizeof(__pyx_k_head_links), 0, 0, 1, 1}, {&__pyx_n_s_head_metas, __pyx_k_head_metas, sizeof(__pyx_k_head_metas), 0, 0, 1, 1}, {&__pyx_n_s_head_only, __pyx_k_head_only, sizeof(__pyx_k_head_only), 0, 0, 1, 1}, {&__pyx_n_b_hidden, __pyx_k_hidden, sizeof(__pyx_k_hidden), 0, 0, 0, 1}, {&__pyx_n_b_href, __pyx_k_href, sizeof(__pyx_k_href), 0, 0, 0, 1}, {&__pyx_n_s_href, __pyx_k_href, sizeof(__pyx_k_href), 0, 0, 1, 1}, {&__pyx_kp_s_http_s_s, __pyx_k_http_s_s, sizeof(__pyx_k_http_s_s), 0, 0, 1, 0}, {&__pyx_n_b_id, __pyx_k_id, sizeof(__pyx_k_id), 0, 0, 0, 1}, {&__pyx_n_s_id, __pyx_k_id, sizeof(__pyx_k_id), 0, 0, 1, 1}, {&__pyx_n_s_ids_boilerplate, __pyx_k_ids_boilerplate, sizeof(__pyx_k_ids_boilerplate), 0, 0, 1, 1}, {&__pyx_n_s_ids_hidden, __pyx_k_ids_hidden, sizeof(__pyx_k_ids_hidden), 0, 0, 1, 1}, {&__pyx_n_s_ids_ignore, __pyx_k_ids_ignore, sizeof(__pyx_k_ids_ignore), 0, 0, 1, 1}, {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, {&__pyx_n_s_internal_hyperlinks, __pyx_k_internal_hyperlinks, sizeof(__pyx_k_internal_hyperlinks), 0, 0, 1, 1}, {&__pyx_n_s_join, __pyx_k_join, sizeof(__pyx_k_join), 0, 0, 1, 1}, {&__pyx_n_s_lower, __pyx_k_lower, sizeof(__pyx_k_lower), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, {&__pyx_n_s_metas_whitelist, __pyx_k_metas_whitelist, sizeof(__pyx_k_metas_whitelist), 0, 0, 1, 1}, {&__pyx_n_b_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 0, 1}, {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, {&__pyx_n_s_nesting_limit, __pyx_k_nesting_limit, sizeof(__pyx_k_nesting_limit), 0, 0, 1, 1}, {&__pyx_n_s_netloc, __pyx_k_netloc, sizeof(__pyx_k_netloc), 0, 0, 1, 1}, {&__pyx_n_s_options, __pyx_k_options, sizeof(__pyx_k_options), 0, 0, 1, 1}, {&__pyx_n_s_pop, __pyx_k_pop, sizeof(__pyx_k_pop), 0, 0, 1, 1}, {&__pyx_n_b_property, __pyx_k_property, sizeof(__pyx_k_property), 0, 0, 0, 1}, {&__pyx_n_s_property, __pyx_k_property, sizeof(__pyx_k_property), 0, 0, 1, 1}, {&__pyx_n_s_pyx_vtable, __pyx_k_pyx_vtable, sizeof(__pyx_k_pyx_vtable), 0, 0, 1, 1}, {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, {&__pyx_n_s_re, __pyx_k_re, sizeof(__pyx_k_re), 0, 0, 1, 1}, {&__pyx_n_b_rel, __pyx_k_rel, sizeof(__pyx_k_rel), 0, 0, 0, 1}, {&__pyx_n_s_rel, __pyx_k_rel, sizeof(__pyx_k_rel), 0, 0, 1, 1}, {&__pyx_n_b_role, __pyx_k_role, sizeof(__pyx_k_role), 0, 0, 0, 1}, {&__pyx_n_s_role, __pyx_k_role, sizeof(__pyx_k_role), 0, 0, 1, 1}, {&__pyx_n_s_roles_boilerplate, __pyx_k_roles_boilerplate, sizeof(__pyx_k_roles_boilerplate), 0, 0, 1, 1}, {&__pyx_kp_s_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 0, 1, 0}, {&__pyx_n_s_scheme, __pyx_k_scheme, sizeof(__pyx_k_scheme), 0, 0, 1, 1}, {&__pyx_n_s_setdefault, __pyx_k_setdefault, sizeof(__pyx_k_setdefault), 0, 0, 1, 1}, {&__pyx_n_s_split, __pyx_k_split, sizeof(__pyx_k_split), 0, 0, 1, 1}, {&__pyx_n_b_src, __pyx_k_src, sizeof(__pyx_k_src), 0, 0, 0, 1}, {&__pyx_n_s_startswith, __pyx_k_startswith, sizeof(__pyx_k_startswith), 0, 0, 1, 1}, {&__pyx_n_s_strip, __pyx_k_strip, sizeof(__pyx_k_strip), 0, 0, 1, 1}, {&__pyx_n_b_style, __pyx_k_style, sizeof(__pyx_k_style), 0, 0, 0, 1}, {&__pyx_n_s_tags_boilerplate, __pyx_k_tags_boilerplate, sizeof(__pyx_k_tags_boilerplate), 0, 0, 1, 1}, {&__pyx_n_s_tags_boilerplate_bypass, __pyx_k_tags_boilerplate_bypass, sizeof(__pyx_k_tags_boilerplate_bypass), 0, 0, 1, 1}, {&__pyx_n_s_tags_ignore, __pyx_k_tags_ignore, sizeof(__pyx_k_tags_ignore), 0, 0, 1, 1}, {&__pyx_n_s_tags_separators, __pyx_k_tags_separators, sizeof(__pyx_k_tags_separators), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, {&__pyx_n_s_title, __pyx_k_title, sizeof(__pyx_k_title), 0, 0, 1, 1}, {&__pyx_n_b_true, __pyx_k_true, sizeof(__pyx_k_true), 0, 0, 0, 1}, {&__pyx_n_s_url, __pyx_k_url, sizeof(__pyx_k_url), 0, 0, 1, 1}, {&__pyx_n_s_urlparse, __pyx_k_urlparse, sizeof(__pyx_k_urlparse), 0, 0, 1, 1}, {&__pyx_n_s_word_groups, __pyx_k_word_groups, sizeof(__pyx_k_word_groups), 0, 0, 1, 1}, {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 287, __pyx_L1_error) return 0; __pyx_L1_error:; return -1; } static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); /* "gumbocy.pyx":403 * if self.has_url: * * if href.startswith("//"): # <<<<<<<<<<<<<< * href = self.scheme + ":" + href * */ __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_s__6); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 403, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); /* "gumbocy.pyx":688 * * if self.has_ids_ignore: * if attrs.get("id") and re2_search(attrs["id"].lower(), deref(self.ids_ignore)): # <<<<<<<<<<<<<< * return 0 * */ __pyx_tuple__9 = PyTuple_Pack(1, __pyx_n_s_id); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(0, 688, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__9); __Pyx_GIVEREF(__pyx_tuple__9); /* "gumbocy.pyx":24 * cdef re2cy.RE2 *_RE2_IGNORED_HREF = new re2cy.RE2(r"^(?:javascript|mailto|ftp|about)\:") * * _RE_SPLIT_WHITESPACE = re.compile(r"\s+") # <<<<<<<<<<<<<< * * ctypedef enum AttributeNames: */ __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_s_s); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(0, 24, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; __Pyx_RefNannyFinishContext(); return -1; } static int __Pyx_InitGlobals(void) { __pyx_umethod_PyList_Type_pop.type = (PyObject*)&PyList_Type; if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error); __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_int_999 = PyInt_FromLong(999); if (unlikely(!__pyx_int_999)) __PYX_ERR(0, 1, __pyx_L1_error) return 0; __pyx_L1_error:; return -1; } #if PY_MAJOR_VERSION < 3 PyMODINIT_FUNC initgumbocy(void); /*proto*/ PyMODINIT_FUNC initgumbocy(void) #else PyMODINIT_FUNC PyInit_gumbocy(void); /*proto*/ PyMODINIT_FUNC PyInit_gumbocy(void) #endif { PyObject *__pyx_t_1 = NULL; std::vector<__pyx_t_5re2cy_ArgPtr> *__pyx_t_2; re2::RE2 *__pyx_t_3; PyObject *__pyx_t_4 = NULL; __Pyx_RefNannyDeclarations #if CYTHON_REFNANNY __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); if (!__Pyx_RefNanny) { PyErr_Clear(); __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); if (!__Pyx_RefNanny) Py_FatalError("failed to import 'refnanny' module"); } #endif __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_gumbocy(void)", 0); if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) #ifdef __Pyx_CyFunction_USED if (__pyx_CyFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_FusedFunction_USED if (__pyx_FusedFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_Coroutine_USED if (__pyx_Coroutine_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_Generator_USED if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_StopAsyncIteration_USED if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif /*--- Library function declarations ---*/ /*--- Threads initialization code ---*/ #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS #ifdef WITH_THREAD /* Python build with threading support? */ PyEval_InitThreads(); #endif #endif /*--- Module creation code ---*/ #if PY_MAJOR_VERSION < 3 __pyx_m = Py_InitModule4("gumbocy", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); #else __pyx_m = PyModule_Create(&__pyx_moduledef); #endif if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) Py_INCREF(__pyx_d); __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) #if CYTHON_COMPILING_IN_PYPY Py_INCREF(__pyx_b); #endif if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error); /*--- Initialize various global constants etc. ---*/ if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif if (__pyx_module_is_main_gumbocy) { if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) } #if PY_MAJOR_VERSION >= 3 { PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) if (!PyDict_GetItemString(modules, "gumbocy")) { if (unlikely(PyDict_SetItemString(modules, "gumbocy", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) } } #endif /*--- Builtin init code ---*/ if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) /*--- Constants init code ---*/ if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) /*--- Global init code ---*/ /*--- Variable export code ---*/ /*--- Function export code ---*/ /*--- Type init code ---*/ if (PyType_Ready(&__pyx_type_7gumbocy_Attributes) < 0) __PYX_ERR(0, 55, __pyx_L1_error) __pyx_type_7gumbocy_Attributes.tp_print = 0; if (PyObject_SetAttrString(__pyx_m, "Attributes", (PyObject *)&__pyx_type_7gumbocy_Attributes) < 0) __PYX_ERR(0, 55, __pyx_L1_error) __pyx_ptype_7gumbocy_Attributes = &__pyx_type_7gumbocy_Attributes; __pyx_vtabptr_7gumbocy_HTMLParser = &__pyx_vtable_7gumbocy_HTMLParser; __pyx_vtable_7gumbocy_HTMLParser.guess_node_hidden = (int (*)(struct __pyx_obj_7gumbocy_HTMLParser *, GumboNode *, struct __pyx_obj_7gumbocy_Attributes *))__pyx_f_7gumbocy_10HTMLParser_guess_node_hidden; __pyx_vtable_7gumbocy_HTMLParser.guess_node_boilerplate = (int (*)(struct __pyx_obj_7gumbocy_HTMLParser *, GumboNode *, struct __pyx_obj_7gumbocy_Attributes *))__pyx_f_7gumbocy_10HTMLParser_guess_node_boilerplate; __pyx_vtable_7gumbocy_HTMLParser.get_attributes = (struct __pyx_obj_7gumbocy_Attributes *(*)(struct __pyx_obj_7gumbocy_HTMLParser *, GumboNode *))__pyx_f_7gumbocy_10HTMLParser_get_attributes; __pyx_vtable_7gumbocy_HTMLParser.close_word_group = (void (*)(struct __pyx_obj_7gumbocy_HTMLParser *))__pyx_f_7gumbocy_10HTMLParser_close_word_group; __pyx_vtable_7gumbocy_HTMLParser.add_text = (void (*)(struct __pyx_obj_7gumbocy_HTMLParser *, PyObject *))__pyx_f_7gumbocy_10HTMLParser_add_text; __pyx_vtable_7gumbocy_HTMLParser.add_hyperlink_text = (void (*)(struct __pyx_obj_7gumbocy_HTMLParser *, PyObject *))__pyx_f_7gumbocy_10HTMLParser_add_hyperlink_text; __pyx_vtable_7gumbocy_HTMLParser.open_hyperlink = (void (*)(struct __pyx_obj_7gumbocy_HTMLParser *, struct __pyx_obj_7gumbocy_Attributes *))__pyx_f_7gumbocy_10HTMLParser_open_hyperlink; __pyx_vtable_7gumbocy_HTMLParser.close_hyperlink = (void (*)(struct __pyx_obj_7gumbocy_HTMLParser *))__pyx_f_7gumbocy_10HTMLParser_close_hyperlink; __pyx_vtable_7gumbocy_HTMLParser._traverse_node = (int (*)(struct __pyx_obj_7gumbocy_HTMLParser *, int, GumboNode *, int, int, int, int, int))__pyx_f_7gumbocy_10HTMLParser__traverse_node; __pyx_vtable_7gumbocy_HTMLParser._traverse_node_simple = (int (*)(struct __pyx_obj_7gumbocy_HTMLParser *, int, GumboNode *))__pyx_f_7gumbocy_10HTMLParser__traverse_node_simple; __pyx_vtable_7gumbocy_HTMLParser.free = (PyObject *(*)(struct __pyx_obj_7gumbocy_HTMLParser *))__pyx_f_7gumbocy_10HTMLParser_free; if (PyType_Ready(&__pyx_type_7gumbocy_HTMLParser) < 0) __PYX_ERR(0, 66, __pyx_L1_error) __pyx_type_7gumbocy_HTMLParser.tp_print = 0; if (__Pyx_SetVtable(__pyx_type_7gumbocy_HTMLParser.tp_dict, __pyx_vtabptr_7gumbocy_HTMLParser) < 0) __PYX_ERR(0, 66, __pyx_L1_error) if (PyObject_SetAttrString(__pyx_m, "HTMLParser", (PyObject *)&__pyx_type_7gumbocy_HTMLParser) < 0) __PYX_ERR(0, 66, __pyx_L1_error) __pyx_ptype_7gumbocy_HTMLParser = &__pyx_type_7gumbocy_HTMLParser; /*--- Type import code ---*/ /*--- Variable import code ---*/ /*--- Function import code ---*/ /*--- Execution code ---*/ #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif /* "gumbocy.pyx":1 * import re # <<<<<<<<<<<<<< * import urlparse * cimport gumbocy */ __pyx_t_1 = __Pyx_Import(__pyx_n_s_re, 0, -1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_re, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":2 * import re * import urlparse # <<<<<<<<<<<<<< * cimport gumbocy * cimport re2cy */ __pyx_t_1 = __Pyx_Import(__pyx_n_s_urlparse, 0, -1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_urlparse, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":14 * int printf(const char* format, ...); * * cdef vector[re2cy.ArgPtr] *argp = new vector[re2cy.ArgPtr]() # <<<<<<<<<<<<<< * cdef re2cy.ArgPtr *empty_args = &(deref(argp)[0]) * */ try { __pyx_t_2 = new std::vector<__pyx_t_5re2cy_ArgPtr> (); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 14, __pyx_L1_error) } __pyx_v_7gumbocy_argp = __pyx_t_2; /* "gumbocy.pyx":15 * * cdef vector[re2cy.ArgPtr] *argp = new vector[re2cy.ArgPtr]() * cdef re2cy.ArgPtr *empty_args = &(deref(argp)[0]) # <<<<<<<<<<<<<< * * cdef bint re2_search(const char* s, re2cy.RE2 &pattern): */ __pyx_v_7gumbocy_empty_args = (&((*__pyx_v_7gumbocy_argp)[0])); /* "gumbocy.pyx":20 * return re2cy.RE2.PartialMatchN(s, pattern, empty_args, 0) * * cdef re2cy.RE2 *_RE2_SEARCH_STYLE_HIDDEN = new re2cy.RE2(r"(display\s*\:\s*none)|(visibility\s*\:\s*hidden)") # <<<<<<<<<<<<<< * cdef re2cy.RE2 *_RE2_ABSOLUTE_HREF = new re2cy.RE2(r"^(?:[A-Za-z0-9\+\.\-]+\:)?\/\/") * cdef re2cy.RE2 *_RE2_IGNORED_HREF = new re2cy.RE2(r"^(?:javascript|mailto|ftp|about)\:") */ try { __pyx_t_3 = new re2::RE2(((char const *)"(display\\s*\\:\\s*none)|(visibility\\s*\\:\\s*hidden)")); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 20, __pyx_L1_error) } __pyx_v_7gumbocy__RE2_SEARCH_STYLE_HIDDEN = __pyx_t_3; /* "gumbocy.pyx":21 * * cdef re2cy.RE2 *_RE2_SEARCH_STYLE_HIDDEN = new re2cy.RE2(r"(display\s*\:\s*none)|(visibility\s*\:\s*hidden)") * cdef re2cy.RE2 *_RE2_ABSOLUTE_HREF = new re2cy.RE2(r"^(?:[A-Za-z0-9\+\.\-]+\:)?\/\/") # <<<<<<<<<<<<<< * cdef re2cy.RE2 *_RE2_IGNORED_HREF = new re2cy.RE2(r"^(?:javascript|mailto|ftp|about)\:") * */ try { __pyx_t_3 = new re2::RE2(((char const *)"^(?:[A-Za-z0-9\\+\\.\\-]+\\:)?\\/\\/")); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 21, __pyx_L1_error) } __pyx_v_7gumbocy__RE2_ABSOLUTE_HREF = __pyx_t_3; /* "gumbocy.pyx":22 * cdef re2cy.RE2 *_RE2_SEARCH_STYLE_HIDDEN = new re2cy.RE2(r"(display\s*\:\s*none)|(visibility\s*\:\s*hidden)") * cdef re2cy.RE2 *_RE2_ABSOLUTE_HREF = new re2cy.RE2(r"^(?:[A-Za-z0-9\+\.\-]+\:)?\/\/") * cdef re2cy.RE2 *_RE2_IGNORED_HREF = new re2cy.RE2(r"^(?:javascript|mailto|ftp|about)\:") # <<<<<<<<<<<<<< * * _RE_SPLIT_WHITESPACE = re.compile(r"\s+") */ try { __pyx_t_3 = new re2::RE2(((char const *)"^(?:javascript|mailto|ftp|about)\\:")); } catch(...) { __Pyx_CppExn2PyErr(); __PYX_ERR(0, 22, __pyx_L1_error) } __pyx_v_7gumbocy__RE2_IGNORED_HREF = __pyx_t_3; /* "gumbocy.pyx":24 * cdef re2cy.RE2 *_RE2_IGNORED_HREF = new re2cy.RE2(r"^(?:javascript|mailto|ftp|about)\:") * * _RE_SPLIT_WHITESPACE = re.compile(r"\s+") # <<<<<<<<<<<<<< * * ctypedef enum AttributeNames: */ __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 24, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 24, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 24, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (PyDict_SetItem(__pyx_d, __pyx_n_s_RE_SPLIT_WHITESPACE, __pyx_t_1) < 0) __PYX_ERR(0, 24, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gumbocy.pyx":1 * import re # <<<<<<<<<<<<<< * import urlparse * cimport gumbocy */ __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /*--- Wrapped vars code ---*/ goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_4); if (__pyx_m) { if (__pyx_d) { __Pyx_AddTraceback("init gumbocy", __pyx_clineno, __pyx_lineno, __pyx_filename); } Py_DECREF(__pyx_m); __pyx_m = 0; } else if (!PyErr_Occurred()) { PyErr_SetString(PyExc_ImportError, "init gumbocy"); } __pyx_L0:; __Pyx_RefNannyFinishContext(); #if PY_MAJOR_VERSION < 3 return; #else return __pyx_m; #endif } /* --- Runtime support code --- */ /* Refnanny */ #if CYTHON_REFNANNY static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { PyObject *m = NULL, *p = NULL; void *r = NULL; m = PyImport_ImportModule((char *)modname); if (!m) goto end; p = PyObject_GetAttrString(m, (char *)"RefNannyAPI"); if (!p) goto end; r = PyLong_AsVoidPtr(p); end: Py_XDECREF(p); Py_XDECREF(m); return (__Pyx_RefNannyAPIStruct *)r; } #endif /* GetBuiltinName */ static PyObject *__Pyx_GetBuiltinName(PyObject *name) { PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name); if (unlikely(!result)) { PyErr_Format(PyExc_NameError, #if PY_MAJOR_VERSION >= 3 "name '%U' is not defined", name); #else "name '%.200s' is not defined", PyString_AS_STRING(name)); #endif } return result; } /* RaiseDoubleKeywords */ static void __Pyx_RaiseDoubleKeywordsError( const char* func_name, PyObject* kw_name) { PyErr_Format(PyExc_TypeError, #if PY_MAJOR_VERSION >= 3 "%s() got multiple values for keyword argument '%U'", func_name, kw_name); #else "%s() got multiple values for keyword argument '%s'", func_name, PyString_AsString(kw_name)); #endif } /* ParseKeywords */ static int __Pyx_ParseOptionalKeywords( PyObject *kwds, PyObject **argnames[], PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, const char* function_name) { PyObject *key = 0, *value = 0; Py_ssize_t pos = 0; PyObject*** name; PyObject*** first_kw_arg = argnames + num_pos_args; while (PyDict_Next(kwds, &pos, &key, &value)) { name = first_kw_arg; while (*name && (**name != key)) name++; if (*name) { values[name-argnames] = value; continue; } name = first_kw_arg; #if PY_MAJOR_VERSION < 3 if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) { while (*name) { if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) && _PyString_Eq(**name, key)) { values[name-argnames] = value; break; } name++; } if (*name) continue; else { PyObject*** argname = argnames; while (argname != first_kw_arg) { if ((**argname == key) || ( (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) && _PyString_Eq(**argname, key))) { goto arg_passed_twice; } argname++; } } } else #endif if (likely(PyUnicode_Check(key))) { while (*name) { int cmp = (**name == key) ? 0 : #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 : #endif PyUnicode_Compare(**name, key); if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; if (cmp == 0) { values[name-argnames] = value; break; } name++; } if (*name) continue; else { PyObject*** argname = argnames; while (argname != first_kw_arg) { int cmp = (**argname == key) ? 0 : #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 : #endif PyUnicode_Compare(**argname, key); if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; if (cmp == 0) goto arg_passed_twice; argname++; } } } else goto invalid_keyword_type; if (kwds2) { if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; } else { goto invalid_keyword; } } return 0; arg_passed_twice: __Pyx_RaiseDoubleKeywordsError(function_name, key); goto bad; invalid_keyword_type: PyErr_Format(PyExc_TypeError, "%.200s() keywords must be strings", function_name); goto bad; invalid_keyword: PyErr_Format(PyExc_TypeError, #if PY_MAJOR_VERSION < 3 "%.200s() got an unexpected keyword argument '%.200s'", function_name, PyString_AsString(key)); #else "%s() got an unexpected keyword argument '%U'", function_name, key); #endif bad: return -1; } /* RaiseArgTupleInvalid */ static void __Pyx_RaiseArgtupleInvalid( const char* func_name, int exact, Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found) { Py_ssize_t num_expected; const char *more_or_less; if (num_found < num_min) { num_expected = num_min; more_or_less = "at least"; } else { num_expected = num_max; more_or_less = "at most"; } if (exact) { more_or_less = "exactly"; } PyErr_Format(PyExc_TypeError, "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", func_name, more_or_less, num_expected, (num_expected == 1) ? "" : "s", num_found); } /* ArgTypeTest */ static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) { PyErr_Format(PyExc_TypeError, "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)", name, type->tp_name, Py_TYPE(obj)->tp_name); } static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, const char *name, int exact) { if (unlikely(!type)) { PyErr_SetString(PyExc_SystemError, "Missing type object"); return 0; } if (none_allowed && obj == Py_None) return 1; else if (exact) { if (likely(Py_TYPE(obj) == type)) return 1; #if PY_MAJOR_VERSION == 2 else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; #endif } else { if (likely(PyObject_TypeCheck(obj, type))) return 1; } __Pyx_RaiseArgumentTypeInvalid(name, obj, type); return 0; } /* dict_getitem_default */ static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value) { PyObject* value; #if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY value = PyDict_GetItemWithError(d, key); if (unlikely(!value)) { if (unlikely(PyErr_Occurred())) return NULL; value = default_value; } Py_INCREF(value); #else if (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key)) { value = PyDict_GetItem(d, key); if (unlikely(!value)) { value = default_value; } Py_INCREF(value); } else { if (default_value == Py_None) default_value = NULL; value = PyObject_CallMethodObjArgs( d, __pyx_n_s_get, key, default_value, NULL); } #endif return value; } /* StringJoin */ #if !CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values) { return PyObject_CallMethodObjArgs(sep, __pyx_n_s_join, values, NULL); } #endif /* PyErrFetchRestore */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { PyObject *tmp_type, *tmp_value, *tmp_tb; tmp_type = tstate->curexc_type; tmp_value = tstate->curexc_value; tmp_tb = tstate->curexc_traceback; tstate->curexc_type = type; tstate->curexc_value = value; tstate->curexc_traceback = tb; Py_XDECREF(tmp_type); Py_XDECREF(tmp_value); Py_XDECREF(tmp_tb); } static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { *type = tstate->curexc_type; *value = tstate->curexc_value; *tb = tstate->curexc_traceback; tstate->curexc_type = 0; tstate->curexc_value = 0; tstate->curexc_traceback = 0; } #endif /* WriteUnraisableException */ static void __Pyx_WriteUnraisable(const char *name, CYTHON_UNUSED int clineno, CYTHON_UNUSED int lineno, CYTHON_UNUSED const char *filename, int full_traceback, CYTHON_UNUSED int nogil) { PyObject *old_exc, *old_val, *old_tb; PyObject *ctx; __Pyx_PyThreadState_declare #ifdef WITH_THREAD PyGILState_STATE state; if (nogil) state = PyGILState_Ensure(); #ifdef _MSC_VER else state = (PyGILState_STATE)-1; #endif #endif __Pyx_PyThreadState_assign __Pyx_ErrFetch(&old_exc, &old_val, &old_tb); if (full_traceback) { Py_XINCREF(old_exc); Py_XINCREF(old_val); Py_XINCREF(old_tb); __Pyx_ErrRestore(old_exc, old_val, old_tb); PyErr_PrintEx(1); } #if PY_MAJOR_VERSION < 3 ctx = PyString_FromString(name); #else ctx = PyUnicode_FromString(name); #endif __Pyx_ErrRestore(old_exc, old_val, old_tb); if (!ctx) { PyErr_WriteUnraisable(Py_None); } else { PyErr_WriteUnraisable(ctx); Py_DECREF(ctx); } #ifdef WITH_THREAD if (nogil) PyGILState_Release(state); #endif } /* PyObjectCall */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { PyObject *result; ternaryfunc call = func->ob_type->tp_call; if (unlikely(!call)) return PyObject_Call(func, arg, kw); if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) return NULL; result = (*call)(func, arg, kw); Py_LeaveRecursiveCall(); if (unlikely(!result) && unlikely(!PyErr_Occurred())) { PyErr_SetString( PyExc_SystemError, "NULL result without error in PyObject_Call"); } return result; } #endif /* BytesEquals */ static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { #if CYTHON_COMPILING_IN_PYPY return PyObject_RichCompareBool(s1, s2, equals); #else if (s1 == s2) { return (equals == Py_EQ); } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { const char *ps1, *ps2; Py_ssize_t length = PyBytes_GET_SIZE(s1); if (length != PyBytes_GET_SIZE(s2)) return (equals == Py_NE); ps1 = PyBytes_AS_STRING(s1); ps2 = PyBytes_AS_STRING(s2); if (ps1[0] != ps2[0]) { return (equals == Py_NE); } else if (length == 1) { return (equals == Py_EQ); } else { int result = memcmp(ps1, ps2, (size_t)length); return (equals == Py_EQ) ? (result == 0) : (result != 0); } } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { return (equals == Py_NE); } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { return (equals == Py_NE); } else { int result; PyObject* py_result = PyObject_RichCompare(s1, s2, equals); if (!py_result) return -1; result = __Pyx_PyObject_IsTrue(py_result); Py_DECREF(py_result); return result; } #endif } /* GetModuleGlobalName */ static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) { PyObject *result; #if CYTHON_COMPILING_IN_CPYTHON result = PyDict_GetItem(__pyx_d, name); if (likely(result)) { Py_INCREF(result); } else { #else result = PyObject_GetItem(__pyx_d, name); if (!result) { PyErr_Clear(); #endif result = __Pyx_GetBuiltinName(name); } return result; } /* PyObjectCallMethO */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { PyObject *self, *result; PyCFunction cfunc; cfunc = PyCFunction_GET_FUNCTION(func); self = PyCFunction_GET_SELF(func); if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) return NULL; result = cfunc(self, arg); Py_LeaveRecursiveCall(); if (unlikely(!result) && unlikely(!PyErr_Occurred())) { PyErr_SetString( PyExc_SystemError, "NULL result without error in PyObject_Call"); } return result; } #endif /* PyObjectCallOneArg */ #if CYTHON_COMPILING_IN_CPYTHON static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { PyObject *result; PyObject *args = PyTuple_New(1); if (unlikely(!args)) return NULL; Py_INCREF(arg); PyTuple_SET_ITEM(args, 0, arg); result = __Pyx_PyObject_Call(func, args, NULL); Py_DECREF(args); return result; } static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { #ifdef __Pyx_CyFunction_USED if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { #else if (likely(PyCFunction_Check(func))) { #endif if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { return __Pyx_PyObject_CallMethO(func, arg); } } return __Pyx__PyObject_CallOneArg(func, arg); } #else static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { PyObject *result; PyObject *args = PyTuple_Pack(1, arg); if (unlikely(!args)) return NULL; result = __Pyx_PyObject_Call(func, args, NULL); Py_DECREF(args); return result; } #endif /* PyObjectCallNoArg */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { #ifdef __Pyx_CyFunction_USED if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { #else if (likely(PyCFunction_Check(func))) { #endif if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) { return __Pyx_PyObject_CallMethO(func, NULL); } } return __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL); } #endif /* PyObjectCallMethod1 */ static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) { PyObject *method, *result = NULL; method = __Pyx_PyObject_GetAttrStr(obj, method_name); if (unlikely(!method)) goto bad; #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyMethod_Check(method))) { PyObject *self = PyMethod_GET_SELF(method); if (likely(self)) { PyObject *args; PyObject *function = PyMethod_GET_FUNCTION(method); args = PyTuple_New(2); if (unlikely(!args)) goto bad; Py_INCREF(self); PyTuple_SET_ITEM(args, 0, self); Py_INCREF(arg); PyTuple_SET_ITEM(args, 1, arg); Py_INCREF(function); Py_DECREF(method); method = NULL; result = __Pyx_PyObject_Call(function, args, NULL); Py_DECREF(args); Py_DECREF(function); return result; } } #endif result = __Pyx_PyObject_CallOneArg(method, arg); bad: Py_XDECREF(method); return result; } /* append */ static CYTHON_INLINE int __Pyx_PyObject_Append(PyObject* L, PyObject* x) { if (likely(PyList_CheckExact(L))) { if (unlikely(__Pyx_PyList_Append(L, x) < 0)) return -1; } else { PyObject* retval = __Pyx_PyObject_CallMethod1(L, __pyx_n_s_append, x); if (unlikely(!retval)) return -1; Py_DECREF(retval); } return 0; } /* GetItemInt */ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { PyObject *r; if (!j) return NULL; r = PyObject_GetItem(o, j); Py_DECREF(j); return r; } static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { #if CYTHON_COMPILING_IN_CPYTHON if (wraparound & unlikely(i < 0)) i += PyList_GET_SIZE(o); if ((!boundscheck) || likely((0 <= i) & (i < PyList_GET_SIZE(o)))) { PyObject *r = PyList_GET_ITEM(o, i); Py_INCREF(r); return r; } return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); #else return PySequence_GetItem(o, i); #endif } static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { #if CYTHON_COMPILING_IN_CPYTHON if (wraparound & unlikely(i < 0)) i += PyTuple_GET_SIZE(o); if ((!boundscheck) || likely((0 <= i) & (i < PyTuple_GET_SIZE(o)))) { PyObject *r = PyTuple_GET_ITEM(o, i); Py_INCREF(r); return r; } return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); #else return PySequence_GetItem(o, i); #endif } static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { #if CYTHON_COMPILING_IN_CPYTHON if (is_list || PyList_CheckExact(o)) { Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); if ((!boundscheck) || (likely((n >= 0) & (n < PyList_GET_SIZE(o))))) { PyObject *r = PyList_GET_ITEM(o, n); Py_INCREF(r); return r; } } else if (PyTuple_CheckExact(o)) { Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); if ((!boundscheck) || likely((n >= 0) & (n < PyTuple_GET_SIZE(o)))) { PyObject *r = PyTuple_GET_ITEM(o, n); Py_INCREF(r); return r; } } else { PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; if (likely(m && m->sq_item)) { if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { Py_ssize_t l = m->sq_length(o); if (likely(l >= 0)) { i += l; } else { if (!PyErr_ExceptionMatches(PyExc_OverflowError)) return NULL; PyErr_Clear(); } } return m->sq_item(o, i); } } #else if (is_list || PySequence_Check(o)) { return PySequence_GetItem(o, i); } #endif return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); } /* SetItemInt */ static CYTHON_INLINE int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { int r; if (!j) return -1; r = PyObject_SetItem(o, j, v); Py_DECREF(j); return r; } static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { #if CYTHON_COMPILING_IN_CPYTHON if (is_list || PyList_CheckExact(o)) { Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); if ((!boundscheck) || likely((n >= 0) & (n < PyList_GET_SIZE(o)))) { PyObject* old = PyList_GET_ITEM(o, n); Py_INCREF(v); PyList_SET_ITEM(o, n, v); Py_DECREF(old); return 1; } } else { PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; if (likely(m && m->sq_ass_item)) { if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { Py_ssize_t l = m->sq_length(o); if (likely(l >= 0)) { i += l; } else { if (!PyErr_ExceptionMatches(PyExc_OverflowError)) return -1; PyErr_Clear(); } } return m->sq_ass_item(o, i, v); } } #else #if CYTHON_COMPILING_IN_PYPY if (is_list || (PySequence_Check(o) && !PyDict_Check(o))) { #else if (is_list || PySequence_Check(o)) { #endif return PySequence_SetItem(o, i, v); } #endif return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); } /* SliceObject */ static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop, PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, int has_cstart, int has_cstop, CYTHON_UNUSED int wraparound) { #if CYTHON_COMPILING_IN_CPYTHON PyMappingMethods* mp; #if PY_MAJOR_VERSION < 3 PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; if (likely(ms && ms->sq_slice)) { if (!has_cstart) { if (_py_start && (*_py_start != Py_None)) { cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; } else cstart = 0; } if (!has_cstop) { if (_py_stop && (*_py_stop != Py_None)) { cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; } else cstop = PY_SSIZE_T_MAX; } if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { Py_ssize_t l = ms->sq_length(obj); if (likely(l >= 0)) { if (cstop < 0) { cstop += l; if (cstop < 0) cstop = 0; } if (cstart < 0) { cstart += l; if (cstart < 0) cstart = 0; } } else { if (!PyErr_ExceptionMatches(PyExc_OverflowError)) goto bad; PyErr_Clear(); } } return ms->sq_slice(obj, cstart, cstop); } #endif mp = Py_TYPE(obj)->tp_as_mapping; if (likely(mp && mp->mp_subscript)) #endif { PyObject* result; PyObject *py_slice, *py_start, *py_stop; if (_py_slice) { py_slice = *_py_slice; } else { PyObject* owned_start = NULL; PyObject* owned_stop = NULL; if (_py_start) { py_start = *_py_start; } else { if (has_cstart) { owned_start = py_start = PyInt_FromSsize_t(cstart); if (unlikely(!py_start)) goto bad; } else py_start = Py_None; } if (_py_stop) { py_stop = *_py_stop; } else { if (has_cstop) { owned_stop = py_stop = PyInt_FromSsize_t(cstop); if (unlikely(!py_stop)) { Py_XDECREF(owned_start); goto bad; } } else py_stop = Py_None; } py_slice = PySlice_New(py_start, py_stop, Py_None); Py_XDECREF(owned_start); Py_XDECREF(owned_stop); if (unlikely(!py_slice)) goto bad; } #if CYTHON_COMPILING_IN_CPYTHON result = mp->mp_subscript(obj, py_slice); #else result = PyObject_GetItem(obj, py_slice); #endif if (!_py_slice) { Py_DECREF(py_slice); } return result; } PyErr_Format(PyExc_TypeError, "'%.200s' object is unsliceable", Py_TYPE(obj)->tp_name); bad: return NULL; } /* PyObjectCallMethod2 */ static PyObject* __Pyx_PyObject_CallMethod2(PyObject* obj, PyObject* method_name, PyObject* arg1, PyObject* arg2) { PyObject *args, *method, *result = NULL; method = __Pyx_PyObject_GetAttrStr(obj, method_name); #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyMethod_Check(method)) && likely(PyMethod_GET_SELF(method))) { PyObject *self, *function; self = PyMethod_GET_SELF(method); function = PyMethod_GET_FUNCTION(method); args = PyTuple_New(3); if (unlikely(!args)) goto bad; Py_INCREF(self); PyTuple_SET_ITEM(args, 0, self); Py_INCREF(arg1); PyTuple_SET_ITEM(args, 1, arg1); Py_INCREF(arg2); PyTuple_SET_ITEM(args, 2, arg2); Py_INCREF(function); Py_DECREF(method); method = function; } else #endif { args = PyTuple_New(2); if (unlikely(!args)) goto bad; Py_INCREF(arg1); PyTuple_SET_ITEM(args, 0, arg1); Py_INCREF(arg2); PyTuple_SET_ITEM(args, 1, arg2); } result = __Pyx_PyObject_Call(method, args, NULL); Py_DECREF(args); Py_DECREF(method); return result; bad: Py_XDECREF(method); return result; } /* dict_setdefault */ static CYTHON_INLINE PyObject *__Pyx_PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *default_value, CYTHON_UNUSED int is_safe_type) { PyObject* value; #if PY_VERSION_HEX >= 0x030400A0 if (1) { value = PyDict_SetDefault(d, key, default_value); if (unlikely(!value)) return NULL; Py_INCREF(value); #else if (is_safe_type == 1 || (is_safe_type == -1 && #if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY (PyUnicode_CheckExact(key) || PyString_CheckExact(key) || PyLong_CheckExact(key)))) { value = PyDict_GetItemWithError(d, key); if (unlikely(!value)) { if (unlikely(PyErr_Occurred())) return NULL; if (unlikely(PyDict_SetItem(d, key, default_value) == -1)) return NULL; value = default_value; } Py_INCREF(value); #else (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key) || PyLong_CheckExact(key)))) { value = PyDict_GetItem(d, key); if (unlikely(!value)) { if (unlikely(PyDict_SetItem(d, key, default_value) == -1)) return NULL; value = default_value; } Py_INCREF(value); #endif #endif } else { value = __Pyx_PyObject_CallMethod2(d, __pyx_n_s_setdefault, key, default_value); } return value; } /* PyObjectCallMethod0 */ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { PyObject *method, *result = NULL; method = __Pyx_PyObject_GetAttrStr(obj, method_name); if (unlikely(!method)) goto bad; #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyMethod_Check(method))) { PyObject *self = PyMethod_GET_SELF(method); if (likely(self)) { PyObject *function = PyMethod_GET_FUNCTION(method); result = __Pyx_PyObject_CallOneArg(function, self); Py_DECREF(method); return result; } } #endif result = __Pyx_PyObject_CallNoArg(method); Py_DECREF(method); bad: return result; } /* UnpackUnboundCMethod */ static int __Pyx_TryUnpackUnboundCMethod(__Pyx_CachedCFunction* target) { PyObject *method; method = __Pyx_PyObject_GetAttrStr(target->type, *target->method_name); if (unlikely(!method)) return -1; target->method = method; #if CYTHON_COMPILING_IN_CPYTHON #if PY_MAJOR_VERSION >= 3 if (likely(PyObject_TypeCheck(method, &PyMethodDescr_Type))) #endif { PyMethodDescrObject *descr = (PyMethodDescrObject*) method; target->func = descr->d_method->ml_meth; target->flag = descr->d_method->ml_flags & (METH_VARARGS | METH_KEYWORDS | METH_O | METH_NOARGS); } #endif return 0; } /* CallUnboundCMethod0 */ static PyObject* __Pyx__CallUnboundCMethod0(__Pyx_CachedCFunction* cfunc, PyObject* self) { PyObject *args, *result = NULL; if (unlikely(!cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL; #if CYTHON_COMPILING_IN_CPYTHON args = PyTuple_New(1); if (unlikely(!args)) goto bad; Py_INCREF(self); PyTuple_SET_ITEM(args, 0, self); #else args = PyTuple_Pack(1, self); if (unlikely(!args)) goto bad; #endif result = __Pyx_PyObject_Call(cfunc->method, args, NULL); Py_DECREF(args); bad: return result; } /* pop */ static CYTHON_INLINE PyObject* __Pyx__PyObject_Pop(PyObject* L) { #if CYTHON_COMPILING_IN_CPYTHON if (Py_TYPE(L) == &PySet_Type) { return PySet_Pop(L); } #endif return __Pyx_PyObject_CallMethod0(L, __pyx_n_s_pop); } #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyList_Pop(PyObject* L) { if (likely(PyList_GET_SIZE(L) > (((PyListObject*)L)->allocated >> 1))) { Py_SIZE(L) -= 1; return PyList_GET_ITEM(L, PyList_GET_SIZE(L)); } return __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyList_Type_pop, L); } #endif /* SetVTable */ static int __Pyx_SetVtable(PyObject *dict, void *vtable) { #if PY_VERSION_HEX >= 0x02070000 PyObject *ob = PyCapsule_New(vtable, 0, 0); #else PyObject *ob = PyCObject_FromVoidPtr(vtable, 0); #endif if (!ob) goto bad; if (PyDict_SetItem(dict, __pyx_n_s_pyx_vtable, ob) < 0) goto bad; Py_DECREF(ob); return 0; bad: Py_XDECREF(ob); return -1; } /* Import */ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { PyObject *empty_list = 0; PyObject *module = 0; PyObject *global_dict = 0; PyObject *empty_dict = 0; PyObject *list; #if PY_VERSION_HEX < 0x03030000 PyObject *py_import; py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); if (!py_import) goto bad; #endif if (from_list) list = from_list; else { empty_list = PyList_New(0); if (!empty_list) goto bad; list = empty_list; } global_dict = PyModule_GetDict(__pyx_m); if (!global_dict) goto bad; empty_dict = PyDict_New(); if (!empty_dict) goto bad; { #if PY_MAJOR_VERSION >= 3 if (level == -1) { if (strchr(__Pyx_MODULE_NAME, '.')) { #if PY_VERSION_HEX < 0x03030000 PyObject *py_level = PyInt_FromLong(1); if (!py_level) goto bad; module = PyObject_CallFunctionObjArgs(py_import, name, global_dict, empty_dict, list, py_level, NULL); Py_DECREF(py_level); #else module = PyImport_ImportModuleLevelObject( name, global_dict, empty_dict, list, 1); #endif if (!module) { if (!PyErr_ExceptionMatches(PyExc_ImportError)) goto bad; PyErr_Clear(); } } level = 0; } #endif if (!module) { #if PY_VERSION_HEX < 0x03030000 PyObject *py_level = PyInt_FromLong(level); if (!py_level) goto bad; module = PyObject_CallFunctionObjArgs(py_import, name, global_dict, empty_dict, list, py_level, NULL); Py_DECREF(py_level); #else module = PyImport_ImportModuleLevelObject( name, global_dict, empty_dict, list, level); #endif } } bad: #if PY_VERSION_HEX < 0x03030000 Py_XDECREF(py_import); #endif Py_XDECREF(empty_list); Py_XDECREF(empty_dict); return module; } /* CodeObjectCache */ static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { int start = 0, mid = 0, end = count - 1; if (end >= 0 && code_line > entries[end].code_line) { return count; } while (start < end) { mid = start + (end - start) / 2; if (code_line < entries[mid].code_line) { end = mid; } else if (code_line > entries[mid].code_line) { start = mid + 1; } else { return mid; } } if (code_line <= entries[mid].code_line) { return mid; } else { return mid + 1; } } static PyCodeObject *__pyx_find_code_object(int code_line) { PyCodeObject* code_object; int pos; if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { return NULL; } pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { return NULL; } code_object = __pyx_code_cache.entries[pos].code_object; Py_INCREF(code_object); return code_object; } static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { int pos, i; __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; if (unlikely(!code_line)) { return; } if (unlikely(!entries)) { entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); if (likely(entries)) { __pyx_code_cache.entries = entries; __pyx_code_cache.max_count = 64; __pyx_code_cache.count = 1; entries[0].code_line = code_line; entries[0].code_object = code_object; Py_INCREF(code_object); } return; } pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { PyCodeObject* tmp = entries[pos].code_object; entries[pos].code_object = code_object; Py_DECREF(tmp); return; } if (__pyx_code_cache.count == __pyx_code_cache.max_count) { int new_max = __pyx_code_cache.max_count + 64; entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry)); if (unlikely(!entries)) { return; } __pyx_code_cache.entries = entries; __pyx_code_cache.max_count = new_max; } for (i=__pyx_code_cache.count; i>pos; i--) { entries[i] = entries[i-1]; } entries[pos].code_line = code_line; entries[pos].code_object = code_object; __pyx_code_cache.count++; Py_INCREF(code_object); } /* AddTraceback */ #include "compile.h" #include "frameobject.h" #include "traceback.h" static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( const char *funcname, int c_line, int py_line, const char *filename) { PyCodeObject *py_code = 0; PyObject *py_srcfile = 0; PyObject *py_funcname = 0; #if PY_MAJOR_VERSION < 3 py_srcfile = PyString_FromString(filename); #else py_srcfile = PyUnicode_FromString(filename); #endif if (!py_srcfile) goto bad; if (c_line) { #if PY_MAJOR_VERSION < 3 py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); #else py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); #endif } else { #if PY_MAJOR_VERSION < 3 py_funcname = PyString_FromString(funcname); #else py_funcname = PyUnicode_FromString(funcname); #endif } if (!py_funcname) goto bad; py_code = __Pyx_PyCode_New( 0, 0, 0, 0, 0, __pyx_empty_bytes, /*PyObject *code,*/ __pyx_empty_tuple, /*PyObject *consts,*/ __pyx_empty_tuple, /*PyObject *names,*/ __pyx_empty_tuple, /*PyObject *varnames,*/ __pyx_empty_tuple, /*PyObject *freevars,*/ __pyx_empty_tuple, /*PyObject *cellvars,*/ py_srcfile, /*PyObject *filename,*/ py_funcname, /*PyObject *name,*/ py_line, __pyx_empty_bytes /*PyObject *lnotab*/ ); Py_DECREF(py_srcfile); Py_DECREF(py_funcname); return py_code; bad: Py_XDECREF(py_srcfile); Py_XDECREF(py_funcname); return NULL; } static void __Pyx_AddTraceback(const char *funcname, int c_line, int py_line, const char *filename) { PyCodeObject *py_code = 0; PyFrameObject *py_frame = 0; py_code = __pyx_find_code_object(c_line ? c_line : py_line); if (!py_code) { py_code = __Pyx_CreateCodeObjectForTraceback( funcname, c_line, py_line, filename); if (!py_code) goto bad; __pyx_insert_code_object(c_line ? c_line : py_line, py_code); } py_frame = PyFrame_New( PyThreadState_GET(), /*PyThreadState *tstate,*/ py_code, /*PyCodeObject *code,*/ __pyx_d, /*PyObject *globals,*/ 0 /*PyObject *locals*/ ); if (!py_frame) goto bad; py_frame->f_lineno = py_line; PyTraceBack_Here(py_frame); bad: Py_XDECREF(py_code); Py_XDECREF(py_frame); } /* CIntFromPyVerify */ #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) #define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) #define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ {\ func_type value = func_value;\ if (sizeof(target_type) < sizeof(func_type)) {\ if (unlikely(value != (func_type) (target_type) value)) {\ func_type zero = 0;\ if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ return (target_type) -1;\ if (is_unsigned && unlikely(value < zero))\ goto raise_neg_overflow;\ else\ goto raise_overflow;\ }\ }\ return (target_type) value;\ } /* CIntToPy */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From___pyx_t_7gumbocy_AttributeNames(__pyx_t_7gumbocy_AttributeNames value) { const __pyx_t_7gumbocy_AttributeNames neg_one = (__pyx_t_7gumbocy_AttributeNames) -1, const_zero = (__pyx_t_7gumbocy_AttributeNames) 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(__pyx_t_7gumbocy_AttributeNames) < sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(__pyx_t_7gumbocy_AttributeNames) <= sizeof(unsigned long)) { return PyLong_FromUnsignedLong((unsigned long) value); } else if (sizeof(__pyx_t_7gumbocy_AttributeNames) <= sizeof(unsigned PY_LONG_LONG)) { return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); } } else { if (sizeof(__pyx_t_7gumbocy_AttributeNames) <= sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(__pyx_t_7gumbocy_AttributeNames) <= sizeof(PY_LONG_LONG)) { return PyLong_FromLongLong((PY_LONG_LONG) value); } } { int one = 1; int little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&value; return _PyLong_FromByteArray(bytes, sizeof(__pyx_t_7gumbocy_AttributeNames), little, !is_unsigned); } } /* CIntToPy */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) { const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(unsigned int) < sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(unsigned int) <= sizeof(unsigned long)) { return PyLong_FromUnsignedLong((unsigned long) value); } else if (sizeof(unsigned int) <= sizeof(unsigned PY_LONG_LONG)) { return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); } } else { if (sizeof(unsigned int) <= sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(unsigned int) <= sizeof(PY_LONG_LONG)) { return PyLong_FromLongLong((PY_LONG_LONG) value); } } { int one = 1; int little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&value; return _PyLong_FromByteArray(bytes, sizeof(unsigned int), little, !is_unsigned); } } /* CIntToPy */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { const long neg_one = (long) -1, const_zero = (long) 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(long) < sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(long) <= sizeof(unsigned long)) { return PyLong_FromUnsignedLong((unsigned long) value); } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); } } else { if (sizeof(long) <= sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { return PyLong_FromLongLong((PY_LONG_LONG) value); } } { int one = 1; int little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&value; return _PyLong_FromByteArray(bytes, sizeof(long), little, !is_unsigned); } } /* CIntToPy */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { const int neg_one = (int) -1, const_zero = (int) 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(int) < sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(int) <= sizeof(unsigned long)) { return PyLong_FromUnsignedLong((unsigned long) value); } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); } } else { if (sizeof(int) <= sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { return PyLong_FromLongLong((PY_LONG_LONG) value); } } { int one = 1; int little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&value; return _PyLong_FromByteArray(bytes, sizeof(int), little, !is_unsigned); } } /* CIntFromPy */ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { const int neg_one = (int) -1, const_zero = (int) 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { if (sizeof(int) < sizeof(long)) { __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) } else { long val = PyInt_AS_LONG(x); if (is_unsigned && unlikely(val < 0)) { goto raise_neg_overflow; } return (int) val; } } else #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { #if CYTHON_USE_PYLONG_INTERNALS const digit* digits = ((PyLongObject*)x)->ob_digit; switch (Py_SIZE(x)) { case 0: return (int) 0; case 1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0]) case 2: if (8 * sizeof(int) > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) { return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); } } break; case 3: if (8 * sizeof(int) > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) { return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); } } break; case 4: if (8 * sizeof(int) > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) { return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); } } break; } #endif #if CYTHON_COMPILING_IN_CPYTHON if (unlikely(Py_SIZE(x) < 0)) { goto raise_neg_overflow; } #else { int result = PyObject_RichCompareBool(x, Py_False, Py_LT); if (unlikely(result < 0)) return (int) -1; if (unlikely(result == 1)) goto raise_neg_overflow; } #endif if (sizeof(int) <= sizeof(unsigned long)) { __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) } } else { #if CYTHON_USE_PYLONG_INTERNALS const digit* digits = ((PyLongObject*)x)->ob_digit; switch (Py_SIZE(x)) { case 0: return (int) 0; case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, (sdigit) (-(sdigit)digits[0])) case 1: __PYX_VERIFY_RETURN_INT(int, digit, +digits[0]) case -2: if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); } } break; case 2: if (8 * sizeof(int) > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); } } break; case -3: if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); } } break; case 3: if (8 * sizeof(int) > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); } } break; case -4: if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); } } break; case 4: if (8 * sizeof(int) > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); } } break; } #endif if (sizeof(int) <= sizeof(long)) { __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) } } { #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) PyErr_SetString(PyExc_RuntimeError, "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); #else int val; PyObject *v = __Pyx_PyNumber_IntOrLong(x); #if PY_MAJOR_VERSION < 3 if (likely(v) && !PyLong_Check(v)) { PyObject *tmp = v; v = PyNumber_Long(tmp); Py_DECREF(tmp); } #endif if (likely(v)) { int one = 1; int is_little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&val; int ret = _PyLong_AsByteArray((PyLongObject *)v, bytes, sizeof(val), is_little, !is_unsigned); Py_DECREF(v); if (likely(!ret)) return val; } #endif return (int) -1; } } else { int val; PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); if (!tmp) return (int) -1; val = __Pyx_PyInt_As_int(tmp); Py_DECREF(tmp); return val; } raise_overflow: PyErr_SetString(PyExc_OverflowError, "value too large to convert to int"); return (int) -1; raise_neg_overflow: PyErr_SetString(PyExc_OverflowError, "can't convert negative value to int"); return (int) -1; } /* CIntFromPy */ static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *x) { const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { if (sizeof(unsigned int) < sizeof(long)) { __PYX_VERIFY_RETURN_INT(unsigned int, long, PyInt_AS_LONG(x)) } else { long val = PyInt_AS_LONG(x); if (is_unsigned && unlikely(val < 0)) { goto raise_neg_overflow; } return (unsigned int) val; } } else #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { #if CYTHON_USE_PYLONG_INTERNALS const digit* digits = ((PyLongObject*)x)->ob_digit; switch (Py_SIZE(x)) { case 0: return (unsigned int) 0; case 1: __PYX_VERIFY_RETURN_INT(unsigned int, digit, digits[0]) case 2: if (8 * sizeof(unsigned int) > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) >= 2 * PyLong_SHIFT) { return (unsigned int) (((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); } } break; case 3: if (8 * sizeof(unsigned int) > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) >= 3 * PyLong_SHIFT) { return (unsigned int) (((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); } } break; case 4: if (8 * sizeof(unsigned int) > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) >= 4 * PyLong_SHIFT) { return (unsigned int) (((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); } } break; } #endif #if CYTHON_COMPILING_IN_CPYTHON if (unlikely(Py_SIZE(x) < 0)) { goto raise_neg_overflow; } #else { int result = PyObject_RichCompareBool(x, Py_False, Py_LT); if (unlikely(result < 0)) return (unsigned int) -1; if (unlikely(result == 1)) goto raise_neg_overflow; } #endif if (sizeof(unsigned int) <= sizeof(unsigned long)) { __PYX_VERIFY_RETURN_INT_EXC(unsigned int, unsigned long, PyLong_AsUnsignedLong(x)) } else if (sizeof(unsigned int) <= sizeof(unsigned PY_LONG_LONG)) { __PYX_VERIFY_RETURN_INT_EXC(unsigned int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) } } else { #if CYTHON_USE_PYLONG_INTERNALS const digit* digits = ((PyLongObject*)x)->ob_digit; switch (Py_SIZE(x)) { case 0: return (unsigned int) 0; case -1: __PYX_VERIFY_RETURN_INT(unsigned int, sdigit, (sdigit) (-(sdigit)digits[0])) case 1: __PYX_VERIFY_RETURN_INT(unsigned int, digit, +digits[0]) case -2: if (8 * sizeof(unsigned int) - 1 > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT) { return (unsigned int) (((unsigned int)-1)*(((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); } } break; case 2: if (8 * sizeof(unsigned int) > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT) { return (unsigned int) ((((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); } } break; case -3: if (8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT) { return (unsigned int) (((unsigned int)-1)*(((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); } } break; case 3: if (8 * sizeof(unsigned int) > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT) { return (unsigned int) ((((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); } } break; case -4: if (8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) - 1 > 4 * PyLong_SHIFT) { return (unsigned int) (((unsigned int)-1)*(((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); } } break; case 4: if (8 * sizeof(unsigned int) > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(unsigned int) - 1 > 4 * PyLong_SHIFT) { return (unsigned int) ((((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); } } break; } #endif if (sizeof(unsigned int) <= sizeof(long)) { __PYX_VERIFY_RETURN_INT_EXC(unsigned int, long, PyLong_AsLong(x)) } else if (sizeof(unsigned int) <= sizeof(PY_LONG_LONG)) { __PYX_VERIFY_RETURN_INT_EXC(unsigned int, PY_LONG_LONG, PyLong_AsLongLong(x)) } } { #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) PyErr_SetString(PyExc_RuntimeError, "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); #else unsigned int val; PyObject *v = __Pyx_PyNumber_IntOrLong(x); #if PY_MAJOR_VERSION < 3 if (likely(v) && !PyLong_Check(v)) { PyObject *tmp = v; v = PyNumber_Long(tmp); Py_DECREF(tmp); } #endif if (likely(v)) { int one = 1; int is_little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&val; int ret = _PyLong_AsByteArray((PyLongObject *)v, bytes, sizeof(val), is_little, !is_unsigned); Py_DECREF(v); if (likely(!ret)) return val; } #endif return (unsigned int) -1; } } else { unsigned int val; PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); if (!tmp) return (unsigned int) -1; val = __Pyx_PyInt_As_unsigned_int(tmp); Py_DECREF(tmp); return val; } raise_overflow: PyErr_SetString(PyExc_OverflowError, "value too large to convert to unsigned int"); return (unsigned int) -1; raise_neg_overflow: PyErr_SetString(PyExc_OverflowError, "can't convert negative value to unsigned int"); return (unsigned int) -1; } /* CIntFromPy */ static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { const long neg_one = (long) -1, const_zero = (long) 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { if (sizeof(long) < sizeof(long)) { __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) } else { long val = PyInt_AS_LONG(x); if (is_unsigned && unlikely(val < 0)) { goto raise_neg_overflow; } return (long) val; } } else #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { #if CYTHON_USE_PYLONG_INTERNALS const digit* digits = ((PyLongObject*)x)->ob_digit; switch (Py_SIZE(x)) { case 0: return (long) 0; case 1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0]) case 2: if (8 * sizeof(long) > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) { return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); } } break; case 3: if (8 * sizeof(long) > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) { return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); } } break; case 4: if (8 * sizeof(long) > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) { return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); } } break; } #endif #if CYTHON_COMPILING_IN_CPYTHON if (unlikely(Py_SIZE(x) < 0)) { goto raise_neg_overflow; } #else { int result = PyObject_RichCompareBool(x, Py_False, Py_LT); if (unlikely(result < 0)) return (long) -1; if (unlikely(result == 1)) goto raise_neg_overflow; } #endif if (sizeof(long) <= sizeof(unsigned long)) { __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) } } else { #if CYTHON_USE_PYLONG_INTERNALS const digit* digits = ((PyLongObject*)x)->ob_digit; switch (Py_SIZE(x)) { case 0: return (long) 0; case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, (sdigit) (-(sdigit)digits[0])) case 1: __PYX_VERIFY_RETURN_INT(long, digit, +digits[0]) case -2: if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); } } break; case 2: if (8 * sizeof(long) > 1 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); } } break; case -3: if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); } } break; case 3: if (8 * sizeof(long) > 2 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); } } break; case -4: if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); } } break; case 4: if (8 * sizeof(long) > 3 * PyLong_SHIFT) { if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); } } break; } #endif if (sizeof(long) <= sizeof(long)) { __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) } } { #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) PyErr_SetString(PyExc_RuntimeError, "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); #else long val; PyObject *v = __Pyx_PyNumber_IntOrLong(x); #if PY_MAJOR_VERSION < 3 if (likely(v) && !PyLong_Check(v)) { PyObject *tmp = v; v = PyNumber_Long(tmp); Py_DECREF(tmp); } #endif if (likely(v)) { int one = 1; int is_little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&val; int ret = _PyLong_AsByteArray((PyLongObject *)v, bytes, sizeof(val), is_little, !is_unsigned); Py_DECREF(v); if (likely(!ret)) return val; } #endif return (long) -1; } } else { long val; PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); if (!tmp) return (long) -1; val = __Pyx_PyInt_As_long(tmp); Py_DECREF(tmp); return val; } raise_overflow: PyErr_SetString(PyExc_OverflowError, "value too large to convert to long"); return (long) -1; raise_neg_overflow: PyErr_SetString(PyExc_OverflowError, "can't convert negative value to long"); return (long) -1; } /* CheckBinaryVersion */ static int __Pyx_check_binary_version(void) { char ctversion[4], rtversion[4]; PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion()); if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) { char message[200]; PyOS_snprintf(message, sizeof(message), "compiletime version %s of module '%.100s' " "does not match runtime version %s", ctversion, __Pyx_MODULE_NAME, rtversion); return PyErr_WarnEx(NULL, message, 1); } return 0; } /* InitStrings */ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { while (t->p) { #if PY_MAJOR_VERSION < 3 if (t->is_unicode) { *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); } else if (t->intern) { *t->p = PyString_InternFromString(t->s); } else { *t->p = PyString_FromStringAndSize(t->s, t->n - 1); } #else if (t->is_unicode | t->is_str) { if (t->intern) { *t->p = PyUnicode_InternFromString(t->s); } else if (t->encoding) { *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); } else { *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); } } else { *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); } #endif if (!*t->p) return -1; ++t; } return 0; } static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); } static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { Py_ssize_t ignore; return __Pyx_PyObject_AsStringAndSize(o, &ignore); } static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { #if CYTHON_COMPILING_IN_CPYTHON && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) if ( #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII __Pyx_sys_getdefaultencoding_not_ascii && #endif PyUnicode_Check(o)) { #if PY_VERSION_HEX < 0x03030000 char* defenc_c; PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); if (!defenc) return NULL; defenc_c = PyBytes_AS_STRING(defenc); #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII { char* end = defenc_c + PyBytes_GET_SIZE(defenc); char* c; for (c = defenc_c; c < end; c++) { if ((unsigned char) (*c) >= 128) { PyUnicode_AsASCIIString(o); return NULL; } } } #endif *length = PyBytes_GET_SIZE(defenc); return defenc_c; #else if (__Pyx_PyUnicode_READY(o) == -1) return NULL; #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII if (PyUnicode_IS_ASCII(o)) { *length = PyUnicode_GET_LENGTH(o); return PyUnicode_AsUTF8(o); } else { PyUnicode_AsASCIIString(o); return NULL; } #else return PyUnicode_AsUTF8AndSize(o, length); #endif #endif } else #endif #if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) if (PyByteArray_Check(o)) { *length = PyByteArray_GET_SIZE(o); return PyByteArray_AS_STRING(o); } else #endif { char* result; int r = PyBytes_AsStringAndSize(o, &result, length); if (unlikely(r < 0)) { return NULL; } else { return result; } } } static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { int is_true = x == Py_True; if (is_true | (x == Py_False) | (x == Py_None)) return is_true; else return PyObject_IsTrue(x); } static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { PyNumberMethods *m; const char *name = NULL; PyObject *res = NULL; #if PY_MAJOR_VERSION < 3 if (PyInt_Check(x) || PyLong_Check(x)) #else if (PyLong_Check(x)) #endif return __Pyx_NewRef(x); m = Py_TYPE(x)->tp_as_number; #if PY_MAJOR_VERSION < 3 if (m && m->nb_int) { name = "int"; res = PyNumber_Int(x); } else if (m && m->nb_long) { name = "long"; res = PyNumber_Long(x); } #else if (m && m->nb_int) { name = "int"; res = PyNumber_Long(x); } #endif if (res) { #if PY_MAJOR_VERSION < 3 if (!PyInt_Check(res) && !PyLong_Check(res)) { #else if (!PyLong_Check(res)) { #endif PyErr_Format(PyExc_TypeError, "__%.4s__ returned non-%.4s (type %.200s)", name, name, Py_TYPE(res)->tp_name); Py_DECREF(res); return NULL; } } else if (!PyErr_Occurred()) { PyErr_SetString(PyExc_TypeError, "an integer is required"); } return res; } static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { Py_ssize_t ival; PyObject *x; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_CheckExact(b))) { if (sizeof(Py_ssize_t) >= sizeof(long)) return PyInt_AS_LONG(b); else return PyInt_AsSsize_t(x); } #endif if (likely(PyLong_CheckExact(b))) { #if CYTHON_USE_PYLONG_INTERNALS const digit* digits = ((PyLongObject*)b)->ob_digit; const Py_ssize_t size = Py_SIZE(b); if (likely(__Pyx_sst_abs(size) <= 1)) { ival = likely(size) ? digits[0] : 0; if (size == -1) ival = -ival; return ival; } else { switch (size) { case 2: if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); } break; case -2: if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); } break; case 3: if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); } break; case -3: if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); } break; case 4: if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); } break; case -4: if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); } break; } } #endif return PyLong_AsSsize_t(b); } x = PyNumber_Index(b); if (!x) return -1; ival = PyInt_AsSsize_t(x); Py_DECREF(x); return ival; } static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { return PyInt_FromSize_t(ival); } #endif /* Py_PYTHON_H */