From 0c65ac1dc98bdeacfd970251eb73ccf33a29b90b Mon Sep 17 00:00:00 2001 From: Antonio Huete Jimenez Date: Sat, 12 Nov 2022 16:38:24 +0100 Subject: [PATCH] vendor/expat: upgrade from 2.1.0 to 2.5.0 Summary of notable changes: - Detect overflow from len=INT_MAX call to XML_Parse - Fix a dangling pointer issue related to realloc - Fix copying of partial characters for UTF-8 input - Avoid doing arithmetic with NULL pointers in XML_GetBuffer - Fix reading uninitialized variable during parsing - CVE-2015-1283 - Multiple integer overflows in XML_GetBuffer - Fix potential null pointer dereference - Following CVEs were handled (not a complete list) CVE-2016-0718, CVE-2016-4472, CVE-2016-5300, CVE-2012-0876 CVE-2012-6702, CVE-2017-9233, CVE-2016-9063, CVE-2018-20843 CVE-2019-15903,CVE-2013-0340/CWE-776, CVE-2021-45960 CVE-2021-46143, CVE-2022-22822 to CVE-2022-22827 CVE-2022-23852, CVE-2022-23990, CVE-2022-43680 For detailed list of all changes, bugfixes and improvements, see Changes. --- contrib/expat/COPYING | 5 +- contrib/expat/Changes | 1087 +++- contrib/expat/expat_config.h | 140 + contrib/expat/expat_config.h.in | 102 - contrib/expat/lib/Makefile.MPW | 206 - contrib/expat/lib/amigaconfig.h | 32 - contrib/expat/lib/ascii.h | 35 +- contrib/expat/lib/asciitab.h | 96 +- contrib/expat/lib/expat.dsp | 185 - contrib/expat/lib/expat.h | 395 +- contrib/expat/lib/expat_external.h | 120 +- contrib/expat/lib/expat_static.dsp | 162 - contrib/expat/lib/expatw.dsp | 185 - contrib/expat/lib/expatw_static.dsp | 162 - contrib/expat/lib/iasciitab.h | 96 +- contrib/expat/lib/internal.h | 120 +- contrib/expat/lib/latin1tab.h | 96 +- contrib/expat/lib/libexpat.def | 73 - contrib/expat/lib/libexpatw.def | 73 - contrib/expat/lib/macconfig.h | 53 - contrib/expat/lib/nametab.h | 274 +- contrib/expat/lib/siphash.h | 393 ++ contrib/expat/lib/utf8tab.h | 97 +- contrib/expat/lib/winconfig.h | 49 +- contrib/expat/lib/xmlparse.c | 7861 +++++++++++++++++---------- contrib/expat/lib/xmlrole.c | 827 ++- contrib/expat/lib/xmlrole.h | 48 +- contrib/expat/lib/xmltok.c | 1522 +++--- contrib/expat/lib/xmltok.h | 257 +- contrib/expat/lib/xmltok_impl.c | 1300 ++--- contrib/expat/lib/xmltok_impl.h | 104 +- contrib/expat/lib/xmltok_ns.c | 123 +- 32 files changed, 9504 insertions(+), 6774 deletions(-) mode change 100755 => 100644 contrib/expat/COPYING mode change 100755 => 100644 contrib/expat/Changes create mode 100644 contrib/expat/expat_config.h delete mode 100644 contrib/expat/expat_config.h.in delete mode 100644 contrib/expat/lib/Makefile.MPW delete mode 100644 contrib/expat/lib/amigaconfig.h delete mode 100644 contrib/expat/lib/expat.dsp delete mode 100644 contrib/expat/lib/expat_static.dsp delete mode 100644 contrib/expat/lib/expatw.dsp delete mode 100644 contrib/expat/lib/expatw_static.dsp delete mode 100644 contrib/expat/lib/libexpat.def delete mode 100644 contrib/expat/lib/libexpatw.def delete mode 100644 contrib/expat/lib/macconfig.h create mode 100644 contrib/expat/lib/siphash.h diff --git a/contrib/expat/COPYING b/contrib/expat/COPYING old mode 100755 new mode 100644 index dcb4506429..ce9e593929 --- a/contrib/expat/COPYING +++ b/contrib/expat/COPYING @@ -1,6 +1,5 @@ -Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - and Clark Cooper -Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers. +Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper +Copyright (c) 2001-2022 Expat maintainers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/contrib/expat/Changes b/contrib/expat/Changes old mode 100755 new mode 100644 index 08897b9f9e..e671710569 --- a/contrib/expat/Changes +++ b/contrib/expat/Changes @@ -1,29 +1,1102 @@ +NOTE: We are looking for help with a few things: + https://github.com/libexpat/libexpat/labels/help%20wanted + If you can help, please get in touch. Thanks! + +Release 2.5.0 Tue October 25 2022 + Security fixes: + #616 #649 #650 CVE-2022-43680 -- Fix heap use-after-free after overeager + destruction of a shared DTD in function + XML_ExternalEntityParserCreate in out-of-memory situations. + Expected impact is denial of service or potentially + arbitrary code execution. + + Bug fixes: + #612 #645 Fix curruption from undefined entities + #613 #654 Fix case when parsing was suspended while processing nested + entities + #616 #652 #653 Stop leaking opening tag bindings after a closing tag + mismatch error where a parser is reset through + XML_ParserReset and then reused to parse + #656 CMake: Fix generation of pkg-config file + #658 MinGW|CMake: Fix static library name + + Other changes: + #663 Protect header expat_config.h from multiple inclusion + #666 examples: Make use of XML_GetBuffer and be more + consistent across examples + #648 Address compiler warnings + #667 #668 Version info bumped from 9:9:8 to 9:10:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Jann Horn + Mark Brand + Osyotr + Rhodri James + and + Google Project Zero + +Release 2.4.9 Tue September 20 2022 + Security fixes: + #629 #640 CVE-2022-40674 -- Heap use-after-free vulnerability in + function doContent. Expected impact is denial of service + or potentially arbitrary code execution. + + Bug fixes: + #634 MinGW: Fix mis-compilation for -D__USE_MINGW_ANSI_STDIO=0 + #614 docs: Fix documentation on effect of switch XML_DTD on + symbol visibility in doc/reference.html + + Other changes: + #638 MinGW: Make fix-xmltest-log.sh drop more Wine bug output + #596 #625 Autotools: Sync CMake templates with CMake 3.22 + #608 CMake: Migrate from use of CMAKE_*_POSTFIX to + dedicated variables EXPAT_*_POSTFIX to stop affecting + other projects + #597 #599 Windows|CMake: Add missing -DXML_STATIC to test runners + and fuzzers + #512 #621 Windows|CMake: Render .def file from a template to fix + linking with -DEXPAT_DTD=OFF and/or -DEXPAT_ATTR_INFO=ON + #611 #621 MinGW|CMake: Apply MSVC .def file when linking + #622 #624 MinGW|CMake: Sync library name with GNU Autotools, + i.e. produce libexpat-1.dll rather than libexpat.dll + by default. Filename libexpat.dll.a is unaffected. + #632 MinGW|CMake: Set missing variable CMAKE_RC_COMPILER in + toolchain file "cmake/mingw-toolchain.cmake" to avoid + error "windres: Command not found" on e.g. Ubuntu 20.04 + #597 #627 CMake: Unify inconsistent use of set() and option() in + context of public build time options to take need for + set(.. FORCE) in projects using Expat by means of + add_subdirectory(..) off Expat's users' shoulders + #626 #641 Stop exporting API symbols when building a static library + #644 Resolve use of deprecated "fgrep" by "grep -F" + #620 CMake: Make documentation on variables a bit more consistent + #636 CMake: Drop leading whitespace from a #cmakedefine line in + file expat_config.h.cmake + #594 xmlwf: Fix harmless variable mix-up in function nsattcmp + #592 #593 #610 Address Cppcheck warnings + #643 Address Clang 15 compiler warnings + #642 #644 Version info bumped from 9:8:8 to 9:9:8; + see https://verbump.de/ for what these numbers do + + Infrastructure: + #597 #598 CI: Windows: Start covering MSVC 2022 + #619 CI: macOS: Migrate off deprecated macOS 10.15 + #632 CI: Linux: Make migration off deprecated Ubuntu 18.04 work + #643 CI: Upgrade Clang from 14 to 15 + #637 apply-clang-format.sh: Add support for BSD find + #633 coverage.sh: Exclude MinGW headers + #635 coverage.sh: Fix name collision for -funsigned-char + + Special thanks to: + David Faure + Felix Wilhelm + Frank Bergmann + Rhodri James + Rosen Penev + Thijs Schreijer + Vincent Torri + and + Google Project Zero + +Release 2.4.8 Mon March 28 2022 + Other changes: + #587 pkg-config: Move "-lm" to section "Libs.private" + #587 CMake|MSVC: Fix pkg-config section "Libs" + #55 #582 CMake|macOS: Start using linker arguments + "-compatibility_version " and + "-current_version " in a way compatible with + GNU Libtool + #590 #591 Version info bumped from 9:7:8 to 9:8:8; + see https://verbump.de/ for what these numbers do + + Infrastructure: + #589 CI: Upgrade Clang from 13 to 14 + + Special thanks to: + evpobr + Kai Pastor + Sam James + +Release 2.4.7 Fri March 4 2022 + Bug fixes: + #572 #577 Relax fix to CVE-2022-25236 (introduced with release 2.4.5) + with regard to all valid URI characters (RFC 3986), + i.e. the following set (excluding whitespace): + ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz + 0123456789 % -._~ :/?#[]@ !$&'()*+,;= + + Other changes: + #555 #570 #581 CMake|Windows: Store Expat version in the DLL + #577 Document consequences of namespace separator choices not just + in doc/reference.html but also in header + #577 Document Expat's lack of validation of namespace URIs against + RFC 3986, and that the XML 1.0r4 specification doesn't + require Expat to validate namespace URIs, and that Expat + may do more in that regard in future releases. + If you find need for strict RFC 3986 URI validation on + application level today, https://uriparser.github.io/ may + be of interest. + #579 Fix documentation of XML_EndDoctypeDeclHandler in + #575 Document that a call to XML_FreeContentModel can be done at + a later time from outside the element declaration handler + #574 Make hardcoded namespace URIs easier to find in code + #573 Update documentation on use of XML_POOR_ENTOPY on Solaris + #569 #571 tests: Resolve use of macros NAN and INFINITY for GNU G++ + 4.8.2 on Solaris. + #578 #580 Version info bumped from 9:6:8 to 9:7:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Jeffrey Walton + Johnny Jazeix + Thijs Schreijer + +Release 2.4.6 Sun February 20 2022 + Bug fixes: + #566 Fix a regression introduced by the fix for CVE-2022-25313 + in release 2.4.5 that affects applications that (1) + call function XML_SetElementDeclHandler and (2) are + parsing XML that contains nested element declarations + (e.g. ""). + + Other changes: + #567 #568 Version info bumped from 9:5:8 to 9:6:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Matt Sergeant + Samanta Navarro + Sergei Trofimovich + and + NixOS + Perl XML::Parser + +Release 2.4.5 Fri February 18 2022 + Security fixes: + #562 CVE-2022-25235 -- Passing malformed 2- and 3-byte UTF-8 + sequences (e.g. from start tag names) to the XML + processing application on top of Expat can cause + arbitrary damage (e.g. code execution) depending + on how invalid UTF-8 is handled inside the XML + processor; validation was not their job but Expat's. + Exploits with code execution are known to exist. + #561 CVE-2022-25236 -- Passing (one or more) namespace separator + characters in "xmlns[:prefix]" attribute values + made Expat send malformed tag names to the XML + processor on top of Expat which can cause + arbitrary damage (e.g. code execution) depending + on such unexpectable cases are handled inside the XML + processor; validation was not their job but Expat's. + Exploits with code execution are known to exist. + #558 CVE-2022-25313 -- Fix stack exhaustion in doctype parsing + that could be triggered by e.g. a 2 megabytes + file with a large number of opening braces. + Expected impact is denial of service or potentially + arbitrary code execution. + #560 CVE-2022-25314 -- Fix integer overflow in function copyString; + only affects the encoding name parameter at parser creation + time which is often hardcoded (rather than user input), + takes a value in the gigabytes to trigger, and a 64-bit + machine. Expected impact is denial of service. + #559 CVE-2022-25315 -- Fix integer overflow in function storeRawNames; + needs input in the gigabytes and a 64-bit machine. + Expected impact is denial of service or potentially + arbitrary code execution. + + Other changes: + #557 #564 Version info bumped from 9:4:8 to 9:5:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Ivan Fratric + Samanta Navarro + and + Google Project Zero + JetBrains + +Release 2.4.4 Sun January 30 2022 + Security fixes: + #550 CVE-2022-23852 -- Fix signed integer overflow + (undefined behavior) in function XML_GetBuffer + (that is also called by function XML_Parse internally) + for when XML_CONTEXT_BYTES is defined to >0 (which is both + common and default). + Impact is denial of service or more. + #551 CVE-2022-23990 -- Fix unsigned integer overflow in function + doProlog triggered by large content in element type + declarations when there is an element declaration handler + present (from a prior call to XML_SetElementDeclHandler). + Impact is denial of service or more. + + Bug fixes: + #544 #545 xmlwf: Fix a memory leak on output file opening error + + Other changes: + #546 Autotools: Fix broken CMake support under Cygwin + #554 Windows: Add missing files to the installer to fix + compilation with CMake from installed sources + #552 #554 Version info bumped from 9:3:8 to 9:4:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Carlo Bramini + hwt0415 + Roland Illig + Samanta Navarro + and + Clang LeakSan and the Clang team + +Release 2.4.3 Sun January 16 2022 + Security fixes: + #531 #534 CVE-2021-45960 -- Fix issues with left shifts by >=29 places + resulting in + a) realloc acting as free + b) realloc allocating too few bytes + c) undefined behavior + depending on architecture and precise value + for XML documents with >=2^27+1 prefixed attributes + on a single XML tag a la + "" + where XML_ParserCreateNS is used to create the parser + (which needs argument "-n" when running xmlwf). + Impact is denial of service, or more. + #532 #538 CVE-2021-46143 (ZDI-CAN-16157) -- Fix integer overflow + on variable m_groupSize in function doProlog leading + to realloc acting as free. + Impact is denial of service or more. + #539 CVE-2022-22822 to CVE-2022-22827 -- Prevent integer overflows + near memory allocation at multiple places. Mitre assigned + a dedicated CVE for each involved internal C function: + - CVE-2022-22822 for function addBinding + - CVE-2022-22823 for function build_model + - CVE-2022-22824 for function defineAttribute + - CVE-2022-22825 for function lookup + - CVE-2022-22826 for function nextScaffoldPart + - CVE-2022-22827 for function storeAtts + Impact is denial of service or more. + + Other changes: + #535 CMake: Make call to file(GENERATE [..]) work for CMake <3.19 + #541 Autotools|CMake: MinGW: Make run.sh(.in) work for Cygwin + and MSYS2 by not going through Wine on these platforms + #527 #528 Address compiler warnings + #533 #543 Version info bumped from 9:2:8 to 9:3:8; + see https://verbump.de/ for what these numbers do + + Infrastructure: + #536 CI: Check for realistic minimum CMake version + #529 #539 CI: Cover compilation with -m32 + #529 CI: Store coverage reports as artifacts for download + #528 CI: Upgrade Clang from 11 to 13 + + Special thanks to: + An anonymous whitehat + Christopher Degawa + J. Peter Mugaas + Tyson Smith + and + GCC Farm Project + Trend Micro Zero Day Initiative + +Release 2.4.2 Sun December 19 2021 + Other changes: + #509 #510 Link againgst libm for function "isnan" + #513 #514 Include expat_config.h as early as possible + #498 Autotools: Include files with release archives: + - buildconf.sh + - fuzz/*.c + #507 #519 Autotools: Sync CMake templates with CMake 3.20 + #495 #524 CMake: MinGW: Fix pkg-config section "Libs" for + - non-release build types (e.g. -DCMAKE_BUILD_TYPE=Debug) + - multi-config CMake generators (e.g. Ninja Multi-Config) + #502 #503 docs: Document that function XML_GetBuffer may return NULL + when asking for a buffer of 0 (zero) bytes size + #522 #523 docs: Fix return value docs for both + XML_SetBillionLaughsAttackProtection* functions + #525 #526 Version info bumped from 9:1:8 to 9:2:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Dong-hee Na + Joergen Ibsen + Kai Pastor + +Release 2.4.1 Sun May 23 2021 + Bug fixes: + #488 #490 Autotools: Fix installed header expat_config.h for multilib + systems; regression introduced in 2.4.0 by pull request #486 + + Other changes: + #491 #492 Version info bumped from 9:0:8 to 9:1:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Gentoo's QA check "multilib_check_headers" + +Release 2.4.0 Sun May 23 2021 + Security fixes: + #34 #466 #484 CVE-2013-0340/CWE-776 -- Protect against billion laughs attacks + (denial-of-service; flavors targeting CPU time or RAM or both, + leveraging general entities or parameter entities or both) + by tracking and limiting the input amplification factor + ( := ( + ) / ). + By conservative default, amplification up to a factor of 100.0 + is tolerated and rejection only starts after 8 MiB of output bytes + (= + ) have been processed. + The fix adds the following to the API: + - A new error code XML_ERROR_AMPLIFICATION_LIMIT_BREACH to + signals this specific condition. + - Two new API functions .. + - XML_SetBillionLaughsAttackProtectionMaximumAmplification and + - XML_SetBillionLaughsAttackProtectionActivationThreshold + .. to further tighten billion laughs protection parameters + when desired. Please see file "doc/reference.html" for details. + If you ever need to increase the defaults for non-attack XML + payload, please file a bug report with libexpat. + - Two new XML_FEATURE_* constants .. + - that can be queried using the XML_GetFeatureList function, and + - that are shown in "xmlwf -v" output. + - Two new environment variable switches .. + - EXPAT_ACCOUNTING_DEBUG=(0|1|2|3) and + - EXPAT_ENTITY_DEBUG=(0|1) + .. for runtime debugging of accounting and entity processing. + Specific behavior of these values may change in the future. + - Two new command line arguments "-a FACTOR" and "-b BYTES" + for xmlwf to further tighten billion laughs protection + parameters when desired. + If you ever need to increase the defaults for non-attack XML + payload, please file a bug report with libexpat. + + Bug fixes: + #332 #470 For (non-default) compilation with -DEXPAT_MIN_SIZE=ON (CMake) + or CPPFLAGS=-DXML_MIN_SIZE (GNU Autotools): Fix segfault + for UTF-16 payloads containing CDATA sections. + #485 #486 Autotools: Fix generated CMake files for non-64bit and + non-Linux platforms (e.g. macOS and MinGW in particular) + that were introduced with release 2.3.0 + + Other changes: + #468 #469 xmlwf: Improve help output and the xmlwf man page + #463 xmlwf: Improve maintainability through some refactoring + #477 xmlwf: Fix man page DocBook validity + #456 Autotools: Sync CMake templates with CMake 3.18 + #458 #459 CMake: Support absolute paths for both CMAKE_INSTALL_LIBDIR + and CMAKE_INSTALL_INCLUDEDIR + #471 #481 CMake: Add support for standard variable BUILD_SHARED_LIBS + #457 Unexpose symbol _INTERNAL_trim_to_complete_utf8_characters + #467 Resolve macro HAVE_EXPAT_CONFIG_H + #472 Delete unused legacy helper file "conftools/PrintPath" + #473 #483 Improve attribution + #464 #465 #477 doc/reference.html: Fix XHTML validity + #475 #478 doc/reference.html: Replace the 90s look by OK.css + #479 Version info bumped from 8:0:7 to 9:0:8 + due to addition of new symbols and error codes; + see https://verbump.de/ for what these numbers do + + Infrastructure: + #456 CI: Enable periodic runs + #457 CI: Start covering the list of exported symbols + #474 CI: Isolate coverage task + #476 #482 CI: Adapt to breaking changes in image "ubuntu-18.04" + #477 CI: Cover well-formedness and DocBook/XHTML validity + of doc/reference.html and doc/xmlwf.xml + + Special thanks to: + Dimitry Andric + Eero Helenius + Nick Wellnhofer + Rhodri James + Tomas Korbar + Yury Gribov + and + Clang LeakSan + JetBrains + OSS-Fuzz + +Release 2.3.0 Thu March 25 2021 + Bug fixes: + #438 When calling XML_ParseBuffer without a prior successful call to + XML_GetBuffer as a user, no longer trigger undefined behavior + (by adding an integer to a NULL pointer) but rather return + XML_STATUS_ERROR and set the error code to (new) code + XML_ERROR_NO_BUFFER. Found by UBSan (UndefinedBehaviorSanitizer) + of Clang 11 (but not Clang 9). + #444 xmlwf: Exit status 2 was used for both: + - malformed input files (documented) and + - invalid command-line arguments (undocumented). + The case of invalid command-line arguments now + has its own exit status 4, resolving the ambiguity. + + Other changes: + #439 xmlwf: Add argument -k to allow continuing after + non-fatal errors + #439 xmlwf: Add section about exit status to the -h help output + #422 #426 #447 Windows: Drop support for Visual Studio <=14.0/2015 + #434 Windows: CMake: Detect unsupported Visual Studio at + configure time (rather than at compile time) + #382 #428 testrunner: Make verbose mode (argument "-v") report + about passed tests, and make default mode report about + failures, as well. + #442 CMake: Call "enable_language(CXX)" prior to tinkering + with CMAKE_CXX_* variables + #448 Document use of libexpat from a CMake-based project + #451 Autotools: Install CMake files as generated by CMake 3.19.6 + so that users with "find_package(expat [..] CONFIG [..])" + are served on distributions that are *not* using the CMake + build system inside for libexpat packaging + #436 #437 Autotools: Drop obsolescent macro AC_HEADER_STDC + #450 #452 Autotools: Resolve use of obsolete macro AC_CONFIG_HEADER + #441 Address compiler warnings + #443 Version info bumped from 7:12:6 to 8:0:7 + due to addition of error code XML_ERROR_NO_BUFFER + (see https://verbump.de/ for what these numbers do) + + Infrastructure: + #435 #446 Replace Travis CI by GitHub Actions + + Special thanks to: + Alexander Richardson + Oleksandr Popovych + Thomas Beutlich + Tim Bray + and + Clang LeakSan, Clang 11 UBSan and the Clang team + +Release 2.2.10 Sat October 3 2020 + Bug fixes: + #390 #395 #398 Fix undefined behavior during parsing caused by + pointer arithmetic with NULL pointers + #404 #405 Fix reading uninitialized variable during parsing + #406 xmlwf: Add missing check for malloc NULL return + + Other changes: + #396 Windows: Drop support for Visual Studio <=8.0/2005 + #409 Windows: Add missing file "Changes" to the installer + to fix compilation with CMake from installed sources + #403 xmlwf: Document exit codes in xmlwf manpage and + exit with code 3 (rather than code 1) for output errors + when used with "-d DIRECTORY" + #356 #359 MinGW: Provide declaration of rand_s for mingwrt <5.3.0 + #383 #392 Autotools: Use -Werror while configure tests the compiler + for supported compile flags to avoid false positives + #383 #393 #394 Autotools: Improve handling of user (C|CPP|CXX|LD)FLAGS, + e.g. ensure that they have the last word over flags added + while running ./configure + #360 CMake: Create libexpatw.{dll,so} and expatw.pc (with emphasis + on suffix "w") with -DEXPAT_CHAR_TYPE=(ushort|wchar_t) + #360 CMake: Detect and deny unsupported build combinations + involving -DEXPAT_CHAR_TYPE=(ushort|wchar_t) + #360 CMake: Install pre-compiled shipped xmlwf.1 manpage in case + of -DEXPAT_BUILD_DOCS=OFF + #375 #380 #419 CMake: Fix use of Expat by means of add_subdirectory + #407 #408 CMake: Keep expat target name constant at "expat" + (i.e. refrain from using the target name to control + build artifact filenames) + #385 CMake: Fix compilation with -DEXPAT_SHARED_LIBS=OFF for + Windows + CMake: Expose man page compilation as target "xmlwf-manpage" + #413 #414 CMake: Introduce option EXPAT_BUILD_PKGCONFIG + to control generation of pkg-config file "expat.pc" + #424 CMake: Add minimalistic support for building binary packages + with CMake target "package"; based on CPack + #366 CMake: Add option -DEXPAT_OSSFUZZ_BUILD=(ON|OFF) with + default OFF to build fuzzer code against OSS-Fuzz and + related environment variable LIB_FUZZING_ENGINE + #354 Fix testsuite for -DEXPAT_DTD=OFF and -DEXPAT_NS=OFF, each + #354 #355 .. + #356 #412 Address compiler warnings + #368 #369 Address pngcheck warnings with doc/*.png images + #425 Version info bumped from 7:11:6 to 7:12:6 + + Special thanks to: + asavah + Ben Wagner + Bhargava Shastry + Frank Landgraf + Jeffrey Walton + Joe Orton + Kleber Tarcísio + Ma Lin + Maciej Sroczyński + Mohammed Khajapasha + Vadim Zeitlin + and + Cppcheck 2.0 and the Cppcheck team + +Release 2.2.9 Wed September 25 2019 + Other changes: + examples: Drop executable bits from elements.c + #349 Windows: Change the name of the Windows DLLs from expat*.dll + to libexpat*.dll once more (regression from 2.2.8, first + fixed in 1.95.3, issue #61 on SourceForge today, + was issue #432456 back then); needs a fix due + case-insensitive file systems on Windows and the fact that + Perl's XML::Parser::Expat compiles into Expat.dll. + #347 Windows: Only define _CRT_RAND_S if not defined + Version info bumped from 7:10:6 to 7:11:6 + + Special thanks to: + Ben Wagner + +Release 2.2.8 Fri September 13 2019 + Security fixes: + #317 #318 CVE-2019-15903 -- Fix heap overflow triggered by + XML_GetCurrentLineNumber (or XML_GetCurrentColumnNumber), + and deny internal entities closing the doctype; + fixed in commit c20b758c332d9a13afbbb276d30db1d183a85d43 + + Bug fixes: + #240 Fix cases where XML_StopParser did not have any effect + when called from inside of an end element handler + #341 xmlwf: Fix exit code for operation without "-d DIRECTORY"; + previously, only "-d DIRECTORY" would give you a proper + exit code: + # xmlwf -d . <<<'' 2>/dev/null ; echo $? + 2 + # xmlwf <<<'' 2>/dev/null ; echo $? + 0 + Now both cases return exit code 2. + + Other changes: + #299 #302 Windows: Replace LoadLibrary hack to access + unofficial API function SystemFunction036 (RtlGenRandom) + by using official API function rand_s (needs WinXP+) + #325 Windows: Drop support for Visual Studio <=7.1/2003 + and document supported compilers in README.md + #286 Windows: Remove COM code from xmlwf; in case it turns + out needed later, there will be a dedicated repository + below https://github.com/libexpat/ for that code + #322 Windows: Remove explicit MSVC solution and project files. + You can generate Visual Studio solution files through + CMake, e.g.: cmake -G"Visual Studio 15 2017" . + #338 xmlwf: Make "xmlwf -h" help output more friendly + #339 examples: Improve elements.c + #244 #264 Autotools: Add argument --enable-xml-attr-info + #239 #301 Autotools: Add arguments + --with-getrandom + --without-getrandom + --with-sys-getrandom + --without-sys-getrandom + #312 #343 Autotools: Fix linking issues with "./configure LD=clang" + Autotools: Fix "make run-xmltest" for out-of-source builds + #329 #336 CMake: Pull all options from Expat <=2.2.7 into namespace + prefix EXPAT_ with the exception of DOCBOOK_TO_MAN: + - BUILD_doc -> EXPAT_BUILD_DOCS (plural) + - BUILD_examples -> EXPAT_BUILD_EXAMPLES + - BUILD_shared -> EXPAT_SHARED_LIBS + - BUILD_tests -> EXPAT_BUILD_TESTS + - BUILD_tools -> EXPAT_BUILD_TOOLS + - DOCBOOK_TO_MAN -> DOCBOOK_TO_MAN (unchanged) + - INSTALL -> EXPAT_ENABLE_INSTALL + - MSVC_USE_STATIC_CRT -> EXPAT_MSVC_STATIC_CRT + - USE_libbsd -> EXPAT_WITH_LIBBSD + - WARNINGS_AS_ERRORS -> EXPAT_WARNINGS_AS_ERRORS + - XML_CONTEXT_BYTES -> EXPAT_CONTEXT_BYTES + - XML_DEV_URANDOM -> EXPAT_DEV_URANDOM + - XML_DTD -> EXPAT_DTD + - XML_NS -> EXPAT_NS + - XML_UNICODE -> EXPAT_CHAR_TYPE=ushort (!) + - XML_UNICODE_WCHAR_T -> EXPAT_CHAR_TYPE=wchar_t (!) + #244 #264 CMake: Add argument -DEXPAT_ATTR_INFO=(ON|OFF), + default OFF + #326 CMake: Add argument -DEXPAT_LARGE_SIZE=(ON|OFF), + default OFF + #328 CMake: Add argument -DEXPAT_MIN_SIZE=(ON|OFF), + default OFF + #239 #277 CMake: Add arguments + -DEXPAT_WITH_GETRANDOM=(ON|OFF|AUTO), default AUTO + -DEXPAT_WITH_SYS_GETRANDOM=(ON|OFF|AUTO), default AUTO + #326 CMake: Install expat_config.h to include directory + #326 CMake: Generate and install configuration files for + future find_package(expat [..] CONFIG [..]) + CMake: Now produces a summary of applied configuration + CMake: Require C++ compiler only when tests are enabled + #330 CMake: Fix compilation for 16bit character types, + i.e. ex -DXML_UNICODE=ON (and ex -DXML_UNICODE_WCHAR_T=ON) + #265 CMake: Fix linking with MinGW + #330 CMake: Add full support for MinGW; to enable, use + -DCMAKE_TOOLCHAIN_FILE=[expat]/cmake/mingw-toolchain.cmake + #330 CMake: Port "make run-xmltest" from GNU Autotools to CMake + #316 CMake: Windows: Make binary postfix match MSVC + Old: expat[d].lib + New: expat[w][d][MD|MT].lib + CMake: Migrate files from Windows to Unix line endings + #308 CMake: Integrate OSS-Fuzz fuzzers, option + -DEXPAT_BUILD_FUZZERS=(ON|OFF), default OFF + #14 Drop an OpenVMS support leftover + #235 #268 .. + #270 #310 .. + #313 #331 #333 Address compiler warnings + #282 #283 .. + #284 #285 Address cppcheck warnings + #294 #295 Address Clang Static Analyzer warnings + #24 #293 Mass-apply clang-format 9 (and ensure conformance during CI) + Version info bumped from 7:9:6 to 7:10:6 + + Special thanks to: + David Loffredo + Joonun Jang + Kishore Kunche + Marco Maggi + Mitch Phillips + Mohammed Khajapasha + Rolf Ade + xantares + Zhongyuan Zhou + +Release 2.2.7 Wed June 19 2019 + Security fixes: + #186 #262 CVE-2018-20843 -- Fix extraction of namespace prefixes from + XML names; XML names with multiple colons could end up in + the wrong namespace, and take a high amount of RAM and CPU + resources while processing, opening the door to + use for denial-of-service attacks + + Other changes: + #195 #197 Autotools/CMake: Utilize -fvisibility=hidden to stop + exporting non-API symbols + #227 Autotools: Add --without-examples and --without-tests + #228 Autotools: Modernize configure.ac + #245 #246 Autotools: Fix check for -fvisibility=hidden for Clang + #247 #248 Autotools: Fix compilation for lack of docbook2x-man + #236 #258 Autotools: Produce .tar.{gz,lz,xz} release archives + #212 CMake: Make libdir of pkgconfig expat.pc support multilib + #158 #263 CMake: Build man page in PROJECT_BINARY_DIR not _SOURCE_DIR + #219 Remove fallback to bcopy, assume that memmove(3) exists + #257 Use portable "/usr/bin/env bash" shebang (e.g. for OpenBSD) + #243 Windows: Fix syntax of .def module definition files + Version info bumped from 7:8:6 to 7:9:6 + + Special thanks to: + Benjamin Peterson + Caolán McNamara + Hanno Böck + KangLin + Kishore Kunche + Marco Maggi + Rhodri James + Sebastian Dröge + userwithuid + Yury Gribov + +Release 2.2.6 Sun August 12 2018 + Bug fixes: + #170 #206 Avoid doing arithmetic with NULL pointers in XML_GetBuffer + #204 #205 Fix 2.2.5 regression with suspend-resume while parsing + a document like '' + + Other changes: + #165 #168 Autotools: Fix docbook-related configure syntax error + #166 Autotools: Avoid grep option `-q` for Solaris + #167 Autotools: Support + ./configure DOCBOOK_TO_MAN="xmlto man --skip-validation" + #159 #167 Autotools: Support DOCBOOK_TO_MAN command which produces + xmlwf.1 rather than XMLWF.1; also covers case insensitive + file systems + #181 Autotools: Drop -rpath option passed to libtool + #188 Autotools: Detect and deny SGML docbook2man as ours is XML + #188 Autotools/CMake: Support command db2x_docbook2man as well + #174 CMake: Introduce option WARNINGS_AS_ERRORS, defaults to OFF + #184 #185 CMake: Introduce option MSVC_USE_STATIC_CRT, defaults to OFF + #207 #208 CMake: Introduce option XML_UNICODE and XML_UNICODE_WCHAR_T, + both defaulting to OFF + #175 CMake: Prefer check_symbol_exists over check_function_exists + #176 CMake: Create the same pkg-config file as with GNU Autotools + #178 #179 CMake: Use GNUInstallDirs module to set proper defaults for + install directories + #208 CMake: Utilize expat_config.h.cmake for XML_DEV_URANDOM + #180 Windows: Fix compilation of test suite for Visual Studio 2008 + #131 #173 #202 Address compiler warnings + #187 #190 #200 Fix miscellaneous typos + Version info bumped from 7:7:6 to 7:8:6 + + Special thanks to: + Anton Maklakov + Benjamin Peterson + Brad King + Franek Korta + Frank Rast + Joe Orton + luzpaz + Pedro Vicente + Rainer Jung + Rhodri James + Rolf Ade + Rolf Eike Beer + Thomas Beutlich + Tomasz Kłoczko + +Release 2.2.5 Tue October 31 2017 + Bug fixes: + #8 If the parser runs out of memory, make sure its internal + state reflects the memory it actually has, not the memory + it wanted to have. + #11 The default handler wasn't being called when it should for + a SYSTEM or PUBLIC doctype if an entity declaration handler + was registered. + #137 #138 Fix a case of mistakenly reported parsing success where + XML_StopParser was called from an element handler + #162 Function XML_ErrorString was returning NULL rather than + a message for code XML_ERROR_INVALID_ARGUMENT + introduced with release 2.2.1 + + Other changes: + #106 xmlwf: Add argument -N adding notation declarations + #75 #106 Test suite: Resolve expected failure cases where xmlwf + output was incomplete + #127 Windows: Fix test suite compilation + #126 #127 Windows: Fix compilation for Visual Studio 2012 + Windows: Upgrade shipped project files to Visual Studio 2017 + #33 #132 tests: Mass-fix compilation for XML_UNICODE_WCHAR_T + #129 examples: Fix compilation for XML_UNICODE_WCHAR_T + #130 benchmark: Fix compilation for XML_UNICODE_WCHAR_T + #144 xmlwf: Fix compilation for XML_UNICODE_WCHAR_T; still needs + Windows or MinGW for 2-byte wchar_t + #9 Address two Clang Static Analyzer false positives + #59 Resolve troublesome macros hiding parser struct membership + and dereferencing that pointer + #6 Resolve superfluous internal malloc/realloc switch + #153 #155 Improve docbook2x-man detection + #160 Undefine NDEBUG in the test suite (rather than rejecting it) + #161 Address compiler warnings + Version info bumped from 7:6:6 to 7:7:6 + + Special thanks to: + Benbuck Nason + Hans Wennborg + José Gutiérrez de la Concha + Pedro Monreal Gonzalez + Rhodri James + Rolf Ade + Stephen Groat + and + Core Infrastructure Initiative + +Release 2.2.4 Sat August 19 2017 + Bug fixes: + #115 Fix copying of partial characters for UTF-8 input + + Other changes: + #109 Fix "make check" for non-x86 architectures that default + to unsigned type char (-128..127 rather than 0..255) + #109 coverage.sh: Cover -funsigned-char + Autotools: Introduce --without-xmlwf argument + #65 Autotools: Replace handwritten Makefile with GNU Automake + #43 CMake: Auto-detect high quality entropy extractors, add new + option USE_libbsd=ON to use arc4random_buf of libbsd + #74 CMake: Add -fno-strict-aliasing only where supported + #114 CMake: Always honor manually set BUILD_* options + #114 CMake: Compile man page if docbook2x-man is available, only + #117 Include file tests/xmltest.log.expected in source tarball + (required for "make run-xmltest") + #117 Include (existing) Visual Studio 2013 files in source tarball + Improve test suite error output + #111 Fix some typos in documentation + Version info bumped from 7:5:6 to 7:6:6 + + Special thanks to: + Jakub Wilk + Joe Orton + Lin Tian + Rolf Eike Beer + +Release 2.2.3 Wed August 2 2017 + Security fixes: + #82 CVE-2017-11742 -- Windows: Fix DLL hijacking vulnerability + using Steve Holme's LoadLibrary wrapper for/of cURL + + Bug fixes: + #85 Fix a dangling pointer issue related to realloc + + Other changes: + Increase code coverage + #91 Linux: Allow getrandom to fail if nonblocking pool has not + yet been initialized and read /dev/urandom then, instead. + This is in line with what recent Python does. + #81 Pre-10.7/Lion macOS: Support entropy from arc4random + #86 Check that a UTF-16 encoding in an XML declaration has the + right endianness + #4 #5 #7 Recover correctly when some reallocations fail + Repair "./configure && make" for systems without any + provider of high quality entropy + and try reading /dev/urandom on those + Ensure that user-defined character encodings have converter + functions when they are needed + Fix mis-leading description of argument -c in xmlwf.1 + Rely on macro HAVE_ARC4RANDOM_BUF (rather than __CloudABI__) + for CloudABI + #100 Fix use of SIPHASH_MAIN in siphash.h + #23 Test suite: Fix memory leaks + Version info bumped from 7:4:6 to 7:5:6 + + Special thanks to: + Chanho Park + Joe Orton + Pascal Cuoq + Rhodri James + Simon McVittie + Vadim Zeitlin + Viktor Szakats + and + Core Infrastructure Initiative + +Release 2.2.2 Wed July 12 2017 + Security fixes: + #43 Protect against compilation without any source of high + quality entropy enabled, e.g. with CMake build system; + commit ff0207e6076e9828e536b8d9cd45c9c92069b895 + #60 Windows with _UNICODE: + Unintended use of LoadLibraryW with a non-wide string + resulted in failure to load advapi32.dll and degradation + in quality of used entropy when compiled with _UNICODE for + Windows; you can launch existing binaries with + EXPAT_ENTROPY_DEBUG=1 in the environment to inspect the + quality of entropy used during runtime; commits + * 95b95032f907ef1cd17ee7a9a1768010a825d61d + * 73a5a2e9c081f49f2d775cf7ced864158b68dc80 + [MOX-006] Fix non-NULL parser parameter validation in XML_Parse; + resulted in NULL dereference, previously; + commit ac256dafdffc9622ab0dc2c62fcecb0dfcfa71fe + + Bug fixes: + #69 Fix improper use of unsigned long long integer literals + + Other changes: + #73 Start requiring a C99 compiler + #49 Fix "==" Bashism in configure script + #50 Fix too eager getrandom detection for Debian GNU/kFreeBSD + #52 and macOS + #51 Address lack of stdint.h in Visual Studio 2003 to 2008 + #58 Address compile warnings + #68 Fix "./buildconf.sh && ./configure" for some versions + of Dash for /bin/sh + #72 CMake: Ease use of Expat in context of a parent project + with multiple CMakeLists.txt files + #72 CMake: Resolve mistaken executable permissions + #76 Address compile warning with -DNDEBUG (not recommended!) + #77 Address compile warning about macro redefinition + + Special thanks to: + Alexander Bluhm + Ben Boeckel + Cătălin Răceanu + Kerin Millar + László Böszörményi + S. P. Zeidler + Segev Finer + Václav Slavík + Victor Stinner + Viktor Szakats + and + Radically Open Security + +Release 2.2.1 Sat June 17 2017 + Security fixes: + CVE-2017-9233 -- External entity infinite loop DoS + Details: https://libexpat.github.io/doc/cve-2017-9233/ + Commit c4bf96bb51dd2a1b0e185374362ee136fe2c9d7f + [MOX-002] CVE-2016-9063 -- Detect integer overflow; commit + d4f735b88d9932bd5039df2335eefdd0723dbe20 + (Fixed version of existing downstream patches!) + (SF.net) #539 Fix regression from fix to CVE-2016-0718 cutting off + longer tag names; commits + * 896b6c1fd3b842f377d1b62135dccf0a579cf65d + * af507cef2c93cb8d40062a0abe43a4f4e9158fb2 + #16 * 0dbbf43fdb20f593ddf4fa1ff67288000dd4a7fd + #25 More integer overflow detection (function poolGrow); commits + * 810b74e4703dcfdd8f404e3cb177d44684775143 + * 44178553f3539ce69d34abee77a05e879a7982ac + [MOX-002] Detect overflow from len=INT_MAX call to XML_Parse; commits + * 4be2cb5afcc018d996f34bbbce6374b7befad47f + * 7e5b71b748491b6e459e5c9a1d090820f94544d8 + [MOX-005] #30 Use high quality entropy for hash initialization: + * arc4random_buf on BSD, systems with libbsd + (when configured with --with-libbsd), CloudABI + * RtlGenRandom on Windows XP / Server 2003 and later + * getrandom on Linux 3.17+ + In a way, that's still part of CVE-2016-5300. + https://github.com/libexpat/libexpat/pull/30/commits + [MOX-005] For the low quality entropy extraction fallback code, + the parser instance address can no longer leak, commit + 04ad658bd3079dd15cb60fc67087900f0ff4b083 + [MOX-003] Prevent use of uninitialised variable; commit + [MOX-004] a4dc944f37b664a3ca7199c624a98ee37babdb4b + Add missing parameter validation to public API functions + and dedicated error code XML_ERROR_INVALID_ARGUMENT: + [MOX-006] * NULL checks; commits + * d37f74b2b7149a3a95a680c4c4cd2a451a51d60a (merge/many) + * 9ed727064b675b7180c98cb3d4f75efba6966681 + * 6a747c837c50114dfa413994e07c0ba477be4534 + * Negative length (XML_Parse); commit + [MOX-002] 70db8d2538a10f4c022655d6895e4c3e78692e7f + [MOX-001] #35 Change hash algorithm to William Ahern's version of SipHash + to go further with fixing CVE-2012-0876. + https://github.com/libexpat/libexpat/pull/39/commits + + Bug fixes: + #32 Fix sharing of hash salt across parsers; + relevant where XML_ExternalEntityParserCreate is called + prior to XML_Parse, in particular (e.g. FBReader) + #28 xmlwf: Auto-disable use of memory-mapping (and parsing + as a single chunk) for files larger than ~1 GB (2^30 bytes) + rather than failing with error "out of memory" + #3 Fix double free after malloc failure in DTD code; commit + 7ae9c3d3af433cd4defe95234eae7dc8ed15637f + #17 Fix memory leak on parser error for unbound XML attribute + prefix with new namespaces defined in the same tag; + found by Google's OSS-Fuzz; commits + * 16f87daae5a16132e479e4f71862128c7a915c73 + * b47dbc9745932c160893d433220e462bd605f8cd + xmlwf on Windows: Add missing calls to CloseHandle + + New features: + #30 Introduced environment switch EXPAT_ENTROPY_DEBUG=1 + for runtime debugging of entropy extraction + + Other changes: + Increase code coverage + #33 Reject use of XML_UNICODE_WCHAR_T with sizeof(wchar_t) != 2; + XML_UNICODE_WCHAR_T was never meant to be used outside + of Windows; 4-byte wchar_t is common on Linux + (SF.net) #538 Start using -fno-strict-aliasing + (SF.net) #540 Support compilation against cloudlibc of CloudABI + Allow MinGW cross-compilation + (SF.net) #534 CMake: Introduce option "BUILD_doc" (enabled by default) + to bypass compilation of the xmlwf.1 man page + (SF.net) pr2 CMake: Introduce option "INSTALL" (enabled by default) + to bypass installation of expat files + CMake: Fix ninja support + Autotools: Add parameters --enable-xml-context [COUNT] + and --disable-xml-context; default of context of 1024 + bytes enabled unchanged + #14 Drop AmigaOS 4.x code and includes + #14 Drop ancient build systems: + * Borland C++ Builder + * OpenVMS + * Open Watcom + * Visual Studio 6.0 + * Pre-X Mac OS (MPW Makefile) + If you happen to rely on some of these, please get in + touch for joining with maintenance. + #10 Move from WIN32 to _WIN32 + #13 Fix "make run-xmltest" order instability + Address compile warnings + Bump version info from 7:2:6 to 7:3:6 + Add AUTHORS file + + Infrastructure: + #1 Migrate from SourceForge to GitHub (except downloads): + https://github.com/libexpat/ + #1 Re-create http://libexpat.org/ project website + Start utilizing Travis CI + + Special thanks to: + Andy Wang + Don Lewis + Ed Schouten + Karl Waclawek + Pascal Cuoq + Rhodri James + Sergei Nikulov + Tobias Taschner + Viktor Szakats + and + Core Infrastructure Initiative + Mozilla Foundation (MOSS Track 3: Secure Open Source) + Radically Open Security + +Release 2.2.0 Tue June 21 2016 + Security fixes: + #537 CVE-2016-0718 -- Fix crash on malformed input + CVE-2016-4472 -- Improve insufficient fix to CVE-2015-1283 / + CVE-2015-2716 introduced with Expat 2.1.1 + #499 CVE-2016-5300 -- Use more entropy for hash initialization + than the original fix to CVE-2012-0876 + #519 CVE-2012-6702 -- Resolve troublesome internal call to srand + that was introduced with Expat 2.1.0 + when addressing CVE-2012-0876 (issue #496) + + Bug fixes: + Fix uninitialized reads of size 1 + (e.g. in little2_updatePosition) + Fix detection of UTF-8 character boundaries + + Other changes: + #532 Fix compilation for Visual Studio 2010 (keyword "C99") + Autotools: Resolve use of "$<" to better support bmake + Autotools: Add QA script "qa.sh" (and make target "qa") + Autotools: Respect CXXFLAGS if given + Autotools: Fix "make run-xmltest" + Autotools: Have "make run-xmltest" check for expected output + p90 CMake: Fix static build (BUILD_shared=OFF) on Windows + #536 CMake: Add soversion, support -DNO_SONAME=yes to bypass + #323 CMake: Add suffix "d" to differentiate debug from release + CMake: Define WIN32 with CMake on Windows + Annotate memory allocators for GCC + Address all currently known compile warnings + Make sure that API symbols remain visible despite + -fvisibility=hidden + Remove executable flag from source files + Resolve COMPILED_FROM_DSP in favor of WIN32 + + Special thanks to: + Björn Lindahl + Christian Heimes + Cristian Rodríguez + Daniel Krügler + Gustavo Grieco + Karl Waclawek + László Böszörményi + Marco Grassi + Pascal Cuoq + Sergei Nikulov + Thomas Beutlich + Warren Young + Yann Droneaud + +Release 2.1.1 Sat March 12 2016 + Security fixes: + #582: CVE-2015-1283 - Multiple integer overflows in XML_GetBuffer + + Bug fixes: + #502: Fix potential null pointer dereference + #520: Symbol XML_SetHashSalt was not exported + Output of "xmlwf -h" was incomplete + + Other changes: + #503: Document behavior of calling XML_SetHashSalt with salt 0 + Minor improvements to man page xmlwf(1) + Improvements to the experimental CMake build system + libtool now invoked with --verbose + Release 2.1.0 Sat March 24 2012 + - Security fixes: + #2958794: CVE-2012-1148 - Memory leak in poolGrow. + #2895533: CVE-2012-1147 - Resource leak in readfilemap.c. + #3496608: CVE-2012-0876 - Hash DOS attack. + #2894085: CVE-2009-3560 - Buffer over-read and crash in big2_toUtf8(). + #1990430: CVE-2009-3720 - Parser crash with special UTF-8 sequences. - Bug Fixes: #1742315: Harmful XML_ParserCreateNS suggestion. - #2895533: CVE-2012-1147 - Resource leak in readfilemap.c. #1785430: Expat build fails on linux-amd64 with gcc version>=4.1 -O3. #1983953, 2517952, 2517962, 2649838: Build modifications using autoreconf instead of buildconf.sh. #2815947, #2884086: OBJEXT and EXEEXT support while building. - #1990430: CVE-2009-3720 - Parser crash with special UTF-8 sequences. #2517938: xmlwf should return non-zero exit status if not well-formed. #2517946: Wrong statement about XMLDecl in xmlwf.1 and xmlwf.sgml. #2855609: Dangling positionPtr after error. - #2894085: CVE-2009-3560 - Buffer over-read and crash in big2_toUtf8(). - #2958794: CVE-2012-1148 - Memory leak in poolGrow. #2990652: CMake support. #3010819: UNEXPECTED_STATE with a trailing "%" in entity value. - #3206497: Unitialized memory returned from XML_Parse. + #3206497: Uninitialized memory returned from XML_Parse. #3287849: make check fails on mingw-w64. - #3496608: CVE-2012-0876 - Hash DOS attack. - Patches: #1749198: pkg-config support. #3010222: Fix for bug #3010819. #3312568: CMake support. #3446384: Report byte offsets for attr names and values. - New Features / API changes: - Added new API member XML_SetHashSalt() that allows setting an intial + Added new API member XML_SetHashSalt() that allows setting an initial value (salt) for hash calculations. This is part of the fix for bug #3496608 to randomize hash parameters. When compiled with XML_ATTR_INFO defined, adds new API member diff --git a/contrib/expat/expat_config.h b/contrib/expat/expat_config.h new file mode 100644 index 0000000000..c63eb3ca08 --- /dev/null +++ b/contrib/expat/expat_config.h @@ -0,0 +1,140 @@ +/* expat_config.h. Generated from expat_config.h.in by configure. */ +/* expat_config.h.in. Generated from configure.ac by autoheader. */ + +#ifndef EXPAT_CONFIG_H +#define EXPAT_CONFIG_H 1 + +/* Define if building universal (internal helper macro) */ +/* #undef AC_APPLE_UNIVERSAL_BUILD */ + +/* 1234 = LILENDIAN, 4321 = BIGENDIAN */ +#define BYTEORDER 1234 + +/* Define to 1 if you have the `arc4random' function. */ +/* #undef HAVE_ARC4RANDOM */ + +/* Define to 1 if you have the `arc4random_buf' function. */ +#define HAVE_ARC4RANDOM_BUF 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 + +/* Define to 1 if you have the `getrandom' function. */ +#define HAVE_GETRANDOM 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `bsd' library (-lbsd). */ +/* #undef HAVE_LIBBSD */ + +/* Define to 1 if you have a working `mmap' system call. */ +#define HAVE_MMAP 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have `syscall' and `SYS_getrandom'. */ +#define HAVE_SYSCALL_GETRANDOM 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "expat" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "expat-bugs@libexpat.org" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "expat" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "expat 2.5.0" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "expat" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.5.0" + +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "2.5.0" + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#if defined AC_APPLE_UNIVERSAL_BUILD +# if defined __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# endif +#else +# ifndef WORDS_BIGENDIAN +/* # undef WORDS_BIGENDIAN */ +# endif +#endif + +/* Define to allow retrieving the byte offsets for attribute names and values. + */ +/* #undef XML_ATTR_INFO */ + +/* Define to specify how much context to retain around the current parse + point. */ +#define XML_CONTEXT_BYTES 1024 + +/* Define to include code reading entropy from `/dev/urandom'. */ +#define XML_DEV_URANDOM 1 + +/* Define to make parameter entity parsing functionality available. */ +#define XML_DTD 1 + +/* Define to make XML Namespaces functionality available. */ +#define XML_NS 1 + +/* Define to empty if `const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to `long int' if does not define. */ +/* #undef off_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ + +#endif // ndef EXPAT_CONFIG_H diff --git a/contrib/expat/expat_config.h.in b/contrib/expat/expat_config.h.in deleted file mode 100644 index 8c6e51409e..0000000000 --- a/contrib/expat/expat_config.h.in +++ /dev/null @@ -1,102 +0,0 @@ -/* expat_config.h.in. Generated from configure.in by autoheader. */ - -/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */ -#undef BYTEORDER - -/* Define to 1 if you have the `bcopy' function. */ -#undef HAVE_BCOPY - -/* Define to 1 if you have the header file. */ -#undef HAVE_DLFCN_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_FCNTL_H - -/* Define to 1 if you have the `getpagesize' function. */ -#undef HAVE_GETPAGESIZE - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the `memmove' function. */ -#undef HAVE_MEMMOVE - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have a working `mmap' system call. */ -#undef HAVE_MMAP - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_PARAM_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#undef LT_OBJDIR - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the home page for this package. */ -#undef PACKAGE_URL - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* whether byteorder is bigendian */ -#undef WORDS_BIGENDIAN - -/* Define to specify how much context to retain around the current parse - point. */ -#undef XML_CONTEXT_BYTES - -/* Define to make parameter entity parsing functionality available. */ -#undef XML_DTD - -/* Define to make XML Namespaces functionality available. */ -#undef XML_NS - -/* Define to __FUNCTION__ or "" if `__func__' does not conform to ANSI C. */ -#undef __func__ - -/* Define to empty if `const' does not conform to ANSI C. */ -#undef const - -/* Define to `long int' if does not define. */ -#undef off_t - -/* Define to `unsigned int' if does not define. */ -#undef size_t diff --git a/contrib/expat/lib/Makefile.MPW b/contrib/expat/lib/Makefile.MPW deleted file mode 100644 index 046af00514..0000000000 --- a/contrib/expat/lib/Makefile.MPW +++ /dev/null @@ -1,206 +0,0 @@ -# File: Makefile.MPW -# Targets: All, Dynamic, Static (and Clean, Clean-All) -# Created: Tuesday, July 02, 2002 -# -# MPW Makefile for building expat under the "classic" (i.e. pre-X) Mac OS -# Copyright © 2002 Daryle Walker -# Portions Copyright © 2002 Thomas Wegner -# See the COPYING file for distribution information -# -# Description: -# This Makefile lets you build static, dynamic (i.e. shared) and stub -# versions of the expat library as well as the elements.c and outline.c -# examples (built as tools for MPW). This is for PPC only; it should be -# no problem to build a 68K version of the expat library, though. -# -# Usage: -# Buildprogram All -# or Buildprogram Dynamic -# or Buildprogram Static -# -# Note: You first have to rename this file to "Makefile", or the Buildprogram -# commando will not recognize it. -# - -MAKEFILE = Makefile -¥MondoBuild¥ = {MAKEFILE} # Make blank to avoid rebuilds when makefile is modified - -ObjDir = : -SrcDir = : -HdrDir = : - -ToolDir = ::examples: - -Includes = -i {HdrDir} - -Sym-PPC = -sym off - -Defines = -d MACOS_CLASSIC - -PPCCOptions = {Includes} {Sym-PPC} -w 35 {Defines} - -FragName = libexpat - - -### Source Files ### - -SrcFiles = ¶ - "{SrcDir}xmlparse.c" ¶ - "{SrcDir}xmlrole.c" ¶ - "{SrcDir}xmltok.c" - -ToolSrcFiles = ¶ - "{ToolDir}elements.c" ¶ - "{ToolDir}outline.c" - - -### Object Files ### - -ObjFiles-PPC = ¶ - "{ObjDir}xmlparse.c.o" ¶ - "{ObjDir}xmlrole.c.o" ¶ - "{ObjDir}xmltok.c.o" - -ElementToolObjFile = "{ObjDir}elements.c.o" - -OutlineToolObjFile = "{ObjDir}outline.c.o" - - -### Libraries ### - -StLibFiles-PPC = ¶ - "{PPCLibraries}StdCRuntime.o" ¶ - "{PPCLibraries}PPCCRuntime.o" ¶ - "{PPCLibraries}PPCToolLibs.o" - -ShLibFiles-PPC = ¶ - "{SharedLibraries}InterfaceLib" ¶ - "{SharedLibraries}StdCLib" ¶ - "{SharedLibraries}MathLib" - -LibFiles-PPC = ¶ - {StLibFiles-PPC} ¶ - {ShLibFiles-PPC} - - -### Special Files ### - -ExportFile = "{ObjDir}{FragName}.exp" - -StLibFile = "{ObjDir}{FragName}.MrC.o" - -ShLibFile = "{ObjDir}{FragName}" - -StubFile = "{ObjDir}{FragName}.stub" - -ElementsTool = "{ToolDir}elements" - -OutlineTool = "{ToolDir}outline" - - -### Default Rules ### - -.c.o Ä .c {¥MondoBuild¥} - {PPCC} {depDir}{default}.c -o {targDir}{default}.c.o {PPCCOptions} - - -### Build Rules ### - -All Ä Dynamic {ElementsTool} {OutlineTool} - -Static Ä {StLibFile} - -Dynamic Ä Static {ShLibFile} {StubFile} - -{StLibFile} ÄÄ {ObjFiles-PPC} {StLibFiles-PPC} {¥MondoBuild¥} - PPCLink ¶ - -o {Targ} ¶ - {ObjFiles-PPC} ¶ - {StLibFiles-PPC} ¶ - {Sym-PPC} ¶ - -mf -d ¶ - -t 'XCOF' ¶ - -c 'MPS ' ¶ - -xm l - -{ShLibFile} ÄÄ {StLibFile} {ShLibFiles-PPC} {ExportFile} {¥MondoBuild¥} - PPCLink ¶ - -o {Targ} ¶ - {StLibFile} ¶ - {ShLibFiles-PPC} ¶ - {Sym-PPC} ¶ - -@export {ExportFile} ¶ - -fragname {FragName} ¶ - -mf -d ¶ - -t 'shlb' ¶ - -c '????' ¶ - -xm s - -{StubFile} ÄÄ {ShLibFile} {¥MondoBuild¥} - shlb2stub -o {Targ} {ShLibFile} - -{ElementsTool} ÄÄ {ElementToolObjFile} {StubFile} {LibFiles-PPC} {¥MondoBuild¥} - PPCLink ¶ - -o {Targ} ¶ - {ElementToolObjFile} ¶ - {StLibFile} ¶ - {LibFiles-PPC} ¶ - {Sym-PPC} ¶ - -mf -d ¶ - -t 'MPST' ¶ - -c 'MPS ' - -{OutlineTool} ÄÄ {OutlineToolObjFile} {StubFile} {LibFiles-PPC} {¥MondoBuild¥} - PPCLink ¶ - -o {Targ} ¶ - {OutlineToolObjFile} ¶ - {StLibFile} ¶ - {LibFiles-PPC} ¶ - {Sym-PPC} ¶ - -mf -d ¶ - -t 'MPST' ¶ - -c 'MPS ' - - -### Special Rules ### - -{ExportFile} ÄÄ "{HdrDir}expat.h" {¥MondoBuild¥} - StreamEdit -d ¶ - -e "/¥('XMLPARSEAPI('Å') ')Ç0,1È'XML_'([A-Za-z0-9_]+)¨1'('/ Print 'XML_' ¨1" ¶ - "{HdrDir}expat.h" > {Targ} - - -### Required Dependencies ### - -"{ObjDir}xmlparse.c.o" Ä "{SrcDir}xmlparse.c" -"{ObjDir}xmlrole.c.o" Ä "{SrcDir}xmlrole.c" -"{ObjDir}xmltok.c.o" Ä "{SrcDir}xmltok.c" - -"{ObjDir}elements.c.o" Ä "{ToolDir}elements.c" -"{ObjDir}outline.c.o" Ä "{ToolDir}outline.c" - - -### Optional Dependencies ### -### Build this target to clean out generated intermediate files. ### - -Clean Ä - Delete {ObjFiles-PPC} {ExportFile} {ElementToolObjFile} {OutlineToolObjFile} - -### Build this target to clean out all generated files. ### - -Clean-All Ä Clean - Delete {StLibFile} {ShLibFile} {StubFile} {ElementsTool} {OutlineTool} - -### Build this target to generate "include file" dependencies. ### - -Dependencies Ä $OutOfDate - MakeDepend ¶ - -append {MAKEFILE} ¶ - -ignore "{CIncludes}" ¶ - -objdir "{ObjDir}" ¶ - -objext .o ¶ - {Defines} ¶ - {Includes} ¶ - {SrcFiles} - - diff --git a/contrib/expat/lib/amigaconfig.h b/contrib/expat/lib/amigaconfig.h deleted file mode 100644 index 86c6115040..0000000000 --- a/contrib/expat/lib/amigaconfig.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef AMIGACONFIG_H -#define AMIGACONFIG_H - -/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */ -#define BYTEORDER 4321 - -/* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - -/* Define to 1 if you have the header file. */ -#undef HAVE_CHECK_H - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* whether byteorder is bigendian */ -#define WORDS_BIGENDIAN - -/* Define to specify how much context to retain around the current parse - point. */ -#define XML_CONTEXT_BYTES 1024 - -/* Define to make parameter entity parsing functionality available. */ -#define XML_DTD - -/* Define to make XML Namespaces functionality available. */ -#define XML_NS - -#endif /* AMIGACONFIG_H */ diff --git a/contrib/expat/lib/ascii.h b/contrib/expat/lib/ascii.h index d10530b09b..1f594d2e54 100644 --- a/contrib/expat/lib/ascii.h +++ b/contrib/expat/lib/ascii.h @@ -1,5 +1,36 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1999-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2007 Karl Waclawek + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define ASCII_A 0x41 diff --git a/contrib/expat/lib/asciitab.h b/contrib/expat/lib/asciitab.h index 79a15c28ca..af766fb247 100644 --- a/contrib/expat/lib/asciitab.h +++ b/contrib/expat/lib/asciitab.h @@ -1,36 +1,66 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, -/* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML, -/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, -/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, -/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, -/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, -/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, -/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, -/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, -/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, -/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, + /* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML, + /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, + /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, + /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, + /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, + /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, + /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, + /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, + /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, + /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, diff --git a/contrib/expat/lib/expat.dsp b/contrib/expat/lib/expat.dsp deleted file mode 100644 index 6440d37f5b..0000000000 --- a/contrib/expat/lib/expat.dsp +++ /dev/null @@ -1,185 +0,0 @@ -# Microsoft Developer Studio Project File - Name="expat" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 6.00 -# ** DO NOT EDIT ** - -# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 - -CFG=expat - Win32 Debug -!MESSAGE This is not a valid makefile. To build this project using NMAKE, -!MESSAGE use the Export Makefile command and run -!MESSAGE -!MESSAGE NMAKE /f "expat.mak". -!MESSAGE -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "expat.mak" CFG="expat - Win32 Debug" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "expat - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library") -!MESSAGE "expat - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library") -!MESSAGE - -# Begin Project -# PROP AllowPerConfigDependencies 0 -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -MTL=midl.exe -RSC=rc.exe - -!IF "$(CFG)" == "expat - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "Release" -# PROP BASE Intermediate_Dir "Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "..\win32\bin\Release" -# PROP Intermediate_Dir "..\win32\tmp\Release" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "EXPAT_EXPORTS" /Yu"stdafx.h" /FD /c -# ADD CPP /nologo /MT /W3 /GX /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMPILED_FROM_DSP" /FD /c -# SUBTRACT CPP /YX /Yc /Yu -# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 -# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 -# ADD BASE RSC /l 0x409 /d "NDEBUG" -# ADD RSC /l 0x409 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 /nologo /dll /machine:I386 -# ADD LINK32 /nologo /dll /pdb:none /machine:I386 /out:"..\win32\bin\Release\libexpat.dll" - -!ELSEIF "$(CFG)" == "expat - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "Debug" -# PROP BASE Intermediate_Dir "Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "..\win32\bin\Debug" -# PROP Intermediate_Dir "..\win32\tmp\Debug" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "EXPAT_EXPORTS" /Yu"stdafx.h" /FD /GZ /c -# ADD CPP /nologo /MTd /W3 /GX /ZI /Od /D "_DEBUG" /D "COMPILED_FROM_DSP" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /FR /FD /GZ /c -# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 -# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 -# ADD BASE RSC /l 0x409 /d "_DEBUG" -# ADD RSC /l 0x409 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 /nologo /dll /debug /machine:I386 /pdbtype:sept -# ADD LINK32 /nologo /dll /pdb:none /debug /machine:I386 /out:"..\win32\bin\Debug\libexpat.dll" - -!ENDIF - -# Begin Target - -# Name "expat - Win32 Release" -# Name "expat - Win32 Debug" -# Begin Group "Source Files" - -# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" -# Begin Source File - -SOURCE=.\libexpat.def -# End Source File -# Begin Source File - -SOURCE=.\xmlparse.c - -!IF "$(CFG)" == "expat - Win32 Release" - -!ELSEIF "$(CFG)" == "expat - Win32 Debug" - -# ADD CPP /GX- /Od - -!ENDIF - -# End Source File -# Begin Source File - -SOURCE=.\xmlrole.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok_impl.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok_ns.c -# End Source File -# End Group -# Begin Group "Header Files" - -# PROP Default_Filter "h;hpp;hxx;hm;inl" -# Begin Source File - -SOURCE=.\ascii.h -# End Source File -# Begin Source File - -SOURCE=.\asciitab.h -# End Source File -# Begin Source File - -SOURCE=.\expat.h -# End Source File -# Begin Source File - -SOURCE=.\expat_external.h -# End Source File -# Begin Source File - -SOURCE=.\iasciitab.h -# End Source File -# Begin Source File - -SOURCE=.\internal.h -# End Source File -# Begin Source File - -SOURCE=.\latin1tab.h -# End Source File -# Begin Source File - -SOURCE=.\nametab.h -# End Source File -# Begin Source File - -SOURCE=.\utf8tab.h -# End Source File -# Begin Source File - -SOURCE=.\xmlrole.h -# End Source File -# Begin Source File - -SOURCE=.\xmltok.h -# End Source File -# Begin Source File - -SOURCE=.\xmltok_impl.h -# End Source File -# End Group -# Begin Group "Resource Files" - -# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" -# End Group -# End Target -# End Project diff --git a/contrib/expat/lib/expat.h b/contrib/expat/lib/expat.h index 9a21680be4..1c83563cbf 100644 --- a/contrib/expat/lib/expat.h +++ b/contrib/expat/lib/expat.h @@ -1,19 +1,46 @@ -/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2005 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016 Cristian Rodríguez + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2017 Rhodri James + Copyright (c) 2022 Thijs Schreijer + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef Expat_INCLUDED #define Expat_INCLUDED 1 -#ifdef __VMS -/* 0 1 2 3 0 1 2 3 - 1234567890123456789012345678901 1234567890123456789012345678901 */ -#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler -#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler -#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler -#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg -#endif - #include #include "expat_external.h" @@ -24,10 +51,9 @@ extern "C" { struct XML_ParserStruct; typedef struct XML_ParserStruct *XML_Parser; -/* Should this be defined using stdbool.h when C99 is available? */ typedef unsigned char XML_Bool; -#define XML_TRUE ((XML_Bool) 1) -#define XML_FALSE ((XML_Bool) 0) +#define XML_TRUE ((XML_Bool)1) +#define XML_FALSE ((XML_Bool)0) /* The XML_Status enum gives the possible return values for several API functions. The preprocessor #defines are included so this @@ -95,7 +121,13 @@ enum XML_Error { /* Added in 2.0. */ XML_ERROR_RESERVED_PREFIX_XML, XML_ERROR_RESERVED_PREFIX_XMLNS, - XML_ERROR_RESERVED_NAMESPACE_URI + XML_ERROR_RESERVED_NAMESPACE_URI, + /* Added in 2.2.1. */ + XML_ERROR_INVALID_ARGUMENT, + /* Added in 2.3.0. */ + XML_ERROR_NO_BUFFER, + /* Added in 2.4.0. */ + XML_ERROR_AMPLIFICATION_LIMIT_BREACH }; enum XML_Content_Type { @@ -135,25 +167,25 @@ enum XML_Content_Quant { typedef struct XML_cp XML_Content; struct XML_cp { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - XML_Char * name; - unsigned int numchildren; - XML_Content * children; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + XML_Char *name; + unsigned int numchildren; + XML_Content *children; }; - /* This is called for an element declaration. See above for - description of the model argument. It's the caller's responsibility - to free model when finished with it. + description of the model argument. It's the user code's responsibility + to free model when finished with it. See XML_FreeContentModel. + There is no need to free the model from the handler, it can be kept + around and freed at a later stage. */ -typedef void (XMLCALL *XML_ElementDeclHandler) (void *userData, - const XML_Char *name, - XML_Content *model); +typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData, + const XML_Char *name, + XML_Content *model); XMLPARSEAPI(void) -XML_SetElementDeclHandler(XML_Parser parser, - XML_ElementDeclHandler eldecl); +XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl); /* The Attlist declaration handler is called for *each* attribute. So a single Attlist declaration with multiple attributes declared will @@ -163,17 +195,12 @@ XML_SetElementDeclHandler(XML_Parser parser, value will be NULL in the case of "#REQUIRED". If "isrequired" is true and default is non-NULL, then this is a "#FIXED" default. */ -typedef void (XMLCALL *XML_AttlistDeclHandler) ( - void *userData, - const XML_Char *elname, - const XML_Char *attname, - const XML_Char *att_type, - const XML_Char *dflt, - int isrequired); +typedef void(XMLCALL *XML_AttlistDeclHandler)( + void *userData, const XML_Char *elname, const XML_Char *attname, + const XML_Char *att_type, const XML_Char *dflt, int isrequired); XMLPARSEAPI(void) -XML_SetAttlistDeclHandler(XML_Parser parser, - XML_AttlistDeclHandler attdecl); +XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl); /* The XML declaration handler is called for *both* XML declarations and text declarations. The way to distinguish is that the version @@ -183,15 +210,13 @@ XML_SetAttlistDeclHandler(XML_Parser parser, was no standalone parameter in the declaration, that it was given as no, or that it was given as yes. */ -typedef void (XMLCALL *XML_XmlDeclHandler) (void *userData, - const XML_Char *version, - const XML_Char *encoding, - int standalone); +typedef void(XMLCALL *XML_XmlDeclHandler)(void *userData, + const XML_Char *version, + const XML_Char *encoding, + int standalone); XMLPARSEAPI(void) -XML_SetXmlDeclHandler(XML_Parser parser, - XML_XmlDeclHandler xmldecl); - +XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl); typedef struct { void *(*malloc_fcn)(size_t size); @@ -215,11 +240,21 @@ XML_ParserCreate(const XML_Char *encoding); and the local part will be concatenated without any separator. It is a programming error to use the separator '\0' with namespace triplets (see XML_SetReturnNSTriplet). + If a namespace separator is chosen that can be part of a URI or + part of an XML name, splitting an expanded name back into its + 1, 2 or 3 original parts on application level in the element handler + may end up vulnerable, so these are advised against; sane choices for + a namespace separator are e.g. '\n' (line feed) and '|' (pipe). + + Note that Expat does not validate namespace URIs (beyond encoding) + against RFC 3986 today (and is not required to do so with regard to + the XML 1.0 namespaces specification) but it may start doing that + in future releases. Before that, an application using Expat must + be ready to receive namespace URIs containing non-URI characters. */ XMLPARSEAPI(XML_Parser) XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); - /* Constructs a new parser using the memory management suite referred to by memsuite. If memsuite is NULL, then use the standard library memory suite. If namespaceSeparator is non-NULL it creates a parser with @@ -235,7 +270,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, const XML_Char *namespaceSeparator); /* Prepare a parser object to be re-used. This is particularly - valuable when memory allocation overhead is disproportionatly high, + valuable when memory allocation overhead is disproportionately high, such as when a large number of small documnents need to be parsed. All handlers are cleared from the parser, except for the unknownEncodingHandler. The parser's external state is re-initialized @@ -249,31 +284,27 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encoding); /* atts is array of name/value pairs, terminated by 0; names and values are 0 terminated. */ -typedef void (XMLCALL *XML_StartElementHandler) (void *userData, - const XML_Char *name, - const XML_Char **atts); - -typedef void (XMLCALL *XML_EndElementHandler) (void *userData, - const XML_Char *name); +typedef void(XMLCALL *XML_StartElementHandler)(void *userData, + const XML_Char *name, + const XML_Char **atts); +typedef void(XMLCALL *XML_EndElementHandler)(void *userData, + const XML_Char *name); /* s is not 0 terminated. */ -typedef void (XMLCALL *XML_CharacterDataHandler) (void *userData, - const XML_Char *s, - int len); +typedef void(XMLCALL *XML_CharacterDataHandler)(void *userData, + const XML_Char *s, int len); /* target and data are 0 terminated */ -typedef void (XMLCALL *XML_ProcessingInstructionHandler) ( - void *userData, - const XML_Char *target, - const XML_Char *data); +typedef void(XMLCALL *XML_ProcessingInstructionHandler)(void *userData, + const XML_Char *target, + const XML_Char *data); /* data is 0 terminated */ -typedef void (XMLCALL *XML_CommentHandler) (void *userData, - const XML_Char *data); +typedef void(XMLCALL *XML_CommentHandler)(void *userData, const XML_Char *data); -typedef void (XMLCALL *XML_StartCdataSectionHandler) (void *userData); -typedef void (XMLCALL *XML_EndCdataSectionHandler) (void *userData); +typedef void(XMLCALL *XML_StartCdataSectionHandler)(void *userData); +typedef void(XMLCALL *XML_EndCdataSectionHandler)(void *userData); /* This is called for any characters in the XML document for which there is no applicable handler. This includes both characters that @@ -288,25 +319,23 @@ typedef void (XMLCALL *XML_EndCdataSectionHandler) (void *userData); default handler: for example, a comment might be split between multiple calls. */ -typedef void (XMLCALL *XML_DefaultHandler) (void *userData, - const XML_Char *s, - int len); +typedef void(XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s, + int len); /* This is called for the start of the DOCTYPE declaration, before any DTD or internal subset is parsed. */ -typedef void (XMLCALL *XML_StartDoctypeDeclHandler) ( - void *userData, - const XML_Char *doctypeName, - const XML_Char *sysid, - const XML_Char *pubid, - int has_internal_subset); - -/* This is called for the start of the DOCTYPE declaration when the +typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset); + +/* This is called for the end of the DOCTYPE declaration when the closing > is encountered, but after processing any external subset. */ -typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); +typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); /* This is called for entity declarations. The is_parameter_entity argument will be non-zero if the entity is a parameter entity, zero @@ -314,7 +343,7 @@ typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); For internal entities (), value will be non-NULL and systemId, publicID, and notationName will be NULL. - The value string is NOT nul-terminated; the length is provided in + The value string is NOT null-terminated; the length is provided in the value_length argument. Since it is legal to have zero-length values, do not use this argument to test for internal entities. @@ -326,23 +355,17 @@ typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); Note that is_parameter_entity can't be changed to XML_Bool, since that would break binary compatibility. */ -typedef void (XMLCALL *XML_EntityDeclHandler) ( - void *userData, - const XML_Char *entityName, - int is_parameter_entity, - const XML_Char *value, - int value_length, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); +typedef void(XMLCALL *XML_EntityDeclHandler)( + void *userData, const XML_Char *entityName, int is_parameter_entity, + const XML_Char *value, int value_length, const XML_Char *base, + const XML_Char *systemId, const XML_Char *publicId, + const XML_Char *notationName); XMLPARSEAPI(void) -XML_SetEntityDeclHandler(XML_Parser parser, - XML_EntityDeclHandler handler); +XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); /* OBSOLETE -- OBSOLETE -- OBSOLETE - This handler has been superceded by the EntityDeclHandler above. + This handler has been superseded by the EntityDeclHandler above. It is provided here for backward compatibility. This is called for a declaration of an unparsed (NDATA) entity. @@ -350,24 +373,20 @@ XML_SetEntityDeclHandler(XML_Parser parser, entityName, systemId and notationName arguments will never be NULL. The other arguments may be. */ -typedef void (XMLCALL *XML_UnparsedEntityDeclHandler) ( - void *userData, - const XML_Char *entityName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); +typedef void(XMLCALL *XML_UnparsedEntityDeclHandler)( + void *userData, const XML_Char *entityName, const XML_Char *base, + const XML_Char *systemId, const XML_Char *publicId, + const XML_Char *notationName); /* This is called for a declaration of notation. The base argument is whatever was set by XML_SetBase. The notationName will never be NULL. The other arguments can be. */ -typedef void (XMLCALL *XML_NotationDeclHandler) ( - void *userData, - const XML_Char *notationName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); +typedef void(XMLCALL *XML_NotationDeclHandler)(void *userData, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); /* When namespace processing is enabled, these are called once for each namespace declaration. The call to the start and end element @@ -375,14 +394,12 @@ typedef void (XMLCALL *XML_NotationDeclHandler) ( declaration handlers. For an xmlns attribute, prefix will be NULL. For an xmlns="" attribute, uri will be NULL. */ -typedef void (XMLCALL *XML_StartNamespaceDeclHandler) ( - void *userData, - const XML_Char *prefix, - const XML_Char *uri); +typedef void(XMLCALL *XML_StartNamespaceDeclHandler)(void *userData, + const XML_Char *prefix, + const XML_Char *uri); -typedef void (XMLCALL *XML_EndNamespaceDeclHandler) ( - void *userData, - const XML_Char *prefix); +typedef void(XMLCALL *XML_EndNamespaceDeclHandler)(void *userData, + const XML_Char *prefix); /* This is called if the document is not standalone, that is, it has an external subset or a reference to a parameter entity, but does not @@ -393,7 +410,7 @@ typedef void (XMLCALL *XML_EndNamespaceDeclHandler) ( conditions above this handler will only be called if the referenced entity was actually read. */ -typedef int (XMLCALL *XML_NotStandaloneHandler) (void *userData); +typedef int(XMLCALL *XML_NotStandaloneHandler)(void *userData); /* This is called for a reference to an external parsed general entity. The referenced entity is not automatically parsed. The @@ -429,12 +446,11 @@ typedef int (XMLCALL *XML_NotStandaloneHandler) (void *userData); Note that unlike other handlers the first argument is the parser, not userData. */ -typedef int (XMLCALL *XML_ExternalEntityRefHandler) ( - XML_Parser parser, - const XML_Char *context, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); +typedef int(XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser parser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); /* This is called in two situations: 1) An entity reference is encountered for which no declaration @@ -446,10 +462,9 @@ typedef int (XMLCALL *XML_ExternalEntityRefHandler) ( the event would be out of sync with the reporting of the declarations or attribute values */ -typedef void (XMLCALL *XML_SkippedEntityHandler) ( - void *userData, - const XML_Char *entityName, - int is_parameter_entity); +typedef void(XMLCALL *XML_SkippedEntityHandler)(void *userData, + const XML_Char *entityName, + int is_parameter_entity); /* This structure is filled in by the XML_UnknownEncodingHandler to provide information to the parser about encodings that are unknown @@ -506,8 +521,8 @@ typedef void (XMLCALL *XML_SkippedEntityHandler) ( typedef struct { int map[256]; void *data; - int (XMLCALL *convert)(void *data, const char *s); - void (XMLCALL *release)(void *data); + int(XMLCALL *convert)(void *data, const char *s); + void(XMLCALL *release)(void *data); } XML_Encoding; /* This is called for an encoding that is unknown to the parser. @@ -523,25 +538,21 @@ typedef struct { Otherwise it must return XML_STATUS_ERROR. If info does not describe a suitable encoding, then the parser will - return an XML_UNKNOWN_ENCODING error. + return an XML_ERROR_UNKNOWN_ENCODING error. */ -typedef int (XMLCALL *XML_UnknownEncodingHandler) ( - void *encodingHandlerData, - const XML_Char *name, - XML_Encoding *info); +typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData, + const XML_Char *name, + XML_Encoding *info); XMLPARSEAPI(void) -XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, +XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end); XMLPARSEAPI(void) -XML_SetStartElementHandler(XML_Parser parser, - XML_StartElementHandler handler); +XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler); XMLPARSEAPI(void) -XML_SetEndElementHandler(XML_Parser parser, - XML_EndElementHandler handler); +XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler); XMLPARSEAPI(void) XML_SetCharacterDataHandler(XML_Parser parser, @@ -551,8 +562,7 @@ XMLPARSEAPI(void) XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler); XMLPARSEAPI(void) -XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler); +XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); XMLPARSEAPI(void) XML_SetCdataSectionHandler(XML_Parser parser, @@ -572,20 +582,17 @@ XML_SetEndCdataSectionHandler(XML_Parser parser, default handler, or to the skipped entity handler, if one is set. */ XMLPARSEAPI(void) -XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler); +XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler); /* This sets the default handler but does not inhibit expansion of internal entities. The entity reference will not be passed to the default handler. */ XMLPARSEAPI(void) -XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler); +XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler); XMLPARSEAPI(void) -XML_SetDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start, +XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) @@ -593,16 +600,14 @@ XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start); XMLPARSEAPI(void) -XML_SetEndDoctypeDeclHandler(XML_Parser parser, - XML_EndDoctypeDeclHandler end); +XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler); XMLPARSEAPI(void) -XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler); +XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); XMLPARSEAPI(void) XML_SetNamespaceDeclHandler(XML_Parser parser, @@ -630,8 +635,7 @@ XML_SetExternalEntityRefHandler(XML_Parser parser, instead of the parser object. */ XMLPARSEAPI(void) -XML_SetExternalEntityRefHandlerArg(XML_Parser parser, - void *arg); +XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg); XMLPARSEAPI(void) XML_SetSkippedEntityHandler(XML_Parser parser, @@ -706,11 +710,11 @@ XML_UseParserAsHandlerArg(XML_Parser parser); be called, despite an external subset being parsed. Note: If XML_DTD is not defined when Expat is compiled, returns XML_ERROR_FEATURE_REQUIRES_XML_DTD. + Note: If parser == NULL, returns XML_ERROR_INVALID_ARGUMENT. */ XMLPARSEAPI(enum XML_Error) XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); - /* Sets the base to be used for resolving relative URIs in system identifiers in declarations. Resolving relative identifiers is left to the application: this value will be passed through as the @@ -728,16 +732,17 @@ XML_GetBase(XML_Parser parser); /* Returns the number of the attribute/value pairs passed in last call to the XML_StartElementHandler that were specified in the start-tag rather than defaulted. Each attribute/value pair counts as 2; thus - this correspondds to an index into the atts array passed to the - XML_StartElementHandler. + this corresponds to an index into the atts array passed to the + XML_StartElementHandler. Returns -1 if parser == NULL. */ XMLPARSEAPI(int) XML_GetSpecifiedAttributeCount(XML_Parser parser); /* Returns the index of the ID attribute passed in the last call to - XML_StartElementHandler, or -1 if there is no ID attribute. Each - attribute/value pair counts as 2; thus this correspondds to an - index into the atts array passed to the XML_StartElementHandler. + XML_StartElementHandler, or -1 if there is no ID attribute or + parser == NULL. Each attribute/value pair counts as 2; thus this + corresponds to an index into the atts array passed to the + XML_StartElementHandler. */ XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); @@ -749,10 +754,10 @@ XML_GetIdAttributeIndex(XML_Parser parser); info->valueEnd - info->valueStart = 4 bytes. */ typedef struct { - XML_Index nameStart; /* Offset to beginning of the attribute name. */ - XML_Index nameEnd; /* Offset after the attribute name's last byte. */ - XML_Index valueStart; /* Offset to beginning of the attribute value. */ - XML_Index valueEnd; /* Offset after the attribute value's last byte. */ + XML_Index nameStart; /* Offset to beginning of the attribute name. */ + XML_Index nameEnd; /* Offset after the attribute name's last byte. */ + XML_Index valueStart; /* Offset to beginning of the attribute value. */ + XML_Index valueEnd; /* Offset after the attribute value's last byte. */ } XML_AttrInfo; /* Returns an array of XML_AttrInfo structures for the attribute/value pairs @@ -788,20 +793,20 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal); (resumable = 0) an already suspended parser. Some call-backs may still follow because they would otherwise get lost. Examples: - endElementHandler() for empty elements when stopped in - startElementHandler(), - - endNameSpaceDeclHandler() when stopped in endElementHandler(), + startElementHandler(), + - endNameSpaceDeclHandler() when stopped in endElementHandler(), and possibly others. Can be called from most handlers, including DTD related call-backs, except when parsing an external parameter entity and resumable != 0. Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise. - Possible error codes: + Possible error codes: - XML_ERROR_SUSPENDED: when suspending an already suspended parser. - XML_ERROR_FINISHED: when the parser has already finished. - XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE. - When resumable != 0 (true) then parsing is suspended, that is, - XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. + When resumable != 0 (true) then parsing is suspended, that is, + XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer() return XML_STATUS_ERROR with error code XML_ERROR_ABORTED. @@ -812,7 +817,7 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal); the externalEntityRefHandler() to call XML_StopParser() on the parent parser (recursively), if one wants to stop parsing altogether. - When suspended, parsing can be resumed by calling XML_ResumeParser(). + When suspended, parsing can be resumed by calling XML_ResumeParser(). */ XMLPARSEAPI(enum XML_Status) XML_StopParser(XML_Parser parser, XML_Bool resumable); @@ -820,7 +825,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable); /* Resumes parsing after it has been suspended with XML_StopParser(). Must not be called from within a handler call-back. Returns same status codes as XML_Parse() or XML_ParseBuffer(). - Additional error code XML_ERROR_NOT_SUSPENDED possible. + Additional error code XML_ERROR_NOT_SUSPENDED possible. *Note*: This must be called on the most deeply nested child parser instance @@ -832,12 +837,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable); XMLPARSEAPI(enum XML_Status) XML_ResumeParser(XML_Parser parser); -enum XML_Parsing { - XML_INITIALIZED, - XML_PARSING, - XML_FINISHED, - XML_SUSPENDED -}; +enum XML_Parsing { XML_INITIALIZED, XML_PARSING, XML_FINISHED, XML_SUSPENDED }; typedef struct { enum XML_Parsing parsing; @@ -869,8 +869,7 @@ XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status); Otherwise returns a new XML_Parser object. */ XMLPARSEAPI(XML_Parser) -XML_ExternalEntityParserCreate(XML_Parser parser, - const XML_Char *context, +XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context, const XML_Char *encoding); enum XML_ParamEntityParsing { @@ -901,6 +900,7 @@ enum XML_ParamEntityParsing { entities is requested; otherwise it will return non-zero. Note: If XML_SetParamEntityParsing is called after XML_Parse or XML_ParseBuffer, then it has no effect and will always return 0. + Note: If parser == NULL, the function will do nothing and return 0. */ XMLPARSEAPI(int) XML_SetParamEntityParsing(XML_Parser parser, @@ -910,10 +910,10 @@ XML_SetParamEntityParsing(XML_Parser parser, Helps in preventing DoS attacks based on predicting hash function behavior. This must be called before parsing is started. Returns 1 if successful, 0 when called after parsing has started. + Note: If parser == NULL, the function will do nothing and return 0. */ XMLPARSEAPI(int) -XML_SetHashSalt(XML_Parser parser, - unsigned long hash_salt); +XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then XML_GetErrorCode returns information about the error. @@ -930,12 +930,16 @@ XML_GetErrorCode(XML_Parser parser); be within the relevant markup. When called outside of the callback functions, the position indicated will be just past the last parse event (regardless of whether there was an associated callback). - + They may also be called after returning from a call to XML_Parse or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then the location is the location of the character at which the error was detected; otherwise the location is the location of the last parse event, as described above. + + Note: XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber + return 0 to indicate an error. + Note: XML_GetCurrentByteIndex returns -1 to indicate an error. */ XMLPARSEAPI(XML_Size) XML_GetCurrentLineNumber(XML_Parser parser); XMLPARSEAPI(XML_Size) XML_GetCurrentColumnNumber(XML_Parser parser); @@ -958,14 +962,12 @@ XML_GetCurrentByteCount(XML_Parser parser); the handler that makes the call. */ XMLPARSEAPI(const char *) -XML_GetInputContext(XML_Parser parser, - int *offset, - int *size); +XML_GetInputContext(XML_Parser parser, int *offset, int *size); /* For backwards compatibility with previous versions. */ -#define XML_GetErrorLineNumber XML_GetCurrentLineNumber +#define XML_GetErrorLineNumber XML_GetCurrentLineNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber -#define XML_GetErrorByteIndex XML_GetCurrentByteIndex +#define XML_GetErrorByteIndex XML_GetCurrentByteIndex /* Frees the content model passed to the element declaration handler */ XMLPARSEAPI(void) @@ -973,9 +975,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model); /* Exposing the memory handling functions used in Expat */ XMLPARSEAPI(void *) +XML_ATTR_MALLOC +XML_ATTR_ALLOC_SIZE(2) XML_MemMalloc(XML_Parser parser, size_t size); XMLPARSEAPI(void *) +XML_ATTR_ALLOC_SIZE(3) XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); XMLPARSEAPI(void) @@ -1017,27 +1022,39 @@ enum XML_FeatureEnum { XML_FEATURE_SIZEOF_XML_LCHAR, XML_FEATURE_NS, XML_FEATURE_LARGE_SIZE, - XML_FEATURE_ATTR_INFO + XML_FEATURE_ATTR_INFO, + /* Added in Expat 2.4.0. */ + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT /* Additional features must be added to the end of this enum. */ }; typedef struct { - enum XML_FeatureEnum feature; - const XML_LChar *name; - long int value; + enum XML_FeatureEnum feature; + const XML_LChar *name; + long int value; } XML_Feature; XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); +#ifdef XML_DTD +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor); + +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); +#endif -/* Expat follows the GNU/Linux convention of odd number minor version for - beta/development releases and even number minor version for stable - releases. Micro is bumped with each release, and set to 0 with each - change to major or minor version. +/* Expat follows the semantic versioning convention. + See http://semver.org. */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 1 +#define XML_MINOR_VERSION 5 #define XML_MICRO_VERSION 0 #ifdef __cplusplus diff --git a/contrib/expat/lib/expat_external.h b/contrib/expat/lib/expat_external.h index 2c03284ea2..8829f77091 100644 --- a/contrib/expat/lib/expat_external.h +++ b/contrib/expat/lib/expat_external.h @@ -1,5 +1,40 @@ -/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2004 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2016 Cristian Rodríguez + Copyright (c) 2016-2019 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2018 Yury Gribov + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef Expat_External_INCLUDED @@ -7,10 +42,6 @@ /* External API definitions */ -#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__) -#define XML_USE_MSC_EXTENSIONS 1 -#endif - /* Expat tries very hard to make the API boundary very specifically defined. There are two macros defined to control this boundary; each of these can be defined before including this header to @@ -34,11 +65,11 @@ system headers may assume the cdecl convention. */ #ifndef XMLCALL -#if defined(_MSC_VER) -#define XMLCALL __cdecl -#elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER) -#define XMLCALL __attribute__((cdecl)) -#else +# if defined(_MSC_VER) +# define XMLCALL __cdecl +# elif defined(__GNUC__) && defined(__i386) && ! defined(__INTEL_COMPILER) +# define XMLCALL __attribute__((cdecl)) +# else /* For any platform which uses this definition and supports more than one calling convention, we need to extend this definition to declare the convention used on that platform, if it's possible to @@ -49,28 +80,47 @@ pre-processor and how to specify the same calling convention as the platform's malloc() implementation. */ -#define XMLCALL -#endif -#endif /* not defined XMLCALL */ - +# define XMLCALL +# endif +#endif /* not defined XMLCALL */ -#if !defined(XML_STATIC) && !defined(XMLIMPORT) -#ifndef XML_BUILDING_EXPAT +#if ! defined(XML_STATIC) && ! defined(XMLIMPORT) +# ifndef XML_BUILDING_EXPAT /* using Expat from an application */ -#ifdef XML_USE_MSC_EXTENSIONS -#define XMLIMPORT __declspec(dllimport) -#endif +# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) && ! defined(__CYGWIN__) +# define XMLIMPORT __declspec(dllimport) +# endif + +# endif +#endif /* not defined XML_STATIC */ +#ifndef XML_ENABLE_VISIBILITY +# define XML_ENABLE_VISIBILITY 0 #endif -#endif /* not defined XML_STATIC */ +#if ! defined(XMLIMPORT) && XML_ENABLE_VISIBILITY +# define XMLIMPORT __attribute__((visibility("default"))) +#endif /* If we didn't define it above, define it away: */ #ifndef XMLIMPORT -#define XMLIMPORT +# define XMLIMPORT #endif +#if defined(__GNUC__) \ + && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) +# define XML_ATTR_MALLOC __attribute__((__malloc__)) +#else +# define XML_ATTR_MALLOC +#endif + +#if defined(__GNUC__) \ + && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) +#else +# define XML_ATTR_ALLOC_SIZE(x) +#endif #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL @@ -79,30 +129,30 @@ extern "C" { #endif #ifdef XML_UNICODE_WCHAR_T -#define XML_UNICODE +# ifndef XML_UNICODE +# define XML_UNICODE +# endif +# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) +# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" +# endif #endif -#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ -#ifdef XML_UNICODE_WCHAR_T +#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ +# ifdef XML_UNICODE_WCHAR_T typedef wchar_t XML_Char; typedef wchar_t XML_LChar; -#else +# else typedef unsigned short XML_Char; typedef char XML_LChar; -#endif /* XML_UNICODE_WCHAR_T */ -#else /* Information is UTF-8 encoded. */ +# endif /* XML_UNICODE_WCHAR_T */ +#else /* Information is UTF-8 encoded. */ typedef char XML_Char; typedef char XML_LChar; -#endif /* XML_UNICODE */ +#endif /* XML_UNICODE */ -#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ -#if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400 -typedef __int64 XML_Index; -typedef unsigned __int64 XML_Size; -#else +#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ typedef long long XML_Index; typedef unsigned long long XML_Size; -#endif #else typedef long XML_Index; typedef unsigned long XML_Size; diff --git a/contrib/expat/lib/expat_static.dsp b/contrib/expat/lib/expat_static.dsp deleted file mode 100644 index ca39bc073d..0000000000 --- a/contrib/expat/lib/expat_static.dsp +++ /dev/null @@ -1,162 +0,0 @@ -# Microsoft Developer Studio Project File - Name="expat_static" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 6.00 -# ** DO NOT EDIT ** - -# TARGTYPE "Win32 (x86) Static Library" 0x0104 - -CFG=expat_static - Win32 Debug -!MESSAGE This is not a valid makefile. To build this project using NMAKE, -!MESSAGE use the Export Makefile command and run -!MESSAGE -!MESSAGE NMAKE /f "expat_static.mak". -!MESSAGE -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "expat_static.mak" CFG="expat_static - Win32 Debug" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "expat_static - Win32 Release" (based on "Win32 (x86) Static Library") -!MESSAGE "expat_static - Win32 Debug" (based on "Win32 (x86) Static Library") -!MESSAGE - -# Begin Project -# PROP AllowPerConfigDependencies 0 -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -RSC=rc.exe - -!IF "$(CFG)" == "expat_static - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "expat_static___Win32_Release" -# PROP BASE Intermediate_Dir "expat_static___Win32_Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "..\win32\bin\Release" -# PROP Intermediate_Dir "..\win32\tmp\Release_static" -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c -# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "_WINDOWS" /D "NDEBUG" /D "_MBCS" /D "_LIB" /D "COMPILED_FROM_DSP" /FD /c -# SUBTRACT CPP /YX -# ADD BASE RSC /l 0x1009 /d "NDEBUG" -# ADD RSC /l 0x409 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LIB32=link.exe -lib -# ADD BASE LIB32 /nologo -# ADD LIB32 /nologo /out:"..\win32\bin\Release\libexpatMT.lib" - -!ELSEIF "$(CFG)" == "expat_static - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "expat_static___Win32_Debug" -# PROP BASE Intermediate_Dir "expat_static___Win32_Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "..\win32\bin\Debug" -# PROP Intermediate_Dir "..\win32\tmp\Debug_static" -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c -# ADD CPP /nologo /MTd /W3 /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "COMPILED_FROM_DSP" /D "_MBCS" /D "_LIB" /FR /FD /GZ /c -# SUBTRACT CPP /YX -# ADD BASE RSC /l 0x1009 /d "_DEBUG" -# ADD RSC /l 0x409 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LIB32=link.exe -lib -# ADD BASE LIB32 /nologo -# ADD LIB32 /nologo /out:"..\win32\bin\Debug\libexpatMT.lib" - -!ENDIF - -# Begin Target - -# Name "expat_static - Win32 Release" -# Name "expat_static - Win32 Debug" -# Begin Group "Source Files" - -# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" -# Begin Source File - -SOURCE=.\xmlparse.c -# End Source File -# Begin Source File - -SOURCE=.\xmlrole.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok_impl.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok_ns.c -# End Source File -# End Group -# Begin Group "Header Files" - -# PROP Default_Filter "h;hpp;hxx;hm;inl" -# Begin Source File - -SOURCE=.\ascii.h -# End Source File -# Begin Source File - -SOURCE=.\asciitab.h -# End Source File -# Begin Source File - -SOURCE=.\expat.h -# End Source File -# Begin Source File - -SOURCE=.\expat_external.h -# End Source File -# Begin Source File - -SOURCE=.\iasciitab.h -# End Source File -# Begin Source File - -SOURCE=.\internal.h -# End Source File -# Begin Source File - -SOURCE=.\latin1tab.h -# End Source File -# Begin Source File - -SOURCE=.\nametab.h -# End Source File -# Begin Source File - -SOURCE=.\utf8tab.h -# End Source File -# Begin Source File - -SOURCE=.\xmlrole.h -# End Source File -# Begin Source File - -SOURCE=.\xmltok.h -# End Source File -# Begin Source File - -SOURCE=.\xmltok_impl.h -# End Source File -# End Group -# End Target -# End Project diff --git a/contrib/expat/lib/expatw.dsp b/contrib/expat/lib/expatw.dsp deleted file mode 100644 index 77497b58db..0000000000 --- a/contrib/expat/lib/expatw.dsp +++ /dev/null @@ -1,185 +0,0 @@ -# Microsoft Developer Studio Project File - Name="expatw" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 6.00 -# ** DO NOT EDIT ** - -# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 - -CFG=expatw - Win32 Debug -!MESSAGE This is not a valid makefile. To build this project using NMAKE, -!MESSAGE use the Export Makefile command and run -!MESSAGE -!MESSAGE NMAKE /f "expatw.mak". -!MESSAGE -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "expatw.mak" CFG="expatw - Win32 Debug" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "expatw - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library") -!MESSAGE "expatw - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library") -!MESSAGE - -# Begin Project -# PROP AllowPerConfigDependencies 0 -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -MTL=midl.exe -RSC=rc.exe - -!IF "$(CFG)" == "expatw - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "Release" -# PROP BASE Intermediate_Dir "Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "..\win32\bin\Release" -# PROP Intermediate_Dir "..\win32\tmp\Release-w" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "EXPAT_EXPORTS" /Yu"stdafx.h" /FD /c -# ADD CPP /nologo /MT /W3 /GX /O2 /D "NDEBUG" /D "COMPILED_FROM_DSP" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "XML_UNICODE_WCHAR_T" /FD /c -# SUBTRACT CPP /YX /Yc /Yu -# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 -# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 -# ADD BASE RSC /l 0x409 /d "NDEBUG" -# ADD RSC /l 0x409 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 /nologo /dll /machine:I386 -# ADD LINK32 /nologo /dll /pdb:none /machine:I386 /out:"..\win32\bin\Release\libexpatw.dll" - -!ELSEIF "$(CFG)" == "expatw - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "Debug" -# PROP BASE Intermediate_Dir "Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "..\win32\bin\Debug" -# PROP Intermediate_Dir "..\win32\tmp\Debug-w" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "EXPAT_EXPORTS" /Yu"stdafx.h" /FD /GZ /c -# ADD CPP /nologo /MTd /W3 /GX /ZI /Od /D "_DEBUG" /D "COMPILED_FROM_DSP" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "XML_UNICODE_WCHAR_T" /FR /FD /GZ /c -# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 -# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 -# ADD BASE RSC /l 0x409 /d "_DEBUG" -# ADD RSC /l 0x409 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 /nologo /dll /debug /machine:I386 /pdbtype:sept -# ADD LINK32 /nologo /dll /pdb:none /debug /machine:I386 /out:"..\win32\bin\Debug\libexpatw.dll" - -!ENDIF - -# Begin Target - -# Name "expatw - Win32 Release" -# Name "expatw - Win32 Debug" -# Begin Group "Source Files" - -# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" -# Begin Source File - -SOURCE=.\libexpatw.def -# End Source File -# Begin Source File - -SOURCE=.\xmlparse.c - -!IF "$(CFG)" == "expatw - Win32 Release" - -!ELSEIF "$(CFG)" == "expatw - Win32 Debug" - -# ADD CPP /GX- /Od - -!ENDIF - -# End Source File -# Begin Source File - -SOURCE=.\xmlrole.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok_impl.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok_ns.c -# End Source File -# End Group -# Begin Group "Header Files" - -# PROP Default_Filter "h;hpp;hxx;hm;inl" -# Begin Source File - -SOURCE=.\ascii.h -# End Source File -# Begin Source File - -SOURCE=.\asciitab.h -# End Source File -# Begin Source File - -SOURCE=.\expat.h -# End Source File -# Begin Source File - -SOURCE=.\expat_external.h -# End Source File -# Begin Source File - -SOURCE=.\iasciitab.h -# End Source File -# Begin Source File - -SOURCE=.\internal.h -# End Source File -# Begin Source File - -SOURCE=.\latin1tab.h -# End Source File -# Begin Source File - -SOURCE=.\nametab.h -# End Source File -# Begin Source File - -SOURCE=.\utf8tab.h -# End Source File -# Begin Source File - -SOURCE=.\xmlrole.h -# End Source File -# Begin Source File - -SOURCE=.\xmltok.h -# End Source File -# Begin Source File - -SOURCE=.\xmltok_impl.h -# End Source File -# End Group -# Begin Group "Resource Files" - -# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" -# End Group -# End Target -# End Project diff --git a/contrib/expat/lib/expatw_static.dsp b/contrib/expat/lib/expatw_static.dsp deleted file mode 100644 index f13dd72978..0000000000 --- a/contrib/expat/lib/expatw_static.dsp +++ /dev/null @@ -1,162 +0,0 @@ -# Microsoft Developer Studio Project File - Name="expatw_static" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 6.00 -# ** DO NOT EDIT ** - -# TARGTYPE "Win32 (x86) Static Library" 0x0104 - -CFG=expatw_static - Win32 Debug -!MESSAGE This is not a valid makefile. To build this project using NMAKE, -!MESSAGE use the Export Makefile command and run -!MESSAGE -!MESSAGE NMAKE /f "expatw_static.mak". -!MESSAGE -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "expatw_static.mak" CFG="expatw_static - Win32 Debug" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "expatw_static - Win32 Release" (based on "Win32 (x86) Static Library") -!MESSAGE "expatw_static - Win32 Debug" (based on "Win32 (x86) Static Library") -!MESSAGE - -# Begin Project -# PROP AllowPerConfigDependencies 0 -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -RSC=rc.exe - -!IF "$(CFG)" == "expatw_static - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "expatw_static___Win32_Release" -# PROP BASE Intermediate_Dir "expatw_static___Win32_Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "..\win32\bin\Release" -# PROP Intermediate_Dir "..\win32\tmp\Release-w_static" -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c -# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "_WINDOWS" /D "NDEBUG" /D "_MBCS" /D "_LIB" /D "COMPILED_FROM_DSP" /D "XML_UNICODE_WCHAR_T" /FD /c -# SUBTRACT CPP /YX -# ADD BASE RSC /l 0x1009 /d "NDEBUG" -# ADD RSC /l 0x409 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LIB32=link.exe -lib -# ADD BASE LIB32 /nologo -# ADD LIB32 /nologo /out:"..\win32\bin\Release\libexpatwMT.lib" - -!ELSEIF "$(CFG)" == "expatw_static - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "expatw_static___Win32_Debug" -# PROP BASE Intermediate_Dir "expatw_static___Win32_Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "..\win32\bin\Debug" -# PROP Intermediate_Dir "..\win32\tmp\Debug-w_static" -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c -# ADD CPP /nologo /MTd /W3 /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_LIB" /D "COMPILED_FROM_DSP" /D "XML_UNICODE_WCHAR_T" /FR /FD /GZ /c -# SUBTRACT CPP /YX -# ADD BASE RSC /l 0x1009 /d "_DEBUG" -# ADD RSC /l 0x409 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LIB32=link.exe -lib -# ADD BASE LIB32 /nologo -# ADD LIB32 /nologo /out:"..\win32\bin\Debug\libexpatwMT.lib" - -!ENDIF - -# Begin Target - -# Name "expatw_static - Win32 Release" -# Name "expatw_static - Win32 Debug" -# Begin Group "Source Files" - -# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" -# Begin Source File - -SOURCE=.\xmlparse.c -# End Source File -# Begin Source File - -SOURCE=.\xmlrole.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok_impl.c -# End Source File -# Begin Source File - -SOURCE=.\xmltok_ns.c -# End Source File -# End Group -# Begin Group "Header Files" - -# PROP Default_Filter "h;hpp;hxx;hm;inl" -# Begin Source File - -SOURCE=.\ascii.h -# End Source File -# Begin Source File - -SOURCE=.\asciitab.h -# End Source File -# Begin Source File - -SOURCE=.\expat.h -# End Source File -# Begin Source File - -SOURCE=.\expat_external.h -# End Source File -# Begin Source File - -SOURCE=.\iasciitab.h -# End Source File -# Begin Source File - -SOURCE=.\internal.h -# End Source File -# Begin Source File - -SOURCE=.\latin1tab.h -# End Source File -# Begin Source File - -SOURCE=.\nametab.h -# End Source File -# Begin Source File - -SOURCE=.\utf8tab.h -# End Source File -# Begin Source File - -SOURCE=.\xmlrole.h -# End Source File -# Begin Source File - -SOURCE=.\xmltok.h -# End Source File -# Begin Source File - -SOURCE=.\xmltok_impl.h -# End Source File -# End Group -# End Target -# End Project diff --git a/contrib/expat/lib/iasciitab.h b/contrib/expat/lib/iasciitab.h index 24a1d5ccc9..5d8646f2a3 100644 --- a/contrib/expat/lib/iasciitab.h +++ b/contrib/expat/lib/iasciitab.h @@ -1,37 +1,67 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, -/* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML, -/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, -/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, -/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, -/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, -/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, -/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, -/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, -/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, -/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, + /* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML, + /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, + /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, + /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, + /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, + /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, + /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, + /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, + /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, + /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, diff --git a/contrib/expat/lib/internal.h b/contrib/expat/lib/internal.h index dd5454831d..e09f533b23 100644 --- a/contrib/expat/lib/internal.h +++ b/contrib/expat/lib/internal.h @@ -18,9 +18,42 @@ Note: Use of these macros is based on judgement, not hard rules, and therefore subject to change. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2002-2003 Fred L. Drake, Jr. + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2003 Greg Stein + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2018 Yury Gribov + Copyright (c) 2019 David Loffredo + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#if defined(__GNUC__) && defined(__i386__) && !defined(__MINGW32__) +#if defined(__GNUC__) && defined(__i386__) && ! defined(__MINGW32__) /* We'll use this version by default only where we know it helps. regparm() generates warnings on Solaris boxes. See SF bug #692878. @@ -30,8 +63,8 @@ #define FASTCALL __attribute__((stdcall, regparm(3))) and let's try this: */ -#define FASTCALL __attribute__((regparm(3))) -#define PTRFASTCALL __attribute__((regparm(3))) +# define FASTCALL __attribute__((regparm(3))) +# define PTRFASTCALL __attribute__((regparm(3))) #endif /* Using __fastcall seems to have an unexpected negative effect under @@ -45,29 +78,88 @@ /* Make sure all of these are defined if they aren't already. */ #ifndef FASTCALL -#define FASTCALL +# define FASTCALL #endif #ifndef PTRCALL -#define PTRCALL +# define PTRCALL #endif #ifndef PTRFASTCALL -#define PTRFASTCALL +# define PTRFASTCALL #endif #ifndef XML_MIN_SIZE -#if !defined(__cplusplus) && !defined(inline) -#ifdef __GNUC__ -#define inline __inline -#endif /* __GNUC__ */ -#endif +# if ! defined(__cplusplus) && ! defined(inline) +# ifdef __GNUC__ +# define inline __inline +# endif /* __GNUC__ */ +# endif #endif /* XML_MIN_SIZE */ #ifdef __cplusplus -#define inline inline +# define inline inline #else -#ifndef inline -#define inline +# ifndef inline +# define inline +# endif #endif + +#include // ULONG_MAX + +#if defined(_WIN32) \ + && (! defined(__USE_MINGW_ANSI_STDIO) \ + || (1 - __USE_MINGW_ANSI_STDIO - 1 == 0)) +# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u" +# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#else +# define EXPAT_FMT_ULL(midpart) "%" midpart "llu" +# if ! defined(ULONG_MAX) +# error Compiler did not define ULONG_MAX for us +# elif ULONG_MAX == 18446744073709551615u // 2^64-1 +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#endif + +#ifndef UNUSED_P +# define UNUSED_P(p) (void)p +#endif + +/* NOTE BEGIN If you ever patch these defaults to greater values + for non-attack XML payload in your environment, + please file a bug report with libexpat. Thank you! +*/ +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \ + 100.0f +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ + 8388608 // 8 MiB, 2^23 +/* NOTE END */ + +#include "expat.h" // so we can use type XML_Parser below + +#ifdef __cplusplus +extern "C" { +#endif + +void _INTERNAL_trim_to_complete_utf8_characters(const char *from, + const char **fromLimRef); + +#if defined(XML_DTD) +unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); +unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); +const char *unsignedCharToPrintable(unsigned char c); +#endif + +#ifdef __cplusplus +} #endif diff --git a/contrib/expat/lib/latin1tab.h b/contrib/expat/lib/latin1tab.h index 53c25d76b2..b681d278af 100644 --- a/contrib/expat/lib/latin1tab.h +++ b/contrib/expat/lib/latin1tab.h @@ -1,36 +1,66 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, -/* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME, -/* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, -/* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, + /* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME, + /* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, + /* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, diff --git a/contrib/expat/lib/libexpat.def b/contrib/expat/lib/libexpat.def deleted file mode 100644 index 3920bbcf90..0000000000 --- a/contrib/expat/lib/libexpat.def +++ /dev/null @@ -1,73 +0,0 @@ -; DEF file for MS VC++ - -LIBRARY -EXPORTS - XML_DefaultCurrent @1 - XML_ErrorString @2 - XML_ExpatVersion @3 - XML_ExpatVersionInfo @4 - XML_ExternalEntityParserCreate @5 - XML_GetBase @6 - XML_GetBuffer @7 - XML_GetCurrentByteCount @8 - XML_GetCurrentByteIndex @9 - XML_GetCurrentColumnNumber @10 - XML_GetCurrentLineNumber @11 - XML_GetErrorCode @12 - XML_GetIdAttributeIndex @13 - XML_GetInputContext @14 - XML_GetSpecifiedAttributeCount @15 - XML_Parse @16 - XML_ParseBuffer @17 - XML_ParserCreate @18 - XML_ParserCreateNS @19 - XML_ParserCreate_MM @20 - XML_ParserFree @21 - XML_SetAttlistDeclHandler @22 - XML_SetBase @23 - XML_SetCdataSectionHandler @24 - XML_SetCharacterDataHandler @25 - XML_SetCommentHandler @26 - XML_SetDefaultHandler @27 - XML_SetDefaultHandlerExpand @28 - XML_SetDoctypeDeclHandler @29 - XML_SetElementDeclHandler @30 - XML_SetElementHandler @31 - XML_SetEncoding @32 - XML_SetEndCdataSectionHandler @33 - XML_SetEndDoctypeDeclHandler @34 - XML_SetEndElementHandler @35 - XML_SetEndNamespaceDeclHandler @36 - XML_SetEntityDeclHandler @37 - XML_SetExternalEntityRefHandler @38 - XML_SetExternalEntityRefHandlerArg @39 - XML_SetNamespaceDeclHandler @40 - XML_SetNotStandaloneHandler @41 - XML_SetNotationDeclHandler @42 - XML_SetParamEntityParsing @43 - XML_SetProcessingInstructionHandler @44 - XML_SetReturnNSTriplet @45 - XML_SetStartCdataSectionHandler @46 - XML_SetStartDoctypeDeclHandler @47 - XML_SetStartElementHandler @48 - XML_SetStartNamespaceDeclHandler @49 - XML_SetUnknownEncodingHandler @50 - XML_SetUnparsedEntityDeclHandler @51 - XML_SetUserData @52 - XML_SetXmlDeclHandler @53 - XML_UseParserAsHandlerArg @54 -; added with version 1.95.3 - XML_ParserReset @55 - XML_SetSkippedEntityHandler @56 -; added with version 1.95.5 - XML_GetFeatureList @57 - XML_UseForeignDTD @58 -; added with version 1.95.6 - XML_FreeContentModel @59 - XML_MemMalloc @60 - XML_MemRealloc @61 - XML_MemFree @62 -; added with version 1.95.8 - XML_StopParser @63 - XML_ResumeParser @64 - XML_GetParsingStatus @65 diff --git a/contrib/expat/lib/libexpatw.def b/contrib/expat/lib/libexpatw.def deleted file mode 100644 index 3920bbcf90..0000000000 --- a/contrib/expat/lib/libexpatw.def +++ /dev/null @@ -1,73 +0,0 @@ -; DEF file for MS VC++ - -LIBRARY -EXPORTS - XML_DefaultCurrent @1 - XML_ErrorString @2 - XML_ExpatVersion @3 - XML_ExpatVersionInfo @4 - XML_ExternalEntityParserCreate @5 - XML_GetBase @6 - XML_GetBuffer @7 - XML_GetCurrentByteCount @8 - XML_GetCurrentByteIndex @9 - XML_GetCurrentColumnNumber @10 - XML_GetCurrentLineNumber @11 - XML_GetErrorCode @12 - XML_GetIdAttributeIndex @13 - XML_GetInputContext @14 - XML_GetSpecifiedAttributeCount @15 - XML_Parse @16 - XML_ParseBuffer @17 - XML_ParserCreate @18 - XML_ParserCreateNS @19 - XML_ParserCreate_MM @20 - XML_ParserFree @21 - XML_SetAttlistDeclHandler @22 - XML_SetBase @23 - XML_SetCdataSectionHandler @24 - XML_SetCharacterDataHandler @25 - XML_SetCommentHandler @26 - XML_SetDefaultHandler @27 - XML_SetDefaultHandlerExpand @28 - XML_SetDoctypeDeclHandler @29 - XML_SetElementDeclHandler @30 - XML_SetElementHandler @31 - XML_SetEncoding @32 - XML_SetEndCdataSectionHandler @33 - XML_SetEndDoctypeDeclHandler @34 - XML_SetEndElementHandler @35 - XML_SetEndNamespaceDeclHandler @36 - XML_SetEntityDeclHandler @37 - XML_SetExternalEntityRefHandler @38 - XML_SetExternalEntityRefHandlerArg @39 - XML_SetNamespaceDeclHandler @40 - XML_SetNotStandaloneHandler @41 - XML_SetNotationDeclHandler @42 - XML_SetParamEntityParsing @43 - XML_SetProcessingInstructionHandler @44 - XML_SetReturnNSTriplet @45 - XML_SetStartCdataSectionHandler @46 - XML_SetStartDoctypeDeclHandler @47 - XML_SetStartElementHandler @48 - XML_SetStartNamespaceDeclHandler @49 - XML_SetUnknownEncodingHandler @50 - XML_SetUnparsedEntityDeclHandler @51 - XML_SetUserData @52 - XML_SetXmlDeclHandler @53 - XML_UseParserAsHandlerArg @54 -; added with version 1.95.3 - XML_ParserReset @55 - XML_SetSkippedEntityHandler @56 -; added with version 1.95.5 - XML_GetFeatureList @57 - XML_UseForeignDTD @58 -; added with version 1.95.6 - XML_FreeContentModel @59 - XML_MemMalloc @60 - XML_MemRealloc @61 - XML_MemFree @62 -; added with version 1.95.8 - XML_StopParser @63 - XML_ResumeParser @64 - XML_GetParsingStatus @65 diff --git a/contrib/expat/lib/macconfig.h b/contrib/expat/lib/macconfig.h deleted file mode 100644 index 2725caaf54..0000000000 --- a/contrib/expat/lib/macconfig.h +++ /dev/null @@ -1,53 +0,0 @@ -/*================================================================ -** Copyright 2000, Clark Cooper -** All rights reserved. -** -** This is free software. You are permitted to copy, distribute, or modify -** it under the terms of the MIT/X license (contained in the COPYING file -** with this distribution.) -** -*/ - -#ifndef MACCONFIG_H -#define MACCONFIG_H - - -/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */ -#define BYTEORDER 4321 - -/* Define to 1 if you have the `bcopy' function. */ -#undef HAVE_BCOPY - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE - -/* Define to 1 if you have a working `mmap' system call. */ -#undef HAVE_MMAP - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* whether byteorder is bigendian */ -#define WORDS_BIGENDIAN - -/* Define to specify how much context to retain around the current parse - point. */ -#undef XML_CONTEXT_BYTES - -/* Define to make parameter entity parsing functionality available. */ -#define XML_DTD - -/* Define to make XML Namespaces functionality available. */ -#define XML_NS - -/* Define to empty if `const' does not conform to ANSI C. */ -#undef const - -/* Define to `long' if does not define. */ -#define off_t long - -/* Define to `unsigned' if does not define. */ -#undef size_t - - -#endif /* ifndef MACCONFIG_H */ diff --git a/contrib/expat/lib/nametab.h b/contrib/expat/lib/nametab.h index b05e62c77a..63485446b9 100644 --- a/contrib/expat/lib/nametab.h +++ b/contrib/expat/lib/nametab.h @@ -1,150 +1,136 @@ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2000 Clark Cooper + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + static const unsigned namingBitmap[] = { -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE, -0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF, -0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF, -0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, -0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, -0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, -0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, -0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, -0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF, -0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000, -0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060, -0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, -0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003, -0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000, -0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, -0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003, -0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000, -0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, -0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003, -0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000, -0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000, -0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF, -0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB, -0x40000000, 0xF580C900, 0x00000007, 0x02010800, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF, -0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, -0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, -0x00000000, 0x00004C40, 0x00000000, 0x00000000, -0x00000007, 0x00000000, 0x00000000, 0x00000000, -0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF, -0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF, -0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000, -0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE, -0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF, -0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, -0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, -0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, -0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, -0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, -0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, -0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, -0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF, -0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF, -0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF, -0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, -0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0, -0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1, -0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, -0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80, -0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, -0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3, -0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, -0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000, -0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF, -0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x1FFF0000, 0x00000002, -0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, -0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x04000000, + 0x87FFFFFE, 0x07FFFFFE, 0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF, + 0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFE00F, 0xFC31FFFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, + 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, 0xFFFF0003, 0xFFFFFFFF, + 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, + 0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF, 0x00000000, 0x07FFFFFE, + 0x000007FE, 0xFFFE0000, 0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060, + 0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, 0xFFF99FE0, 0x03C5FDFF, + 0xB0000000, 0x00030003, 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000, + 0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, 0xFFF99FE0, 0x23CDFDFF, + 0xB0000000, 0x00000003, 0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000, + 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, 0xFFFDDFE0, 0x03EFFDFF, + 0x40000000, 0x00000003, 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFE, 0x000D7FFF, + 0x0000003F, 0x00000000, 0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF, 0x0007DAED, 0x50000000, + 0x82315001, 0x002C62AB, 0x40000000, 0xF580C900, 0x00000007, 0x02010800, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0FFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x03FFFFFF, 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, + 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, 0x00000000, 0x00004C40, + 0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000000, 0x00000000, + 0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF, 0x001FFFFF, 0xFFFFFFFE, + 0xFFFFFFFF, 0x07FFFFFF, 0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F, + 0x00000000, 0x00000000, 0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE, + 0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF, 0x00FFFFFF, 0x00000000, + 0xFFFF0000, 0xFFFFFFFF, 0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, 0xFFFFD7C0, 0xFFFFFFFB, + 0x547F7FFF, 0x000FFFFD, 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, + 0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, + 0x027FFFFF, 0xFFFFFFFE, 0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, + 0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF, 0xFFFFFFFF, 0x7CFFFFFF, + 0xFFEF7FFF, 0x03FF3DFF, 0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF, + 0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, 0xFFF987E4, 0xD36DFDFF, + 0x5E003987, 0x001FFFC0, 0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1, + 0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, 0xD63DC7EC, 0xC3BFC718, + 0x00803DC7, 0x0000FF80, 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, + 0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3, 0xFFFDDFEC, 0xC3FFFDFF, + 0x00803DCF, 0x0000FFC3, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, 0xFEF02596, 0x3BFF6CAE, + 0x03FF3F5F, 0x00000000, 0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF, + 0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1FFF0000, 0x00000002, + 0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, 0x661FFFFF, 0xFFFFFFFE, + 0xFFFFFFFF, 0x77FFFFFF, }; static const unsigned char nmstrtPages[] = { -0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, -0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, -0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, -0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x00, 0x14, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, }; static const unsigned char namePages[] = { -0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, -0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, -0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, -0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, 0x00, 0x1F, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x26, 0x14, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, }; diff --git a/contrib/expat/lib/siphash.h b/contrib/expat/lib/siphash.h new file mode 100644 index 0000000000..303283ad2d --- /dev/null +++ b/contrib/expat/lib/siphash.h @@ -0,0 +1,393 @@ +/* ========================================================================== + * siphash.h - SipHash-2-4 in a single header file + * -------------------------------------------------------------------------- + * Derived by William Ahern from the reference implementation[1] published[2] + * by Jean-Philippe Aumasson and Daniel J. Berstein. + * Minimal changes by Sebastian Pipping and Victor Stinner on top, see below. + * Licensed under the CC0 Public Domain Dedication license. + * + * 1. https://www.131002.net/siphash/siphash24.c + * 2. https://www.131002.net/siphash/ + * -------------------------------------------------------------------------- + * HISTORY: + * + * 2020-10-03 (Sebastian Pipping) + * - Drop support for Visual Studio 9.0/2008 and earlier + * + * 2019-08-03 (Sebastian Pipping) + * - Mark part of sip24_valid as to be excluded from clang-format + * - Re-format code using clang-format 9 + * + * 2018-07-08 (Anton Maklakov) + * - Add "fall through" markers for GCC's -Wimplicit-fallthrough + * + * 2017-11-03 (Sebastian Pipping) + * - Hide sip_tobin and sip_binof unless SIPHASH_TOBIN macro is defined + * + * 2017-07-25 (Vadim Zeitlin) + * - Fix use of SIPHASH_MAIN macro + * + * 2017-07-05 (Sebastian Pipping) + * - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++ + * - Add const qualifiers at two places + * - Ensure <=80 characters line length (assuming tab width 4) + * + * 2017-06-23 (Victor Stinner) + * - Address Win64 compile warnings + * + * 2017-06-18 (Sebastian Pipping) + * - Clarify license note in the header + * - Address C89 issues: + * - Stop using inline keyword (and let compiler decide) + * - Replace _Bool by int + * - Turn macro siphash24 into a function + * - Address invalid conversion (void pointer) by explicit cast + * - Address lack of stdint.h for Visual Studio 2003 to 2008 + * - Always expose sip24_valid (for self-tests) + * + * 2012-11-04 - Born. (William Ahern) + * -------------------------------------------------------------------------- + * USAGE: + * + * SipHash-2-4 takes as input two 64-bit words as the key, some number of + * message bytes, and outputs a 64-bit word as the message digest. This + * implementation employs two data structures: a struct sipkey for + * representing the key, and a struct siphash for representing the hash + * state. + * + * For converting a 16-byte unsigned char array to a key, use either the + * macro sip_keyof or the routine sip_tokey. The former instantiates a + * compound literal key, while the latter requires a key object as a + * parameter. + * + * unsigned char secret[16]; + * arc4random_buf(secret, sizeof secret); + * struct sipkey *key = sip_keyof(secret); + * + * For hashing a message, use either the convenience macro siphash24 or the + * routines sip24_init, sip24_update, and sip24_final. + * + * struct siphash state; + * void *msg; + * size_t len; + * uint64_t hash; + * + * sip24_init(&state, key); + * sip24_update(&state, msg, len); + * hash = sip24_final(&state); + * + * or + * + * hash = siphash24(msg, len, key); + * + * To convert the 64-bit hash value to a canonical 8-byte little-endian + * binary representation, use either the macro sip_binof or the routine + * sip_tobin. The former instantiates and returns a compound literal array, + * while the latter requires an array object as a parameter. + * -------------------------------------------------------------------------- + * NOTES: + * + * o Neither sip_keyof, sip_binof, nor siphash24 will work with compilers + * lacking compound literal support. Instead, you must use the lower-level + * interfaces which take as parameters the temporary state objects. + * + * o Uppercase macros may evaluate parameters more than once. Lowercase + * macros should not exhibit any such side effects. + * ========================================================================== + */ +#ifndef SIPHASH_H +#define SIPHASH_H + +#include /* size_t */ +#include /* uint64_t uint32_t uint8_t */ + +/* + * Workaround to not require a C++11 compiler for using ULL suffix + * if this code is included and compiled as C++; related GCC warning is: + * warning: use of C++11 long long integer constant [-Wlong-long] + */ +#define _SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low)) + +#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define SIP_U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v) >> 0); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define SIP_U64TO8_LE(p, v) \ + SIP_U32TO8_LE((p) + 0, (uint32_t)((v) >> 0)); \ + SIP_U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define SIP_U8TO64_LE(p) \ + (((uint64_t)((p)[0]) << 0) | ((uint64_t)((p)[1]) << 8) \ + | ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) \ + | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) \ + | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) + +#define SIPHASH_INITIALIZER \ + { 0, 0, 0, 0, {0}, 0, 0 } + +struct siphash { + uint64_t v0, v1, v2, v3; + + unsigned char buf[8], *p; + uint64_t c; +}; /* struct siphash */ + +#define SIP_KEYLEN 16 + +struct sipkey { + uint64_t k[2]; +}; /* struct sipkey */ + +#define sip_keyof(k) sip_tokey(&(struct sipkey){{0}}, (k)) + +static struct sipkey * +sip_tokey(struct sipkey *key, const void *src) { + key->k[0] = SIP_U8TO64_LE((const unsigned char *)src); + key->k[1] = SIP_U8TO64_LE((const unsigned char *)src + 8); + return key; +} /* sip_tokey() */ + +#ifdef SIPHASH_TOBIN + +# define sip_binof(v) sip_tobin((unsigned char[8]){0}, (v)) + +static void * +sip_tobin(void *dst, uint64_t u64) { + SIP_U64TO8_LE((unsigned char *)dst, u64); + return dst; +} /* sip_tobin() */ + +#endif /* SIPHASH_TOBIN */ + +static void +sip_round(struct siphash *H, const int rounds) { + int i; + + for (i = 0; i < rounds; i++) { + H->v0 += H->v1; + H->v1 = SIP_ROTL(H->v1, 13); + H->v1 ^= H->v0; + H->v0 = SIP_ROTL(H->v0, 32); + + H->v2 += H->v3; + H->v3 = SIP_ROTL(H->v3, 16); + H->v3 ^= H->v2; + + H->v0 += H->v3; + H->v3 = SIP_ROTL(H->v3, 21); + H->v3 ^= H->v0; + + H->v2 += H->v1; + H->v1 = SIP_ROTL(H->v1, 17); + H->v1 ^= H->v2; + H->v2 = SIP_ROTL(H->v2, 32); + } +} /* sip_round() */ + +static struct siphash * +sip24_init(struct siphash *H, const struct sipkey *key) { + H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; + H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; + H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; + H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; + + H->p = H->buf; + H->c = 0; + + return H; +} /* sip24_init() */ + +#define sip_endof(a) (&(a)[sizeof(a) / sizeof *(a)]) + +static struct siphash * +sip24_update(struct siphash *H, const void *src, size_t len) { + const unsigned char *p = (const unsigned char *)src, *pe = p + len; + uint64_t m; + + do { + while (p < pe && H->p < sip_endof(H->buf)) + *H->p++ = *p++; + + if (H->p < sip_endof(H->buf)) + break; + + m = SIP_U8TO64_LE(H->buf); + H->v3 ^= m; + sip_round(H, 2); + H->v0 ^= m; + + H->p = H->buf; + H->c += 8; + } while (p < pe); + + return H; +} /* sip24_update() */ + +static uint64_t +sip24_final(struct siphash *H) { + const char left = (char)(H->p - H->buf); + uint64_t b = (H->c + left) << 56; + + switch (left) { + case 7: + b |= (uint64_t)H->buf[6] << 48; + /* fall through */ + case 6: + b |= (uint64_t)H->buf[5] << 40; + /* fall through */ + case 5: + b |= (uint64_t)H->buf[4] << 32; + /* fall through */ + case 4: + b |= (uint64_t)H->buf[3] << 24; + /* fall through */ + case 3: + b |= (uint64_t)H->buf[2] << 16; + /* fall through */ + case 2: + b |= (uint64_t)H->buf[1] << 8; + /* fall through */ + case 1: + b |= (uint64_t)H->buf[0] << 0; + /* fall through */ + case 0: + break; + } + + H->v3 ^= b; + sip_round(H, 2); + H->v0 ^= b; + H->v2 ^= 0xff; + sip_round(H, 4); + + return H->v0 ^ H->v1 ^ H->v2 ^ H->v3; +} /* sip24_final() */ + +static uint64_t +siphash24(const void *src, size_t len, const struct sipkey *key) { + struct siphash state = SIPHASH_INITIALIZER; + return sip24_final(sip24_update(sip24_init(&state, key), src, len)); +} /* siphash24() */ + +/* + * SipHash-2-4 output with + * k = 00 01 02 ... + * and + * in = (empty string) + * in = 00 (1 byte) + * in = 00 01 (2 bytes) + * in = 00 01 02 (3 bytes) + * ... + * in = 00 01 02 ... 3e (63 bytes) + */ +static int +sip24_valid(void) { + /* clang-format off */ + static const unsigned char vectors[64][8] = { + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, + { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, + { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, + { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, + { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, + { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, + { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, + { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, + { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, + { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, + { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, + { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, + { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, + { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, + { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, + { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, + { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, + { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, + { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, + { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, + { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, + { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, + { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, + { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, + { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, + { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, + { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, + { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, + { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, + { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, + { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, + { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, + { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, + { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, + { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, + { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, + { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, + { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, + { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, + { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, + { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, + { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, + { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, + { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, + { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, + { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, + { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, + { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, + { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, + { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, + { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, + { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, + { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, + { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, + { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, + { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, + { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, + { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, + { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, + { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, + { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, + { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, + { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, + { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, } + }; + /* clang-format on */ + + unsigned char in[64]; + struct sipkey k; + size_t i; + + sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011" + "\012\013\014\015\016\017"); + + for (i = 0; i < sizeof in; ++i) { + in[i] = (unsigned char)i; + + if (siphash24(in, i, &k) != SIP_U8TO64_LE(vectors[i])) + return 0; + } + + return 1; +} /* sip24_valid() */ + +#ifdef SIPHASH_MAIN + +# include + +int +main(void) { + const int ok = sip24_valid(); + + if (ok) + puts("OK"); + else + puts("FAIL"); + + return ! ok; +} /* main() */ + +#endif /* SIPHASH_MAIN */ + +#endif /* SIPHASH_H */ diff --git a/contrib/expat/lib/utf8tab.h b/contrib/expat/lib/utf8tab.h index 7bb3e77603..88efcf91cc 100644 --- a/contrib/expat/lib/utf8tab.h +++ b/contrib/expat/lib/utf8tab.h @@ -1,37 +1,66 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. -*/ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ /* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, -/* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM, + /* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, + /* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM, diff --git a/contrib/expat/lib/winconfig.h b/contrib/expat/lib/winconfig.h index c1b791d62d..2ecd61b5b9 100644 --- a/contrib/expat/lib/winconfig.h +++ b/contrib/expat/lib/winconfig.h @@ -1,10 +1,35 @@ -/*================================================================ -** Copyright 2000, Clark Cooper -** All rights reserved. -** -** This is free software. You are permitted to copy, distribute, or modify -** it under the terms of the MIT/X license (contained in the COPYING file -** with this distribution.) +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2005 Karl Waclawek + Copyright (c) 2017-2021 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef WINCONFIG_H @@ -17,14 +42,4 @@ #include #include -#define XML_NS 1 -#define XML_DTD 1 -#define XML_CONTEXT_BYTES 1024 - -/* we will assume all Windows platforms are little endian */ -#define BYTEORDER 1234 - -/* Windows has memmove() available. */ -#define HAVE_MEMMOVE - #endif /* ndef WINCONFIG_H */ diff --git a/contrib/expat/lib/xmlparse.c b/contrib/expat/lib/xmlparse.c index f35aa36ba8..b6c2eca975 100644 --- a/contrib/expat/lib/xmlparse.c +++ b/contrib/expat/lib/xmlparse.c @@ -1,88 +1,200 @@ -/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* 5ab094ffadd6edfc94c3eee53af44a86951f9f1f0933ada3114bbce2bfb02c99 (2.5.0+) + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2006 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016 Eric Rahm + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016 Gaurav + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2016 Gustavo Grieco + Copyright (c) 2016 Pascal Cuoq + Copyright (c) 2016 Ed Schouten + Copyright (c) 2017-2022 Rhodri James + Copyright (c) 2017 Václav Slavík + Copyright (c) 2017 Viktor Szakats + Copyright (c) 2017 Chanho Park + Copyright (c) 2017 Rolf Eike Beer + Copyright (c) 2017 Hans Wennborg + Copyright (c) 2018 Anton Maklakov + Copyright (c) 2018 Benjamin Peterson + Copyright (c) 2018 Marco Maggi + Copyright (c) 2018 Mariusz Zaborski + Copyright (c) 2019 David Loffredo + Copyright (c) 2019-2020 Ben Wagner + Copyright (c) 2019 Vadim Zeitlin + Copyright (c) 2021 Dong-hee Na + Copyright (c) 2022 Samanta Navarro + Copyright (c) 2022 Jeffrey Walton + Copyright (c) 2022 Jann Horn + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include /* memset(), memcpy() */ -#include -#include /* UINT_MAX */ -#include /* time() */ - #define XML_BUILDING_EXPAT 1 -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#elif defined(__amigaos__) -#include "amigaconfig.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" -#elif defined(HAVE_EXPAT_CONFIG_H) #include -#endif /* ndef COMPILED_FROM_DSP */ + +#if ! defined(_GNU_SOURCE) +# define _GNU_SOURCE 1 /* syscall prototype */ +#endif + +#ifdef _WIN32 +/* force stdlib to define rand_s() */ +# if ! defined(_CRT_RAND_S) +# define _CRT_RAND_S +# endif +#endif + +#include +#include /* memset(), memcpy() */ +#include +#include /* UINT_MAX */ +#include /* fprintf */ +#include /* getenv, rand_s */ +#include /* uintptr_t */ +#include /* isnan */ + +#ifdef _WIN32 +# define getpid GetCurrentProcessId +#else +# include /* gettimeofday() */ +# include /* getpid() */ +# include /* getpid() */ +# include /* O_RDONLY */ +# include +#endif + +#ifdef _WIN32 +# include "winconfig.h" +#endif #include "ascii.h" #include "expat.h" +#include "siphash.h" + +#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) +# if defined(HAVE_GETRANDOM) +# include /* getrandom */ +# else +# include /* syscall */ +# include /* SYS_getrandom */ +# endif +# if ! defined(GRND_NONBLOCK) +# define GRND_NONBLOCK 0x0001 +# endif /* defined(GRND_NONBLOCK) */ +#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ + +#if defined(HAVE_LIBBSD) \ + && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) +# include +#endif + +#if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) +# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 +#endif + +#if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ + && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ + && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ + && ! defined(XML_POOR_ENTROPY) +# error You do not have support for any sources of high quality entropy \ + enabled. For end user security, that is probably not what you want. \ + \ + Your options include: \ + * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ + * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ + * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ + * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \ + * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ + * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ + * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ + * Windows >=Vista (rand_s): _WIN32. \ + \ + If insist on not using any of these, bypass this error by defining \ + XML_POOR_ENTROPY; you have been warned. \ + \ + If you have reasons to patch this detection code away or need changes \ + to the build system, please open a bug. Thank you! +#endif #ifdef XML_UNICODE -#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX -#define XmlConvert XmlUtf16Convert -#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding -#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS -#define XmlEncode XmlUtf16Encode -/* Using pointer subtraction to convert to integer type. */ -#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1)) +# define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX +# define XmlConvert XmlUtf16Convert +# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding +# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS +# define XmlEncode XmlUtf16Encode +# define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) typedef unsigned short ICHAR; #else -#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX -#define XmlConvert XmlUtf8Convert -#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding -#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS -#define XmlEncode XmlUtf8Encode -#define MUST_CONVERT(enc, s) (!(enc)->isUtf8) +# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX +# define XmlConvert XmlUtf8Convert +# define XmlGetInternalEncoding XmlGetUtf8InternalEncoding +# define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS +# define XmlEncode XmlUtf8Encode +# define MUST_CONVERT(enc, s) (! (enc)->isUtf8) typedef char ICHAR; #endif - #ifndef XML_NS -#define XmlInitEncodingNS XmlInitEncoding -#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding -#undef XmlGetInternalEncodingNS -#define XmlGetInternalEncodingNS XmlGetInternalEncoding -#define XmlParseXmlDeclNS XmlParseXmlDecl +# define XmlInitEncodingNS XmlInitEncoding +# define XmlInitUnknownEncodingNS XmlInitUnknownEncoding +# undef XmlGetInternalEncodingNS +# define XmlGetInternalEncodingNS XmlGetInternalEncoding +# define XmlParseXmlDeclNS XmlParseXmlDecl #endif #ifdef XML_UNICODE -#ifdef XML_UNICODE_WCHAR_T -#define XML_T(x) (const wchar_t)x -#define XML_L(x) L ## x -#else -#define XML_T(x) (const unsigned short)x -#define XML_L(x) x -#endif +# ifdef XML_UNICODE_WCHAR_T +# define XML_T(x) (const wchar_t) x +# define XML_L(x) L##x +# else +# define XML_T(x) (const unsigned short)x +# define XML_L(x) x +# endif #else -#define XML_T(x) x -#define XML_L(x) x +# define XML_T(x) x +# define XML_L(x) x #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ -#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) +#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1)) -/* Handle the case where memmove() doesn't exist. */ -#ifndef HAVE_MEMMOVE -#ifdef HAVE_BCOPY -#define memmove(d,s,l) bcopy((s),(d),(l)) -#else -#error memmove does not exist on this platform, nor is a substitute available -#endif /* HAVE_BCOPY */ -#endif /* HAVE_MEMMOVE */ +/* Do safe (NULL-aware) pointer arithmetic */ +#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) #include "internal.h" #include "xmltok.h" @@ -102,17 +214,9 @@ typedef struct { const XML_Memory_Handling_Suite *mem; } HASH_TABLE; -/* Basic character hash algorithm, taken from Python's string hash: - h = h * 1000003 ^ character, the constant being a prime number. +static size_t keylen(KEY s); -*/ -#ifdef XML_UNICODE -#define CHAR_HASH(h, c) \ - (((h) * 0xF4243) ^ (unsigned short)(c)) -#else -#define CHAR_HASH(h, c) \ - (((h) * 0xF4243) ^ (unsigned char)(c)) -#endif +static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); /* For probing (after a collision) we need a step size relative prime to the hash table size, which is a power of 2. We use double-hashing, @@ -122,9 +226,9 @@ typedef struct { We limit the maximum step size to table->size / 4 (mask >> 2) and make it odd, since odd numbers are always relative prime to a power of 2. */ -#define SECOND_HASH(hash, mask, power) \ - ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) -#define PROBE_STEP(hash, mask, power) \ +#define SECOND_HASH(hash, mask, power) \ + ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2)) +#define PROBE_STEP(hash, mask, power) \ ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) typedef struct { @@ -132,7 +236,7 @@ typedef struct { NAMED **end; } HASH_TABLE_ITER; -#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ +#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ #define INIT_DATA_BUF_SIZE 1024 #define INIT_ATTS_SIZE 16 #define INIT_ATTS_VERSION 0xFFFFFFFF @@ -179,20 +283,20 @@ typedef struct { TAG objects in a free list. */ typedef struct tag { - struct tag *parent; /* parent of this element */ - const char *rawName; /* tagName in the original encoding */ + struct tag *parent; /* parent of this element */ + const char *rawName; /* tagName in the original encoding */ int rawNameLength; - TAG_NAME name; /* tagName in the API encoding */ - char *buf; /* buffer for name components */ - char *bufEnd; /* end of the buffer */ + TAG_NAME name; /* tagName in the API encoding */ + char *buf; /* buffer for name components */ + char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; typedef struct { const XML_Char *name; const XML_Char *textPtr; - int textLen; /* length in XML_Chars */ - int processed; /* # of processed bytes - when suspended */ + int textLen; /* length in XML_Chars */ + int processed; /* # of processed bytes - when suspended */ const XML_Char *systemId; const XML_Char *base; const XML_Char *publicId; @@ -203,13 +307,13 @@ typedef struct { } ENTITY; typedef struct { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - const XML_Char * name; - int firstchild; - int lastchild; - int childcnt; - int nextsib; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char *name; + int firstchild; + int lastchild; + int childcnt; + int nextsib; } CONTENT_SCAFFOLD; #define INIT_SCAFFOLD_ELEMENTS 32 @@ -297,10 +401,33 @@ typedef struct open_internal_entity { XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; -typedef enum XML_Error PTRCALL Processor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr); +enum XML_Account { + XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ + XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity + expansion */ + XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ +}; + +#ifdef XML_DTD +typedef unsigned long long XmlBigCount; +typedef struct accounting { + XmlBigCount countBytesDirect; + XmlBigCount countBytesIndirect; + int debugLevel; + float maximumAmplificationFactor; // >=1.0 + unsigned long long activationThresholdBytes; +} ACCOUNTING; + +typedef struct entity_stats { + unsigned int countEverOpened; + unsigned int currentDepth; + unsigned int maximumDepthSeen; + int debugLevel; +} ENTITY_STATS; +#endif /* XML_DTD */ + +typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, + const char *end, const char **endPtr); static Processor prologProcessor; static Processor prologInitProcessor; @@ -321,128 +448,147 @@ static Processor externalEntityInitProcessor3; static Processor externalEntityContentProcessor; static Processor internalEntityProcessor; -static enum XML_Error -handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); -static enum XML_Error -processXmlDecl(XML_Parser parser, int isGeneralTextEntity, - const char *s, const char *next); -static enum XML_Error -initializeEncoding(XML_Parser parser); -static enum XML_Error -doProlog(XML_Parser parser, const ENCODING *enc, const char *s, - const char *end, int tok, const char *next, const char **nextPtr, - XML_Bool haveMore); -static enum XML_Error -processInternalEntity(XML_Parser parser, ENTITY *entity, - XML_Bool betweenDecl); -static enum XML_Error -doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, - const char *start, const char *end, const char **endPtr, - XML_Bool haveMore); -static enum XML_Error -doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, - const char *end, const char **nextPtr, XML_Bool haveMore); +static enum XML_Error handleUnknownEncoding(XML_Parser parser, + const XML_Char *encodingName); +static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, + const char *s, const char *next); +static enum XML_Error initializeEncoding(XML_Parser parser); +static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, + const char *s, const char *end, int tok, + const char *next, const char **nextPtr, + XML_Bool haveMore, XML_Bool allowClosingDoctype, + enum XML_Account account); +static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl); +static enum XML_Error doContent(XML_Parser parser, int startTagLevel, + const ENCODING *enc, const char *start, + const char *end, const char **endPtr, + XML_Bool haveMore, enum XML_Account account); +static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, + const char **startPtr, const char *end, + const char **nextPtr, XML_Bool haveMore, + enum XML_Account account); #ifdef XML_DTD -static enum XML_Error -doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, - const char *end, const char **nextPtr, XML_Bool haveMore); +static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, + const char **startPtr, const char *end, + const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ -static enum XML_Error -storeAtts(XML_Parser parser, const ENCODING *, const char *s, - TAG_NAME *tagNamePtr, BINDING **bindingsPtr); -static enum XML_Error -addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, - const XML_Char *uri, BINDING **bindingsPtr); -static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, - XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser); -static enum XML_Error -storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, - const char *, const char *, STRING_POOL *); -static enum XML_Error -appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, - const char *, const char *, STRING_POOL *); -static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, - const char *end); -static int -setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); -static enum XML_Error -storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, - const char *end); -static int -reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end); -static int -reportComment(XML_Parser parser, const ENCODING *enc, const char *start, - const char *end); -static void -reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, - const char *end); - -static const XML_Char * getContext(XML_Parser parser); -static XML_Bool -setContext(XML_Parser parser, const XML_Char *context); +static void freeBindings(XML_Parser parser, BINDING *bindings); +static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, + const char *s, TAG_NAME *tagNamePtr, + BINDING **bindingsPtr, + enum XML_Account account); +static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, + const ATTRIBUTE_ID *attId, const XML_Char *uri, + BINDING **bindingsPtr); +static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, + XML_Bool isId, const XML_Char *dfltValue, + XML_Parser parser); +static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, + const char *, STRING_POOL *, + enum XML_Account account); +static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, + const char *, STRING_POOL *, + enum XML_Account account); +static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); +static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end, + enum XML_Account account); +static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static int reportComment(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static void reportDefault(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); + +static const XML_Char *getContext(XML_Parser parser); +static XML_Bool setContext(XML_Parser parser, const XML_Char *context); static void FASTCALL normalizePublicId(XML_Char *s); -static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms); -/* do not call if parentParser != NULL */ +static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); +/* do not call if m_parentParser != NULL */ static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); -static void -dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); -static int -dtdCopy(XML_Parser oldParser, - DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); -static int -copyEntityTable(XML_Parser oldParser, - HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); -static NAMED * -lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); -static void FASTCALL -hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms); +static void dtdDestroy(DTD *p, XML_Bool isDocEntity, + const XML_Memory_Handling_Suite *ms); +static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, + const XML_Memory_Handling_Suite *ms); +static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *, + const HASH_TABLE *); +static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, + size_t createSize); +static void FASTCALL hashTableInit(HASH_TABLE *, + const XML_Memory_Handling_Suite *ms); static void FASTCALL hashTableClear(HASH_TABLE *); static void FASTCALL hashTableDestroy(HASH_TABLE *); -static void FASTCALL -hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); -static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *); +static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); +static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *); -static void FASTCALL -poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms); +static void FASTCALL poolInit(STRING_POOL *, + const XML_Memory_Handling_Suite *ms); static void FASTCALL poolClear(STRING_POOL *); static void FASTCALL poolDestroy(STRING_POOL *); -static XML_Char * -poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); -static XML_Char * -poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); +static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); -static const XML_Char * FASTCALL -poolCopyString(STRING_POOL *pool, const XML_Char *s); -static const XML_Char * -poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); -static const XML_Char * FASTCALL -poolAppendString(STRING_POOL *pool, const XML_Char *s); +static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, + const XML_Char *s); +static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, + int n); +static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, + const XML_Char *s); static int FASTCALL nextScaffoldPart(XML_Parser parser); -static XML_Content * build_model(XML_Parser parser); -static ELEMENT_TYPE * -getElementType(XML_Parser parser, const ENCODING *enc, - const char *ptr, const char *end); +static XML_Content *build_model(XML_Parser parser); +static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, + const char *ptr, const char *end); -static unsigned long generate_hash_secret_salt(void); +static XML_Char *copyString(const XML_Char *s, + const XML_Memory_Handling_Suite *memsuite); + +static unsigned long generate_hash_secret_salt(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser); -static XML_Parser -parserCreate(const XML_Char *encodingName, - const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep, - DTD *dtd); +static XML_Parser parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep, DTD *dtd); -static void -parserInit(XML_Parser parser, const XML_Char *encodingName); +static void parserInit(XML_Parser parser, const XML_Char *encodingName); + +#ifdef XML_DTD +static float accountingGetCurrentAmplification(XML_Parser rootParser); +static void accountingReportStats(XML_Parser originParser, const char *epilog); +static void accountingOnAbort(XML_Parser originParser); +static void accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, + const char *before, const char *after, + ptrdiff_t bytesMore, int source_line, + enum XML_Account account); +static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, + const char *before, const char *after, + int source_line, + enum XML_Account account); + +static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, + const char *action, int sourceLine); +static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, + int sourceLine); +static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, + int sourceLine); + +static XML_Parser getRootParserOf(XML_Parser parser, + unsigned int *outLevelDiff); +#endif /* XML_DTD */ + +static unsigned long getDebugLevel(const char *variableName, + unsigned long defaultDebugLevel); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) @@ -451,13 +597,13 @@ parserInit(XML_Parser parser, const XML_Char *encodingName); #define poolLastChar(pool) (((pool)->ptr)[-1]) #define poolDiscard(pool) ((pool)->ptr = (pool)->start) #define poolFinish(pool) ((pool)->start = (pool)->ptr) -#define poolAppendChar(pool, c) \ - (((pool)->ptr == (pool)->end && !poolGrow(pool)) \ - ? 0 \ - : ((*((pool)->ptr)++ = c), 1)) +#define poolAppendChar(pool, c) \ + (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ + ? 0 \ + : ((*((pool)->ptr)++ = c), 1)) struct XML_ParserStruct { - /* The first member must be userData so that the XML_GetUserData + /* The first member must be m_userData so that the XML_GetUserData macro works. */ void *m_userData; void *m_handlerArg; @@ -467,7 +613,7 @@ struct XML_ParserStruct { const char *m_bufferPtr; /* past last character to be parsed */ char *m_bufferEnd; - /* allocated end of buffer */ + /* allocated end of m_buffer */ const char *m_bufferLim; XML_Index m_parseEndByteIndex; const char *m_parseEndPtr; @@ -505,7 +651,7 @@ struct XML_ParserStruct { void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; - void (XMLCALL *m_unknownEncodingRelease)(void *); + void(XMLCALL *m_unknownEncodingRelease)(void *); PROLOG_STATE m_prologState; Processor *m_processor; enum XML_Error m_errorCode; @@ -557,190 +703,288 @@ struct XML_ParserStruct { enum XML_ParamEntityParsing m_paramEntityParsing; #endif unsigned long m_hash_secret_salt; +#ifdef XML_DTD + ACCOUNTING m_accounting; + ENTITY_STATS m_entity_stats; +#endif }; -#define MALLOC(s) (parser->m_mem.malloc_fcn((s))) -#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s))) -#define FREE(p) (parser->m_mem.free_fcn((p))) - -#define userData (parser->m_userData) -#define handlerArg (parser->m_handlerArg) -#define startElementHandler (parser->m_startElementHandler) -#define endElementHandler (parser->m_endElementHandler) -#define characterDataHandler (parser->m_characterDataHandler) -#define processingInstructionHandler \ - (parser->m_processingInstructionHandler) -#define commentHandler (parser->m_commentHandler) -#define startCdataSectionHandler \ - (parser->m_startCdataSectionHandler) -#define endCdataSectionHandler (parser->m_endCdataSectionHandler) -#define defaultHandler (parser->m_defaultHandler) -#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler) -#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler) -#define unparsedEntityDeclHandler \ - (parser->m_unparsedEntityDeclHandler) -#define notationDeclHandler (parser->m_notationDeclHandler) -#define startNamespaceDeclHandler \ - (parser->m_startNamespaceDeclHandler) -#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler) -#define notStandaloneHandler (parser->m_notStandaloneHandler) -#define externalEntityRefHandler \ - (parser->m_externalEntityRefHandler) -#define externalEntityRefHandlerArg \ - (parser->m_externalEntityRefHandlerArg) -#define internalEntityRefHandler \ - (parser->m_internalEntityRefHandler) -#define skippedEntityHandler (parser->m_skippedEntityHandler) -#define unknownEncodingHandler (parser->m_unknownEncodingHandler) -#define elementDeclHandler (parser->m_elementDeclHandler) -#define attlistDeclHandler (parser->m_attlistDeclHandler) -#define entityDeclHandler (parser->m_entityDeclHandler) -#define xmlDeclHandler (parser->m_xmlDeclHandler) -#define encoding (parser->m_encoding) -#define initEncoding (parser->m_initEncoding) -#define internalEncoding (parser->m_internalEncoding) -#define unknownEncodingMem (parser->m_unknownEncodingMem) -#define unknownEncodingData (parser->m_unknownEncodingData) -#define unknownEncodingHandlerData \ - (parser->m_unknownEncodingHandlerData) -#define unknownEncodingRelease (parser->m_unknownEncodingRelease) -#define protocolEncodingName (parser->m_protocolEncodingName) -#define ns (parser->m_ns) -#define ns_triplets (parser->m_ns_triplets) -#define prologState (parser->m_prologState) -#define processor (parser->m_processor) -#define errorCode (parser->m_errorCode) -#define eventPtr (parser->m_eventPtr) -#define eventEndPtr (parser->m_eventEndPtr) -#define positionPtr (parser->m_positionPtr) -#define position (parser->m_position) -#define openInternalEntities (parser->m_openInternalEntities) -#define freeInternalEntities (parser->m_freeInternalEntities) -#define defaultExpandInternalEntities \ - (parser->m_defaultExpandInternalEntities) -#define tagLevel (parser->m_tagLevel) -#define buffer (parser->m_buffer) -#define bufferPtr (parser->m_bufferPtr) -#define bufferEnd (parser->m_bufferEnd) -#define parseEndByteIndex (parser->m_parseEndByteIndex) -#define parseEndPtr (parser->m_parseEndPtr) -#define bufferLim (parser->m_bufferLim) -#define dataBuf (parser->m_dataBuf) -#define dataBufEnd (parser->m_dataBufEnd) -#define _dtd (parser->m_dtd) -#define curBase (parser->m_curBase) -#define declEntity (parser->m_declEntity) -#define doctypeName (parser->m_doctypeName) -#define doctypeSysid (parser->m_doctypeSysid) -#define doctypePubid (parser->m_doctypePubid) -#define declAttributeType (parser->m_declAttributeType) -#define declNotationName (parser->m_declNotationName) -#define declNotationPublicId (parser->m_declNotationPublicId) -#define declElementType (parser->m_declElementType) -#define declAttributeId (parser->m_declAttributeId) -#define declAttributeIsCdata (parser->m_declAttributeIsCdata) -#define declAttributeIsId (parser->m_declAttributeIsId) -#define freeTagList (parser->m_freeTagList) -#define freeBindingList (parser->m_freeBindingList) -#define inheritedBindings (parser->m_inheritedBindings) -#define tagStack (parser->m_tagStack) -#define atts (parser->m_atts) -#define attsSize (parser->m_attsSize) -#define nSpecifiedAtts (parser->m_nSpecifiedAtts) -#define idAttIndex (parser->m_idAttIndex) -#define nsAtts (parser->m_nsAtts) -#define nsAttsVersion (parser->m_nsAttsVersion) -#define nsAttsPower (parser->m_nsAttsPower) -#define attInfo (parser->m_attInfo) -#define tempPool (parser->m_tempPool) -#define temp2Pool (parser->m_temp2Pool) -#define groupConnector (parser->m_groupConnector) -#define groupSize (parser->m_groupSize) -#define namespaceSeparator (parser->m_namespaceSeparator) -#define parentParser (parser->m_parentParser) -#define ps_parsing (parser->m_parsingStatus.parsing) -#define ps_finalBuffer (parser->m_parsingStatus.finalBuffer) -#ifdef XML_DTD -#define isParamEntity (parser->m_isParamEntity) -#define useForeignDTD (parser->m_useForeignDTD) -#define paramEntityParsing (parser->m_paramEntityParsing) -#endif /* XML_DTD */ -#define hash_secret_salt (parser->m_hash_secret_salt) +#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) +#define FREE(parser, p) (parser->m_mem.free_fcn((p))) XML_Parser XMLCALL -XML_ParserCreate(const XML_Char *encodingName) -{ +XML_ParserCreate(const XML_Char *encodingName) { return XML_ParserCreate_MM(encodingName, NULL, NULL); } XML_Parser XMLCALL -XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) -{ - XML_Char tmp[2]; - *tmp = nsSep; +XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { + XML_Char tmp[2] = {nsSep, 0}; return XML_ParserCreate_MM(encodingName, NULL, tmp); } -static const XML_Char implicitContext[] = { - ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p, - ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, - ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, - ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, - ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, - ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0' -}; +// "xml=http://www.w3.org/XML/1998/namespace" +static const XML_Char implicitContext[] + = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, + ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, + ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, + ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, + ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, + ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, + ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, + ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, + '\0'}; -static unsigned long -generate_hash_secret_salt(void) -{ - unsigned int seed = time(NULL) % UINT_MAX; - srand(seed); - return rand(); -} - -static XML_Bool /* only valid for root parser */ -startParsing(XML_Parser parser) -{ - /* hash functions must be initialized before setContext() is called */ - if (hash_secret_salt == 0) - hash_secret_salt = generate_hash_secret_salt(); - if (ns) { - /* implicit context only set for root parser, since child - parsers (i.e. external entity parsers) will inherit it - */ - return setContext(parser, implicitContext); +/* To avoid warnings about unused functions: */ +#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) + +# if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) + +/* Obtain entropy on Linux 3.17+ */ +static int +writeRandomBytes_getrandom_nonblock(void *target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + const unsigned int getrandomFlags = GRND_NONBLOCK; + + do { + void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const int bytesWrittenMore = +# if defined(HAVE_GETRANDOM) + getrandom(currentTarget, bytesToWrite, getrandomFlags); +# else + syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); +# endif + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; } - return XML_TRUE; + } while (! success && (errno == EINTR)); + + return success; +} + +# endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ + +# if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + +/* Extract entropy from /dev/urandom */ +static int +writeRandomBytes_dev_urandom(void *target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + + const int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + return 0; + } + + do { + void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; + } + } while (! success && (errno == EINTR)); + + close(fd); + return success; +} + +# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ + +#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ + +#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) + +static void +writeRandomBytes_arc4random(void *target, size_t count) { + size_t bytesWrittenTotal = 0; + + while (bytesWrittenTotal < count) { + const uint32_t random32 = arc4random(); + size_t i = 0; + + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); + i++, bytesWrittenTotal++) { + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); + ((uint8_t *)target)[bytesWrittenTotal] = random8; + } + } +} + +#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ + +#ifdef _WIN32 + +/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), + as it didn't declare it in its header prior to version 5.3.0 of its + runtime package (mingwrt, containing stdlib.h). The upstream fix + was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ +# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ + && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) +__declspec(dllimport) int rand_s(unsigned int *); +# endif + +/* Obtain entropy on Windows using the rand_s() function which + * generates cryptographically secure random numbers. Internally it + * uses RtlGenRandom API which is present in Windows XP and later. + */ +static int +writeRandomBytes_rand_s(void *target, size_t count) { + size_t bytesWrittenTotal = 0; + + while (bytesWrittenTotal < count) { + unsigned int random32 = 0; + size_t i = 0; + + if (rand_s(&random32)) + return 0; /* failure */ + + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); + i++, bytesWrittenTotal++) { + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); + ((uint8_t *)target)[bytesWrittenTotal] = random8; + } + } + return 1; /* success */ +} + +#endif /* _WIN32 */ + +#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) + +static unsigned long +gather_time_entropy(void) { +# ifdef _WIN32 + FILETIME ft; + GetSystemTimeAsFileTime(&ft); /* never fails */ + return ft.dwHighDateTime ^ ft.dwLowDateTime; +# else + struct timeval tv; + int gettimeofday_res; + + gettimeofday_res = gettimeofday(&tv, NULL); + +# if defined(NDEBUG) + (void)gettimeofday_res; +# else + assert(gettimeofday_res == 0); +# endif /* defined(NDEBUG) */ + + /* Microseconds time is <20 bits entropy */ + return tv.tv_usec; +# endif +} + +#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ + +static unsigned long +ENTROPY_DEBUG(const char *label, unsigned long entropy) { + if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { + fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, + (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); + } + return entropy; +} + +static unsigned long +generate_hash_secret_salt(XML_Parser parser) { + unsigned long entropy; + (void)parser; + + /* "Failproof" high quality providers: */ +#if defined(HAVE_ARC4RANDOM_BUF) + arc4random_buf(&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random_buf", entropy); +#elif defined(HAVE_ARC4RANDOM) + writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random", entropy); +#else + /* Try high quality providers first .. */ +# ifdef _WIN32 + if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("rand_s", entropy); + } +# elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) + if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("getrandom", entropy); + } +# endif +# if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("/dev/urandom", entropy); + } +# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ + /* .. and self-made low quality for backup: */ + + /* Process ID is 0 bits entropy if attacker has local access */ + entropy = gather_time_entropy() ^ getpid(); + + /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ + if (sizeof(unsigned long) == 4) { + return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); + } else { + return ENTROPY_DEBUG("fallback(8)", + entropy * (unsigned long)2305843009213693951ULL); + } +#endif +} + +static unsigned long +get_hash_secret_salt(XML_Parser parser) { + if (parser->m_parentParser != NULL) + return get_hash_secret_salt(parser->m_parentParser); + return parser->m_hash_secret_salt; +} + +static XML_Bool /* only valid for root parser */ +startParsing(XML_Parser parser) { + /* hash functions must be initialized before setContext() is called */ + if (parser->m_hash_secret_salt == 0) + parser->m_hash_secret_salt = generate_hash_secret_salt(parser); + if (parser->m_ns) { + /* implicit context only set for root parser, since child + parsers (i.e. external entity parsers) will inherit it + */ + return setContext(parser, implicitContext); + } + return XML_TRUE; } XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep) -{ + const XML_Char *nameSep) { return parserCreate(encodingName, memsuite, nameSep, NULL); } static XML_Parser parserCreate(const XML_Char *encodingName, - const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep, - DTD *dtd) -{ + const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, + DTD *dtd) { XML_Parser parser; if (memsuite) { XML_Memory_Handling_Suite *mtemp; - parser = (XML_Parser) - memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); + parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); if (parser != NULL) { mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = memsuite->malloc_fcn; mtemp->realloc_fcn = memsuite->realloc_fcn; mtemp->free_fcn = memsuite->free_fcn; } - } - else { + } else { XML_Memory_Handling_Suite *mtemp; parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); if (parser != NULL) { @@ -751,287 +995,356 @@ parserCreate(const XML_Char *encodingName, } } - if (!parser) + if (! parser) return parser; - buffer = NULL; - bufferLim = NULL; + parser->m_buffer = NULL; + parser->m_bufferLim = NULL; - attsSize = INIT_ATTS_SIZE; - atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE)); - if (atts == NULL) { - FREE(parser); + parser->m_attsSize = INIT_ATTS_SIZE; + parser->m_atts + = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); + if (parser->m_atts == NULL) { + FREE(parser, parser); return NULL; } #ifdef XML_ATTR_INFO - attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo)); - if (attInfo == NULL) { - FREE(atts); - FREE(parser); + parser->m_attInfo = (XML_AttrInfo *)MALLOC( + parser, parser->m_attsSize * sizeof(XML_AttrInfo)); + if (parser->m_attInfo == NULL) { + FREE(parser, parser->m_atts); + FREE(parser, parser); return NULL; } #endif - dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); - if (dataBuf == NULL) { - FREE(atts); + parser->m_dataBuf + = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + if (parser->m_dataBuf == NULL) { + FREE(parser, parser->m_atts); #ifdef XML_ATTR_INFO - FREE(attInfo); + FREE(parser, parser->m_attInfo); #endif - FREE(parser); + FREE(parser, parser); return NULL; } - dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; + parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; if (dtd) - _dtd = dtd; + parser->m_dtd = dtd; else { - _dtd = dtdCreate(&parser->m_mem); - if (_dtd == NULL) { - FREE(dataBuf); - FREE(atts); + parser->m_dtd = dtdCreate(&parser->m_mem); + if (parser->m_dtd == NULL) { + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_atts); #ifdef XML_ATTR_INFO - FREE(attInfo); + FREE(parser, parser->m_attInfo); #endif - FREE(parser); + FREE(parser, parser); return NULL; } } - freeBindingList = NULL; - freeTagList = NULL; - freeInternalEntities = NULL; + parser->m_freeBindingList = NULL; + parser->m_freeTagList = NULL; + parser->m_freeInternalEntities = NULL; + + parser->m_groupSize = 0; + parser->m_groupConnector = NULL; - groupSize = 0; - groupConnector = NULL; + parser->m_unknownEncodingHandler = NULL; + parser->m_unknownEncodingHandlerData = NULL; - unknownEncodingHandler = NULL; - unknownEncodingHandlerData = NULL; + parser->m_namespaceSeparator = ASCII_EXCL; + parser->m_ns = XML_FALSE; + parser->m_ns_triplets = XML_FALSE; - namespaceSeparator = ASCII_EXCL; - ns = XML_FALSE; - ns_triplets = XML_FALSE; + parser->m_nsAtts = NULL; + parser->m_nsAttsVersion = 0; + parser->m_nsAttsPower = 0; - nsAtts = NULL; - nsAttsVersion = 0; - nsAttsPower = 0; + parser->m_protocolEncodingName = NULL; - poolInit(&tempPool, &(parser->m_mem)); - poolInit(&temp2Pool, &(parser->m_mem)); + poolInit(&parser->m_tempPool, &(parser->m_mem)); + poolInit(&parser->m_temp2Pool, &(parser->m_mem)); parserInit(parser, encodingName); - if (encodingName && !protocolEncodingName) { + if (encodingName && ! parser->m_protocolEncodingName) { + if (dtd) { + // We need to stop the upcoming call to XML_ParserFree from happily + // destroying parser->m_dtd because the DTD is shared with the parent + // parser and the only guard that keeps XML_ParserFree from destroying + // parser->m_dtd is parser->m_isParamEntity but it will be set to + // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all). + parser->m_dtd = NULL; + } XML_ParserFree(parser); return NULL; } if (nameSep) { - ns = XML_TRUE; - internalEncoding = XmlGetInternalEncodingNS(); - namespaceSeparator = *nameSep; - } - else { - internalEncoding = XmlGetInternalEncoding(); + parser->m_ns = XML_TRUE; + parser->m_internalEncoding = XmlGetInternalEncodingNS(); + parser->m_namespaceSeparator = *nameSep; + } else { + parser->m_internalEncoding = XmlGetInternalEncoding(); } return parser; } static void -parserInit(XML_Parser parser, const XML_Char *encodingName) -{ - processor = prologInitProcessor; - XmlPrologStateInit(&prologState); - protocolEncodingName = (encodingName != NULL - ? poolCopyString(&tempPool, encodingName) - : NULL); - curBase = NULL; - XmlInitEncoding(&initEncoding, &encoding, 0); - userData = NULL; - handlerArg = NULL; - startElementHandler = NULL; - endElementHandler = NULL; - characterDataHandler = NULL; - processingInstructionHandler = NULL; - commentHandler = NULL; - startCdataSectionHandler = NULL; - endCdataSectionHandler = NULL; - defaultHandler = NULL; - startDoctypeDeclHandler = NULL; - endDoctypeDeclHandler = NULL; - unparsedEntityDeclHandler = NULL; - notationDeclHandler = NULL; - startNamespaceDeclHandler = NULL; - endNamespaceDeclHandler = NULL; - notStandaloneHandler = NULL; - externalEntityRefHandler = NULL; - externalEntityRefHandlerArg = parser; - skippedEntityHandler = NULL; - elementDeclHandler = NULL; - attlistDeclHandler = NULL; - entityDeclHandler = NULL; - xmlDeclHandler = NULL; - bufferPtr = buffer; - bufferEnd = buffer; - parseEndByteIndex = 0; - parseEndPtr = NULL; - declElementType = NULL; - declAttributeId = NULL; - declEntity = NULL; - doctypeName = NULL; - doctypeSysid = NULL; - doctypePubid = NULL; - declAttributeType = NULL; - declNotationName = NULL; - declNotationPublicId = NULL; - declAttributeIsCdata = XML_FALSE; - declAttributeIsId = XML_FALSE; - memset(&position, 0, sizeof(POSITION)); - errorCode = XML_ERROR_NONE; - eventPtr = NULL; - eventEndPtr = NULL; - positionPtr = NULL; - openInternalEntities = NULL; - defaultExpandInternalEntities = XML_TRUE; - tagLevel = 0; - tagStack = NULL; - inheritedBindings = NULL; - nSpecifiedAtts = 0; - unknownEncodingMem = NULL; - unknownEncodingRelease = NULL; - unknownEncodingData = NULL; - parentParser = NULL; - ps_parsing = XML_INITIALIZED; +parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_processor = prologInitProcessor; + XmlPrologStateInit(&parser->m_prologState); + if (encodingName != NULL) { + parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + } + parser->m_curBase = NULL; + XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); + parser->m_userData = NULL; + parser->m_handlerArg = NULL; + parser->m_startElementHandler = NULL; + parser->m_endElementHandler = NULL; + parser->m_characterDataHandler = NULL; + parser->m_processingInstructionHandler = NULL; + parser->m_commentHandler = NULL; + parser->m_startCdataSectionHandler = NULL; + parser->m_endCdataSectionHandler = NULL; + parser->m_defaultHandler = NULL; + parser->m_startDoctypeDeclHandler = NULL; + parser->m_endDoctypeDeclHandler = NULL; + parser->m_unparsedEntityDeclHandler = NULL; + parser->m_notationDeclHandler = NULL; + parser->m_startNamespaceDeclHandler = NULL; + parser->m_endNamespaceDeclHandler = NULL; + parser->m_notStandaloneHandler = NULL; + parser->m_externalEntityRefHandler = NULL; + parser->m_externalEntityRefHandlerArg = parser; + parser->m_skippedEntityHandler = NULL; + parser->m_elementDeclHandler = NULL; + parser->m_attlistDeclHandler = NULL; + parser->m_entityDeclHandler = NULL; + parser->m_xmlDeclHandler = NULL; + parser->m_bufferPtr = parser->m_buffer; + parser->m_bufferEnd = parser->m_buffer; + parser->m_parseEndByteIndex = 0; + parser->m_parseEndPtr = NULL; + parser->m_declElementType = NULL; + parser->m_declAttributeId = NULL; + parser->m_declEntity = NULL; + parser->m_doctypeName = NULL; + parser->m_doctypeSysid = NULL; + parser->m_doctypePubid = NULL; + parser->m_declAttributeType = NULL; + parser->m_declNotationName = NULL; + parser->m_declNotationPublicId = NULL; + parser->m_declAttributeIsCdata = XML_FALSE; + parser->m_declAttributeIsId = XML_FALSE; + memset(&parser->m_position, 0, sizeof(POSITION)); + parser->m_errorCode = XML_ERROR_NONE; + parser->m_eventPtr = NULL; + parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; + parser->m_openInternalEntities = NULL; + parser->m_defaultExpandInternalEntities = XML_TRUE; + parser->m_tagLevel = 0; + parser->m_tagStack = NULL; + parser->m_inheritedBindings = NULL; + parser->m_nSpecifiedAtts = 0; + parser->m_unknownEncodingMem = NULL; + parser->m_unknownEncodingRelease = NULL; + parser->m_unknownEncodingData = NULL; + parser->m_parentParser = NULL; + parser->m_parsingStatus.parsing = XML_INITIALIZED; +#ifdef XML_DTD + parser->m_isParamEntity = XML_FALSE; + parser->m_useForeignDTD = XML_FALSE; + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; +#endif + parser->m_hash_secret_salt = 0; + #ifdef XML_DTD - isParamEntity = XML_FALSE; - useForeignDTD = XML_FALSE; - paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; + memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); + parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); + parser->m_accounting.maximumAmplificationFactor + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; + parser->m_accounting.activationThresholdBytes + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; + + memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); + parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); #endif - hash_secret_salt = 0; } -/* moves list of bindings to freeBindingList */ +/* moves list of bindings to m_freeBindingList */ static void FASTCALL -moveToFreeBindingList(XML_Parser parser, BINDING *bindings) -{ +moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { while (bindings) { BINDING *b = bindings; bindings = bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; } } XML_Bool XMLCALL -XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) -{ +XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { TAG *tStk; OPEN_INTERNAL_ENTITY *openEntityList; - if (parentParser) + + if (parser == NULL) + return XML_FALSE; + + if (parser->m_parentParser) return XML_FALSE; - /* move tagStack to freeTagList */ - tStk = tagStack; + /* move m_tagStack to m_freeTagList */ + tStk = parser->m_tagStack; while (tStk) { TAG *tag = tStk; tStk = tStk->parent; - tag->parent = freeTagList; + tag->parent = parser->m_freeTagList; moveToFreeBindingList(parser, tag->bindings); tag->bindings = NULL; - freeTagList = tag; + parser->m_freeTagList = tag; } - /* move openInternalEntities to freeInternalEntities */ - openEntityList = openInternalEntities; + /* move m_openInternalEntities to m_freeInternalEntities */ + openEntityList = parser->m_openInternalEntities; while (openEntityList) { OPEN_INTERNAL_ENTITY *openEntity = openEntityList; openEntityList = openEntity->next; - openEntity->next = freeInternalEntities; - freeInternalEntities = openEntity; - } - moveToFreeBindingList(parser, inheritedBindings); - FREE(unknownEncodingMem); - if (unknownEncodingRelease) - unknownEncodingRelease(unknownEncodingData); - poolClear(&tempPool); - poolClear(&temp2Pool); + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + } + moveToFreeBindingList(parser, parser->m_inheritedBindings); + FREE(parser, parser->m_unknownEncodingMem); + if (parser->m_unknownEncodingRelease) + parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); + poolClear(&parser->m_tempPool); + poolClear(&parser->m_temp2Pool); + FREE(parser, (void *)parser->m_protocolEncodingName); + parser->m_protocolEncodingName = NULL; parserInit(parser, encodingName); - dtdReset(_dtd, &parser->m_mem); + dtdReset(parser->m_dtd, &parser->m_mem); return XML_TRUE; } enum XML_Status XMLCALL -XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) -{ +XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { + if (parser == NULL) + return XML_STATUS_ERROR; /* Block after XML_Parse()/XML_ParseBuffer() has been called. XXX There's no way for the caller to determine which of the XXX possible error cases caused the XML_STATUS_ERROR return. */ - if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return XML_STATUS_ERROR; + + /* Get rid of any previous encoding name */ + FREE(parser, (void *)parser->m_protocolEncodingName); + if (encodingName == NULL) - protocolEncodingName = NULL; + /* No new encoding name */ + parser->m_protocolEncodingName = NULL; else { - protocolEncodingName = poolCopyString(&tempPool, encodingName); - if (!protocolEncodingName) + /* Copy the new encoding name into allocated memory */ + parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + if (! parser->m_protocolEncodingName) return XML_STATUS_ERROR; } return XML_STATUS_OK; } XML_Parser XMLCALL -XML_ExternalEntityParserCreate(XML_Parser oldParser, - const XML_Char *context, - const XML_Char *encodingName) -{ +XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + const XML_Char *encodingName) { XML_Parser parser = oldParser; DTD *newDtd = NULL; - DTD *oldDtd = _dtd; - XML_StartElementHandler oldStartElementHandler = startElementHandler; - XML_EndElementHandler oldEndElementHandler = endElementHandler; - XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; - XML_ProcessingInstructionHandler oldProcessingInstructionHandler - = processingInstructionHandler; - XML_CommentHandler oldCommentHandler = commentHandler; - XML_StartCdataSectionHandler oldStartCdataSectionHandler - = startCdataSectionHandler; - XML_EndCdataSectionHandler oldEndCdataSectionHandler - = endCdataSectionHandler; - XML_DefaultHandler oldDefaultHandler = defaultHandler; - XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler - = unparsedEntityDeclHandler; - XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; - XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler - = startNamespaceDeclHandler; - XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler - = endNamespaceDeclHandler; - XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; - XML_ExternalEntityRefHandler oldExternalEntityRefHandler - = externalEntityRefHandler; - XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler; - XML_UnknownEncodingHandler oldUnknownEncodingHandler - = unknownEncodingHandler; - XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler; - XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler; - XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler; - XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler; - ELEMENT_TYPE * oldDeclElementType = declElementType; - - void *oldUserData = userData; - void *oldHandlerArg = handlerArg; - XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities; - XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; + DTD *oldDtd; + XML_StartElementHandler oldStartElementHandler; + XML_EndElementHandler oldEndElementHandler; + XML_CharacterDataHandler oldCharacterDataHandler; + XML_ProcessingInstructionHandler oldProcessingInstructionHandler; + XML_CommentHandler oldCommentHandler; + XML_StartCdataSectionHandler oldStartCdataSectionHandler; + XML_EndCdataSectionHandler oldEndCdataSectionHandler; + XML_DefaultHandler oldDefaultHandler; + XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; + XML_NotationDeclHandler oldNotationDeclHandler; + XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; + XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; + XML_NotStandaloneHandler oldNotStandaloneHandler; + XML_ExternalEntityRefHandler oldExternalEntityRefHandler; + XML_SkippedEntityHandler oldSkippedEntityHandler; + XML_UnknownEncodingHandler oldUnknownEncodingHandler; + XML_ElementDeclHandler oldElementDeclHandler; + XML_AttlistDeclHandler oldAttlistDeclHandler; + XML_EntityDeclHandler oldEntityDeclHandler; + XML_XmlDeclHandler oldXmlDeclHandler; + ELEMENT_TYPE *oldDeclElementType; + + void *oldUserData; + void *oldHandlerArg; + XML_Bool oldDefaultExpandInternalEntities; + XML_Parser oldExternalEntityRefHandlerArg; #ifdef XML_DTD - enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing; - int oldInEntityValue = prologState.inEntityValue; + enum XML_ParamEntityParsing oldParamEntityParsing; + int oldInEntityValue; #endif - XML_Bool oldns_triplets = ns_triplets; + XML_Bool oldns_triplets; /* Note that the new parser shares the same hash secret as the old parser, so that dtdCopy and copyEntityTable can lookup values from hash tables associated with either parser without us having to worry which hash secrets each table has. */ - unsigned long oldhash_secret_salt = hash_secret_salt; + unsigned long oldhash_secret_salt; + + /* Validate the oldParser parameter before we pull everything out of it */ + if (oldParser == NULL) + return NULL; + + /* Stash the original parser contents on the stack */ + oldDtd = parser->m_dtd; + oldStartElementHandler = parser->m_startElementHandler; + oldEndElementHandler = parser->m_endElementHandler; + oldCharacterDataHandler = parser->m_characterDataHandler; + oldProcessingInstructionHandler = parser->m_processingInstructionHandler; + oldCommentHandler = parser->m_commentHandler; + oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; + oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; + oldDefaultHandler = parser->m_defaultHandler; + oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; + oldNotationDeclHandler = parser->m_notationDeclHandler; + oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; + oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; + oldNotStandaloneHandler = parser->m_notStandaloneHandler; + oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; + oldSkippedEntityHandler = parser->m_skippedEntityHandler; + oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; + oldElementDeclHandler = parser->m_elementDeclHandler; + oldAttlistDeclHandler = parser->m_attlistDeclHandler; + oldEntityDeclHandler = parser->m_entityDeclHandler; + oldXmlDeclHandler = parser->m_xmlDeclHandler; + oldDeclElementType = parser->m_declElementType; + + oldUserData = parser->m_userData; + oldHandlerArg = parser->m_handlerArg; + oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; + oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; +#ifdef XML_DTD + oldParamEntityParsing = parser->m_paramEntityParsing; + oldInEntityValue = parser->m_prologState.inEntityValue; +#endif + oldns_triplets = parser->m_ns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + oldhash_secret_salt = parser->m_hash_secret_salt; #ifdef XML_DTD - if (!context) + if (! context) newDtd = oldDtd; #endif /* XML_DTD */ @@ -1040,445 +1353,457 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, here. This makes this function more painful to follow than it would be otherwise. */ - if (ns) { - XML_Char tmp[2]; - *tmp = namespaceSeparator; + if (parser->m_ns) { + XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); - } - else { + } else { parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); } - if (!parser) + if (! parser) return NULL; - startElementHandler = oldStartElementHandler; - endElementHandler = oldEndElementHandler; - characterDataHandler = oldCharacterDataHandler; - processingInstructionHandler = oldProcessingInstructionHandler; - commentHandler = oldCommentHandler; - startCdataSectionHandler = oldStartCdataSectionHandler; - endCdataSectionHandler = oldEndCdataSectionHandler; - defaultHandler = oldDefaultHandler; - unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; - notationDeclHandler = oldNotationDeclHandler; - startNamespaceDeclHandler = oldStartNamespaceDeclHandler; - endNamespaceDeclHandler = oldEndNamespaceDeclHandler; - notStandaloneHandler = oldNotStandaloneHandler; - externalEntityRefHandler = oldExternalEntityRefHandler; - skippedEntityHandler = oldSkippedEntityHandler; - unknownEncodingHandler = oldUnknownEncodingHandler; - elementDeclHandler = oldElementDeclHandler; - attlistDeclHandler = oldAttlistDeclHandler; - entityDeclHandler = oldEntityDeclHandler; - xmlDeclHandler = oldXmlDeclHandler; - declElementType = oldDeclElementType; - userData = oldUserData; + parser->m_startElementHandler = oldStartElementHandler; + parser->m_endElementHandler = oldEndElementHandler; + parser->m_characterDataHandler = oldCharacterDataHandler; + parser->m_processingInstructionHandler = oldProcessingInstructionHandler; + parser->m_commentHandler = oldCommentHandler; + parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; + parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; + parser->m_defaultHandler = oldDefaultHandler; + parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; + parser->m_notationDeclHandler = oldNotationDeclHandler; + parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; + parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; + parser->m_notStandaloneHandler = oldNotStandaloneHandler; + parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; + parser->m_skippedEntityHandler = oldSkippedEntityHandler; + parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; + parser->m_elementDeclHandler = oldElementDeclHandler; + parser->m_attlistDeclHandler = oldAttlistDeclHandler; + parser->m_entityDeclHandler = oldEntityDeclHandler; + parser->m_xmlDeclHandler = oldXmlDeclHandler; + parser->m_declElementType = oldDeclElementType; + parser->m_userData = oldUserData; if (oldUserData == oldHandlerArg) - handlerArg = userData; + parser->m_handlerArg = parser->m_userData; else - handlerArg = parser; + parser->m_handlerArg = parser; if (oldExternalEntityRefHandlerArg != oldParser) - externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; - defaultExpandInternalEntities = oldDefaultExpandInternalEntities; - ns_triplets = oldns_triplets; - hash_secret_salt = oldhash_secret_salt; - parentParser = oldParser; + parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; + parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; + parser->m_ns_triplets = oldns_triplets; + parser->m_hash_secret_salt = oldhash_secret_salt; + parser->m_parentParser = oldParser; #ifdef XML_DTD - paramEntityParsing = oldParamEntityParsing; - prologState.inEntityValue = oldInEntityValue; + parser->m_paramEntityParsing = oldParamEntityParsing; + parser->m_prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem) - || !setContext(parser, context)) { + if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) + || ! setContext(parser, context)) { XML_ParserFree(parser); return NULL; } - processor = externalEntityInitProcessor; + parser->m_processor = externalEntityInitProcessor; #ifdef XML_DTD - } - else { - /* The DTD instance referenced by _dtd is shared between the document's - root parser and external PE parsers, therefore one does not need to - call setContext. In addition, one also *must* not call setContext, - because this would overwrite existing prefix->binding pointers in - _dtd with ones that get destroyed with the external PE parser. - This would leave those prefixes with dangling pointers. + } else { + /* The DTD instance referenced by parser->m_dtd is shared between the + document's root parser and external PE parsers, therefore one does not + need to call setContext. In addition, one also *must* not call + setContext, because this would overwrite existing prefix->binding + pointers in parser->m_dtd with ones that get destroyed with the external + PE parser. This would leave those prefixes with dangling pointers. */ - isParamEntity = XML_TRUE; - XmlPrologStateInitExternalEntity(&prologState); - processor = externalParEntInitProcessor; + parser->m_isParamEntity = XML_TRUE; + XmlPrologStateInitExternalEntity(&parser->m_prologState); + parser->m_processor = externalParEntInitProcessor; } #endif /* XML_DTD */ return parser; } static void FASTCALL -destroyBindings(BINDING *bindings, XML_Parser parser) -{ +destroyBindings(BINDING *bindings, XML_Parser parser) { for (;;) { BINDING *b = bindings; - if (!b) + if (! b) break; bindings = b->nextTagBinding; - FREE(b->uri); - FREE(b); + FREE(parser, b->uri); + FREE(parser, b); } } void XMLCALL -XML_ParserFree(XML_Parser parser) -{ +XML_ParserFree(XML_Parser parser) { TAG *tagList; OPEN_INTERNAL_ENTITY *entityList; if (parser == NULL) return; - /* free tagStack and freeTagList */ - tagList = tagStack; + /* free m_tagStack and m_freeTagList */ + tagList = parser->m_tagStack; for (;;) { TAG *p; if (tagList == NULL) { - if (freeTagList == NULL) + if (parser->m_freeTagList == NULL) break; - tagList = freeTagList; - freeTagList = NULL; + tagList = parser->m_freeTagList; + parser->m_freeTagList = NULL; } p = tagList; tagList = tagList->parent; - FREE(p->buf); + FREE(parser, p->buf); destroyBindings(p->bindings, parser); - FREE(p); + FREE(parser, p); } - /* free openInternalEntities and freeInternalEntities */ - entityList = openInternalEntities; + /* free m_openInternalEntities and m_freeInternalEntities */ + entityList = parser->m_openInternalEntities; for (;;) { OPEN_INTERNAL_ENTITY *openEntity; if (entityList == NULL) { - if (freeInternalEntities == NULL) + if (parser->m_freeInternalEntities == NULL) break; - entityList = freeInternalEntities; - freeInternalEntities = NULL; + entityList = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = NULL; } openEntity = entityList; entityList = entityList->next; - FREE(openEntity); + FREE(parser, openEntity); } - destroyBindings(freeBindingList, parser); - destroyBindings(inheritedBindings, parser); - poolDestroy(&tempPool); - poolDestroy(&temp2Pool); + destroyBindings(parser->m_freeBindingList, parser); + destroyBindings(parser->m_inheritedBindings, parser); + poolDestroy(&parser->m_tempPool); + poolDestroy(&parser->m_temp2Pool); + FREE(parser, (void *)parser->m_protocolEncodingName); #ifdef XML_DTD /* external parameter entity parsers share the DTD structure parser->m_dtd with the root parser, so we must not destroy it */ - if (!isParamEntity && _dtd) + if (! parser->m_isParamEntity && parser->m_dtd) #else - if (_dtd) + if (parser->m_dtd) #endif /* XML_DTD */ - dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem); - FREE((void *)atts); + dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, + &parser->m_mem); + FREE(parser, (void *)parser->m_atts); #ifdef XML_ATTR_INFO - FREE((void *)attInfo); + FREE(parser, (void *)parser->m_attInfo); #endif - FREE(groupConnector); - FREE(buffer); - FREE(dataBuf); - FREE(nsAtts); - FREE(unknownEncodingMem); - if (unknownEncodingRelease) - unknownEncodingRelease(unknownEncodingData); - FREE(parser); + FREE(parser, parser->m_groupConnector); + FREE(parser, parser->m_buffer); + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_nsAtts); + FREE(parser, parser->m_unknownEncodingMem); + if (parser->m_unknownEncodingRelease) + parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); + FREE(parser, parser); } void XMLCALL -XML_UseParserAsHandlerArg(XML_Parser parser) -{ - handlerArg = parser; +XML_UseParserAsHandlerArg(XML_Parser parser) { + if (parser != NULL) + parser->m_handlerArg = parser; } enum XML_Error XMLCALL -XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) -{ +XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { + if (parser == NULL) + return XML_ERROR_INVALID_ARGUMENT; #ifdef XML_DTD /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; - useForeignDTD = useDTD; + parser->m_useForeignDTD = useDTD; return XML_ERROR_NONE; #else + UNUSED_P(useDTD); return XML_ERROR_FEATURE_REQUIRES_XML_DTD; #endif } void XMLCALL -XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) -{ +XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { + if (parser == NULL) + return; /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return; - ns_triplets = do_nst ? XML_TRUE : XML_FALSE; + parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; } void XMLCALL -XML_SetUserData(XML_Parser parser, void *p) -{ - if (handlerArg == userData) - handlerArg = userData = p; +XML_SetUserData(XML_Parser parser, void *p) { + if (parser == NULL) + return; + if (parser->m_handlerArg == parser->m_userData) + parser->m_handlerArg = parser->m_userData = p; else - userData = p; + parser->m_userData = p; } enum XML_Status XMLCALL -XML_SetBase(XML_Parser parser, const XML_Char *p) -{ +XML_SetBase(XML_Parser parser, const XML_Char *p) { + if (parser == NULL) + return XML_STATUS_ERROR; if (p) { - p = poolCopyString(&_dtd->pool, p); - if (!p) + p = poolCopyString(&parser->m_dtd->pool, p); + if (! p) return XML_STATUS_ERROR; - curBase = p; - } - else - curBase = NULL; + parser->m_curBase = p; + } else + parser->m_curBase = NULL; return XML_STATUS_OK; } -const XML_Char * XMLCALL -XML_GetBase(XML_Parser parser) -{ - return curBase; +const XML_Char *XMLCALL +XML_GetBase(XML_Parser parser) { + if (parser == NULL) + return NULL; + return parser->m_curBase; } int XMLCALL -XML_GetSpecifiedAttributeCount(XML_Parser parser) -{ - return nSpecifiedAtts; +XML_GetSpecifiedAttributeCount(XML_Parser parser) { + if (parser == NULL) + return -1; + return parser->m_nSpecifiedAtts; } int XMLCALL -XML_GetIdAttributeIndex(XML_Parser parser) -{ - return idAttIndex; +XML_GetIdAttributeIndex(XML_Parser parser) { + if (parser == NULL) + return -1; + return parser->m_idAttIndex; } #ifdef XML_ATTR_INFO -const XML_AttrInfo * XMLCALL -XML_GetAttributeInfo(XML_Parser parser) -{ - return attInfo; +const XML_AttrInfo *XMLCALL +XML_GetAttributeInfo(XML_Parser parser) { + if (parser == NULL) + return NULL; + return parser->m_attInfo; } #endif void XMLCALL -XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end) -{ - startElementHandler = start; - endElementHandler = end; +XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, + XML_EndElementHandler end) { + if (parser == NULL) + return; + parser->m_startElementHandler = start; + parser->m_endElementHandler = end; } void XMLCALL -XML_SetStartElementHandler(XML_Parser parser, - XML_StartElementHandler start) { - startElementHandler = start; +XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { + if (parser != NULL) + parser->m_startElementHandler = start; } void XMLCALL -XML_SetEndElementHandler(XML_Parser parser, - XML_EndElementHandler end) { - endElementHandler = end; +XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { + if (parser != NULL) + parser->m_endElementHandler = end; } void XMLCALL XML_SetCharacterDataHandler(XML_Parser parser, - XML_CharacterDataHandler handler) -{ - characterDataHandler = handler; + XML_CharacterDataHandler handler) { + if (parser != NULL) + parser->m_characterDataHandler = handler; } void XMLCALL XML_SetProcessingInstructionHandler(XML_Parser parser, - XML_ProcessingInstructionHandler handler) -{ - processingInstructionHandler = handler; + XML_ProcessingInstructionHandler handler) { + if (parser != NULL) + parser->m_processingInstructionHandler = handler; } void XMLCALL -XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler) -{ - commentHandler = handler; +XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { + if (parser != NULL) + parser->m_commentHandler = handler; } void XMLCALL XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, - XML_EndCdataSectionHandler end) -{ - startCdataSectionHandler = start; - endCdataSectionHandler = end; + XML_EndCdataSectionHandler end) { + if (parser == NULL) + return; + parser->m_startCdataSectionHandler = start; + parser->m_endCdataSectionHandler = end; } void XMLCALL XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start) { - startCdataSectionHandler = start; + if (parser != NULL) + parser->m_startCdataSectionHandler = start; } void XMLCALL XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end) { - endCdataSectionHandler = end; + if (parser != NULL) + parser->m_endCdataSectionHandler = end; } void XMLCALL -XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler) -{ - defaultHandler = handler; - defaultExpandInternalEntities = XML_FALSE; +XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { + if (parser == NULL) + return; + parser->m_defaultHandler = handler; + parser->m_defaultExpandInternalEntities = XML_FALSE; } void XMLCALL -XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler) -{ - defaultHandler = handler; - defaultExpandInternalEntities = XML_TRUE; +XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { + if (parser == NULL) + return; + parser->m_defaultHandler = handler; + parser->m_defaultExpandInternalEntities = XML_TRUE; } void XMLCALL -XML_SetDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start, - XML_EndDoctypeDeclHandler end) -{ - startDoctypeDeclHandler = start; - endDoctypeDeclHandler = end; +XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, + XML_EndDoctypeDeclHandler end) { + if (parser == NULL) + return; + parser->m_startDoctypeDeclHandler = start; + parser->m_endDoctypeDeclHandler = end; } void XMLCALL XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start) { - startDoctypeDeclHandler = start; + if (parser != NULL) + parser->m_startDoctypeDeclHandler = start; } void XMLCALL -XML_SetEndDoctypeDeclHandler(XML_Parser parser, - XML_EndDoctypeDeclHandler end) { - endDoctypeDeclHandler = end; +XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { + if (parser != NULL) + parser->m_endDoctypeDeclHandler = end; } void XMLCALL XML_SetUnparsedEntityDeclHandler(XML_Parser parser, - XML_UnparsedEntityDeclHandler handler) -{ - unparsedEntityDeclHandler = handler; + XML_UnparsedEntityDeclHandler handler) { + if (parser != NULL) + parser->m_unparsedEntityDeclHandler = handler; } void XMLCALL -XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler) -{ - notationDeclHandler = handler; +XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { + if (parser != NULL) + parser->m_notationDeclHandler = handler; } void XMLCALL XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, - XML_EndNamespaceDeclHandler end) -{ - startNamespaceDeclHandler = start; - endNamespaceDeclHandler = end; + XML_EndNamespaceDeclHandler end) { + if (parser == NULL) + return; + parser->m_startNamespaceDeclHandler = start; + parser->m_endNamespaceDeclHandler = end; } void XMLCALL XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start) { - startNamespaceDeclHandler = start; + if (parser != NULL) + parser->m_startNamespaceDeclHandler = start; } void XMLCALL XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end) { - endNamespaceDeclHandler = end; + if (parser != NULL) + parser->m_endNamespaceDeclHandler = end; } void XMLCALL XML_SetNotStandaloneHandler(XML_Parser parser, - XML_NotStandaloneHandler handler) -{ - notStandaloneHandler = handler; + XML_NotStandaloneHandler handler) { + if (parser != NULL) + parser->m_notStandaloneHandler = handler; } void XMLCALL XML_SetExternalEntityRefHandler(XML_Parser parser, - XML_ExternalEntityRefHandler handler) -{ - externalEntityRefHandler = handler; + XML_ExternalEntityRefHandler handler) { + if (parser != NULL) + parser->m_externalEntityRefHandler = handler; } void XMLCALL -XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) -{ +XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { + if (parser == NULL) + return; if (arg) - externalEntityRefHandlerArg = (XML_Parser)arg; + parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; else - externalEntityRefHandlerArg = parser; + parser->m_externalEntityRefHandlerArg = parser; } void XMLCALL XML_SetSkippedEntityHandler(XML_Parser parser, - XML_SkippedEntityHandler handler) -{ - skippedEntityHandler = handler; + XML_SkippedEntityHandler handler) { + if (parser != NULL) + parser->m_skippedEntityHandler = handler; } void XMLCALL XML_SetUnknownEncodingHandler(XML_Parser parser, - XML_UnknownEncodingHandler handler, - void *data) -{ - unknownEncodingHandler = handler; - unknownEncodingHandlerData = data; + XML_UnknownEncodingHandler handler, void *data) { + if (parser == NULL) + return; + parser->m_unknownEncodingHandler = handler; + parser->m_unknownEncodingHandlerData = data; } void XMLCALL -XML_SetElementDeclHandler(XML_Parser parser, - XML_ElementDeclHandler eldecl) -{ - elementDeclHandler = eldecl; +XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { + if (parser != NULL) + parser->m_elementDeclHandler = eldecl; } void XMLCALL -XML_SetAttlistDeclHandler(XML_Parser parser, - XML_AttlistDeclHandler attdecl) -{ - attlistDeclHandler = attdecl; +XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { + if (parser != NULL) + parser->m_attlistDeclHandler = attdecl; } void XMLCALL -XML_SetEntityDeclHandler(XML_Parser parser, - XML_EntityDeclHandler handler) -{ - entityDeclHandler = handler; +XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { + if (parser != NULL) + parser->m_entityDeclHandler = handler; } void XMLCALL -XML_SetXmlDeclHandler(XML_Parser parser, - XML_XmlDeclHandler handler) { - xmlDeclHandler = handler; +XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { + if (parser != NULL) + parser->m_xmlDeclHandler = handler; } int XMLCALL XML_SetParamEntityParsing(XML_Parser parser, - enum XML_ParamEntityParsing peParsing) -{ + enum XML_ParamEntityParsing peParsing) { + if (parser == NULL) + return 0; /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return 0; #ifdef XML_DTD - paramEntityParsing = peParsing; + parser->m_paramEntityParsing = peParsing; return 1; #else return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; @@ -1486,91 +1811,122 @@ XML_SetParamEntityParsing(XML_Parser parser, } int XMLCALL -XML_SetHashSalt(XML_Parser parser, - unsigned long hash_salt) -{ +XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { + if (parser == NULL) + return 0; + if (parser->m_parentParser) + return XML_SetHashSalt(parser->m_parentParser, hash_salt); /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) return 0; - hash_secret_salt = hash_salt; + parser->m_hash_secret_salt = hash_salt; return 1; } enum XML_Status XMLCALL -XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) -{ - switch (ps_parsing) { +XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { + if (parser != NULL) + parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; + return XML_STATUS_ERROR; + } + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: - errorCode = XML_ERROR_SUSPENDED; + parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; case XML_FINISHED: - errorCode = XML_ERROR_FINISHED; + parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; case XML_INITIALIZED: - if (parentParser == NULL && !startParsing(parser)) { - errorCode = XML_ERROR_NO_MEMORY; + if (parser->m_parentParser == NULL && ! startParsing(parser)) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; return XML_STATUS_ERROR; } + /* fall through */ default: - ps_parsing = XML_PARSING; + parser->m_parsingStatus.parsing = XML_PARSING; } if (len == 0) { - ps_finalBuffer = (XML_Bool)isFinal; - if (!isFinal) + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + if (! isFinal) return XML_STATUS_OK; - positionPtr = bufferPtr; - parseEndPtr = bufferEnd; + parser->m_positionPtr = parser->m_bufferPtr; + parser->m_parseEndPtr = parser->m_bufferEnd; /* If data are left over from last buffer, and we now know that these data are the final chunk of input, then we have to check them again to detect errors based on that fact. */ - errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr); + parser->m_errorCode + = parser->m_processor(parser, parser->m_bufferPtr, + parser->m_parseEndPtr, &parser->m_bufferPtr); - if (errorCode == XML_ERROR_NONE) { - switch (ps_parsing) { + if (parser->m_errorCode == XML_ERROR_NONE) { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - positionPtr = bufferPtr; + /* It is hard to be certain, but it seems that this case + * cannot occur. This code is cleaning up a previous parse + * with no new data (since len == 0). Changing the parsing + * state requires getting to execute a handler function, and + * there doesn't seem to be an opportunity for that while in + * this circumstance. + * + * Given the uncertainty, we retain the code but exclude it + * from coverage tests. + * + * LCOV_EXCL_START + */ + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; return XML_STATUS_SUSPENDED; + /* LCOV_EXCL_STOP */ case XML_INITIALIZED: case XML_PARSING: - ps_parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_FINISHED; /* fall through */ default: return XML_STATUS_OK; } } - eventEndPtr = eventPtr; - processor = errorProcessor; + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } #ifndef XML_CONTEXT_BYTES - else if (bufferPtr == bufferEnd) { + else if (parser->m_bufferPtr == parser->m_bufferEnd) { const char *end; int nLeftOver; - enum XML_Error result; - parseEndByteIndex += len; - positionPtr = s; - ps_finalBuffer = (XML_Bool)isFinal; + enum XML_Status result; + /* Detect overflow (a+b > MAX <==> b > MAX-a) */ + if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } + parser->m_parseEndByteIndex += len; + parser->m_positionPtr = s; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; - errorCode = processor(parser, s, parseEndPtr = s + len, &end); + parser->m_errorCode + = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); - if (errorCode != XML_ERROR_NONE) { - eventEndPtr = eventPtr; - processor = errorProcessor; + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; - } - else { - switch (ps_parsing) { + } else { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: if (isFinal) { - ps_parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_FINISHED; return XML_STATUS_OK; } /* fall through */ @@ -1579,35 +1935,38 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) } } - XmlUpdatePosition(encoding, positionPtr, end, &position); + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, + &parser->m_position); nLeftOver = s + len - end; if (nLeftOver) { - if (buffer == NULL || nLeftOver > bufferLim - buffer) { - /* FIXME avoid integer overflow */ - char *temp; - temp = (buffer == NULL - ? (char *)MALLOC(len * 2) - : (char *)REALLOC(buffer, len * 2)); + if (parser->m_buffer == NULL + || nLeftOver > parser->m_bufferLim - parser->m_buffer) { + /* avoid _signed_ integer overflow */ + char *temp = NULL; + const int bytesToAllocate = (int)((unsigned)len * 2U); + if (bytesToAllocate > 0) { + temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); + } if (temp == NULL) { - errorCode = XML_ERROR_NO_MEMORY; - eventPtr = eventEndPtr = NULL; - processor = errorProcessor; + parser->m_errorCode = XML_ERROR_NO_MEMORY; + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } - buffer = temp; - bufferLim = buffer + len * 2; + parser->m_buffer = temp; + parser->m_bufferLim = parser->m_buffer + bytesToAllocate; } - memcpy(buffer, end, nLeftOver); + memcpy(parser->m_buffer, end, nLeftOver); } - bufferPtr = buffer; - bufferEnd = buffer + nLeftOver; - positionPtr = bufferPtr; - parseEndPtr = bufferEnd; - eventPtr = bufferPtr; - eventEndPtr = bufferPtr; + parser->m_bufferPtr = parser->m_buffer; + parser->m_bufferEnd = parser->m_buffer + nLeftOver; + parser->m_positionPtr = parser->m_bufferPtr; + parser->m_parseEndPtr = parser->m_bufferEnd; + parser->m_eventPtr = parser->m_bufferPtr; + parser->m_eventEndPtr = parser->m_bufferPtr; return result; } -#endif /* not defined XML_CONTEXT_BYTES */ +#endif /* not defined XML_CONTEXT_BYTES */ else { void *buff = XML_GetBuffer(parser, len); if (buff == NULL) @@ -1620,364 +1979,489 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) } enum XML_Status XMLCALL -XML_ParseBuffer(XML_Parser parser, int len, int isFinal) -{ +XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { const char *start; enum XML_Status result = XML_STATUS_OK; - switch (ps_parsing) { + if (parser == NULL) + return XML_STATUS_ERROR; + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: - errorCode = XML_ERROR_SUSPENDED; + parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; case XML_FINISHED: - errorCode = XML_ERROR_FINISHED; + parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; case XML_INITIALIZED: - if (parentParser == NULL && !startParsing(parser)) { - errorCode = XML_ERROR_NO_MEMORY; + /* Has someone called XML_GetBuffer successfully before? */ + if (! parser->m_bufferPtr) { + parser->m_errorCode = XML_ERROR_NO_BUFFER; + return XML_STATUS_ERROR; + } + + if (parser->m_parentParser == NULL && ! startParsing(parser)) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; return XML_STATUS_ERROR; } + /* fall through */ default: - ps_parsing = XML_PARSING; + parser->m_parsingStatus.parsing = XML_PARSING; } - start = bufferPtr; - positionPtr = start; - bufferEnd += len; - parseEndPtr = bufferEnd; - parseEndByteIndex += len; - ps_finalBuffer = (XML_Bool)isFinal; + start = parser->m_bufferPtr; + parser->m_positionPtr = start; + parser->m_bufferEnd += len; + parser->m_parseEndPtr = parser->m_bufferEnd; + parser->m_parseEndByteIndex += len; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; - errorCode = processor(parser, start, parseEndPtr, &bufferPtr); + parser->m_errorCode = parser->m_processor( + parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); - if (errorCode != XML_ERROR_NONE) { - eventEndPtr = eventPtr; - processor = errorProcessor; + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; - } - else { - switch (ps_parsing) { + } else { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: if (isFinal) { - ps_parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_FINISHED; return result; } - default: ; /* should not happen */ + default:; /* should not happen */ } } - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - positionPtr = bufferPtr; + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; return result; } -void * XMLCALL -XML_GetBuffer(XML_Parser parser, int len) -{ - switch (ps_parsing) { +void *XMLCALL +XML_GetBuffer(XML_Parser parser, int len) { + if (parser == NULL) + return NULL; + if (len < 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: - errorCode = XML_ERROR_SUSPENDED; + parser->m_errorCode = XML_ERROR_SUSPENDED; return NULL; case XML_FINISHED: - errorCode = XML_ERROR_FINISHED; + parser->m_errorCode = XML_ERROR_FINISHED; return NULL; - default: ; + default:; } - if (len > bufferLim - bufferEnd) { - /* FIXME avoid integer overflow */ - int neededSize = len + (int)(bufferEnd - bufferPtr); + if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { #ifdef XML_CONTEXT_BYTES - int keep = (int)(bufferPtr - buffer); - + int keep; +#endif /* defined XML_CONTEXT_BYTES */ + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int)((unsigned)len + + (unsigned)EXPAT_SAFE_PTR_DIFF( + parser->m_bufferEnd, parser->m_bufferPtr)); + if (neededSize < 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +#ifdef XML_CONTEXT_BYTES + keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; + /* Detect and prevent integer overflow */ + if (keep > INT_MAX - neededSize) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } neededSize += keep; -#endif /* defined XML_CONTEXT_BYTES */ - if (neededSize <= bufferLim - buffer) { +#endif /* defined XML_CONTEXT_BYTES */ + if (neededSize + <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { #ifdef XML_CONTEXT_BYTES - if (keep < bufferPtr - buffer) { - int offset = (int)(bufferPtr - buffer) - keep; - memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); - bufferEnd -= offset; - bufferPtr -= offset; + if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { + int offset + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) + - keep; + /* The buffer pointers cannot be NULL here; we have at least some bytes + * in the buffer */ + memmove(parser->m_buffer, &parser->m_buffer[offset], + parser->m_bufferEnd - parser->m_bufferPtr + keep); + parser->m_bufferEnd -= offset; + parser->m_bufferPtr -= offset; } #else - memmove(buffer, bufferPtr, bufferEnd - bufferPtr); - bufferEnd = buffer + (bufferEnd - bufferPtr); - bufferPtr = buffer; -#endif /* not defined XML_CONTEXT_BYTES */ - } - else { + if (parser->m_buffer && parser->m_bufferPtr) { + memmove(parser->m_buffer, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); + parser->m_bufferEnd + = parser->m_buffer + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + parser->m_bufferPtr = parser->m_buffer; + } +#endif /* not defined XML_CONTEXT_BYTES */ + } else { char *newBuf; - int bufferSize = (int)(bufferLim - bufferPtr); + int bufferSize + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { - bufferSize *= 2; - } while (bufferSize < neededSize); - newBuf = (char *)MALLOC(bufferSize); - if (newBuf == 0) { - errorCode = XML_ERROR_NO_MEMORY; + /* Do not invoke signed arithmetic overflow: */ + bufferSize = (int)(2U * (unsigned)bufferSize); + } while (bufferSize < neededSize && bufferSize > 0); + if (bufferSize <= 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } - bufferLim = newBuf + bufferSize; -#ifdef XML_CONTEXT_BYTES - if (bufferPtr) { - int keep = (int)(bufferPtr - buffer); - if (keep > XML_CONTEXT_BYTES) - keep = XML_CONTEXT_BYTES; - memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep); - FREE(buffer); - buffer = newBuf; - bufferEnd = buffer + (bufferEnd - bufferPtr) + keep; - bufferPtr = buffer + keep; + newBuf = (char *)MALLOC(parser, bufferSize); + if (newBuf == 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; } - else { - bufferEnd = newBuf + (bufferEnd - bufferPtr); - bufferPtr = buffer = newBuf; + parser->m_bufferLim = newBuf + bufferSize; +#ifdef XML_CONTEXT_BYTES + if (parser->m_bufferPtr) { + memcpy(newBuf, &parser->m_bufferPtr[-keep], + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + + keep); + FREE(parser, parser->m_buffer); + parser->m_buffer = newBuf; + parser->m_bufferEnd + = parser->m_buffer + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + + keep; + parser->m_bufferPtr = parser->m_buffer + keep; + } else { + /* This must be a brand new buffer with no data in it yet */ + parser->m_bufferEnd = newBuf; + parser->m_bufferPtr = parser->m_buffer = newBuf; } #else - if (bufferPtr) { - memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); - FREE(buffer); + if (parser->m_bufferPtr) { + memcpy(newBuf, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); + FREE(parser, parser->m_buffer); + parser->m_bufferEnd + = newBuf + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + } else { + /* This must be a brand new buffer with no data in it yet */ + parser->m_bufferEnd = newBuf; } - bufferEnd = newBuf + (bufferEnd - bufferPtr); - bufferPtr = buffer = newBuf; -#endif /* not defined XML_CONTEXT_BYTES */ + parser->m_bufferPtr = parser->m_buffer = newBuf; +#endif /* not defined XML_CONTEXT_BYTES */ } - eventPtr = eventEndPtr = NULL; - positionPtr = NULL; + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; } - return bufferEnd; + return parser->m_bufferEnd; } enum XML_Status XMLCALL -XML_StopParser(XML_Parser parser, XML_Bool resumable) -{ - switch (ps_parsing) { +XML_StopParser(XML_Parser parser, XML_Bool resumable) { + if (parser == NULL) + return XML_STATUS_ERROR; + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: if (resumable) { - errorCode = XML_ERROR_SUSPENDED; + parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; } - ps_parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_FINISHED; break; case XML_FINISHED: - errorCode = XML_ERROR_FINISHED; + parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; default: if (resumable) { #ifdef XML_DTD - if (isParamEntity) { - errorCode = XML_ERROR_SUSPEND_PE; + if (parser->m_isParamEntity) { + parser->m_errorCode = XML_ERROR_SUSPEND_PE; return XML_STATUS_ERROR; } #endif - ps_parsing = XML_SUSPENDED; - } - else - ps_parsing = XML_FINISHED; + parser->m_parsingStatus.parsing = XML_SUSPENDED; + } else + parser->m_parsingStatus.parsing = XML_FINISHED; } return XML_STATUS_OK; } enum XML_Status XMLCALL -XML_ResumeParser(XML_Parser parser) -{ +XML_ResumeParser(XML_Parser parser) { enum XML_Status result = XML_STATUS_OK; - if (ps_parsing != XML_SUSPENDED) { - errorCode = XML_ERROR_NOT_SUSPENDED; + if (parser == NULL) + return XML_STATUS_ERROR; + if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { + parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; return XML_STATUS_ERROR; } - ps_parsing = XML_PARSING; + parser->m_parsingStatus.parsing = XML_PARSING; - errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr); + parser->m_errorCode = parser->m_processor( + parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); - if (errorCode != XML_ERROR_NONE) { - eventEndPtr = eventPtr; - processor = errorProcessor; + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; return XML_STATUS_ERROR; - } - else { - switch (ps_parsing) { + } else { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: - if (ps_finalBuffer) { - ps_parsing = XML_FINISHED; + if (parser->m_parsingStatus.finalBuffer) { + parser->m_parsingStatus.parsing = XML_FINISHED; return result; } - default: ; + default:; } } - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - positionPtr = bufferPtr; + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; return result; } void XMLCALL -XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) -{ +XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { + if (parser == NULL) + return; assert(status != NULL); *status = parser->m_parsingStatus; } enum XML_Error XMLCALL -XML_GetErrorCode(XML_Parser parser) -{ - return errorCode; +XML_GetErrorCode(XML_Parser parser) { + if (parser == NULL) + return XML_ERROR_INVALID_ARGUMENT; + return parser->m_errorCode; } XML_Index XMLCALL -XML_GetCurrentByteIndex(XML_Parser parser) -{ - if (eventPtr) - return parseEndByteIndex - (parseEndPtr - eventPtr); +XML_GetCurrentByteIndex(XML_Parser parser) { + if (parser == NULL) + return -1; + if (parser->m_eventPtr) + return (XML_Index)(parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - parser->m_eventPtr)); return -1; } int XMLCALL -XML_GetCurrentByteCount(XML_Parser parser) -{ - if (eventEndPtr && eventPtr) - return (int)(eventEndPtr - eventPtr); +XML_GetCurrentByteCount(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventEndPtr && parser->m_eventPtr) + return (int)(parser->m_eventEndPtr - parser->m_eventPtr); return 0; } -const char * XMLCALL -XML_GetInputContext(XML_Parser parser, int *offset, int *size) -{ +const char *XMLCALL +XML_GetInputContext(XML_Parser parser, int *offset, int *size) { #ifdef XML_CONTEXT_BYTES - if (eventPtr && buffer) { - *offset = (int)(eventPtr - buffer); - *size = (int)(bufferEnd - buffer); - return buffer; + if (parser == NULL) + return NULL; + if (parser->m_eventPtr && parser->m_buffer) { + if (offset != NULL) + *offset = (int)(parser->m_eventPtr - parser->m_buffer); + if (size != NULL) + *size = (int)(parser->m_bufferEnd - parser->m_buffer); + return parser->m_buffer; } +#else + (void)parser; + (void)offset; + (void)size; #endif /* defined XML_CONTEXT_BYTES */ - return (char *) 0; + return (const char *)0; } XML_Size XMLCALL -XML_GetCurrentLineNumber(XML_Parser parser) -{ - if (eventPtr && eventPtr >= positionPtr) { - XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); - positionPtr = eventPtr; +XML_GetCurrentLineNumber(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_eventPtr, &parser->m_position); + parser->m_positionPtr = parser->m_eventPtr; } - return position.lineNumber + 1; + return parser->m_position.lineNumber + 1; } XML_Size XMLCALL -XML_GetCurrentColumnNumber(XML_Parser parser) -{ - if (eventPtr && eventPtr >= positionPtr) { - XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); - positionPtr = eventPtr; +XML_GetCurrentColumnNumber(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_eventPtr, &parser->m_position); + parser->m_positionPtr = parser->m_eventPtr; } - return position.columnNumber; + return parser->m_position.columnNumber; } void XMLCALL -XML_FreeContentModel(XML_Parser parser, XML_Content *model) -{ - FREE(model); +XML_FreeContentModel(XML_Parser parser, XML_Content *model) { + if (parser != NULL) + FREE(parser, model); } -void * XMLCALL -XML_MemMalloc(XML_Parser parser, size_t size) -{ - return MALLOC(size); +void *XMLCALL +XML_MemMalloc(XML_Parser parser, size_t size) { + if (parser == NULL) + return NULL; + return MALLOC(parser, size); } -void * XMLCALL -XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) -{ - return REALLOC(ptr, size); +void *XMLCALL +XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { + if (parser == NULL) + return NULL; + return REALLOC(parser, ptr, size); } void XMLCALL -XML_MemFree(XML_Parser parser, void *ptr) -{ - FREE(ptr); +XML_MemFree(XML_Parser parser, void *ptr) { + if (parser != NULL) + FREE(parser, ptr); } void XMLCALL -XML_DefaultCurrent(XML_Parser parser) -{ - if (defaultHandler) { - if (openInternalEntities) - reportDefault(parser, - internalEncoding, - openInternalEntities->internalEventPtr, - openInternalEntities->internalEventEndPtr); +XML_DefaultCurrent(XML_Parser parser) { + if (parser == NULL) + return; + if (parser->m_defaultHandler) { + if (parser->m_openInternalEntities) + reportDefault(parser, parser->m_internalEncoding, + parser->m_openInternalEntities->internalEventPtr, + parser->m_openInternalEntities->internalEventEndPtr); else - reportDefault(parser, encoding, eventPtr, eventEndPtr); - } -} - -const XML_LChar * XMLCALL -XML_ErrorString(enum XML_Error code) -{ - static const XML_LChar* const message[] = { - 0, - XML_L("out of memory"), - XML_L("syntax error"), - XML_L("no element found"), - XML_L("not well-formed (invalid token)"), - XML_L("unclosed token"), - XML_L("partial character"), - XML_L("mismatched tag"), - XML_L("duplicate attribute"), - XML_L("junk after document element"), - XML_L("illegal parameter entity reference"), - XML_L("undefined entity"), - XML_L("recursive entity reference"), - XML_L("asynchronous entity"), - XML_L("reference to invalid character number"), - XML_L("reference to binary entity"), - XML_L("reference to external entity in attribute"), - XML_L("XML or text declaration not at start of entity"), - XML_L("unknown encoding"), - XML_L("encoding specified in XML declaration is incorrect"), - XML_L("unclosed CDATA section"), - XML_L("error in processing external entity reference"), - XML_L("document is not standalone"), - XML_L("unexpected parser state - please send a bug report"), - XML_L("entity declared in parameter entity"), - XML_L("requested feature requires XML_DTD support in Expat"), - XML_L("cannot change setting once parsing has begun"), - XML_L("unbound prefix"), - XML_L("must not undeclare prefix"), - XML_L("incomplete markup in parameter entity"), - XML_L("XML declaration not well-formed"), - XML_L("text declaration not well-formed"), - XML_L("illegal character(s) in public id"), - XML_L("parser suspended"), - XML_L("parser not suspended"), - XML_L("parsing aborted"), - XML_L("parsing finished"), - XML_L("cannot suspend in external parameter entity"), - XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"), - XML_L("reserved prefix (xmlns) must not be declared or undeclared"), - XML_L("prefix must not be bound to one of the reserved namespace names") - }; - if (code > 0 && code < sizeof(message)/sizeof(message[0])) - return message[code]; + reportDefault(parser, parser->m_encoding, parser->m_eventPtr, + parser->m_eventEndPtr); + } +} + +const XML_LChar *XMLCALL +XML_ErrorString(enum XML_Error code) { + switch (code) { + case XML_ERROR_NONE: + return NULL; + case XML_ERROR_NO_MEMORY: + return XML_L("out of memory"); + case XML_ERROR_SYNTAX: + return XML_L("syntax error"); + case XML_ERROR_NO_ELEMENTS: + return XML_L("no element found"); + case XML_ERROR_INVALID_TOKEN: + return XML_L("not well-formed (invalid token)"); + case XML_ERROR_UNCLOSED_TOKEN: + return XML_L("unclosed token"); + case XML_ERROR_PARTIAL_CHAR: + return XML_L("partial character"); + case XML_ERROR_TAG_MISMATCH: + return XML_L("mismatched tag"); + case XML_ERROR_DUPLICATE_ATTRIBUTE: + return XML_L("duplicate attribute"); + case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: + return XML_L("junk after document element"); + case XML_ERROR_PARAM_ENTITY_REF: + return XML_L("illegal parameter entity reference"); + case XML_ERROR_UNDEFINED_ENTITY: + return XML_L("undefined entity"); + case XML_ERROR_RECURSIVE_ENTITY_REF: + return XML_L("recursive entity reference"); + case XML_ERROR_ASYNC_ENTITY: + return XML_L("asynchronous entity"); + case XML_ERROR_BAD_CHAR_REF: + return XML_L("reference to invalid character number"); + case XML_ERROR_BINARY_ENTITY_REF: + return XML_L("reference to binary entity"); + case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: + return XML_L("reference to external entity in attribute"); + case XML_ERROR_MISPLACED_XML_PI: + return XML_L("XML or text declaration not at start of entity"); + case XML_ERROR_UNKNOWN_ENCODING: + return XML_L("unknown encoding"); + case XML_ERROR_INCORRECT_ENCODING: + return XML_L("encoding specified in XML declaration is incorrect"); + case XML_ERROR_UNCLOSED_CDATA_SECTION: + return XML_L("unclosed CDATA section"); + case XML_ERROR_EXTERNAL_ENTITY_HANDLING: + return XML_L("error in processing external entity reference"); + case XML_ERROR_NOT_STANDALONE: + return XML_L("document is not standalone"); + case XML_ERROR_UNEXPECTED_STATE: + return XML_L("unexpected parser state - please send a bug report"); + case XML_ERROR_ENTITY_DECLARED_IN_PE: + return XML_L("entity declared in parameter entity"); + case XML_ERROR_FEATURE_REQUIRES_XML_DTD: + return XML_L("requested feature requires XML_DTD support in Expat"); + case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: + return XML_L("cannot change setting once parsing has begun"); + /* Added in 1.95.7. */ + case XML_ERROR_UNBOUND_PREFIX: + return XML_L("unbound prefix"); + /* Added in 1.95.8. */ + case XML_ERROR_UNDECLARING_PREFIX: + return XML_L("must not undeclare prefix"); + case XML_ERROR_INCOMPLETE_PE: + return XML_L("incomplete markup in parameter entity"); + case XML_ERROR_XML_DECL: + return XML_L("XML declaration not well-formed"); + case XML_ERROR_TEXT_DECL: + return XML_L("text declaration not well-formed"); + case XML_ERROR_PUBLICID: + return XML_L("illegal character(s) in public id"); + case XML_ERROR_SUSPENDED: + return XML_L("parser suspended"); + case XML_ERROR_NOT_SUSPENDED: + return XML_L("parser not suspended"); + case XML_ERROR_ABORTED: + return XML_L("parsing aborted"); + case XML_ERROR_FINISHED: + return XML_L("parsing finished"); + case XML_ERROR_SUSPEND_PE: + return XML_L("cannot suspend in external parameter entity"); + /* Added in 2.0.0. */ + case XML_ERROR_RESERVED_PREFIX_XML: + return XML_L( + "reserved prefix (xml) must not be undeclared or bound to another namespace name"); + case XML_ERROR_RESERVED_PREFIX_XMLNS: + return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); + case XML_ERROR_RESERVED_NAMESPACE_URI: + return XML_L( + "prefix must not be bound to one of the reserved namespace names"); + /* Added in 2.2.5. */ + case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ + return XML_L("invalid argument"); + /* Added in 2.3.0. */ + case XML_ERROR_NO_BUFFER: + return XML_L( + "a successful prior call to function XML_GetBuffer is required"); + /* Added in 2.4.0. */ + case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: + return XML_L( + "limit on input amplification factor (from DTD and entities) breached"); + } return NULL; } -const XML_LChar * XMLCALL +const XML_LChar *XMLCALL XML_ExpatVersion(void) { - /* V1 is used to string-ize the version number. However, it would string-ize the actual version macro *names* unless we get them substituted before being passed to V1. CPP is defined to expand @@ -1986,8 +2470,8 @@ XML_ExpatVersion(void) { with the correct numerals. */ /* ### I'm assuming cpp is portable in this respect... */ -#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c) -#define V2(a,b,c) XML_L("expat_")V1(a,b,c) +#define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) +#define V2(a, b, c) XML_L("expat_") V1(a, b, c) return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); @@ -1996,8 +2480,7 @@ XML_ExpatVersion(void) { } XML_Expat_Version XMLCALL -XML_ExpatVersionInfo(void) -{ +XML_ExpatVersionInfo(void) { XML_Expat_Version version; version.major = XML_MAJOR_VERSION; @@ -2007,59 +2490,91 @@ XML_ExpatVersionInfo(void) return version; } -const XML_Feature * XMLCALL -XML_GetFeatureList(void) -{ +const XML_Feature *XMLCALL +XML_GetFeatureList(void) { static const XML_Feature features[] = { - {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), - sizeof(XML_Char)}, - {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), - sizeof(XML_LChar)}, + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), + sizeof(XML_Char)}, + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), + sizeof(XML_LChar)}, #ifdef XML_UNICODE - {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, #endif #ifdef XML_UNICODE_WCHAR_T - {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, #endif #ifdef XML_DTD - {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, + {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, #endif #ifdef XML_CONTEXT_BYTES - {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), - XML_CONTEXT_BYTES}, + {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), + XML_CONTEXT_BYTES}, #endif #ifdef XML_MIN_SIZE - {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, #endif #ifdef XML_NS - {XML_FEATURE_NS, XML_L("XML_NS"), 0}, + {XML_FEATURE_NS, XML_L("XML_NS"), 0}, #endif #ifdef XML_LARGE_SIZE - {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, #endif #ifdef XML_ATTR_INFO - {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, +#endif +#ifdef XML_DTD + /* Added in Expat 2.4.0. */ + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_L("XML_BLAP_MAX_AMP"), + (long int) + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_BLAP_ACT_THRES"), + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, #endif - {XML_FEATURE_END, NULL, 0} - }; + {XML_FEATURE_END, NULL, 0}}; return features; } +#ifdef XML_DTD +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor) { + if ((parser == NULL) || (parser->m_parentParser != NULL) + || isnan(maximumAmplificationFactor) + || (maximumAmplificationFactor < 1.0f)) { + return XML_FALSE; + } + parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; + return XML_TRUE; +} + +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes) { + if ((parser == NULL) || (parser->m_parentParser != NULL)) { + return XML_FALSE; + } + parser->m_accounting.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; +} +#endif /* XML_DTD */ + /* Initially tag->rawName always points into the parse buffer; for those TAG instances opened while the current parse buffer was processed, and not yet closed, we need to store tag->rawName in a more permanent location, since the parse buffer is about to be discarded. */ static XML_Bool -storeRawNames(XML_Parser parser) -{ - TAG *tag = tagStack; +storeRawNames(XML_Parser parser) { + TAG *tag = parser->m_tagStack; while (tag) { int bufSize; int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + size_t rawNameLen; char *rawNameBuf = tag->buf + nameLen; - /* Stop if already stored. Since tagStack is a stack, we can stop + /* Stop if already stored. Since m_tagStack is a stack, we can stop at the first entry that has already been copied; everything below it in the stack is already been accounted for in a previous call to this function. @@ -2069,9 +2584,13 @@ storeRawNames(XML_Parser parser) /* For re-use purposes we need to ensure that the size of tag->buf is a multiple of sizeof(XML_Char). */ - bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + /* Detect and prevent integer overflow. */ + if (rawNameLen > (size_t)INT_MAX - nameLen) + return XML_FALSE; + bufSize = nameLen + (int)rawNameLen; if (bufSize > tag->bufEnd - tag->buf) { - char *temp = (char *)REALLOC(tag->buf, bufSize); + char *temp = (char *)REALLOC(parser, tag->buf, bufSize); if (temp == NULL) return XML_FALSE; /* if tag->name.str points to tag->buf (only when namespace @@ -2083,8 +2602,8 @@ storeRawNames(XML_Parser parser) then update it as well, since it will always point into tag->buf */ if (tag->name.localPart) - tag->name.localPart = (XML_Char *)temp + (tag->name.localPart - - (XML_Char *)tag->buf); + tag->name.localPart + = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); tag->buf = temp; tag->bufEnd = temp + bufSize; rawNameBuf = temp + nameLen; @@ -2097,163 +2616,166 @@ storeRawNames(XML_Parser parser) } static enum XML_Error PTRCALL -contentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doContent(parser, 0, encoding, start, end, - endPtr, (XML_Bool)!ps_finalBuffer); +contentProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result = doContent( + parser, 0, parser->m_encoding, start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result == XML_ERROR_NONE) { - if (!storeRawNames(parser)) + if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; } return result; } static enum XML_Error PTRCALL -externalEntityInitProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ +externalEntityInitProcessor(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; - processor = externalEntityInitProcessor2; + parser->m_processor = externalEntityInitProcessor2; return externalEntityInitProcessor2(parser, start, end, endPtr); } static enum XML_Error PTRCALL -externalEntityInitProcessor2(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ +externalEntityInitProcessor2(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { const char *next = start; /* XmlContentTok doesn't always set the last arg */ - int tok = XmlContentTok(encoding, start, end, &next); + int tok = XmlContentTok(parser->m_encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif /* XML_DTD */ + /* If we are at the end of the buffer, this would cause the next stage, i.e. externalEntityInitProcessor3, to pass control directly to doContent (by detecting XML_TOK_NONE) without processing any xml text declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. */ - if (next == end && !ps_finalBuffer) { + if (next == end && ! parser->m_parsingStatus.finalBuffer) { *endPtr = next; return XML_ERROR_NONE; } start = next; break; case XML_TOK_PARTIAL: - if (!ps_finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; + parser->m_eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (!ps_finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; + parser->m_eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } - processor = externalEntityInitProcessor3; + parser->m_processor = externalEntityInitProcessor3; return externalEntityInitProcessor3(parser, start, end, endPtr); } static enum XML_Error PTRCALL -externalEntityInitProcessor3(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ +externalEntityInitProcessor3(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { int tok; const char *next = start; /* XmlContentTok doesn't always set the last arg */ - eventPtr = start; - tok = XmlContentTok(encoding, start, end, &next); - eventEndPtr = next; + parser->m_eventPtr = start; + tok = XmlContentTok(parser->m_encoding, start, end, &next); + /* Note: These bytes are accounted later in: + - processXmlDecl + - externalEntityContentProcessor + */ + parser->m_eventEndPtr = next; switch (tok) { - case XML_TOK_XML_DECL: - { - enum XML_Error result; - result = processXmlDecl(parser, 1, start, next); - if (result != XML_ERROR_NONE) - return result; - switch (ps_parsing) { - case XML_SUSPENDED: - *endPtr = next; - return XML_ERROR_NONE; - case XML_FINISHED: - return XML_ERROR_ABORTED; - default: - start = next; - } + case XML_TOK_XML_DECL: { + enum XML_Error result; + result = processXmlDecl(parser, 1, start, next); + if (result != XML_ERROR_NONE) + return result; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + *endPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + start = next; } - break; + } break; case XML_TOK_PARTIAL: - if (!ps_finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (!ps_finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; } - processor = externalEntityContentProcessor; - tagLevel = 1; + parser->m_processor = externalEntityContentProcessor; + parser->m_tagLevel = 1; return externalEntityContentProcessor(parser, start, end, endPtr); } static enum XML_Error PTRCALL -externalEntityContentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doContent(parser, 1, encoding, start, end, - endPtr, (XML_Bool)!ps_finalBuffer); +externalEntityContentProcessor(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { + enum XML_Error result + = doContent(parser, 1, parser->m_encoding, start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { - if (!storeRawNames(parser)) + if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; } return result; } static enum XML_Error -doContent(XML_Parser parser, - int startTagLevel, - const ENCODING *enc, - const char *s, - const char *end, - const char **nextPtr, - XML_Bool haveMore) -{ +doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, + const char *s, const char *end, const char **nextPtr, + XML_Bool haveMore, enum XML_Account account) { /* save one level of indirection */ - DTD * const dtd = _dtd; + DTD *const dtd = parser->m_dtd; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } *eventPP = s; for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); +#ifdef XML_DTD + const char *accountAfter + = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) + ? (haveMore ? s /* i.e. 0 bytes */ : end) + : next; + if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, + account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: @@ -2262,18 +2784,17 @@ doContent(XML_Parser parser, return XML_ERROR_NONE; } *eventEndPP = end; - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for XML_SUSPENDED, XML_FINISHED? */ if (startTagLevel == 0) return XML_ERROR_NO_ELEMENTS; - if (tagLevel != startTagLevel) + if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; *nextPtr = end; return XML_ERROR_NONE; @@ -2283,7 +2804,7 @@ doContent(XML_Parser parser, return XML_ERROR_NONE; } if (startTagLevel > 0) { - if (tagLevel != startTagLevel) + if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; *nextPtr = s; return XML_ERROR_NONE; @@ -2304,321 +2825,325 @@ doContent(XML_Parser parser, return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_ENTITY_REF: - { - const XML_Char *name; - ENTITY *entity; - XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (characterDataHandler) - characterDataHandler(handlerArg, &ch, 1); - else if (defaultHandler) + case XML_TOK_ENTITY_REF: { + const XML_Char *name; + ENTITY *entity; + XML_Char ch = (XML_Char)XmlPredefinedEntityName( + enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); + if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ + if (parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); + poolDiscard(&dtd->pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the skipped entity or default handler. + */ + if (! dtd->hasParamEntityRefs || dtd->standalone) { + if (! entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (! entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } else if (! entity) { + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->notation) + return XML_ERROR_BINARY_ENTITY_REF; + if (entity->textPtr) { + enum XML_Error result; + if (! parser->m_defaultExpandInternalEntities) { + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, + 0); + else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } - name = poolStoreString(&dtd->pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + result = processInternalEntity(parser, entity, XML_FALSE); + if (result != XML_ERROR_NONE) + return result; + } else if (parser->m_externalEntityRefHandler) { + const XML_Char *context; + entity->open = XML_TRUE; + context = getContext(parser); + entity->open = XML_FALSE; + if (! context) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); - poolDiscard(&dtd->pool); - /* First, determine if a check for an existing declaration is needed; - if yes, check that the entity exists, and that it is internal, - otherwise call the skipped entity or default handler. - */ - if (!dtd->hasParamEntityRefs || dtd->standalone) { - if (!entity) - return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; - } - else if (!entity) { - if (skippedEntityHandler) - skippedEntityHandler(handlerArg, name, 0); - else if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - if (entity->open) - return XML_ERROR_RECURSIVE_ENTITY_REF; - if (entity->notation) - return XML_ERROR_BINARY_ENTITY_REF; - if (entity->textPtr) { - enum XML_Error result; - if (!defaultExpandInternalEntities) { - if (skippedEntityHandler) - skippedEntityHandler(handlerArg, entity->name, 0); - else if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - result = processInternalEntity(parser, entity, XML_FALSE); - if (result != XML_ERROR_NONE) - return result; - } - else if (externalEntityRefHandler) { - const XML_Char *context; - entity->open = XML_TRUE; - context = getContext(parser); - entity->open = XML_FALSE; - if (!context) - return XML_ERROR_NO_MEMORY; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - context, - entity->base, - entity->systemId, - entity->publicId)) - return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - poolDiscard(&tempPool); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, context, entity->base, + entity->systemId, entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + poolDiscard(&parser->m_tempPool); + } else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } case XML_TOK_START_TAG_NO_ATTS: /* fall through */ - case XML_TOK_START_TAG_WITH_ATTS: - { - TAG *tag; - enum XML_Error result; - XML_Char *toPtr; - if (freeTagList) { - tag = freeTagList; - freeTagList = freeTagList->parent; + case XML_TOK_START_TAG_WITH_ATTS: { + TAG *tag; + enum XML_Error result; + XML_Char *toPtr; + if (parser->m_freeTagList) { + tag = parser->m_freeTagList; + parser->m_freeTagList = parser->m_freeTagList->parent; + } else { + tag = (TAG *)MALLOC(parser, sizeof(TAG)); + if (! tag) + return XML_ERROR_NO_MEMORY; + tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); + if (! tag->buf) { + FREE(parser, tag); + return XML_ERROR_NO_MEMORY; } - else { - tag = (TAG *)MALLOC(sizeof(TAG)); - if (!tag) - return XML_ERROR_NO_MEMORY; - tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE); - if (!tag->buf) { - FREE(tag); - return XML_ERROR_NO_MEMORY; + tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; + } + tag->bindings = NULL; + tag->parent = parser->m_tagStack; + parser->m_tagStack = tag; + tag->name.localPart = NULL; + tag->name.prefix = NULL; + tag->rawName = s + enc->minBytesPerChar; + tag->rawNameLength = XmlNameLength(enc, tag->rawName); + ++parser->m_tagLevel; + { + const char *rawNameEnd = tag->rawName + tag->rawNameLength; + const char *fromPtr = tag->rawName; + toPtr = (XML_Char *)tag->buf; + for (;;) { + int bufSize; + int convLen; + const enum XML_Convert_Result convert_res + = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, + (ICHAR *)tag->bufEnd - 1); + convLen = (int)(toPtr - (XML_Char *)tag->buf); + if ((fromPtr >= rawNameEnd) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { + tag->name.strLen = convLen; + break; } - tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; - } - tag->bindings = NULL; - tag->parent = tagStack; - tagStack = tag; - tag->name.localPart = NULL; - tag->name.prefix = NULL; - tag->rawName = s + enc->minBytesPerChar; - tag->rawNameLength = XmlNameLength(enc, tag->rawName); - ++tagLevel; - { - const char *rawNameEnd = tag->rawName + tag->rawNameLength; - const char *fromPtr = tag->rawName; - toPtr = (XML_Char *)tag->buf; - for (;;) { - int bufSize; - int convLen; - XmlConvert(enc, - &fromPtr, rawNameEnd, - (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - convLen = (int)(toPtr - (XML_Char *)tag->buf); - if (fromPtr == rawNameEnd) { - tag->name.strLen = convLen; - break; - } - bufSize = (int)(tag->bufEnd - tag->buf) << 1; - { - char *temp = (char *)REALLOC(tag->buf, bufSize); - if (temp == NULL) - return XML_ERROR_NO_MEMORY; - tag->buf = temp; - tag->bufEnd = temp + bufSize; - toPtr = (XML_Char *)temp + convLen; - } + bufSize = (int)(tag->bufEnd - tag->buf) << 1; + { + char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + tag->buf = temp; + tag->bufEnd = temp + bufSize; + toPtr = (XML_Char *)temp + convLen; } } - tag->name.str = (XML_Char *)tag->buf; - *toPtr = XML_T('\0'); - result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); - if (result) - return result; - if (startElementHandler) - startElementHandler(handlerArg, tag->name.str, - (const XML_Char **)atts); - else if (defaultHandler) - reportDefault(parser, enc, s, next); - poolClear(&tempPool); - break; } + tag->name.str = (XML_Char *)tag->buf; + *toPtr = XML_T('\0'); + result + = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); + if (result) + return result; + if (parser->m_startElementHandler) + parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, + (const XML_Char **)parser->m_atts); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&parser->m_tempPool); + break; + } case XML_TOK_EMPTY_ELEMENT_NO_ATTS: /* fall through */ - case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - { - const char *rawName = s + enc->minBytesPerChar; - enum XML_Error result; - BINDING *bindings = NULL; - XML_Bool noElmHandlers = XML_TRUE; - TAG_NAME name; - name.str = poolStoreString(&tempPool, enc, rawName, - rawName + XmlNameLength(enc, rawName)); - if (!name.str) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - result = storeAtts(parser, enc, s, &name, &bindings); - if (result) - return result; - poolFinish(&tempPool); - if (startElementHandler) { - startElementHandler(handlerArg, name.str, (const XML_Char **)atts); - noElmHandlers = XML_FALSE; - } - if (endElementHandler) { - if (startElementHandler) - *eventPP = *eventEndPP; - endElementHandler(handlerArg, name.str); - noElmHandlers = XML_FALSE; - } - if (noElmHandlers && defaultHandler) - reportDefault(parser, enc, s, next); - poolClear(&tempPool); - while (bindings) { - BINDING *b = bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); - bindings = bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; - b->prefix->binding = b->prevPrefixBinding; - } + case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { + const char *rawName = s + enc->minBytesPerChar; + enum XML_Error result; + BINDING *bindings = NULL; + XML_Bool noElmHandlers = XML_TRUE; + TAG_NAME name; + name.str = poolStoreString(&parser->m_tempPool, enc, rawName, + rawName + XmlNameLength(enc, rawName)); + if (! name.str) + return XML_ERROR_NO_MEMORY; + poolFinish(&parser->m_tempPool); + result = storeAtts(parser, enc, s, &name, &bindings, + XML_ACCOUNT_NONE /* token spans whole start tag */); + if (result != XML_ERROR_NONE) { + freeBindings(parser, bindings); + return result; + } + poolFinish(&parser->m_tempPool); + if (parser->m_startElementHandler) { + parser->m_startElementHandler(parser->m_handlerArg, name.str, + (const XML_Char **)parser->m_atts); + noElmHandlers = XML_FALSE; + } + if (parser->m_endElementHandler) { + if (parser->m_startElementHandler) + *eventPP = *eventEndPP; + parser->m_endElementHandler(parser->m_handlerArg, name.str); + noElmHandlers = XML_FALSE; + } + if (noElmHandlers && parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&parser->m_tempPool); + freeBindings(parser, bindings); + } + if ((parser->m_tagLevel == 0) + && (parser->m_parsingStatus.parsing != XML_FINISHED)) { + if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + parser->m_processor = epilogProcessor; + else + return epilogProcessor(parser, next, end, nextPtr); } - if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); break; case XML_TOK_END_TAG: - if (tagLevel == startTagLevel) + if (parser->m_tagLevel == startTagLevel) return XML_ERROR_ASYNC_ENTITY; else { int len; const char *rawName; - TAG *tag = tagStack; - tagStack = tag->parent; - tag->parent = freeTagList; - freeTagList = tag; - rawName = s + enc->minBytesPerChar*2; + TAG *tag = parser->m_tagStack; + rawName = s + enc->minBytesPerChar * 2; len = XmlNameLength(enc, rawName); if (len != tag->rawNameLength || memcmp(tag->rawName, rawName, len) != 0) { *eventPP = rawName; return XML_ERROR_TAG_MISMATCH; } - --tagLevel; - if (endElementHandler) { + parser->m_tagStack = tag->parent; + tag->parent = parser->m_freeTagList; + parser->m_freeTagList = tag; + --parser->m_tagLevel; + if (parser->m_endElementHandler) { const XML_Char *localPart; const XML_Char *prefix; XML_Char *uri; localPart = tag->name.localPart; - if (ns && localPart) { + if (parser->m_ns && localPart) { /* localPart and prefix may have been overwritten in tag->name.str, since this points to the binding->uri buffer which gets re-used; so we have to add them again */ uri = (XML_Char *)tag->name.str + tag->name.uriLen; /* don't need to check for space - already done in storeAtts() */ - while (*localPart) *uri++ = *localPart++; + while (*localPart) + *uri++ = *localPart++; prefix = (XML_Char *)tag->name.prefix; - if (ns_triplets && prefix) { - *uri++ = namespaceSeparator; - while (*prefix) *uri++ = *prefix++; - } + if (parser->m_ns_triplets && prefix) { + *uri++ = parser->m_namespaceSeparator; + while (*prefix) + *uri++ = *prefix++; + } *uri = XML_T('\0'); } - endElementHandler(handlerArg, tag->name.str); - } - else if (defaultHandler) + parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); while (tag->bindings) { BINDING *b = tag->bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); + if (parser->m_endNamespaceDeclHandler) + parser->m_endNamespaceDeclHandler(parser->m_handlerArg, + b->prefix->name); tag->bindings = tag->bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; b->prefix->binding = b->prevPrefixBinding; } - if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); - } - break; - case XML_TOK_CHAR_REF: - { - int n = XmlCharRefNumber(enc, s); - if (n < 0) - return XML_ERROR_BAD_CHAR_REF; - if (characterDataHandler) { - XML_Char buf[XML_ENCODE_MAX]; - characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); + if ((parser->m_tagLevel == 0) + && (parser->m_parsingStatus.parsing != XML_FINISHED)) { + if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + parser->m_processor = epilogProcessor; + else + return epilogProcessor(parser, next, end, nextPtr); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); } break; + case XML_TOK_CHAR_REF: { + int n = XmlCharRefNumber(enc, s); + if (n < 0) + return XML_ERROR_BAD_CHAR_REF; + if (parser->m_characterDataHandler) { + XML_Char buf[XML_ENCODE_MAX]; + parser->m_characterDataHandler(parser->m_handlerArg, buf, + XmlEncode(n, (ICHAR *)buf)); + } else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + } break; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; case XML_TOK_DATA_NEWLINE: - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; - case XML_TOK_CDATA_SECT_OPEN: - { - enum XML_Error result; - if (startCdataSectionHandler) - startCdataSectionHandler(handlerArg); -#if 0 - /* Suppose you doing a transformation on a document that involves - changing only the character data. You set up a defaultHandler - and a characterDataHandler. The defaultHandler simply copies - characters through. The characterDataHandler does the - transformation and writes the characters out escaping them as - necessary. This case will fail to work if we leave out the - following two lines (because & and < inside CDATA sections will - be incorrectly escaped). - - However, now we have a start/endCdataSectionHandler, so it seems - easier to let the user deal with this. - */ - else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); -#endif - else if (defaultHandler) - reportDefault(parser, enc, s, next); - result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore); - if (result != XML_ERROR_NONE) - return result; - else if (!next) { - processor = cdataSectionProcessor; - return result; - } + case XML_TOK_CDATA_SECT_OPEN: { + enum XML_Error result; + if (parser->m_startCdataSectionHandler) + parser->m_startCdataSectionHandler(parser->m_handlerArg); + /* BEGIN disabled code */ + /* Suppose you doing a transformation on a document that involves + changing only the character data. You set up a defaultHandler + and a characterDataHandler. The defaultHandler simply copies + characters through. The characterDataHandler does the + transformation and writes the characters out escaping them as + necessary. This case will fail to work if we leave out the + following two lines (because & and < inside CDATA sections will + be incorrectly escaped). + + However, now we have a start/endCdataSectionHandler, so it seems + easier to let the user deal with this. + */ + else if (0 && parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, + 0); + /* END disabled code */ + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + result + = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); + if (result != XML_ERROR_NONE) + return result; + else if (! next) { + parser->m_processor = cdataSectionProcessor; + return result; } - break; + } break; case XML_TOK_TRAILING_RSQB: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } - if (characterDataHandler) { + if (parser->m_characterDataHandler) { if (MUST_CONVERT(enc, s)) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); - characterDataHandler(handlerArg, dataBuf, - (int)(dataPtr - (ICHAR *)dataBuf)); - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (int)((XML_Char *)end - (XML_Char *)s)); - } - else if (defaultHandler) + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); + parser->m_characterDataHandler( + parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + } else + parser->m_characterDataHandler( + parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)end - (XML_Char *)s)); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for XML_SUSPENDED, XML_FINISHED? @@ -2627,63 +3152,90 @@ doContent(XML_Parser parser, *eventPP = end; return XML_ERROR_NO_ELEMENTS; } - if (tagLevel != startTagLevel) { + if (parser->m_tagLevel != startTagLevel) { *eventPP = end; return XML_ERROR_ASYNC_ENTITY; } *nextPtr = end; return XML_ERROR_NONE; - case XML_TOK_DATA_CHARS: - { - XML_CharacterDataHandler charDataHandler = characterDataHandler; - if (charDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = s; - charDataHandler(handlerArg, dataBuf, - (int)(dataPtr - (ICHAR *)dataBuf)); - if (s == next) - break; - *eventPP = s; - } + case XML_TOK_DATA_CHARS: { + XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; + if (charDataHandler) { + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); + *eventEndPP = s; + charDataHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) + break; + *eventPP = s; } - else - charDataHandler(handlerArg, - (XML_Char *)s, - (int)((XML_Char *)next - (XML_Char *)s)); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); - } - break; + } else + charDataHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); + } else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + } break; case XML_TOK_PI: - if (!reportProcessingInstruction(parser, enc, s, next)) + if (! reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: - if (!reportComment(parser, enc, s, next)) + if (! reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; default: - if (defaultHandler) + /* All of the tokens produced by XmlContentTok() have their own + * explicit cases, so this default is not strictly necessary. + * However it is a useful safety net, so we retain the code and + * simply exclude it from the coverage tests. + * + * LCOV_EXCL_START + */ + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; - switch (ps_parsing) { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; - default: ; + default:; } } /* not reached */ } +/* This function does not call free() on the allocated memory, merely + * moving it to the parser's m_freeBindingList where it can be freed or + * reused as appropriate. + */ +static void +freeBindings(XML_Parser parser, BINDING *bindings) { + while (bindings) { + BINDING *b = bindings; + + /* m_startNamespaceDeclHandler will have been called for this + * binding in addBindings(), so call the end handler now. + */ + if (parser->m_endNamespaceDeclHandler) + parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); + + bindings = bindings->nextTagBinding; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; + b->prefix->binding = b->prevPrefixBinding; + } +} + /* Precondition: all arguments must be non-NULL; Purpose: - normalize attributes @@ -2695,14 +3247,13 @@ doContent(XML_Parser parser, - generate namespace aware element name (URI, prefix) */ static enum XML_Error -storeAtts(XML_Parser parser, const ENCODING *enc, - const char *attStr, TAG_NAME *tagNamePtr, - BINDING **bindingsPtr) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + TAG_NAME *tagNamePtr, BINDING **bindingsPtr, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ELEMENT_TYPE *elementType; int nDefaultAtts; - const XML_Char **appAtts; /* the attribute list for the application */ + const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; int prefixLen; int i; @@ -2713,75 +3264,120 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const XML_Char *localPart; /* lookup the element type name */ - elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0); - if (!elementType) { + elementType + = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); + if (! elementType) { const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); - if (!name) + if (! name) return XML_ERROR_NO_MEMORY; elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); - if (!elementType) + if (! elementType) return XML_ERROR_NO_MEMORY; - if (ns && !setElementTypePrefix(parser, elementType)) + if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) return XML_ERROR_NO_MEMORY; } nDefaultAtts = elementType->nDefaultAtts; /* get the attributes from the tokenizer */ - n = XmlGetAttributes(enc, attStr, attsSize, atts); - if (n + nDefaultAtts > attsSize) { - int oldAttsSize = attsSize; + n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); + + /* Detect and prevent integer overflow */ + if (n > INT_MAX - nDefaultAtts) { + return XML_ERROR_NO_MEMORY; + } + + if (n + nDefaultAtts > parser->m_attsSize) { + int oldAttsSize = parser->m_attsSize; ATTRIBUTE *temp; #ifdef XML_ATTR_INFO XML_AttrInfo *temp2; #endif - attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; - temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); - if (temp == NULL) + + /* Detect and prevent integer overflow */ + if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) + || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { return XML_ERROR_NO_MEMORY; - atts = temp; + } + + parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { + parser->m_attsSize = oldAttsSize; + return XML_ERROR_NO_MEMORY; + } +#endif + + temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, + parser->m_attsSize * sizeof(ATTRIBUTE)); + if (temp == NULL) { + parser->m_attsSize = oldAttsSize; + return XML_ERROR_NO_MEMORY; + } + parser->m_atts = temp; #ifdef XML_ATTR_INFO - temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo)); - if (temp2 == NULL) + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +# if UINT_MAX >= SIZE_MAX + if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { + parser->m_attsSize = oldAttsSize; + return XML_ERROR_NO_MEMORY; + } +# endif + + temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, + parser->m_attsSize * sizeof(XML_AttrInfo)); + if (temp2 == NULL) { + parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; - attInfo = temp2; + } + parser->m_attInfo = temp2; #endif if (n > oldAttsSize) - XmlGetAttributes(enc, attStr, n, atts); + XmlGetAttributes(enc, attStr, n, parser->m_atts); } - appAtts = (const XML_Char **)atts; + appAtts = (const XML_Char **)parser->m_atts; for (i = 0; i < n; i++) { - ATTRIBUTE *currAtt = &atts[i]; + ATTRIBUTE *currAtt = &parser->m_atts[i]; #ifdef XML_ATTR_INFO - XML_AttrInfo *currAttInfo = &attInfo[i]; + XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; #endif /* add the name and value to the attribute list */ - ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name, - currAtt->name - + XmlNameLength(enc, currAtt->name)); - if (!attId) + ATTRIBUTE_ID *attId + = getAttributeId(parser, enc, currAtt->name, + currAtt->name + XmlNameLength(enc, currAtt->name)); + if (! attId) return XML_ERROR_NO_MEMORY; #ifdef XML_ATTR_INFO - currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name); - currAttInfo->nameEnd = currAttInfo->nameStart + - XmlNameLength(enc, currAtt->name); - currAttInfo->valueStart = parseEndByteIndex - - (parseEndPtr - currAtt->valuePtr); - currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd); + currAttInfo->nameStart + = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); + currAttInfo->nameEnd + = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); + currAttInfo->valueStart = parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - currAtt->valuePtr); + currAttInfo->valueEnd = parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - currAtt->valueEnd); #endif /* Detect duplicate attributes by their QNames. This does not work when namespace processing is turned on and different prefixes for the same namespace are used. For this case we have a check further down. */ if ((attId->name)[-1]) { - if (enc == encoding) - eventPtr = atts[i].name; + if (enc == parser->m_encoding) + parser->m_eventPtr = parser->m_atts[i].name; return XML_ERROR_DUPLICATE_ATTRIBUTE; } (attId->name)[-1] = 1; appAtts[attIndex++] = attId->name; - if (!atts[i].normalized) { + if (! parser->m_atts[i].normalized) { enum XML_Error result; XML_Bool isCdata = XML_TRUE; @@ -2797,21 +3393,21 @@ storeAtts(XML_Parser parser, const ENCODING *enc, } /* normalize the attribute value */ - result = storeAttributeValue(parser, enc, isCdata, - atts[i].valuePtr, atts[i].valueEnd, - &tempPool); + result = storeAttributeValue( + parser, enc, isCdata, parser->m_atts[i].valuePtr, + parser->m_atts[i].valueEnd, &parser->m_tempPool, account); if (result) return result; - appAtts[attIndex] = poolStart(&tempPool); - poolFinish(&tempPool); - } - else { + appAtts[attIndex] = poolStart(&parser->m_tempPool); + poolFinish(&parser->m_tempPool); + } else { /* the value did not need normalizing */ - appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, - atts[i].valueEnd); + appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, + parser->m_atts[i].valuePtr, + parser->m_atts[i].valueEnd); if (appAtts[attIndex] == 0) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); } /* handle prefixed attribute names */ if (attId->prefix) { @@ -2822,49 +3418,44 @@ storeAtts(XML_Parser parser, const ENCODING *enc, if (result) return result; --attIndex; - } - else { + } else { /* deal with other prefixed names later */ attIndex++; nPrefixes++; (attId->name)[-1] = 2; } - } - else + } else attIndex++; } /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ - nSpecifiedAtts = attIndex; + parser->m_nSpecifiedAtts = attIndex; if (elementType->idAtt && (elementType->idAtt->name)[-1]) { for (i = 0; i < attIndex; i += 2) if (appAtts[i] == elementType->idAtt->name) { - idAttIndex = i; + parser->m_idAttIndex = i; break; } - } - else - idAttIndex = -1; + } else + parser->m_idAttIndex = -1; /* do attribute defaulting */ for (i = 0; i < nDefaultAtts; i++) { const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; - if (!(da->id->name)[-1] && da->value) { + if (! (da->id->name)[-1] && da->value) { if (da->id->prefix) { if (da->id->xmlns) { enum XML_Error result = addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr); if (result) return result; - } - else { + } else { (da->id->name)[-1] = 2; nPrefixes++; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; } - } - else { + } else { (da->id->name)[-1] = 1; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; @@ -2877,108 +3468,169 @@ storeAtts(XML_Parser parser, const ENCODING *enc, and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { - int j; /* hash table index */ - unsigned long version = nsAttsVersion; - int nsAttsSize = (int)1 << nsAttsPower; + int j; /* hash table index */ + unsigned long version = parser->m_nsAttsVersion; + + /* Detect and prevent invalid shift */ + if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { + return XML_ERROR_NO_MEMORY; + } + + unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; + unsigned char oldNsAttsPower = parser->m_nsAttsPower; /* size of hash table must be at least 2 * (# of prefixed attributes) */ - if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */ + if ((nPrefixes << 1) + >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ NS_ATT *temp; /* hash table size must also be a power of 2 and >= 8 */ - while (nPrefixes >> nsAttsPower++); - if (nsAttsPower < 3) - nsAttsPower = 3; - nsAttsSize = (int)1 << nsAttsPower; - temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT)); - if (!temp) + while (nPrefixes >> parser->m_nsAttsPower++) + ; + if (parser->m_nsAttsPower < 3) + parser->m_nsAttsPower = 3; + + /* Detect and prevent invalid shift */ + if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { + /* Restore actual size of memory in m_nsAtts */ + parser->m_nsAttsPower = oldNsAttsPower; + return XML_ERROR_NO_MEMORY; + } + + nsAttsSize = 1u << parser->m_nsAttsPower; + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { + /* Restore actual size of memory in m_nsAtts */ + parser->m_nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; - nsAtts = temp; - version = 0; /* force re-initialization of nsAtts hash table */ + } +#endif + + temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, + nsAttsSize * sizeof(NS_ATT)); + if (! temp) { + /* Restore actual size of memory in m_nsAtts */ + parser->m_nsAttsPower = oldNsAttsPower; + return XML_ERROR_NO_MEMORY; + } + parser->m_nsAtts = temp; + version = 0; /* force re-initialization of m_nsAtts hash table */ } - /* using a version flag saves us from initializing nsAtts every time */ - if (!version) { /* initialize version flags when version wraps around */ + /* using a version flag saves us from initializing m_nsAtts every time */ + if (! version) { /* initialize version flags when version wraps around */ version = INIT_ATTS_VERSION; - for (j = nsAttsSize; j != 0; ) - nsAtts[--j].version = version; + for (j = nsAttsSize; j != 0;) + parser->m_nsAtts[--j].version = version; } - nsAttsVersion = --version; + parser->m_nsAttsVersion = --version; /* expand prefixed names and check for duplicates */ for (; i < attIndex; i += 2) { const XML_Char *s = appAtts[i]; - if (s[-1] == 2) { /* prefixed */ + if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; const BINDING *b; - unsigned long uriHash = hash_secret_salt; - ((XML_Char *)s)[-1] = 0; /* clear flag */ + unsigned long uriHash; + struct siphash sip_state; + struct sipkey sip_key; + + copy_salt_to_sipkey(parser, &sip_key); + sip24_init(&sip_state, &sip_key); + + ((XML_Char *)s)[-1] = 0; /* clear flag */ id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); + if (! id || ! id->prefix) { + /* This code is walking through the appAtts array, dealing + * with (in this case) a prefixed attribute name. To be in + * the array, the attribute must have already been bound, so + * has to have passed through the hash table lookup once + * already. That implies that an entry for it already + * exists, so the lookup above will return a pointer to + * already allocated memory. There is no opportunaity for + * the allocator to fail, so the condition above cannot be + * fulfilled. + * + * Since it is difficult to be certain that the above + * analysis is complete, we retain the test and merely + * remove the code from coverage tests. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } b = id->prefix->binding; - if (!b) + if (! b) return XML_ERROR_UNBOUND_PREFIX; - /* as we expand the name we also calculate its hash value */ for (j = 0; j < b->uriLen; j++) { const XML_Char c = b->uri[j]; - if (!poolAppendChar(&tempPool, c)) + if (! poolAppendChar(&parser->m_tempPool, c)) return XML_ERROR_NO_MEMORY; - uriHash = CHAR_HASH(uriHash, c); } + + sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); + while (*s++ != XML_T(ASCII_COLON)) ; - do { /* copies null terminator */ - const XML_Char c = *s; - if (!poolAppendChar(&tempPool, *s)) + + sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); + + do { /* copies null terminator */ + if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; - uriHash = CHAR_HASH(uriHash, c); } while (*s++); + uriHash = (unsigned long)sip24_final(&sip_state); + { /* Check hash table for duplicate of expanded name (uriName). Derived from code in lookup(parser, HASH_TABLE *table, ...). */ unsigned char step = 0; unsigned long mask = nsAttsSize - 1; - j = uriHash & mask; /* index into hash table */ - while (nsAtts[j].version == version) { + j = uriHash & mask; /* index into hash table */ + while (parser->m_nsAtts[j].version == version) { /* for speed we compare stored hash values first */ - if (uriHash == nsAtts[j].hash) { - const XML_Char *s1 = poolStart(&tempPool); - const XML_Char *s2 = nsAtts[j].uriName; + if (uriHash == parser->m_nsAtts[j].hash) { + const XML_Char *s1 = poolStart(&parser->m_tempPool); + const XML_Char *s2 = parser->m_nsAtts[j].uriName; /* s1 is null terminated, but not s2 */ - for (; *s1 == *s2 && *s1 != 0; s1++, s2++); + for (; *s1 == *s2 && *s1 != 0; s1++, s2++) + ; if (*s1 == 0) return XML_ERROR_DUPLICATE_ATTRIBUTE; } - if (!step) - step = PROBE_STEP(uriHash, mask, nsAttsPower); + if (! step) + step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); j < step ? (j += nsAttsSize - step) : (j -= step); } } - if (ns_triplets) { /* append namespace separator and prefix */ - tempPool.ptr[-1] = namespaceSeparator; + if (parser->m_ns_triplets) { /* append namespace separator and prefix */ + parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; s = b->prefix->name; do { - if (!poolAppendChar(&tempPool, *s)) + if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; } while (*s++); } /* store expanded name in attribute list */ - s = poolStart(&tempPool); - poolFinish(&tempPool); + s = poolStart(&parser->m_tempPool); + poolFinish(&parser->m_tempPool); appAtts[i] = s; /* fill empty slot with new version, uriName and hash value */ - nsAtts[j].version = version; - nsAtts[j].hash = uriHash; - nsAtts[j].uriName = s; + parser->m_nsAtts[j].version = version; + parser->m_nsAtts[j].hash = uriHash; + parser->m_nsAtts[j].uriName = s; - if (!--nPrefixes) { + if (! --nPrefixes) { i += 2; break; } - } - else /* not prefixed */ - ((XML_Char *)s)[-1] = 0; /* clear flag */ + } else /* not prefixed */ + ((XML_Char *)s)[-1] = 0; /* clear flag */ } } /* clear flags for the remaining attributes */ @@ -2987,88 +3639,219 @@ storeAtts(XML_Parser parser, const ENCODING *enc, for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; - if (!ns) + if (! parser->m_ns) return XML_ERROR_NONE; /* expand the element type name */ if (elementType->prefix) { binding = elementType->prefix->binding; - if (!binding) + if (! binding) return XML_ERROR_UNBOUND_PREFIX; localPart = tagNamePtr->str; while (*localPart++ != XML_T(ASCII_COLON)) ; - } - else if (dtd->defaultPrefix.binding) { + } else if (dtd->defaultPrefix.binding) { binding = dtd->defaultPrefix.binding; localPart = tagNamePtr->str; - } - else + } else return XML_ERROR_NONE; prefixLen = 0; - if (ns_triplets && binding->prefix->name) { + if (parser->m_ns_triplets && binding->prefix->name) { for (; binding->prefix->name[prefixLen++];) - ; /* prefixLen includes null terminator */ + ; /* prefixLen includes null terminator */ } tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; tagNamePtr->prefix = binding->prefix->name; tagNamePtr->prefixLen = prefixLen; for (i = 0; localPart[i++];) - ; /* i includes null terminator */ + ; /* i includes null terminator */ + + /* Detect and prevent integer overflow */ + if (binding->uriLen > INT_MAX - prefixLen + || i > INT_MAX - (binding->uriLen + prefixLen)) { + return XML_ERROR_NO_MEMORY; + } + n = i + binding->uriLen + prefixLen; if (n > binding->uriAlloc) { TAG *p; - uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char)); - if (!uri) + + /* Detect and prevent integer overflow */ + if (n > INT_MAX - EXPAND_SPARE) { + return XML_ERROR_NO_MEMORY; + } + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + return XML_ERROR_NO_MEMORY; + } +#endif + + uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); + if (! uri) return XML_ERROR_NO_MEMORY; binding->uriAlloc = n + EXPAND_SPARE; memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); - for (p = tagStack; p; p = p->parent) + for (p = parser->m_tagStack; p; p = p->parent) if (p->name.str == binding->uri) p->name.str = uri; - FREE(binding->uri); + FREE(parser, binding->uri); binding->uri = uri; } - /* if namespaceSeparator != '\0' then uri includes it already */ + /* if m_namespaceSeparator != '\0' then uri includes it already */ uri = binding->uri + binding->uriLen; memcpy(uri, localPart, i * sizeof(XML_Char)); /* we always have a namespace separator between localPart and prefix */ if (prefixLen) { uri += i - 1; - *uri = namespaceSeparator; /* replace null terminator */ + *uri = parser->m_namespaceSeparator; /* replace null terminator */ memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); } tagNamePtr->str = binding->uri; return XML_ERROR_NONE; } +static XML_Bool +is_rfc3986_uri_char(XML_Char candidate) { + // For the RFC 3986 ANBF grammar see + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A + + switch (candidate) { + // From rule "ALPHA" (uppercase half) + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + + // From rule "ALPHA" (lowercase half) + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + + // From rule "DIGIT" + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + // From rule "pct-encoded" + case '%': + + // From rule "unreserved" + case '-': + case '.': + case '_': + case '~': + + // From rule "gen-delims" + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + + // From rule "sub-delims" + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return XML_TRUE; + + default: + return XML_FALSE; + } +} + /* addBinding() overwrites the value of prefix->binding without checking. Therefore one must keep track of the old value outside of addBinding(). */ static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, - const XML_Char *uri, BINDING **bindingsPtr) -{ - static const XML_Char xmlNamespace[] = { - ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, - ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, - ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, - ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH, - ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, - ASCII_e, '\0' - }; - static const int xmlLen = - (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1; - static const XML_Char xmlnsNamespace[] = { - ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, - ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, - ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0, - ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s, - ASCII_SLASH, '\0' - }; - static const int xmlnsLen = - (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1; + const XML_Char *uri, BINDING **bindingsPtr) { + // "http://www.w3.org/XML/1998/namespace" + static const XML_Char xmlNamespace[] + = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, + ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, + ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, + ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, + ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, + ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, + ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, + ASCII_e, '\0'}; + static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; + // "http://www.w3.org/2000/xmlns/" + static const XML_Char xmlnsNamespace[] + = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, + ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, + ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, + ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, + ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; + static const int xmlnsLen + = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; XML_Bool mustBeXML = XML_FALSE; XML_Bool isXML = XML_TRUE; @@ -3081,14 +3864,11 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, if (*uri == XML_T('\0') && prefix->name) return XML_ERROR_UNDECLARING_PREFIX; - if (prefix->name - && prefix->name[0] == XML_T(ASCII_x) + if (prefix->name && prefix->name[0] == XML_T(ASCII_x) && prefix->name[1] == XML_T(ASCII_m) && prefix->name[2] == XML_T(ASCII_l)) { - /* Not allowed to bind xmlns */ - if (prefix->name[3] == XML_T(ASCII_n) - && prefix->name[4] == XML_T(ASCII_s) + if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) && prefix->name[5] == XML_T('\0')) return XML_ERROR_RESERVED_PREFIX_XMLNS; @@ -3100,9 +3880,32 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) isXML = XML_FALSE; - if (!mustBeXML && isXMLNS + if (! mustBeXML && isXMLNS && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; + + // NOTE: While Expat does not validate namespace URIs against RFC 3986 + // today (and is not REQUIRED to do so with regard to the XML 1.0 + // namespaces specification) we have to at least make sure, that + // the application on top of Expat (that is likely splitting expanded + // element names ("qualified names") of form + // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces + // in its element handler code) cannot be confused by an attacker + // putting additional namespace separator characters into namespace + // declarations. That would be ambiguous and not to be expected. + // + // While the HTML API docs of function XML_ParserCreateNS have been + // advising against use of a namespace separator character that can + // appear in a URI for >20 years now, some widespread applications + // are using URI characters (':' (colon) in particular) for a + // namespace separator, in practice. To keep these applications + // functional, we only reject namespaces URIs containing the + // application-chosen namespace separator if the chosen separator + // is a non-URI character with regard to RFC 3986. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) + && ! is_rfc3986_uri_char(uri[len])) { + return XML_ERROR_SYNTAX; + } } isXML = isXML && len == xmlLen; isXMLNS = isXMLNS && len == xmlnsLen; @@ -3114,49 +3917,79 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, if (isXMLNS) return XML_ERROR_RESERVED_NAMESPACE_URI; - if (namespaceSeparator) + if (parser->m_namespaceSeparator) len++; - if (freeBindingList) { - b = freeBindingList; + if (parser->m_freeBindingList) { + b = parser->m_freeBindingList; if (len > b->uriAlloc) { - XML_Char *temp = (XML_Char *)REALLOC(b->uri, - sizeof(XML_Char) * (len + EXPAND_SPARE)); + /* Detect and prevent integer overflow */ + if (len > INT_MAX - EXPAND_SPARE) { + return XML_ERROR_NO_MEMORY; + } + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + return XML_ERROR_NO_MEMORY; + } +#endif + + XML_Char *temp = (XML_Char *)REALLOC( + parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (temp == NULL) return XML_ERROR_NO_MEMORY; b->uri = temp; b->uriAlloc = len + EXPAND_SPARE; } - freeBindingList = b->nextTagBinding; - } - else { - b = (BINDING *)MALLOC(sizeof(BINDING)); - if (!b) + parser->m_freeBindingList = b->nextTagBinding; + } else { + b = (BINDING *)MALLOC(parser, sizeof(BINDING)); + if (! b) return XML_ERROR_NO_MEMORY; - b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE)); - if (!b->uri) { - FREE(b); + + /* Detect and prevent integer overflow */ + if (len > INT_MAX - EXPAND_SPARE) { + return XML_ERROR_NO_MEMORY; + } + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + return XML_ERROR_NO_MEMORY; + } +#endif + + b->uri + = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); + if (! b->uri) { + FREE(parser, b); return XML_ERROR_NO_MEMORY; } b->uriAlloc = len + EXPAND_SPARE; } b->uriLen = len; memcpy(b->uri, uri, len * sizeof(XML_Char)); - if (namespaceSeparator) - b->uri[len - 1] = namespaceSeparator; + if (parser->m_namespaceSeparator) + b->uri[len - 1] = parser->m_namespaceSeparator; b->prefix = prefix; b->attId = attId; b->prevPrefixBinding = prefix->binding; /* NULL binding when default namespace undeclared */ - if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix) + if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) prefix->binding = NULL; else prefix->binding = b; b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; /* if attId == NULL then we are not starting a namespace scope */ - if (attId && startNamespaceDeclHandler) - startNamespaceDeclHandler(handlerArg, prefix->name, - prefix->binding ? uri : 0); + if (attId && parser->m_startNamespaceDeclHandler) + parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, + prefix->binding ? uri : 0); return XML_ERROR_NONE; } @@ -3164,22 +3997,19 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, the whole file is parsed with one call. */ static enum XML_Error PTRCALL -cdataSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doCdataSection(parser, encoding, &start, end, - endPtr, (XML_Bool)!ps_finalBuffer); +cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result = doCdataSection( + parser, parser->m_encoding, &start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result != XML_ERROR_NONE) return result; if (start) { - if (parentParser) { /* we are parsing an external entity */ - processor = externalEntityContentProcessor; + if (parser->m_parentParser) { /* we are parsing an external entity */ + parser->m_processor = externalEntityContentProcessor; return externalEntityContentProcessor(parser, start, end, endPtr); - } - else { - processor = contentProcessor; + } else { + parser->m_processor = contentProcessor; return contentProcessor(parser, start, end, endPtr); } } @@ -3190,82 +4020,82 @@ cdataSectionProcessor(XML_Parser parser, the section is not yet closed. */ static enum XML_Error -doCdataSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr, - XML_Bool haveMore) -{ +doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, + const char *end, const char **nextPtr, XML_Bool haveMore, + enum XML_Account account) { const char *s = *startPtr; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; *eventPP = s; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } *eventPP = s; *startPtr = NULL; for (;;) { - const char *next; + const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok = XmlCdataSectionTok(enc, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#else + UNUSED_P(account); +#endif *eventEndPP = next; switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: - if (endCdataSectionHandler) - endCdataSectionHandler(handlerArg); -#if 0 + if (parser->m_endCdataSectionHandler) + parser->m_endCdataSectionHandler(parser->m_handlerArg); + /* BEGIN disabled code */ /* see comment under XML_TOK_CDATA_SECT_OPEN */ - else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); -#endif - else if (defaultHandler) + else if (0 && parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, + 0); + /* END disabled code */ + else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; *nextPtr = next; - if (ps_parsing == XML_FINISHED) + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; else return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; - case XML_TOK_DATA_CHARS: - { - XML_CharacterDataHandler charDataHandler = characterDataHandler; - if (charDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = next; - charDataHandler(handlerArg, dataBuf, - (int)(dataPtr - (ICHAR *)dataBuf)); - if (s == next) - break; - *eventPP = s; - } + case XML_TOK_DATA_CHARS: { + XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; + if (charDataHandler) { + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); + *eventEndPP = next; + charDataHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) + break; + *eventPP = s; } - else - charDataHandler(handlerArg, - (XML_Char *)s, - (int)((XML_Char *)next - (XML_Char *)s)); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); - } - break; + } else + charDataHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); + } else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + } break; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; @@ -3283,18 +4113,26 @@ doCdataSection(XML_Parser parser, } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: + /* Every token returned by XmlCdataSectionTok() has its own + * explicit case, so this default case will never be executed. + * We retain it as a safety net and exclude it from the coverage + * statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; - switch (ps_parsing) { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; - default: ; + default:; } } /* not reached */ @@ -3306,17 +4144,15 @@ doCdataSection(XML_Parser parser, the whole file is parsed with one call. */ static enum XML_Error PTRCALL -ignoreSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, - endPtr, (XML_Bool)!ps_finalBuffer); +ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result + = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer); if (result != XML_ERROR_NONE) return result; if (start) { - processor = prologProcessor; + parser->m_processor = prologProcessor; return prologProcessor(parser, start, end, endPtr); } return result; @@ -3326,38 +4162,51 @@ ignoreSectionProcessor(XML_Parser parser, if the section is not yet closed. */ static enum XML_Error -doIgnoreSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr, - XML_Bool haveMore) -{ - const char *next; +doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, + const char *end, const char **nextPtr, XML_Bool haveMore) { + const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok; const char *s = *startPtr; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; *eventPP = s; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + eventEndPP = &parser->m_eventEndPtr; + } else { + /* It's not entirely clear, but it seems the following two lines + * of code cannot be executed. The only occasions on which 'enc' + * is not 'encoding' are when this function is called + * from the internal entity processing, and IGNORE sections are an + * error in internal entities. + * + * Since it really isn't clear that this is true, we keep the code + * and just remove it from our coverage tests. + * + * LCOV_EXCL_START + */ + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } *eventPP = s; *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif *eventEndPP = next; switch (tok) { case XML_TOK_IGNORE_SECT: - if (defaultHandler) + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; *nextPtr = next; - if (ps_parsing == XML_FINISHED) + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; else return XML_ERROR_NONE; @@ -3378,8 +4227,16 @@ doIgnoreSection(XML_Parser parser, } return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ default: + /* All of the tokens that XmlIgnoreSectionTok() returns have + * explicit cases to handle them, so this default case is never + * executed. We keep it as a safety net anyway, and remove it + * from our test coverage statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } /* not reached */ } @@ -3387,127 +4244,130 @@ doIgnoreSection(XML_Parser parser, #endif /* XML_DTD */ static enum XML_Error -initializeEncoding(XML_Parser parser) -{ +initializeEncoding(XML_Parser parser) { const char *s; #ifdef XML_UNICODE char encodingBuf[128]; - if (!protocolEncodingName) + /* See comments about `protocolEncodingName` in parserInit() */ + if (! parser->m_protocolEncodingName) s = NULL; else { int i; - for (i = 0; protocolEncodingName[i]; i++) { + for (i = 0; parser->m_protocolEncodingName[i]; i++) { if (i == sizeof(encodingBuf) - 1 - || (protocolEncodingName[i] & ~0x7f) != 0) { + || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { encodingBuf[0] = '\0'; break; } - encodingBuf[i] = (char)protocolEncodingName[i]; + encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; } encodingBuf[i] = '\0'; s = encodingBuf; } #else - s = protocolEncodingName; + s = parser->m_protocolEncodingName; #endif - if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s)) + if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( + &parser->m_initEncoding, &parser->m_encoding, s)) return XML_ERROR_NONE; - return handleUnknownEncoding(parser, protocolEncodingName); + return handleUnknownEncoding(parser, parser->m_protocolEncodingName); } static enum XML_Error -processXmlDecl(XML_Parser parser, int isGeneralTextEntity, - const char *s, const char *next) -{ +processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, + const char *next) { const char *encodingName = NULL; const XML_Char *storedEncName = NULL; const ENCODING *newEncoding = NULL; const char *version = NULL; - const char *versionend; + const char *versionend = NULL; const XML_Char *storedversion = NULL; int standalone = -1; - if (!(ns - ? XmlParseXmlDeclNS - : XmlParseXmlDecl)(isGeneralTextEntity, - encoding, - s, - next, - &eventPtr, - &version, - &versionend, - &encodingName, - &newEncoding, - &standalone)) { + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif + + if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( + isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, + &version, &versionend, &encodingName, &newEncoding, &standalone)) { if (isGeneralTextEntity) return XML_ERROR_TEXT_DECL; else return XML_ERROR_XML_DECL; } - if (!isGeneralTextEntity && standalone == 1) { - _dtd->standalone = XML_TRUE; + if (! isGeneralTextEntity && standalone == 1) { + parser->m_dtd->standalone = XML_TRUE; #ifdef XML_DTD - if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) - paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; + if (parser->m_paramEntityParsing + == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif /* XML_DTD */ } - if (xmlDeclHandler) { + if (parser->m_xmlDeclHandler) { if (encodingName != NULL) { - storedEncName = poolStoreString(&temp2Pool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); - if (!storedEncName) - return XML_ERROR_NO_MEMORY; - poolFinish(&temp2Pool); + storedEncName = poolStoreString( + &parser->m_temp2Pool, parser->m_encoding, encodingName, + encodingName + XmlNameLength(parser->m_encoding, encodingName)); + if (! storedEncName) + return XML_ERROR_NO_MEMORY; + poolFinish(&parser->m_temp2Pool); } if (version) { - storedversion = poolStoreString(&temp2Pool, - encoding, - version, - versionend - encoding->minBytesPerChar); - if (!storedversion) + storedversion + = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, + versionend - parser->m_encoding->minBytesPerChar); + if (! storedversion) return XML_ERROR_NO_MEMORY; } - xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone); - } - else if (defaultHandler) - reportDefault(parser, encoding, s, next); - if (protocolEncodingName == NULL) { + parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, + standalone); + } else if (parser->m_defaultHandler) + reportDefault(parser, parser->m_encoding, s, next); + if (parser->m_protocolEncodingName == NULL) { if (newEncoding) { - if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { - eventPtr = encodingName; + /* Check that the specified encoding does not conflict with what + * the parser has already deduced. Do we have the same number + * of bytes in the smallest representation of a character? If + * this is UTF-16, is it the same endianness? + */ + if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar + || (newEncoding->minBytesPerChar == 2 + && newEncoding != parser->m_encoding)) { + parser->m_eventPtr = encodingName; return XML_ERROR_INCORRECT_ENCODING; } - encoding = newEncoding; - } - else if (encodingName) { + parser->m_encoding = newEncoding; + } else if (encodingName) { enum XML_Error result; - if (!storedEncName) { + if (! storedEncName) { storedEncName = poolStoreString( - &temp2Pool, encoding, encodingName, - encodingName + XmlNameLength(encoding, encodingName)); - if (!storedEncName) + &parser->m_temp2Pool, parser->m_encoding, encodingName, + encodingName + XmlNameLength(parser->m_encoding, encodingName)); + if (! storedEncName) return XML_ERROR_NO_MEMORY; } result = handleUnknownEncoding(parser, storedEncName); - poolClear(&temp2Pool); + poolClear(&parser->m_temp2Pool); if (result == XML_ERROR_UNKNOWN_ENCODING) - eventPtr = encodingName; + parser->m_eventPtr = encodingName; return result; } } if (storedEncName || storedversion) - poolClear(&temp2Pool); + poolClear(&parser->m_temp2Pool); return XML_ERROR_NONE; } static enum XML_Error -handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) -{ - if (unknownEncodingHandler) { +handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { + if (parser->m_unknownEncodingHandler) { XML_Encoding info; int i; for (i = 0; i < 256; i++) @@ -3515,25 +4375,21 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) info.convert = NULL; info.data = NULL; info.release = NULL; - if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, - &info)) { + if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, + encodingName, &info)) { ENCODING *enc; - unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding()); - if (!unknownEncodingMem) { + parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); + if (! parser->m_unknownEncodingMem) { if (info.release) info.release(info.data); return XML_ERROR_NO_MEMORY; } - enc = (ns - ? XmlInitUnknownEncodingNS - : XmlInitUnknownEncoding)(unknownEncodingMem, - info.map, - info.convert, - info.data); + enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( + parser->m_unknownEncodingMem, info.map, info.convert, info.data); if (enc) { - unknownEncodingData = info.data; - unknownEncodingRelease = info.release; - encoding = enc; + parser->m_unknownEncodingData = info.data; + parser->m_unknownEncodingRelease = info.release; + parser->m_encoding = enc; return XML_ERROR_NONE; } } @@ -3544,60 +4400,54 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) } static enum XML_Error PTRCALL -prologInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +prologInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; - processor = prologProcessor; + parser->m_processor = prologProcessor; return prologProcessor(parser, s, end, nextPtr); } #ifdef XML_DTD static enum XML_Error PTRCALL -externalParEntInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; /* we know now that XML_Parse(Buffer) has been called, so we consider the external parameter entity read */ - _dtd->paramEntityRead = XML_TRUE; + parser->m_dtd->paramEntityRead = XML_TRUE; - if (prologState.inEntityValue) { - processor = entityValueInitProcessor; + if (parser->m_prologState.inEntityValue) { + parser->m_processor = entityValueInitProcessor; return entityValueInitProcessor(parser, s, end, nextPtr); - } - else { - processor = externalParEntProcessor; + } else { + parser->m_processor = externalParEntProcessor; return externalParEntProcessor(parser, s, end, nextPtr); } } static enum XML_Error PTRCALL -entityValueInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { int tok; const char *start = s; const char *next = start; - eventPtr = start; + parser->m_eventPtr = start; for (;;) { - tok = XmlPrologTok(encoding, start, end, &next); - eventEndPtr = next; + tok = XmlPrologTok(parser->m_encoding, start, end, &next); + /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: + - storeEntityValue + - processXmlDecl + */ + parser->m_eventEndPtr = next; if (tok <= 0) { - if (!ps_finalBuffer && tok != XML_TOK_INVALID) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3608,29 +4458,28 @@ entityValueInitProcessor(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_NONE: /* start == end */ + case XML_TOK_NONE: /* start == end */ default: break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, encoding, s, end); - } - else if (tok == XML_TOK_XML_DECL) { + return storeEntityValue(parser, parser->m_encoding, s, end, + XML_ACCOUNT_DIRECT); + } else if (tok == XML_TOK_XML_DECL) { enum XML_Error result; result = processXmlDecl(parser, 0, start, next); if (result != XML_ERROR_NONE) return result; - switch (ps_parsing) { - case XML_SUSPENDED: - *nextPtr = next; - return XML_ERROR_NONE; - case XML_FINISHED: + /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For + * that to happen, a parameter entity parsing handler must have attempted + * to suspend the parser, which fails and raises an error. The parser can + * be aborted, but can't be suspended. + */ + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; - default: - *nextPtr = next; - } + *nextPtr = next; /* stop scanning for text declaration - we found one */ - processor = entityValueProcessor; + parser->m_processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); } /* If we are at the end of the buffer, this would cause XmlPrologTok to @@ -3640,27 +4489,41 @@ entityValueInitProcessor(XML_Parser parser, then, when this routine is entered the next time, XmlPrologTok will return XML_TOK_INVALID, since the BOM is still in the buffer */ - else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) { + else if (tok == XML_TOK_BOM && next == end + && ! parser->m_parsingStatus.finalBuffer) { +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif + *nextPtr = next; return XML_ERROR_NONE; } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with + "m_eventPtr = start; } } static enum XML_Error PTRCALL -externalParEntProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +externalParEntProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { const char *next = s; int tok; - tok = XmlPrologTok(encoding, s, end, &next); + tok = XmlPrologTok(parser->m_encoding, s, end, &next); if (tok <= 0) { - if (!ps_finalBuffer && tok != XML_TOK_INVALID) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3671,40 +4534,48 @@ externalParEntProcessor(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_NONE: /* start == end */ + case XML_TOK_NONE: /* start == end */ default: break; } } /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. However, when parsing an external subset, doProlog will not accept a BOM - as valid, and report a syntax error, so we have to skip the BOM + as valid, and report a syntax error, so we have to skip the BOM, and + account for the BOM bytes. */ else if (tok == XML_TOK_BOM) { + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + s = next; - tok = XmlPrologTok(encoding, s, end, &next); + tok = XmlPrologTok(parser->m_encoding, s, end, &next); } - processor = prologProcessor; - return doProlog(parser, encoding, s, end, tok, next, - nextPtr, (XML_Bool)!ps_finalBuffer); + parser->m_processor = prologProcessor; + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error PTRCALL -entityValueProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +entityValueProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { const char *start = s; const char *next = s; - const ENCODING *enc = encoding; + const ENCODING *enc = parser->m_encoding; int tok; for (;;) { tok = XmlPrologTok(enc, start, end, &next); + /* Note: These bytes are accounted later in: + - storeEntityValue + */ if (tok <= 0) { - if (!ps_finalBuffer && tok != XML_TOK_INVALID) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3715,12 +4586,12 @@ entityValueProcessor(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_NONE: /* start == end */ + case XML_TOK_NONE: /* start == end */ default: break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, enc, s, end); + return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); } start = next; } @@ -3729,64 +4600,62 @@ entityValueProcessor(XML_Parser parser, #endif /* XML_DTD */ static enum XML_Error PTRCALL -prologProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +prologProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { const char *next = s; - int tok = XmlPrologTok(encoding, s, end, &next); - return doProlog(parser, encoding, s, end, tok, next, - nextPtr, (XML_Bool)!ps_finalBuffer); + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error -doProlog(XML_Parser parser, - const ENCODING *enc, - const char *s, - const char *end, - int tok, - const char *next, - const char **nextPtr, - XML_Bool haveMore) -{ +doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + int tok, const char *next, const char **nextPtr, XML_Bool haveMore, + XML_Bool allowClosingDoctype, enum XML_Account account) { #ifdef XML_DTD - static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' }; + static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; #endif /* XML_DTD */ - static const XML_Char atypeCDATA[] = - { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; - static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' }; - static const XML_Char atypeIDREF[] = - { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; - static const XML_Char atypeIDREFS[] = - { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; - static const XML_Char atypeENTITY[] = - { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; - static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N, - ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; - static const XML_Char atypeNMTOKEN[] = { - ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; - static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, - ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; - static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T, - ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' }; - static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' }; - static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' }; + static const XML_Char atypeCDATA[] + = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; + static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; + static const XML_Char atypeIDREF[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; + static const XML_Char atypeIDREFS[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; + static const XML_Char atypeENTITY[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; + static const XML_Char atypeENTITIES[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, + ASCII_I, ASCII_E, ASCII_S, '\0'}; + static const XML_Char atypeNMTOKEN[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; + static const XML_Char atypeNMTOKENS[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, + ASCII_E, ASCII_N, ASCII_S, '\0'}; + static const XML_Char notationPrefix[] + = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, + ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; + static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; + static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; + +#ifndef XML_DTD + UNUSED_P(account); +#endif /* save one level of indirection */ - DTD * const dtd = _dtd; + DTD *const dtd = parser->m_dtd; const char **eventPP; const char **eventEndPP; enum XML_Content_Quant quant; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } for (;;) { @@ -3813,7 +4682,8 @@ doProlog(XML_Parser parser, case XML_TOK_NONE: #ifdef XML_DTD /* for internal PE NOT referenced between declarations */ - if (enc != encoding && !openInternalEntities->betweenDecl) { + if (enc != parser->m_encoding + && ! parser->m_openInternalEntities->betweenDecl) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3821,8 +4691,8 @@ doProlog(XML_Parser parser, complete markup, not only for external PEs, but also for internal PEs if the reference occurs between declarations. */ - if (isParamEntity || enc != encoding) { - if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) + if (parser->m_isParamEntity || enc != parser->m_encoding) { + if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) == XML_ROLE_ERROR) return XML_ERROR_INCOMPLETE_PE; *nextPtr = s; @@ -3836,142 +4706,161 @@ doProlog(XML_Parser parser, break; } } - role = XmlTokenRole(&prologState, tok, s, next, enc); + role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); +#ifdef XML_DTD switch (role) { - case XML_ROLE_XML_DECL: - { - enum XML_Error result = processXmlDecl(parser, 0, s, next); - if (result != XML_ERROR_NONE) - return result; - enc = encoding; - handleDefault = XML_FALSE; - } + case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor + case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl + case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl break; + default: + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + } +#endif + switch (role) { + case XML_ROLE_XML_DECL: { + enum XML_Error result = processXmlDecl(parser, 0, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = parser->m_encoding; + handleDefault = XML_FALSE; + } break; case XML_ROLE_DOCTYPE_NAME: - if (startDoctypeDeclHandler) { - doctypeName = poolStoreString(&tempPool, enc, s, next); - if (!doctypeName) + if (parser->m_startDoctypeDeclHandler) { + parser->m_doctypeName + = poolStoreString(&parser->m_tempPool, enc, s, next); + if (! parser->m_doctypeName) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - doctypePubid = NULL; + poolFinish(&parser->m_tempPool); + parser->m_doctypePubid = NULL; handleDefault = XML_FALSE; } - doctypeSysid = NULL; /* always initialize to NULL */ + parser->m_doctypeSysid = NULL; /* always initialize to NULL */ break; case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: - if (startDoctypeDeclHandler) { - startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, - doctypePubid, 1); - doctypeName = NULL; - poolClear(&tempPool); + if (parser->m_startDoctypeDeclHandler) { + parser->m_startDoctypeDeclHandler( + parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, + parser->m_doctypePubid, 1); + parser->m_doctypeName = NULL; + poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } break; #ifdef XML_DTD - case XML_ROLE_TEXT_DECL: - { - enum XML_Error result = processXmlDecl(parser, 1, s, next); - if (result != XML_ERROR_NONE) - return result; - enc = encoding; - handleDefault = XML_FALSE; - } - break; + case XML_ROLE_TEXT_DECL: { + enum XML_Error result = processXmlDecl(parser, 1, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = parser->m_encoding; + handleDefault = XML_FALSE; + } break; #endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: #ifdef XML_DTD - useForeignDTD = XML_FALSE; - declEntity = (ENTITY *)lookup(parser, - &dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!declEntity) + parser->m_useForeignDTD = XML_FALSE; + parser->m_declEntity = (ENTITY *)lookup( + parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; - if (startDoctypeDeclHandler) { + if (parser->m_startDoctypeDeclHandler) { XML_Char *pubId; - if (!XmlIsPublicId(enc, s, next, eventPP)) + if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; - pubId = poolStoreString(&tempPool, enc, + pubId = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); - if (!pubId) + if (! pubId) return XML_ERROR_NO_MEMORY; normalizePublicId(pubId); - poolFinish(&tempPool); - doctypePubid = pubId; + poolFinish(&parser->m_tempPool); + parser->m_doctypePubid = pubId; handleDefault = XML_FALSE; goto alreadyChecked; } /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: - if (!XmlIsPublicId(enc, s, next, eventPP)) + if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; alreadyChecked: - if (dtd->keepProcessing && declEntity) { - XML_Char *tem = poolStoreString(&dtd->pool, - enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!tem) + if (dtd->keepProcessing && parser->m_declEntity) { + XML_Char *tem + = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); - declEntity->publicId = tem; + parser->m_declEntity->publicId = tem; poolFinish(&dtd->pool); - if (entityDeclHandler) + /* Don't suppress the default handler if we fell through from + * the XML_ROLE_DOCTYPE_PUBLIC_ID case. + */ + if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) handleDefault = XML_FALSE; } break; case XML_ROLE_DOCTYPE_CLOSE: - if (doctypeName) { - startDoctypeDeclHandler(handlerArg, doctypeName, - doctypeSysid, doctypePubid, 0); - poolClear(&tempPool); + if (allowClosingDoctype != XML_TRUE) { + /* Must not close doctype from within expanded parameter entities */ + return XML_ERROR_INVALID_TOKEN; + } + + if (parser->m_doctypeName) { + parser->m_startDoctypeDeclHandler( + parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, + parser->m_doctypePubid, 0); + poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } - /* doctypeSysid will be non-NULL in the case of a previous - XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler + /* parser->m_doctypeSysid will be non-NULL in the case of a previous + XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler was not set, indicating an external subset */ #ifdef XML_DTD - if (doctypeSysid || useForeignDTD) { + if (parser->m_doctypeSysid || parser->m_useForeignDTD) { XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; - if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(parser, - &dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!entity) - return XML_ERROR_NO_MEMORY; - if (useForeignDTD) - entity->base = curBase; + if (parser->m_paramEntityParsing + && parser->m_externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, + externalSubsetName, sizeof(ENTITY)); + if (! entity) { + /* The external subset name "#" will have already been + * inserted into the hash table at the start of the + * external entity parsing, so no allocation will happen + * and lookup() cannot fail. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } + if (parser->m_useForeignDTD) + entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; if (dtd->paramEntityRead) { - if (!dtd->standalone && - notStandaloneHandler && - !notStandaloneHandler(handlerArg)) + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; } /* if we didn't read the foreign DTD then this means that there is no external subset and we must reset dtd->hasParamEntityRefs */ - else if (!doctypeSysid) + else if (! parser->m_doctypeSysid) dtd->hasParamEntityRefs = hadParamEntityRefs; /* end of DTD - no need to update dtd->keepProcessing */ } - useForeignDTD = XML_FALSE; + parser->m_useForeignDTD = XML_FALSE; } #endif /* XML_DTD */ - if (endDoctypeDeclHandler) { - endDoctypeDeclHandler(handlerArg); + if (parser->m_endDoctypeDeclHandler) { + parser->m_endDoctypeDeclHandler(parser->m_handlerArg); handleDefault = XML_FALSE; } break; @@ -3980,27 +4869,24 @@ doProlog(XML_Parser parser, /* if there is no DOCTYPE declaration then now is the last chance to read the foreign DTD */ - if (useForeignDTD) { + if (parser->m_useForeignDTD) { XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; - if (paramEntityParsing && externalEntityRefHandler) { + if (parser->m_paramEntityParsing + && parser->m_externalEntityRefHandler) { ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!entity) + externalSubsetName, sizeof(ENTITY)); + if (! entity) return XML_ERROR_NO_MEMORY; - entity->base = curBase; + entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; if (dtd->paramEntityRead) { - if (!dtd->standalone && - notStandaloneHandler && - !notStandaloneHandler(handlerArg)) + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; } /* if we didn't read the foreign DTD then this means that there @@ -4012,156 +4898,154 @@ doProlog(XML_Parser parser, } } #endif /* XML_DTD */ - processor = contentProcessor; + parser->m_processor = contentProcessor; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: - declElementType = getElementType(parser, enc, s, next); - if (!declElementType) + parser->m_declElementType = getElementType(parser, enc, s, next); + if (! parser->m_declElementType) return XML_ERROR_NO_MEMORY; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_NAME: - declAttributeId = getAttributeId(parser, enc, s, next); - if (!declAttributeId) + parser->m_declAttributeId = getAttributeId(parser, enc, s, next); + if (! parser->m_declAttributeId) return XML_ERROR_NO_MEMORY; - declAttributeIsCdata = XML_FALSE; - declAttributeType = NULL; - declAttributeIsId = XML_FALSE; + parser->m_declAttributeIsCdata = XML_FALSE; + parser->m_declAttributeType = NULL; + parser->m_declAttributeIsId = XML_FALSE; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: - declAttributeIsCdata = XML_TRUE; - declAttributeType = atypeCDATA; + parser->m_declAttributeIsCdata = XML_TRUE; + parser->m_declAttributeType = atypeCDATA; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ID: - declAttributeIsId = XML_TRUE; - declAttributeType = atypeID; + parser->m_declAttributeIsId = XML_TRUE; + parser->m_declAttributeType = atypeID; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREF: - declAttributeType = atypeIDREF; + parser->m_declAttributeType = atypeIDREF; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: - declAttributeType = atypeIDREFS; + parser->m_declAttributeType = atypeIDREFS; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: - declAttributeType = atypeENTITY; + parser->m_declAttributeType = atypeENTITY; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: - declAttributeType = atypeENTITIES; + parser->m_declAttributeType = atypeENTITIES; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: - declAttributeType = atypeNMTOKEN; + parser->m_declAttributeType = atypeNMTOKEN; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: - declAttributeType = atypeNMTOKENS; + parser->m_declAttributeType = atypeNMTOKENS; checkAttListDeclHandler: - if (dtd->keepProcessing && attlistDeclHandler) + if (dtd->keepProcessing && parser->m_attlistDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ATTRIBUTE_ENUM_VALUE: case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: - if (dtd->keepProcessing && attlistDeclHandler) { + if (dtd->keepProcessing && parser->m_attlistDeclHandler) { const XML_Char *prefix; - if (declAttributeType) { + if (parser->m_declAttributeType) { prefix = enumValueSep; + } else { + prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix + : enumValueStart); } - else { - prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE - ? notationPrefix - : enumValueStart); - } - if (!poolAppendString(&tempPool, prefix)) + if (! poolAppendString(&parser->m_tempPool, prefix)) return XML_ERROR_NO_MEMORY; - if (!poolAppend(&tempPool, enc, s, next)) + if (! poolAppend(&parser->m_tempPool, enc, s, next)) return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; + parser->m_declAttributeType = parser->m_tempPool.start; handleDefault = XML_FALSE; } break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { - if (!defineAttribute(declElementType, declAttributeId, - declAttributeIsCdata, declAttributeIsId, - 0, parser)) + if (! defineAttribute(parser->m_declElementType, + parser->m_declAttributeId, + parser->m_declAttributeIsCdata, + parser->m_declAttributeIsId, 0, parser)) return XML_ERROR_NO_MEMORY; - if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == XML_T(ASCII_LPAREN) - || (*declAttributeType == XML_T(ASCII_N) - && declAttributeType[1] == XML_T(ASCII_O))) { + if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { + if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) + || (*parser->m_declAttributeType == XML_T(ASCII_N) + && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN)) - || !poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) + || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; - poolFinish(&tempPool); + parser->m_declAttributeType = parser->m_tempPool.start; + poolFinish(&parser->m_tempPool); } *eventEndPP = s; - attlistDeclHandler(handlerArg, declElementType->name, - declAttributeId->name, declAttributeType, - 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); - poolClear(&tempPool); + parser->m_attlistDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, + parser->m_declAttributeId->name, parser->m_declAttributeType, 0, + role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); handleDefault = XML_FALSE; } } + poolClear(&parser->m_tempPool); break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { const XML_Char *attVal; - enum XML_Error result = - storeAttributeValue(parser, enc, declAttributeIsCdata, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar, - &dtd->pool); + enum XML_Error result = storeAttributeValue( + parser, enc, parser->m_declAttributeIsCdata, + s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, + XML_ACCOUNT_NONE); if (result) return result; attVal = poolStart(&dtd->pool); poolFinish(&dtd->pool); /* ID attributes aren't allowed to have a default */ - if (!defineAttribute(declElementType, declAttributeId, - declAttributeIsCdata, XML_FALSE, attVal, parser)) + if (! defineAttribute( + parser->m_declElementType, parser->m_declAttributeId, + parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) return XML_ERROR_NO_MEMORY; - if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == XML_T(ASCII_LPAREN) - || (*declAttributeType == XML_T(ASCII_N) - && declAttributeType[1] == XML_T(ASCII_O))) { + if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { + if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) + || (*parser->m_declAttributeType == XML_T(ASCII_N) + && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN)) - || !poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) + || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; - poolFinish(&tempPool); + parser->m_declAttributeType = parser->m_tempPool.start; + poolFinish(&parser->m_tempPool); } *eventEndPP = s; - attlistDeclHandler(handlerArg, declElementType->name, - declAttributeId->name, declAttributeType, - attVal, - role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); - poolClear(&tempPool); + parser->m_attlistDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, + parser->m_declAttributeId->name, parser->m_declAttributeType, + attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); + poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } } break; case XML_ROLE_ENTITY_VALUE: if (dtd->keepProcessing) { - enum XML_Error result = storeEntityValue(parser, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (declEntity) { - declEntity->textPtr = poolStart(&dtd->entityValuePool); - declEntity->textLen = (int)(poolLength(&dtd->entityValuePool)); + enum XML_Error result + = storeEntityValue(parser, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar, XML_ACCOUNT_NONE); + if (parser->m_declEntity) { + parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); + parser->m_declEntity->textLen + = (int)(poolLength(&dtd->entityValuePool)); poolFinish(&dtd->entityValuePool); - if (entityDeclHandler) { + if (parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - declEntity->is_param, - declEntity->textPtr, - declEntity->textLen, - curBase, 0, 0, 0); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->is_param, parser->m_declEntity->textPtr, + parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); handleDefault = XML_FALSE; } - } - else + } else poolDiscard(&dtd->entityValuePool); if (result != XML_ERROR_NONE) return result; @@ -4169,227 +5053,212 @@ doProlog(XML_Parser parser, break; case XML_ROLE_DOCTYPE_SYSTEM_ID: #ifdef XML_DTD - useForeignDTD = XML_FALSE; + parser->m_useForeignDTD = XML_FALSE; #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; - if (startDoctypeDeclHandler) { - doctypeSysid = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (doctypeSysid == NULL) + if (parser->m_startDoctypeDeclHandler) { + parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (parser->m_doctypeSysid == NULL) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } #ifdef XML_DTD else - /* use externalSubsetName to make doctypeSysid non-NULL - for the case where no startDoctypeDeclHandler is set */ - doctypeSysid = externalSubsetName; + /* use externalSubsetName to make parser->m_doctypeSysid non-NULL + for the case where no parser->m_startDoctypeDeclHandler is set */ + parser->m_doctypeSysid = externalSubsetName; #endif /* XML_DTD */ - if (!dtd->standalone + if (! dtd->standalone #ifdef XML_DTD - && !paramEntityParsing + && ! parser->m_paramEntityParsing #endif /* XML_DTD */ - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) + && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; #ifndef XML_DTD break; -#else /* XML_DTD */ - if (!declEntity) { - declEntity = (ENTITY *)lookup(parser, - &dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!declEntity) +#else /* XML_DTD */ + if (! parser->m_declEntity) { + parser->m_declEntity = (ENTITY *)lookup( + parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; - declEntity->publicId = NULL; + parser->m_declEntity->publicId = NULL; } - /* fall through */ #endif /* XML_DTD */ + /* fall through */ case XML_ROLE_ENTITY_SYSTEM_ID: - if (dtd->keepProcessing && declEntity) { - declEntity->systemId = poolStoreString(&dtd->pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!declEntity->systemId) + if (dtd->keepProcessing && parser->m_declEntity) { + parser->m_declEntity->systemId + = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! parser->m_declEntity->systemId) return XML_ERROR_NO_MEMORY; - declEntity->base = curBase; + parser->m_declEntity->base = parser->m_curBase; poolFinish(&dtd->pool); - if (entityDeclHandler) + /* Don't suppress the default handler if we fell through from + * the XML_ROLE_DOCTYPE_SYSTEM_ID case. + */ + if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_COMPLETE: - if (dtd->keepProcessing && declEntity && entityDeclHandler) { + if (dtd->keepProcessing && parser->m_declEntity + && parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - declEntity->is_param, - 0,0, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - 0); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, + parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_NOTATION_NAME: - if (dtd->keepProcessing && declEntity) { - declEntity->notation = poolStoreString(&dtd->pool, enc, s, next); - if (!declEntity->notation) + if (dtd->keepProcessing && parser->m_declEntity) { + parser->m_declEntity->notation + = poolStoreString(&dtd->pool, enc, s, next); + if (! parser->m_declEntity->notation) return XML_ERROR_NO_MEMORY; poolFinish(&dtd->pool); - if (unparsedEntityDeclHandler) { + if (parser->m_unparsedEntityDeclHandler) { *eventEndPP = s; - unparsedEntityDeclHandler(handlerArg, - declEntity->name, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); + parser->m_unparsedEntityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->base, parser->m_declEntity->systemId, + parser->m_declEntity->publicId, parser->m_declEntity->notation); handleDefault = XML_FALSE; - } - else if (entityDeclHandler) { + } else if (parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - 0,0,0, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, + parser->m_declEntity->base, parser->m_declEntity->systemId, + parser->m_declEntity->publicId, parser->m_declEntity->notation); handleDefault = XML_FALSE; } } break; - case XML_ROLE_GENERAL_ENTITY_NAME: - { - if (XmlPredefinedEntityName(enc, s, next)) { - declEntity = NULL; - break; - } - if (dtd->keepProcessing) { - const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); - if (!name) - return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, - sizeof(ENTITY)); - if (!declEntity) - return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { - poolDiscard(&dtd->pool); - declEntity = NULL; - } - else { - poolFinish(&dtd->pool); - declEntity->publicId = NULL; - declEntity->is_param = XML_FALSE; - /* if we have a parent parser or are reading an internal parameter - entity, then the entity declaration is not considered "internal" - */ - declEntity->is_internal = !(parentParser || openInternalEntities); - if (entityDeclHandler) - handleDefault = XML_FALSE; - } - } - else { + case XML_ROLE_GENERAL_ENTITY_NAME: { + if (XmlPredefinedEntityName(enc, s, next)) { + parser->m_declEntity = NULL; + break; + } + if (dtd->keepProcessing) { + const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); + if (! name) + return XML_ERROR_NO_MEMORY; + parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, + name, sizeof(ENTITY)); + if (! parser->m_declEntity) + return XML_ERROR_NO_MEMORY; + if (parser->m_declEntity->name != name) { poolDiscard(&dtd->pool); - declEntity = NULL; + parser->m_declEntity = NULL; + } else { + poolFinish(&dtd->pool); + parser->m_declEntity->publicId = NULL; + parser->m_declEntity->is_param = XML_FALSE; + /* if we have a parent parser or are reading an internal parameter + entity, then the entity declaration is not considered "internal" + */ + parser->m_declEntity->is_internal + = ! (parser->m_parentParser || parser->m_openInternalEntities); + if (parser->m_entityDeclHandler) + handleDefault = XML_FALSE; } + } else { + poolDiscard(&dtd->pool); + parser->m_declEntity = NULL; } - break; + } break; case XML_ROLE_PARAM_ENTITY_NAME: #ifdef XML_DTD if (dtd->keepProcessing) { const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); - if (!name) + if (! name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, - name, sizeof(ENTITY)); - if (!declEntity) + parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, + name, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { + if (parser->m_declEntity->name != name) { poolDiscard(&dtd->pool); - declEntity = NULL; - } - else { + parser->m_declEntity = NULL; + } else { poolFinish(&dtd->pool); - declEntity->publicId = NULL; - declEntity->is_param = XML_TRUE; + parser->m_declEntity->publicId = NULL; + parser->m_declEntity->is_param = XML_TRUE; /* if we have a parent parser or are reading an internal parameter entity, then the entity declaration is not considered "internal" */ - declEntity->is_internal = !(parentParser || openInternalEntities); - if (entityDeclHandler) + parser->m_declEntity->is_internal + = ! (parser->m_parentParser || parser->m_openInternalEntities); + if (parser->m_entityDeclHandler) handleDefault = XML_FALSE; } - } - else { + } else { poolDiscard(&dtd->pool); - declEntity = NULL; + parser->m_declEntity = NULL; } -#else /* not XML_DTD */ - declEntity = NULL; +#else /* not XML_DTD */ + parser->m_declEntity = NULL; #endif /* XML_DTD */ break; case XML_ROLE_NOTATION_NAME: - declNotationPublicId = NULL; - declNotationName = NULL; - if (notationDeclHandler) { - declNotationName = poolStoreString(&tempPool, enc, s, next); - if (!declNotationName) + parser->m_declNotationPublicId = NULL; + parser->m_declNotationName = NULL; + if (parser->m_notationDeclHandler) { + parser->m_declNotationName + = poolStoreString(&parser->m_tempPool, enc, s, next); + if (! parser->m_declNotationName) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_PUBLIC_ID: - if (!XmlIsPublicId(enc, s, next, eventPP)) + if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; - if (declNotationName) { /* means notationDeclHandler != NULL */ - XML_Char *tem = poolStoreString(&tempPool, - enc, + if (parser + ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ + XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); - if (!tem) + if (! tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); - declNotationPublicId = tem; - poolFinish(&tempPool); + parser->m_declNotationPublicId = tem; + poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_SYSTEM_ID: - if (declNotationName && notationDeclHandler) { - const XML_Char *systemId - = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!systemId) + if (parser->m_declNotationName && parser->m_notationDeclHandler) { + const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! systemId) return XML_ERROR_NO_MEMORY; *eventEndPP = s; - notationDeclHandler(handlerArg, - declNotationName, - curBase, - systemId, - declNotationPublicId); + parser->m_notationDeclHandler( + parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, + systemId, parser->m_declNotationPublicId); handleDefault = XML_FALSE; } - poolClear(&tempPool); + poolClear(&parser->m_tempPool); break; case XML_ROLE_NOTATION_NO_SYSTEM_ID: - if (declNotationPublicId && notationDeclHandler) { + if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { *eventEndPP = s; - notationDeclHandler(handlerArg, - declNotationName, - curBase, - 0, - declNotationPublicId); + parser->m_notationDeclHandler( + parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, + 0, parser->m_declNotationPublicId); handleDefault = XML_FALSE; } - poolClear(&tempPool); + poolClear(&parser->m_tempPool); break; case XML_ROLE_ERROR: switch (tok) { @@ -4403,90 +5272,110 @@ doProlog(XML_Parser parser, return XML_ERROR_SYNTAX; } #ifdef XML_DTD - case XML_ROLE_IGNORE_SECT: - { - enum XML_Error result; - if (defaultHandler) - reportDefault(parser, enc, s, next); - handleDefault = XML_FALSE; - result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); - if (result != XML_ERROR_NONE) - return result; - else if (!next) { - processor = ignoreSectionProcessor; - return result; - } + case XML_ROLE_IGNORE_SECT: { + enum XML_Error result; + if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + handleDefault = XML_FALSE; + result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); + if (result != XML_ERROR_NONE) + return result; + else if (! next) { + parser->m_processor = ignoreSectionProcessor; + return result; } - break; + } break; #endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: - if (prologState.level >= groupSize) { - if (groupSize) { - char *temp = (char *)REALLOC(groupConnector, groupSize *= 2); - if (temp == NULL) - return XML_ERROR_NO_MEMORY; - groupConnector = temp; + if (parser->m_prologState.level >= parser->m_groupSize) { + if (parser->m_groupSize) { + { + /* Detect and prevent integer overflow */ + if (parser->m_groupSize > (unsigned int)(-1) / 2u) { + return XML_ERROR_NO_MEMORY; + } + + char *const new_connector = (char *)REALLOC( + parser, parser->m_groupConnector, parser->m_groupSize *= 2); + if (new_connector == NULL) { + parser->m_groupSize /= 2; + return XML_ERROR_NO_MEMORY; + } + parser->m_groupConnector = new_connector; + } + if (dtd->scaffIndex) { - int *temp = (int *)REALLOC(dtd->scaffIndex, - groupSize * sizeof(int)); - if (temp == NULL) + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { + return XML_ERROR_NO_MEMORY; + } +#endif + + int *const new_scaff_index = (int *)REALLOC( + parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); + if (new_scaff_index == NULL) return XML_ERROR_NO_MEMORY; - dtd->scaffIndex = temp; + dtd->scaffIndex = new_scaff_index; } - } - else { - groupConnector = (char *)MALLOC(groupSize = 32); - if (!groupConnector) + } else { + parser->m_groupConnector + = (char *)MALLOC(parser, parser->m_groupSize = 32); + if (! parser->m_groupConnector) { + parser->m_groupSize = 0; return XML_ERROR_NO_MEMORY; + } } } - groupConnector[prologState.level] = 0; + parser->m_groupConnector[parser->m_prologState.level] = 0; if (dtd->in_eldecl) { int myindex = nextScaffoldPart(parser); if (myindex < 0) return XML_ERROR_NO_MEMORY; + assert(dtd->scaffIndex != NULL); dtd->scaffIndex[dtd->scaffLevel] = myindex; dtd->scaffLevel++; dtd->scaffold[myindex].type = XML_CTYPE_SEQ; - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; case XML_ROLE_GROUP_SEQUENCE: - if (groupConnector[prologState.level] == ASCII_PIPE) + if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) return XML_ERROR_SYNTAX; - groupConnector[prologState.level] = ASCII_COMMA; - if (dtd->in_eldecl && elementDeclHandler) + parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; + if (dtd->in_eldecl && parser->m_elementDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_GROUP_CHOICE: - if (groupConnector[prologState.level] == ASCII_COMMA) + if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) return XML_ERROR_SYNTAX; if (dtd->in_eldecl - && !groupConnector[prologState.level] + && ! parser->m_groupConnector[parser->m_prologState.level] && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type - != XML_CTYPE_MIXED) - ) { + != XML_CTYPE_MIXED)) { dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type = XML_CTYPE_CHOICE; - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } - groupConnector[prologState.level] = ASCII_PIPE; + parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; break; case XML_ROLE_PARAM_ENTITY_REF: #ifdef XML_DTD case XML_ROLE_INNER_PARAM_ENTITY_REF: dtd->hasParamEntityRefs = XML_TRUE; - if (!paramEntityParsing) + if (! parser->m_paramEntityParsing) dtd->keepProcessing = dtd->standalone; else { const XML_Char *name; ENTITY *entity; - name = poolStoreString(&dtd->pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&dtd->pool); @@ -4494,20 +5383,40 @@ doProlog(XML_Parser parser, if yes, check that the entity exists, and that it is internal, otherwise call the skipped entity handler */ - if (prologState.documentEntity && - (dtd->standalone - ? !openInternalEntities - : !dtd->hasParamEntityRefs)) { - if (!entity) + if (parser->m_prologState.documentEntity + && (dtd->standalone ? ! parser->m_openInternalEntities + : ! dtd->hasParamEntityRefs)) { + if (! entity) return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; - } - else if (!entity) { + else if (! entity->is_internal) { + /* It's hard to exhaustively search the code to be sure, + * but there doesn't seem to be a way of executing the + * following line. There are two cases: + * + * If 'standalone' is false, the DTD must have no + * parameter entities or we wouldn't have passed the outer + * 'if' statement. That means the only entity in the hash + * table is the external subset name "#" which cannot be + * given as a parameter entity name in XML syntax, so the + * lookup must have returned NULL and we don't even reach + * the test for an internal entity. + * + * If 'standalone' is true, it does not seem to be + * possible to create entities taking this code path that + * are not internal entities, so fail the test above. + * + * Because this analysis is very uncertain, the code is + * being left in place and merely removed from the + * coverage test statistics. + */ + return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ + } + } else if (! entity) { dtd->keepProcessing = dtd->standalone; /* cannot report skipped entities in declarations */ - if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) { - skippedEntityHandler(handlerArg, name, 1); + if ((role == XML_ROLE_PARAM_ENTITY_REF) + && parser->m_skippedEntityHandler) { + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); handleDefault = XML_FALSE; } break; @@ -4516,50 +5425,49 @@ doProlog(XML_Parser parser, return XML_ERROR_RECURSIVE_ENTITY_REF; if (entity->textPtr) { enum XML_Error result; - XML_Bool betweenDecl = - (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); + XML_Bool betweenDecl + = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); result = processInternalEntity(parser, entity, betweenDecl); if (result != XML_ERROR_NONE) return result; handleDefault = XML_FALSE; break; } - if (externalEntityRefHandler) { + if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) { + entityTrackingOnOpen(parser, entity, __LINE__); + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; handleDefault = XML_FALSE; - if (!dtd->paramEntityRead) { + if (! dtd->paramEntityRead) { dtd->keepProcessing = dtd->standalone; break; } - } - else { + } else { dtd->keepProcessing = dtd->standalone; break; } } #endif /* XML_DTD */ - if (!dtd->standalone && - notStandaloneHandler && - !notStandaloneHandler(handlerArg)) + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; break; - /* Element declaration stuff */ + /* Element declaration stuff */ case XML_ROLE_ELEMENT_NAME: - if (elementDeclHandler) { - declElementType = getElementType(parser, enc, s, next); - if (!declElementType) + if (parser->m_elementDeclHandler) { + parser->m_declElementType = getElementType(parser, enc, s, next); + if (! parser->m_declElementType) return XML_ERROR_NO_MEMORY; dtd->scaffLevel = 0; dtd->scaffCount = 0; @@ -4571,19 +5479,20 @@ doProlog(XML_Parser parser, case XML_ROLE_CONTENT_ANY: case XML_ROLE_CONTENT_EMPTY: if (dtd->in_eldecl) { - if (elementDeclHandler) { - XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content)); - if (!content) + if (parser->m_elementDeclHandler) { + XML_Content *content + = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); + if (! content) return XML_ERROR_NO_MEMORY; content->quant = XML_CQUANT_NONE; content->name = NULL; content->numchildren = 0; content->children = NULL; - content->type = ((role == XML_ROLE_CONTENT_ANY) ? - XML_CTYPE_ANY : - XML_CTYPE_EMPTY); + content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY + : XML_CTYPE_EMPTY); *eventEndPP = s; - elementDeclHandler(handlerArg, declElementType->name, content); + parser->m_elementDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, content); handleDefault = XML_FALSE; } dtd->in_eldecl = XML_FALSE; @@ -4594,7 +5503,7 @@ doProlog(XML_Parser parser, if (dtd->in_eldecl) { dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type = XML_CTYPE_MIXED; - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; @@ -4614,24 +5523,30 @@ doProlog(XML_Parser parser, if (dtd->in_eldecl) { ELEMENT_TYPE *el; const XML_Char *name; - int nameLen; - const char *nxt = (quant == XML_CQUANT_NONE - ? next - : next - enc->minBytesPerChar); + size_t nameLen; + const char *nxt + = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); int myindex = nextScaffoldPart(parser); if (myindex < 0) return XML_ERROR_NO_MEMORY; dtd->scaffold[myindex].type = XML_CTYPE_NAME; dtd->scaffold[myindex].quant = quant; el = getElementType(parser, enc, s, nxt); - if (!el) + if (! el) return XML_ERROR_NO_MEMORY; name = el->name; dtd->scaffold[myindex].name = name; nameLen = 0; - for (; name[nameLen++]; ); - dtd->contentStringLen += nameLen; - if (elementDeclHandler) + for (; name[nameLen++];) + ; + + /* Detect and prevent integer overflow */ + if (nameLen > UINT_MAX - dtd->contentStringLen) { + return XML_ERROR_NO_MEMORY; + } + + dtd->contentStringLen += (unsigned)nameLen; + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; @@ -4649,17 +5564,18 @@ doProlog(XML_Parser parser, quant = XML_CQUANT_PLUS; closeGroup: if (dtd->in_eldecl) { - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; dtd->scaffLevel--; dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; if (dtd->scaffLevel == 0) { - if (!handleDefault) { + if (! handleDefault) { XML_Content *model = build_model(parser); - if (!model) + if (! model) return XML_ERROR_NO_MEMORY; *eventEndPP = s; - elementDeclHandler(handlerArg, declElementType->name, model); + parser->m_elementDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, model); } dtd->in_eldecl = XML_FALSE; dtd->contentStringLen = 0; @@ -4669,12 +5585,12 @@ doProlog(XML_Parser parser, /* End element declaration stuff */ case XML_ROLE_PI: - if (!reportProcessingInstruction(parser, enc, s, next)) + if (! reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; handleDefault = XML_FALSE; break; case XML_ROLE_COMMENT: - if (!reportComment(parser, enc, s, next)) + if (! reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; handleDefault = XML_FALSE; break; @@ -4686,31 +5602,31 @@ doProlog(XML_Parser parser, } break; case XML_ROLE_DOCTYPE_NONE: - if (startDoctypeDeclHandler) + if (parser->m_startDoctypeDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ENTITY_NONE: - if (dtd->keepProcessing && entityDeclHandler) + if (dtd->keepProcessing && parser->m_entityDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_NOTATION_NONE: - if (notationDeclHandler) + if (parser->m_notationDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ATTLIST_NONE: - if (dtd->keepProcessing && attlistDeclHandler) + if (dtd->keepProcessing && parser->m_attlistDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ELEMENT_NONE: - if (elementDeclHandler) + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; break; } /* end of big switch */ - if (handleDefault && defaultHandler) + if (handleDefault && parser->m_defaultHandler) reportDefault(parser, enc, s, next); - switch (ps_parsing) { + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; @@ -4725,23 +5641,27 @@ doProlog(XML_Parser parser, } static enum XML_Error PTRCALL -epilogProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ - processor = epilogProcessor; - eventPtr = s; +epilogProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + parser->m_processor = epilogProcessor; + parser->m_eventPtr = s; for (;;) { const char *next = NULL; - int tok = XmlPrologTok(encoding, s, end, &next); - eventEndPtr = next; + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif + parser->m_eventEndPtr = next; switch (tok) { /* report partial linebreak - it might be the last token */ case -XML_TOK_PROLOG_S: - if (defaultHandler) { - reportDefault(parser, encoding, s, next); - if (ps_parsing == XML_FINISHED) + if (parser->m_defaultHandler) { + reportDefault(parser, parser->m_encoding, s, next); + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; } *nextPtr = next; @@ -4750,28 +5670,28 @@ epilogProcessor(XML_Parser parser, *nextPtr = s; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: - if (defaultHandler) - reportDefault(parser, encoding, s, next); + if (parser->m_defaultHandler) + reportDefault(parser, parser->m_encoding, s, next); break; case XML_TOK_PI: - if (!reportProcessingInstruction(parser, encoding, s, next)) + if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: - if (!reportComment(parser, encoding, s, next)) + if (! reportComment(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_INVALID: - eventPtr = next; + parser->m_eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (!ps_finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (!ps_finalBuffer) { + if (! parser->m_parsingStatus.finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } @@ -4779,209 +5699,245 @@ epilogProcessor(XML_Parser parser, default: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; } - eventPtr = s = next; - switch (ps_parsing) { + parser->m_eventPtr = s = next; + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; - default: ; + default:; } } } static enum XML_Error -processInternalEntity(XML_Parser parser, ENTITY *entity, - XML_Bool betweenDecl) -{ +processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { const char *textStart, *textEnd; const char *next; enum XML_Error result; OPEN_INTERNAL_ENTITY *openEntity; - if (freeInternalEntities) { - openEntity = freeInternalEntities; - freeInternalEntities = openEntity->next; - } - else { - openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY)); - if (!openEntity) + if (parser->m_freeInternalEntities) { + openEntity = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity->next; + } else { + openEntity + = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); + if (! openEntity) return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif entity->processed = 0; - openEntity->next = openInternalEntities; - openInternalEntities = openEntity; + openEntity->next = parser->m_openInternalEntities; + parser->m_openInternalEntities = openEntity; openEntity->entity = entity; - openEntity->startTagLevel = tagLevel; + openEntity->startTagLevel = parser->m_tagLevel; openEntity->betweenDecl = betweenDecl; openEntity->internalEventPtr = NULL; openEntity->internalEventEndPtr = NULL; - textStart = (char *)entity->textPtr; - textEnd = (char *)(entity->textPtr + entity->textLen); + textStart = (const char *)entity->textPtr; + textEnd = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; #ifdef XML_DTD if (entity->is_param) { - int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, internalEncoding, textStart, textEnd, tok, - next, &next, XML_FALSE); - } - else + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, + tok, next, &next, XML_FALSE, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); + } else #endif /* XML_DTD */ - result = doContent(parser, tagLevel, internalEncoding, textStart, - textEnd, &next, XML_FALSE); + result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, + textStart, textEnd, &next, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { - if (textEnd != next && ps_parsing == XML_SUSPENDED) { + if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - textStart); - processor = internalEntityProcessor; - } - else { + parser->m_processor = internalEntityProcessor; + } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif /* XML_DTD */ entity->open = XML_FALSE; - openInternalEntities = openEntity->next; + parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ - openEntity->next = freeInternalEntities; - freeInternalEntities = openEntity; + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; } } return result; } static enum XML_Error PTRCALL -internalEntityProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +internalEntityProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { ENTITY *entity; const char *textStart, *textEnd; const char *next; enum XML_Error result; - OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities; - if (!openEntity) + OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; + if (! openEntity) return XML_ERROR_UNEXPECTED_STATE; entity = openEntity->entity; - textStart = ((char *)entity->textPtr) + entity->processed; - textEnd = (char *)(entity->textPtr + entity->textLen); + textStart = ((const char *)entity->textPtr) + entity->processed; + textEnd = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; #ifdef XML_DTD if (entity->is_param) { - int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, internalEncoding, textStart, textEnd, tok, - next, &next, XML_FALSE); - } - else + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, + tok, next, &next, XML_FALSE, XML_TRUE, + XML_ACCOUNT_ENTITY_EXPANSION); + } else #endif /* XML_DTD */ - result = doContent(parser, openEntity->startTagLevel, internalEncoding, - textStart, textEnd, &next, XML_FALSE); + result = doContent(parser, openEntity->startTagLevel, + parser->m_internalEncoding, textStart, textEnd, &next, + XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); if (result != XML_ERROR_NONE) return result; - else if (textEnd != next && ps_parsing == XML_SUSPENDED) { - entity->processed = (int)(next - (char *)entity->textPtr); + + if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + entity->processed = (int)(next - (const char *)entity->textPtr); return result; } - else { - entity->open = XML_FALSE; - openInternalEntities = openEntity->next; - /* put openEntity back in list of free instances */ - openEntity->next = freeInternalEntities; - freeInternalEntities = openEntity; + +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif + entity->open = XML_FALSE; + parser->m_openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + + // If there are more open entities we want to stop right here and have the + // upcoming call to XML_ResumeParser continue with entity content, or it would + // be ignored altogether. + if (parser->m_openInternalEntities != NULL + && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + return XML_ERROR_NONE; } #ifdef XML_DTD if (entity->is_param) { int tok; - processor = prologProcessor; - tok = XmlPrologTok(encoding, s, end, &next); - return doProlog(parser, encoding, s, end, tok, next, nextPtr, - (XML_Bool)!ps_finalBuffer); - } - else + parser->m_processor = prologProcessor; + tok = XmlPrologTok(parser->m_encoding, s, end, &next); + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); + } else #endif /* XML_DTD */ { - processor = contentProcessor; + parser->m_processor = contentProcessor; /* see externalEntityContentProcessor vs contentProcessor */ - return doContent(parser, parentParser ? 1 : 0, encoding, s, end, - nextPtr, (XML_Bool)!ps_finalBuffer); + result = doContent(parser, parser->m_parentParser ? 1 : 0, + parser->m_encoding, s, end, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_DIRECT); + if (result == XML_ERROR_NONE) { + if (! storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + } + return result; } } static enum XML_Error PTRCALL -errorProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ - return errorCode; +errorProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + UNUSED_P(s); + UNUSED_P(end); + UNUSED_P(nextPtr); + return parser->m_errorCode; } static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) -{ - enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, - end, pool); + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { + enum XML_Error result + = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); if (result) return result; - if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) + if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) poolChop(pool); - if (!poolAppendChar(pool, XML_T('\0'))) + if (! poolAppendChar(pool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; return XML_ERROR_NONE; } static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ +#ifndef XML_DTD + UNUSED_P(account); +#endif + for (;;) { - const char *next; + const char *next + = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ int tok = XmlAttributeValueTok(enc, ptr, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif switch (tok) { case XML_TOK_NONE: return XML_ERROR_NONE; case XML_TOK_INVALID: - if (enc == encoding) - eventPtr = next; + if (enc == parser->m_encoding) + parser->m_eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (enc == encoding) - eventPtr = ptr; + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; return XML_ERROR_INVALID_TOKEN; - case XML_TOK_CHAR_REF: - { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, ptr); - if (n < 0) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - if (!isCdata - && n == 0x20 /* space */ - && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) - break; - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - for (i = 0; i < n; i++) { - if (!poolAppendChar(pool, buf[i])) - return XML_ERROR_NO_MEMORY; - } + case XML_TOK_CHAR_REF: { + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, ptr); + if (n < 0) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; } - break; + if (! isCdata && n == 0x20 /* space */ + && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) + break; + n = XmlEncode(n, (ICHAR *)buf); + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ + for (i = 0; i < n; i++) { + if (! poolAppendChar(pool, buf[i])) + return XML_ERROR_NO_MEMORY; + } + } break; case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, enc, ptr, next)) + if (! poolAppend(pool, enc, ptr, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_TRAILING_CR: @@ -4989,95 +5945,134 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, /* fall through */ case XML_TOK_ATTRIBUTE_VALUE_S: case XML_TOK_DATA_NEWLINE: - if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) + if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; - if (!poolAppendChar(pool, 0x20)) + if (! poolAppendChar(pool, 0x20)) return XML_ERROR_NO_MEMORY; break; - case XML_TOK_ENTITY_REF: - { - const XML_Char *name; - ENTITY *entity; - char checkEntityDecl; - XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (!poolAppendChar(pool, ch)) - return XML_ERROR_NO_MEMORY; - break; - } - name = poolStoreString(&temp2Pool, enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + case XML_TOK_ENTITY_REF: { + const XML_Char *name; + ENTITY *entity; + char checkEntityDecl; + XML_Char ch = (XML_Char)XmlPredefinedEntityName( + enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); + if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ + if (! poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); - poolDiscard(&temp2Pool); - /* First, determine if a check for an existing declaration is needed; - if yes, check that the entity exists, and that it is internal. - */ - if (pool == &dtd->pool) /* are we called from prolog? */ - checkEntityDecl = + break; + } + name = poolStoreString(&parser->m_temp2Pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); + poolDiscard(&parser->m_temp2Pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal. + */ + if (pool == &dtd->pool) /* are we called from prolog? */ + checkEntityDecl = #ifdef XML_DTD - prologState.documentEntity && + parser->m_prologState.documentEntity && #endif /* XML_DTD */ - (dtd->standalone - ? !openInternalEntities - : !dtd->hasParamEntityRefs); - else /* if (pool == &tempPool): we are called from content */ - checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone; - if (checkEntityDecl) { - if (!entity) - return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; - } - else if (!entity) { - /* Cannot report skipped entity here - see comments on - skippedEntityHandler. - if (skippedEntityHandler) - skippedEntityHandler(handlerArg, name, 0); - */ - /* Cannot call the default handler because this would be - out of sync with the call to the startElementHandler. - if ((pool == &tempPool) && defaultHandler) - reportDefault(parser, enc, ptr, next); - */ - break; - } - if (entity->open) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_RECURSIVE_ENTITY_REF; - } - if (entity->notation) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BINARY_ENTITY_REF; - } - if (!entity->textPtr) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; - } - else { - enum XML_Error result; - const XML_Char *textEnd = entity->textPtr + entity->textLen; - entity->open = XML_TRUE; - result = appendAttributeValue(parser, internalEncoding, isCdata, - (char *)entity->textPtr, - (char *)textEnd, pool); - entity->open = XML_FALSE; - if (result) - return result; + (dtd->standalone ? ! parser->m_openInternalEntities + : ! dtd->hasParamEntityRefs); + else /* if (pool == &parser->m_tempPool): we are called from content */ + checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; + if (checkEntityDecl) { + if (! entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (! entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } else if (! entity) { + /* Cannot report skipped entity here - see comments on + parser->m_skippedEntityHandler. + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + */ + /* Cannot call the default handler because this would be + out of sync with the call to the startElementHandler. + if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) + reportDefault(parser, enc, ptr, next); + */ + break; + } + if (entity->open) { + if (enc == parser->m_encoding) { + /* It does not appear that this line can be executed. + * + * The "if (entity->open)" check catches recursive entity + * definitions. In order to be called with an open + * entity, it must have gone through this code before and + * been through the recursive call to + * appendAttributeValue() some lines below. That call + * sets the local encoding ("enc") to the parser's + * internal encoding (internal_utf8 or internal_utf16), + * which can never be the same as the principle encoding. + * It doesn't appear there is another code path that gets + * here with entity->open being TRUE. + * + * Since it is not certain that this logic is watertight, + * we keep the line and merely exclude it from coverage + * tests. + */ + parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ } + return XML_ERROR_RECURSIVE_ENTITY_REF; } - break; + if (entity->notation) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_BINARY_ENTITY_REF; + } + if (! entity->textPtr) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + } else { + enum XML_Error result; + const XML_Char *textEnd = entity->textPtr + entity->textLen; + entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif + result = appendAttributeValue(parser, parser->m_internalEncoding, + isCdata, (const char *)entity->textPtr, + (const char *)textEnd, pool, + XML_ACCOUNT_ENTITY_EXPANSION); +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif + entity->open = XML_FALSE; + if (result) + return result; + } + } break; default: - if (enc == encoding) - eventPtr = ptr; + /* The only token returned by XmlAttributeValueTok() that does + * not have an explicit case here is XML_TOK_PARTIAL_CHAR. + * Getting that would require an entity name to contain an + * incomplete XML character (e.g. \xE2\x82); however previous + * tokenisers will have already recognised and rejected such + * names before XmlAttributeValueTok() gets a look-in. This + * default case should be retained as a safety net, but the code + * excluded from coverage tests. + * + * LCOV_EXCL_START + */ + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } ptr = next; } @@ -5085,87 +6080,98 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, } static enum XML_Error -storeEntityValue(XML_Parser parser, - const ENCODING *enc, - const char *entityTextPtr, - const char *entityTextEnd) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +storeEntityValue(XML_Parser parser, const ENCODING *enc, + const char *entityTextPtr, const char *entityTextEnd, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; #ifdef XML_DTD - int oldInEntityValue = prologState.inEntityValue; - prologState.inEntityValue = 1; + int oldInEntityValue = parser->m_prologState.inEntityValue; + parser->m_prologState.inEntityValue = 1; +#else + UNUSED_P(account); #endif /* XML_DTD */ /* never return Null for the value argument in EntityDeclHandler, since this would indicate an external entity; therefore we have to make sure that entityValuePool.start is not null */ - if (!pool->blocks) { - if (!poolGrow(pool)) + if (! pool->blocks) { + if (! poolGrow(pool)) return XML_ERROR_NO_MEMORY; } for (;;) { - const char *next; + const char *next + = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, + account)) { + accountingOnAbort(parser); + result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + goto endEntityValue; + } +#endif + switch (tok) { case XML_TOK_PARAM_ENTITY_REF: #ifdef XML_DTD - if (isParamEntity || enc != encoding) { + if (parser->m_isParamEntity || enc != parser->m_encoding) { const XML_Char *name; ENTITY *entity; - name = poolStoreString(&tempPool, enc, + name = poolStoreString(&parser->m_tempPool, enc, entityTextPtr + enc->minBytesPerChar, next - enc->minBytesPerChar); - if (!name) { + if (! name) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); - poolDiscard(&tempPool); - if (!entity) { + poolDiscard(&parser->m_tempPool); + if (! entity) { /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ /* cannot report skipped entity here - see comments on - skippedEntityHandler - if (skippedEntityHandler) - skippedEntityHandler(handlerArg, name, 0); + parser->m_skippedEntityHandler + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); */ dtd->keepProcessing = dtd->standalone; goto endEntityValue; } if (entity->open) { - if (enc == encoding) - eventPtr = entityTextPtr; + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; result = XML_ERROR_RECURSIVE_ENTITY_REF; goto endEntityValue; } if (entity->systemId) { - if (externalEntityRefHandler) { + if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) { + entityTrackingOnOpen(parser, entity, __LINE__); + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; goto endEntityValue; } + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; - if (!dtd->paramEntityRead) + if (! dtd->paramEntityRead) dtd->keepProcessing = dtd->standalone; - } - else + } else dtd->keepProcessing = dtd->standalone; - } - else { + } else { entity->open = XML_TRUE; - result = storeEntityValue(parser, - internalEncoding, - (char *)entity->textPtr, - (char *)(entity->textPtr - + entity->textLen)); + entityTrackingOnOpen(parser, entity, __LINE__); + result = storeEntityValue( + parser, parser->m_internalEncoding, (const char *)entity->textPtr, + (const char *)(entity->textPtr + entity->textLen), + XML_ACCOUNT_ENTITY_EXPANSION); + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; if (result) goto endEntityValue; @@ -5175,7 +6181,7 @@ storeEntityValue(XML_Parser parser, #endif /* XML_DTD */ /* In the internal subset, PE references are not legal within markup declarations, e.g entity values in this case. */ - eventPtr = entityTextPtr; + parser->m_eventPtr = entityTextPtr; result = XML_ERROR_PARAM_ENTITY_REF; goto endEntityValue; case XML_TOK_NONE: @@ -5183,7 +6189,7 @@ storeEntityValue(XML_Parser parser, goto endEntityValue; case XML_TOK_ENTITY_REF: case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, enc, entityTextPtr, next)) { + if (! poolAppend(pool, enc, entityTextPtr, next)) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } @@ -5192,67 +6198,75 @@ storeEntityValue(XML_Parser parser, next = entityTextPtr + enc->minBytesPerChar; /* fall through */ case XML_TOK_DATA_NEWLINE: - if (pool->end == pool->ptr && !poolGrow(pool)) { - result = XML_ERROR_NO_MEMORY; + if (pool->end == pool->ptr && ! poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; goto endEntityValue; } *(pool->ptr)++ = 0xA; break; - case XML_TOK_CHAR_REF: - { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, entityTextPtr); - if (n < 0) { - if (enc == encoding) - eventPtr = entityTextPtr; - result = XML_ERROR_BAD_CHAR_REF; - goto endEntityValue; - } - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = entityTextPtr; - result = XML_ERROR_BAD_CHAR_REF; + case XML_TOK_CHAR_REF: { + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, entityTextPtr); + if (n < 0) { + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; + result = XML_ERROR_BAD_CHAR_REF; + goto endEntityValue; + } + n = XmlEncode(n, (ICHAR *)buf); + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ + for (i = 0; i < n; i++) { + if (pool->end == pool->ptr && ! poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; goto endEntityValue; } - for (i = 0; i < n; i++) { - if (pool->end == pool->ptr && !poolGrow(pool)) { - result = XML_ERROR_NO_MEMORY; - goto endEntityValue; - } - *(pool->ptr)++ = buf[i]; - } + *(pool->ptr)++ = buf[i]; } - break; + } break; case XML_TOK_PARTIAL: - if (enc == encoding) - eventPtr = entityTextPtr; + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; case XML_TOK_INVALID: - if (enc == encoding) - eventPtr = next; + if (enc == parser->m_encoding) + parser->m_eventPtr = next; result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; default: - if (enc == encoding) - eventPtr = entityTextPtr; + /* This default case should be unnecessary -- all the tokens + * that XmlEntityValueTok() can return have their own explicit + * cases -- but should be retained for safety. We do however + * exclude it from the coverage statistics. + * + * LCOV_EXCL_START + */ + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; result = XML_ERROR_UNEXPECTED_STATE; goto endEntityValue; + /* LCOV_EXCL_STOP */ } entityTextPtr = next; } endEntityValue: #ifdef XML_DTD - prologState.inEntityValue = oldInEntityValue; + parser->m_prologState.inEntityValue = oldInEntityValue; #endif /* XML_DTD */ return result; } static void FASTCALL -normalizeLines(XML_Char *s) -{ +normalizeLines(XML_Char *s) { XML_Char *p; for (;; s++) { if (*s == XML_T('\0')) @@ -5266,8 +6280,7 @@ normalizeLines(XML_Char *s) *p++ = 0xA; if (*++s == 0xA) s++; - } - else + } else *p++ = *s++; } while (*s); *p = XML_T('\0'); @@ -5275,87 +6288,100 @@ normalizeLines(XML_Char *s) static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end) -{ + const char *start, const char *end) { const XML_Char *target; XML_Char *data; const char *tem; - if (!processingInstructionHandler) { - if (defaultHandler) + if (! parser->m_processingInstructionHandler) { + if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); return 1; } start += enc->minBytesPerChar * 2; tem = start + XmlNameLength(enc, start); - target = poolStoreString(&tempPool, enc, start, tem); - if (!target) + target = poolStoreString(&parser->m_tempPool, enc, start, tem); + if (! target) return 0; - poolFinish(&tempPool); - data = poolStoreString(&tempPool, enc, - XmlSkipS(enc, tem), - end - enc->minBytesPerChar*2); - if (!data) + poolFinish(&parser->m_tempPool); + data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), + end - enc->minBytesPerChar * 2); + if (! data) return 0; normalizeLines(data); - processingInstructionHandler(handlerArg, target, data); - poolClear(&tempPool); + parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); + poolClear(&parser->m_tempPool); return 1; } static int -reportComment(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end) -{ +reportComment(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end) { XML_Char *data; - if (!commentHandler) { - if (defaultHandler) + if (! parser->m_commentHandler) { + if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); return 1; } - data = poolStoreString(&tempPool, - enc, + data = poolStoreString(&parser->m_tempPool, enc, start + enc->minBytesPerChar * 4, end - enc->minBytesPerChar * 3); - if (!data) + if (! data) return 0; normalizeLines(data); - commentHandler(handlerArg, data); - poolClear(&tempPool); + parser->m_commentHandler(parser->m_handlerArg, data); + poolClear(&parser->m_tempPool); return 1; } static void -reportDefault(XML_Parser parser, const ENCODING *enc, - const char *s, const char *end) -{ +reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, + const char *end) { if (MUST_CONVERT(enc, s)) { + enum XML_Convert_Result convert_res; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + /* To get here, two things must be true; the parser must be + * using a character encoding that is not the same as the + * encoding passed in, and the encoding passed in must need + * conversion to the internal format (UTF-8 unless XML_UNICODE + * is defined). The only occasions on which the encoding passed + * in is not the same as the parser's encoding are when it is + * the internal encoding (e.g. a previously defined parameter + * entity, already converted to internal format). This by + * definition doesn't need conversion, so the whole branch never + * gets executed. + * + * For safety's sake we don't delete these lines and merely + * exclude them from coverage statistics. + * + * LCOV_EXCL_START + */ + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } do { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + convert_res + = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; - defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); + parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); *eventPP = s; - } while (s != end); - } - else - defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); + } while ((convert_res != XML_CONVERT_COMPLETED) + && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); + } else + parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)end - (XML_Char *)s)); } - static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, - XML_Bool isId, const XML_Char *value, XML_Parser parser) -{ + XML_Bool isId, const XML_Char *value, XML_Parser parser) { DEFAULT_ATTRIBUTE *att; if (value || isId) { /* The handling of default attributes gets messed up if we have @@ -5364,22 +6390,40 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, for (i = 0; i < type->nDefaultAtts; i++) if (attId == type->defaultAtts[i].id) return 1; - if (isId && !type->idAtt && !attId->xmlns) + if (isId && ! type->idAtt && ! attId->xmlns) type->idAtt = attId; } if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts - * sizeof(DEFAULT_ATTRIBUTE)); - if (!type->defaultAtts) + type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( + parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (! type->defaultAtts) { + type->allocDefaultAtts = 0; return 0; - } - else { + } + } else { DEFAULT_ATTRIBUTE *temp; + + /* Detect and prevent integer overflow */ + if (type->allocDefaultAtts > INT_MAX / 2) { + return 0; + } + int count = type->allocDefaultAtts * 2; - temp = (DEFAULT_ATTRIBUTE *) - REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE))); + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { + return 0; + } +#endif + + temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, + (count * sizeof(DEFAULT_ATTRIBUTE))); if (temp == NULL) return 0; type->allocDefaultAtts = count; @@ -5390,91 +6434,90 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, att->id = attId; att->value = value; att->isCdata = isCdata; - if (!isCdata) + if (! isCdata) attId->maybeTokenized = XML_TRUE; type->nDefaultAtts += 1; return 1; } static int -setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *name; for (name = elementType->name; *name; name++) { if (*name == XML_T(ASCII_COLON)) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { - if (!poolAppendChar(&dtd->pool, *s)) + if (! poolAppendChar(&dtd->pool, *s)) return 0; } - if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return 0; prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); - if (!prefix) + if (! prefix) return 0; if (prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else poolDiscard(&dtd->pool); elementType->prefix = prefix; - + break; } } return 1; } static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ATTRIBUTE_ID *id; const XML_Char *name; - if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; name = poolStoreString(&dtd->pool, enc, start, end); - if (!name) + if (! name) return NULL; /* skip quotation mark - its storage will be re-used (like in name[-1]) */ ++name; - id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); - if (!id) + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, + sizeof(ATTRIBUTE_ID)); + if (! id) return NULL; if (id->name != name) poolDiscard(&dtd->pool); else { poolFinish(&dtd->pool); - if (!ns) + if (! parser->m_ns) ; - else if (name[0] == XML_T(ASCII_x) - && name[1] == XML_T(ASCII_m) - && name[2] == XML_T(ASCII_l) - && name[3] == XML_T(ASCII_n) - && name[4] == XML_T(ASCII_s) - && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { + else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) + && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) + && name[4] == XML_T(ASCII_s) + && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { if (name[5] == XML_T('\0')) id->prefix = &dtd->defaultPrefix; else - id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX)); + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, + sizeof(PREFIX)); id->xmlns = XML_TRUE; - } - else { + } else { int i; for (i = 0; name[i]; i++) { /* attributes without prefix are *not* in the default namespace */ if (name[i] == XML_T(ASCII_COLON)) { int j; for (j = 0; j < i; j++) { - if (!poolAppendChar(&dtd->pool, name[j])) + if (! poolAppendChar(&dtd->pool, name[j])) return NULL; } - if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) + return NULL; + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, + poolStart(&dtd->pool), sizeof(PREFIX)); + if (! id->prefix) return NULL; - id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), - sizeof(PREFIX)); if (id->prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else @@ -5490,23 +6533,44 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, #define CONTEXT_SEP XML_T(ASCII_FF) static const XML_Char * -getContext(XML_Parser parser) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +getContext(XML_Parser parser) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ HASH_TABLE_ITER iter; XML_Bool needSep = XML_FALSE; if (dtd->defaultPrefix.binding) { int i; int len; - if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS))) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; len = dtd->defaultPrefix.binding->uriLen; - if (namespaceSeparator) + if (parser->m_namespaceSeparator) len--; - for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) - return NULL; + for (i = 0; i < len; i++) { + if (! poolAppendChar(&parser->m_tempPool, + dtd->defaultPrefix.binding->uri[i])) { + /* Because of memory caching, I don't believe this line can be + * executed. + * + * This is part of a loop copying the default prefix binding + * URI into the parser's temporary string pool. Previously, + * that URI was copied into the same string pool, with a + * terminating NUL character, as part of setContext(). When + * the pool was cleared, that leaves a block definitely big + * enough to hold the URI on the free block list of the pool. + * The URI copy in getContext() therefore cannot run out of + * memory. + * + * If the pool is used between the setContext() and + * getContext() calls, the worst it can do is leave a bigger + * block on the front of the free list. Given that this is + * all somewhat inobvious and program logic can be changed, we + * don't delete the line but we do exclude it from the test + * coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } + } needSep = XML_TRUE; } @@ -5516,102 +6580,107 @@ getContext(XML_Parser parser) int len; const XML_Char *s; PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); - if (!prefix) + if (! prefix) break; - if (!prefix->binding) - continue; - if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) + if (! prefix->binding) { + /* This test appears to be (justifiable) paranoia. There does + * not seem to be a way of injecting a prefix without a binding + * that doesn't get errored long before this function is called. + * The test should remain for safety's sake, so we instead + * exclude the following line from the coverage statistics. + */ + continue; /* LCOV_EXCL_LINE */ + } + if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; for (s = prefix->name; *s; s++) - if (!poolAppendChar(&tempPool, *s)) + if (! poolAppendChar(&parser->m_tempPool, *s)) return NULL; - if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS))) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; len = prefix->binding->uriLen; - if (namespaceSeparator) + if (parser->m_namespaceSeparator) len--; for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, prefix->binding->uri[i])) + if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) return NULL; needSep = XML_TRUE; } - hashTableIterInit(&iter, &(dtd->generalEntities)); for (;;) { const XML_Char *s; ENTITY *e = (ENTITY *)hashTableIterNext(&iter); - if (!e) + if (! e) break; - if (!e->open) + if (! e->open) continue; - if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) + if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; for (s = e->name; *s; s++) - if (!poolAppendChar(&tempPool, *s)) + if (! poolAppendChar(&parser->m_tempPool, *s)) return 0; needSep = XML_TRUE; } - if (!poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return NULL; - return tempPool.start; + return parser->m_tempPool.start; } static XML_Bool -setContext(XML_Parser parser, const XML_Char *context) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +setContext(XML_Parser parser, const XML_Char *context) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *s = context; while (*context != XML_T('\0')) { if (*s == CONTEXT_SEP || *s == XML_T('\0')) { ENTITY *e; - if (!poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; - e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0); + e = (ENTITY *)lookup(parser, &dtd->generalEntities, + poolStart(&parser->m_tempPool), 0); if (e) e->open = XML_TRUE; if (*s != XML_T('\0')) s++; context = s; - poolDiscard(&tempPool); - } - else if (*s == XML_T(ASCII_EQUALS)) { + poolDiscard(&parser->m_tempPool); + } else if (*s == XML_T(ASCII_EQUALS)) { PREFIX *prefix; - if (poolLength(&tempPool) == 0) + if (poolLength(&parser->m_tempPool) == 0) prefix = &dtd->defaultPrefix; else { - if (!poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; - prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool), - sizeof(PREFIX)); - if (!prefix) + prefix + = (PREFIX *)lookup(parser, &dtd->prefixes, + poolStart(&parser->m_tempPool), sizeof(PREFIX)); + if (! prefix) return XML_FALSE; - if (prefix->name == poolStart(&tempPool)) { + if (prefix->name == poolStart(&parser->m_tempPool)) { prefix->name = poolCopyString(&dtd->pool, prefix->name); - if (!prefix->name) + if (! prefix->name) return XML_FALSE; } - poolDiscard(&tempPool); + poolDiscard(&parser->m_tempPool); } - for (context = s + 1; - *context != CONTEXT_SEP && *context != XML_T('\0'); + for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) - if (!poolAppendChar(&tempPool, *context)) + if (! poolAppendChar(&parser->m_tempPool, *context)) return XML_FALSE; - if (!poolAppendChar(&tempPool, XML_T('\0'))) + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; - if (addBinding(parser, prefix, NULL, poolStart(&tempPool), - &inheritedBindings) != XML_ERROR_NONE) + if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), + &parser->m_inheritedBindings) + != XML_ERROR_NONE) return XML_FALSE; - poolDiscard(&tempPool); + poolDiscard(&parser->m_tempPool); if (*context != XML_T('\0')) ++context; s = context; - } - else { - if (!poolAppendChar(&tempPool, *s)) + } else { + if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_FALSE; s++; } @@ -5620,8 +6689,7 @@ setContext(XML_Parser parser, const XML_Char *context) } static void FASTCALL -normalizePublicId(XML_Char *publicId) -{ +normalizePublicId(XML_Char *publicId) { XML_Char *p = publicId; XML_Char *s; for (s = publicId; *s; s++) { @@ -5642,9 +6710,8 @@ normalizePublicId(XML_Char *publicId) } static DTD * -dtdCreate(const XML_Memory_Handling_Suite *ms) -{ - DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD)); +dtdCreate(const XML_Memory_Handling_Suite *ms) { + DTD *p = ms->malloc_fcn(sizeof(DTD)); if (p == NULL) return p; poolInit(&(p->pool), ms); @@ -5675,13 +6742,12 @@ dtdCreate(const XML_Memory_Handling_Suite *ms) } static void -dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) -{ +dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); - if (!e) + if (! e) break; if (e->allocDefaultAtts != 0) ms->free_fcn(e->defaultAtts); @@ -5717,13 +6783,12 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) } static void -dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) -{ +dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); - if (!e) + if (! e) break; if (e->allocDefaultAtts != 0) ms->free_fcn(e->defaultAtts); @@ -5748,8 +6813,8 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) The new DTD has already been initialized. */ static int -dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) -{ +dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, + const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; /* Copy the prefix table. */ @@ -5758,12 +6823,12 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H for (;;) { const XML_Char *name; const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); - if (!oldP) + if (! oldP) break; name = poolCopyString(&(newDtd->pool), oldP->name); - if (!name) + if (! name) return 0; - if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) + if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } @@ -5776,18 +6841,18 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H const XML_Char *name; const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); - if (!oldA) + if (! oldA) break; /* Remember to allocate the scratch byte before the name. */ - if (!poolAppendChar(&(newDtd->pool), XML_T('\0'))) + if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) return 0; name = poolCopyString(&(newDtd->pool), oldA->name); - if (!name) + if (! name) return 0; ++name; newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); - if (!newA) + if (! newA) return 0; newA->maybeTokenized = oldA->maybeTokenized; if (oldA->prefix) { @@ -5809,58 +6874,52 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H ELEMENT_TYPE *newE; const XML_Char *name; const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); - if (!oldE) + if (! oldE) break; name = poolCopyString(&(newDtd->pool), oldE->name); - if (!name) + if (! name) return 0; newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); - if (!newE) + if (! newE) return 0; if (oldE->nDefaultAtts) { - newE->defaultAtts = (DEFAULT_ATTRIBUTE *) - ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); - if (!newE->defaultAtts) { - ms->free_fcn(newE); + newE->defaultAtts + = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (! newE->defaultAtts) { return 0; } } if (oldE->idAtt) - newE->idAtt = (ATTRIBUTE_ID *) - lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0); + newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), + oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { - newE->defaultAtts[i].id = (ATTRIBUTE_ID *) - lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); + newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( + oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); - if (!newE->defaultAtts[i].value) + if (! newE->defaultAtts[i].value) return 0; - } - else + } else newE->defaultAtts[i].value = NULL; } } /* Copy the entity tables. */ - if (!copyEntityTable(oldParser, - &(newDtd->generalEntities), - &(newDtd->pool), - &(oldDtd->generalEntities))) - return 0; + if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), + &(oldDtd->generalEntities))) + return 0; #ifdef XML_DTD - if (!copyEntityTable(oldParser, - &(newDtd->paramEntities), - &(newDtd->pool), - &(oldDtd->paramEntities))) - return 0; + if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), + &(oldDtd->paramEntities))) + return 0; newDtd->paramEntityRead = oldDtd->paramEntityRead; #endif /* XML_DTD */ @@ -5877,14 +6936,11 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H newDtd->scaffIndex = oldDtd->scaffIndex; return 1; -} /* End dtdCopy */ +} /* End dtdCopy */ static int -copyEntityTable(XML_Parser oldParser, - HASH_TABLE *newTable, - STRING_POOL *newPool, - const HASH_TABLE *oldTable) -{ +copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, + STRING_POOL *newPool, const HASH_TABLE *oldTable) { HASH_TABLE_ITER iter; const XML_Char *cachedOldBase = NULL; const XML_Char *cachedNewBase = NULL; @@ -5895,17 +6951,17 @@ copyEntityTable(XML_Parser oldParser, ENTITY *newE; const XML_Char *name; const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); - if (!oldE) + if (! oldE) break; name = poolCopyString(newPool, oldE->name); - if (!name) + if (! name) return 0; newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); - if (!newE) + if (! newE) return 0; if (oldE->systemId) { const XML_Char *tem = poolCopyString(newPool, oldE->systemId); - if (!tem) + if (! tem) return 0; newE->systemId = tem; if (oldE->base) { @@ -5914,29 +6970,28 @@ copyEntityTable(XML_Parser oldParser, else { cachedOldBase = oldE->base; tem = poolCopyString(newPool, cachedOldBase); - if (!tem) + if (! tem) return 0; cachedNewBase = newE->base = tem; } } if (oldE->publicId) { tem = poolCopyString(newPool, oldE->publicId); - if (!tem) + if (! tem) return 0; newE->publicId = tem; } - } - else { - const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, - oldE->textLen); - if (!tem) + } else { + const XML_Char *tem + = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); + if (! tem) return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { const XML_Char *tem = poolCopyString(newPool, oldE->notation); - if (!tem) + if (! tem) return 0; newE->notation = tem; } @@ -5949,44 +7004,57 @@ copyEntityTable(XML_Parser oldParser, #define INIT_POWER 6 static XML_Bool FASTCALL -keyeq(KEY s1, KEY s2) -{ +keyeq(KEY s1, KEY s2) { for (; *s1 == *s2; s1++, s2++) if (*s1 == 0) return XML_TRUE; return XML_FALSE; } +static size_t +keylen(KEY s) { + size_t len = 0; + for (; *s; s++, len++) + ; + return len; +} + +static void +copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { + key->k[0] = 0; + key->k[1] = get_hash_secret_salt(parser); +} + static unsigned long FASTCALL -hash(XML_Parser parser, KEY s) -{ - unsigned long h = hash_secret_salt; - while (*s) - h = CHAR_HASH(h, *s++); - return h; +hash(XML_Parser parser, KEY s) { + struct siphash state; + struct sipkey key; + (void)sip24_valid; + copy_salt_to_sipkey(parser, &key); + sip24_init(&state, &key); + sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); + return (unsigned long)sip24_final(&state); } static NAMED * -lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) -{ +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { size_t tsize; - if (!createSize) + if (! createSize) return NULL; table->power = INIT_POWER; /* table->size is a power of 2 */ table->size = (size_t)1 << INIT_POWER; tsize = table->size * sizeof(NAMED *); - table->v = (NAMED **)table->mem->malloc_fcn(tsize); - if (!table->v) { + table->v = table->mem->malloc_fcn(tsize); + if (! table->v) { table->size = 0; return NULL; } memset(table->v, 0, tsize); i = hash(parser, name) & ((unsigned long)table->size - 1); - } - else { + } else { unsigned long h = hash(parser, name); unsigned long mask = (unsigned long)table->size - 1; unsigned char step = 0; @@ -5994,21 +7062,33 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) while (table->v[i]) { if (keyeq(name, table->v[i]->name)) return table->v[i]; - if (!step) + if (! step) step = PROBE_STEP(h, mask, table->power); i < step ? (i += table->size - step) : (i -= step); } - if (!createSize) + if (! createSize) return NULL; /* check for overflow (table is half full) */ if (table->used >> (table->power - 1)) { unsigned char newPower = table->power + 1; + + /* Detect and prevent invalid shift */ + if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { + return NULL; + } + size_t newSize = (size_t)1 << newPower; unsigned long newMask = (unsigned long)newSize - 1; + + /* Detect and prevent integer overflow */ + if (newSize > (size_t)(-1) / sizeof(NAMED *)) { + return NULL; + } + size_t tsize = newSize * sizeof(NAMED *); - NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); - if (!newV) + NAMED **newV = table->mem->malloc_fcn(tsize); + if (! newV) return NULL; memset(newV, 0, tsize); for (i = 0; i < table->size; i++) @@ -6017,7 +7097,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) size_t j = newHash & newMask; step = 0; while (newV[j]) { - if (!step) + if (! step) step = PROBE_STEP(newHash, newMask, newPower); j < step ? (j += newSize - step) : (j -= step); } @@ -6030,14 +7110,14 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) i = h & newMask; step = 0; while (table->v[i]) { - if (!step) + if (! step) step = PROBE_STEP(h, newMask, newPower); i < step ? (i += newSize - step) : (i -= step); } } } - table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize); - if (!table->v[i]) + table->v[i] = table->mem->malloc_fcn(createSize); + if (! table->v[i]) return NULL; memset(table->v[i], 0, createSize); table->v[i]->name = name; @@ -6046,8 +7126,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) } static void FASTCALL -hashTableClear(HASH_TABLE *table) -{ +hashTableClear(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { table->mem->free_fcn(table->v[i]); @@ -6057,8 +7136,7 @@ hashTableClear(HASH_TABLE *table) } static void FASTCALL -hashTableDestroy(HASH_TABLE *table) -{ +hashTableDestroy(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) table->mem->free_fcn(table->v[i]); @@ -6066,8 +7144,7 @@ hashTableDestroy(HASH_TABLE *table) } static void FASTCALL -hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) -{ +hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { p->power = 0; p->size = 0; p->used = 0; @@ -6076,15 +7153,13 @@ hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) } static void FASTCALL -hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) -{ +hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { iter->p = table->v; - iter->end = iter->p + table->size; + iter->end = iter->p ? iter->p + table->size : NULL; } -static NAMED * FASTCALL -hashTableIterNext(HASH_TABLE_ITER *iter) -{ +static NAMED *FASTCALL +hashTableIterNext(HASH_TABLE_ITER *iter) { while (iter->p != iter->end) { NAMED *tem = *(iter->p)++; if (tem) @@ -6094,8 +7169,7 @@ hashTableIterNext(HASH_TABLE_ITER *iter) } static void FASTCALL -poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) -{ +poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { pool->blocks = NULL; pool->freeBlocks = NULL; pool->start = NULL; @@ -6105,9 +7179,8 @@ poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) } static void FASTCALL -poolClear(STRING_POOL *pool) -{ - if (!pool->freeBlocks) +poolClear(STRING_POOL *pool) { + if (! pool->freeBlocks) pool->freeBlocks = pool->blocks; else { BLOCK *p = pool->blocks; @@ -6125,8 +7198,7 @@ poolClear(STRING_POOL *pool) } static void FASTCALL -poolDestroy(STRING_POOL *pool) -{ +poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; @@ -6142,26 +7214,26 @@ poolDestroy(STRING_POOL *pool) } static XML_Char * -poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) -{ - if (!pool->ptr && !poolGrow(pool)) +poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, + const char *end) { + if (! pool->ptr && ! poolGrow(pool)) return NULL; for (;;) { - XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); - if (ptr == end) + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; - if (!poolGrow(pool)) + if (! poolGrow(pool)) return NULL; } return pool->start; } -static const XML_Char * FASTCALL -poolCopyString(STRING_POOL *pool, const XML_Char *s) -{ +static const XML_Char *FASTCALL +poolCopyString(STRING_POOL *pool, const XML_Char *s) { do { - if (!poolAppendChar(pool, *s)) + if (! poolAppendChar(pool, *s)) return NULL; } while (*s++); s = pool->start; @@ -6170,12 +7242,23 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s) } static const XML_Char * -poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) -{ - if (!pool->ptr && !poolGrow(pool)) - return NULL; +poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { + if (! pool->ptr && ! poolGrow(pool)) { + /* The following line is unreachable given the current usage of + * poolCopyStringN(). Currently it is called from exactly one + * place to copy the text of a simple general entity. By that + * point, the name of the entity is already stored in the pool, so + * pool->ptr cannot be NULL. + * + * If poolCopyStringN() is used elsewhere as it well might be, + * this line may well become executable again. Regardless, this + * sort of check shouldn't be removed lightly, so we just exclude + * it from the coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } for (; n > 0; --n, s++) { - if (!poolAppendChar(pool, *s)) + if (! poolAppendChar(pool, *s)) return NULL; } s = pool->start; @@ -6183,11 +7266,10 @@ poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) return s; } -static const XML_Char * FASTCALL -poolAppendString(STRING_POOL *pool, const XML_Char *s) -{ +static const XML_Char *FASTCALL +poolAppendString(STRING_POOL *pool, const XML_Char *s) { while (*s) { - if (!poolAppendChar(pool, *s)) + if (! poolAppendChar(pool, *s)) return NULL; s++; } @@ -6195,20 +7277,46 @@ poolAppendString(STRING_POOL *pool, const XML_Char *s) } static XML_Char * -poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) -{ - if (!poolAppend(pool, enc, ptr, end)) +poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, + const char *end) { + if (! poolAppend(pool, enc, ptr, end)) return NULL; - if (pool->ptr == pool->end && !poolGrow(pool)) + if (pool->ptr == pool->end && ! poolGrow(pool)) return NULL; *(pool->ptr)++ = 0; return pool->start; } +static size_t +poolBytesToAllocateFor(int blockSize) { + /* Unprotected math would be: + ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); + ** + ** Detect overflow, avoiding _signed_ overflow undefined behavior + ** For a + b * c we check b * c in isolation first, so that addition of a + ** on top has no chance of making us accept a small non-negative number + */ + const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ + + if (blockSize <= 0) + return 0; + + if (blockSize > (int)(INT_MAX / stretch)) + return 0; + + { + const int stretchedBlockSize = blockSize * (int)stretch; + const int bytesToAllocate + = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); + if (bytesToAllocate < 0) + return 0; + + return (size_t)bytesToAllocate; + } +} + static XML_Bool FASTCALL -poolGrow(STRING_POOL *pool) -{ +poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { if (pool->start == 0) { pool->blocks = pool->freeBlocks; @@ -6233,36 +7341,77 @@ poolGrow(STRING_POOL *pool) } } if (pool->blocks && pool->start == pool->blocks->s) { - int blockSize = (int)(pool->end - pool->start)*2; - BLOCK *temp = (BLOCK *) - pool->mem->realloc_fcn(pool->blocks, - (offsetof(BLOCK, s) - + blockSize * sizeof(XML_Char))); + BLOCK *temp; + int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); + size_t bytesToAllocate; + + /* NOTE: Needs to be calculated prior to calling `realloc` + to avoid dangling pointers: */ + const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX/2 bytes have already been allocated. This isn't + * readily testable, since it is unlikely that an average + * machine will have that much memory, so we exclude it from the + * coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } + + bytesToAllocate = poolBytesToAllocateFor(blockSize); + if (bytesToAllocate == 0) + return XML_FALSE; + + temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, + (unsigned)bytesToAllocate); if (temp == NULL) return XML_FALSE; pool->blocks = temp; pool->blocks->size = blockSize; - pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->ptr = pool->blocks->s + offsetInsideBlock; pool->start = pool->blocks->s; pool->end = pool->start + blockSize; - } - else { + } else { BLOCK *tem; int blockSize = (int)(pool->end - pool->start); + size_t bytesToAllocate; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX bytes have already been allocated (which is prevented + * by various pieces of program logic, not least this one, never + * mind the unlikelihood of actually having that much memory) or + * the pool control fields have been corrupted (which could + * conceivably happen in an extremely buggy user handler + * function). Either way it isn't readily testable, so we + * exclude it from the coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } + if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; - else + else { + /* Detect overflow, avoiding _signed_ overflow undefined behavior */ + if ((int)((unsigned)blockSize * 2U) < 0) { + return XML_FALSE; + } blockSize *= 2; - tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s) - + blockSize * sizeof(XML_Char)); - if (!tem) + } + + bytesToAllocate = poolBytesToAllocateFor(blockSize); + if (bytesToAllocate == 0) + return XML_FALSE; + + tem = pool->mem->malloc_fcn(bytesToAllocate); + if (! tem) return XML_FALSE; tem->size = blockSize; tem->next = pool->blocks; pool->blocks = tem; if (pool->ptr != pool->start) - memcpy(tem->s, pool->start, - (pool->ptr - pool->start) * sizeof(XML_Char)); + memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); pool->ptr = tem->s + (pool->ptr - pool->start); pool->start = tem->s; pool->end = tem->s + blockSize; @@ -6271,15 +7420,14 @@ poolGrow(STRING_POOL *pool) } static int FASTCALL -nextScaffoldPart(XML_Parser parser) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ - CONTENT_SCAFFOLD * me; +nextScaffoldPart(XML_Parser parser) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + CONTENT_SCAFFOLD *me; int next; - if (!dtd->scaffIndex) { - dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int)); - if (!dtd->scaffIndex) + if (! dtd->scaffIndex) { + dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); + if (! dtd->scaffIndex) return -1; dtd->scaffIndex[0] = 0; } @@ -6287,15 +7435,28 @@ nextScaffoldPart(XML_Parser parser) if (dtd->scaffCount >= dtd->scaffSize) { CONTENT_SCAFFOLD *temp; if (dtd->scaffold) { - temp = (CONTENT_SCAFFOLD *) - REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); + /* Detect and prevent integer overflow */ + if (dtd->scaffSize > UINT_MAX / 2u) { + return -1; + } + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { + return -1; + } +#endif + + temp = (CONTENT_SCAFFOLD *)REALLOC( + parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize *= 2; - } - else { - temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS - * sizeof(CONTENT_SCAFFOLD)); + } else { + temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS + * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; @@ -6305,11 +7466,12 @@ nextScaffoldPart(XML_Parser parser) next = dtd->scaffCount++; me = &dtd->scaffold[next]; if (dtd->scaffLevel) { - CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]]; + CONTENT_SCAFFOLD *parent + = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; if (parent->lastchild) { dtd->scaffold[parent->lastchild].nextsib = next; } - if (!parent->childcnt) + if (! parent->childcnt) parent->firstchild = next; parent->lastchild = next; parent->childcnt++; @@ -6318,86 +7480,925 @@ nextScaffoldPart(XML_Parser parser) return next; } -static void -build_node(XML_Parser parser, - int src_node, - XML_Content *dest, - XML_Content **contpos, - XML_Char **strpos) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ - dest->type = dtd->scaffold[src_node].type; - dest->quant = dtd->scaffold[src_node].quant; - if (dest->type == XML_CTYPE_NAME) { - const XML_Char *src; - dest->name = *strpos; - src = dtd->scaffold[src_node].name; - for (;;) { - *(*strpos)++ = *src; - if (!*src) - break; - src++; - } - dest->numchildren = 0; - dest->children = NULL; +static XML_Content * +build_model(XML_Parser parser) { + /* Function build_model transforms the existing parser->m_dtd->scaffold + * array of CONTENT_SCAFFOLD tree nodes into a new array of + * XML_Content tree nodes followed by a gapless list of zero-terminated + * strings. */ + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + XML_Content *ret; + XML_Char *str; /* the current string writing location */ + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { + return NULL; } - else { - unsigned int i; - int cn; - dest->numchildren = dtd->scaffold[src_node].childcnt; - dest->children = *contpos; - *contpos += dest->numchildren; - for (i = 0, cn = dtd->scaffold[src_node].firstchild; - i < dest->numchildren; - i++, cn = dtd->scaffold[cn].nextsib) { - build_node(parser, cn, &(dest->children[i]), contpos, strpos); - } - dest->name = NULL; + if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { + return NULL; + } +#endif + if (dtd->scaffCount * sizeof(XML_Content) + > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { + return NULL; } -} -static XML_Content * -build_model (XML_Parser parser) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ - XML_Content *ret; - XML_Content *cpos; - XML_Char * str; - int allocsize = (dtd->scaffCount * sizeof(XML_Content) - + (dtd->contentStringLen * sizeof(XML_Char))); + const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) + + (dtd->contentStringLen * sizeof(XML_Char))); - ret = (XML_Content *)MALLOC(allocsize); - if (!ret) + ret = (XML_Content *)MALLOC(parser, allocsize); + if (! ret) return NULL; - str = (XML_Char *) (&ret[dtd->scaffCount]); - cpos = &ret[1]; + /* What follows is an iterative implementation (of what was previously done + * recursively in a dedicated function called "build_node". The old recursive + * build_node could be forced into stack exhaustion from input as small as a + * few megabyte, and so that was a security issue. Hence, a function call + * stack is avoided now by resolving recursion.) + * + * The iterative approach works as follows: + * + * - We have two writing pointers, both walking up the result array; one does + * the work, the other creates "jobs" for its colleague to do, and leads + * the way: + * + * - The faster one, pointer jobDest, always leads and writes "what job + * to do" by the other, once they reach that place in the + * array: leader "jobDest" stores the source node array index (relative + * to array dtd->scaffold) in field "numchildren". + * + * - The slower one, pointer dest, looks at the value stored in the + * "numchildren" field (which actually holds a source node array index + * at that time) and puts the real data from dtd->scaffold in. + * + * - Before the loop starts, jobDest writes source array index 0 + * (where the root node is located) so that dest will have something to do + * when it starts operation. + * + * - Whenever nodes with children are encountered, jobDest appends + * them as new jobs, in order. As a result, tree node siblings are + * adjacent in the resulting array, for example: + * + * [0] root, has two children + * [1] first child of 0, has three children + * [3] first child of 1, does not have children + * [4] second child of 1, does not have children + * [5] third child of 1, does not have children + * [2] second child of 0, does not have children + * + * Or (the same data) presented in flat array view: + * + * [0] root, has two children + * + * [1] first child of 0, has three children + * [2] second child of 0, does not have children + * + * [3] first child of 1, does not have children + * [4] second child of 1, does not have children + * [5] third child of 1, does not have children + * + * - The algorithm repeats until all target array indices have been processed. + */ + XML_Content *dest = ret; /* tree node writing location, moves upwards */ + XML_Content *const destLimit = &ret[dtd->scaffCount]; + XML_Content *jobDest = ret; /* next free writing location in target array */ + str = (XML_Char *)&ret[dtd->scaffCount]; + + /* Add the starting job, the root node (index 0) of the source tree */ + (jobDest++)->numchildren = 0; + + for (; dest < destLimit; dest++) { + /* Retrieve source tree array index from job storage */ + const int src_node = (int)dest->numchildren; + + /* Convert item */ + dest->type = dtd->scaffold[src_node].type; + dest->quant = dtd->scaffold[src_node].quant; + if (dest->type == XML_CTYPE_NAME) { + const XML_Char *src; + dest->name = str; + src = dtd->scaffold[src_node].name; + for (;;) { + *str++ = *src; + if (! *src) + break; + src++; + } + dest->numchildren = 0; + dest->children = NULL; + } else { + unsigned int i; + int cn; + dest->name = NULL; + dest->numchildren = dtd->scaffold[src_node].childcnt; + dest->children = jobDest; + + /* Append scaffold indices of children to array */ + for (i = 0, cn = dtd->scaffold[src_node].firstchild; + i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) + (jobDest++)->numchildren = (unsigned int)cn; + } + } - build_node(parser, 0, ret, &cpos, &str); return ret; } static ELEMENT_TYPE * -getElementType(XML_Parser parser, - const ENCODING *enc, - const char *ptr, - const char *end) -{ - DTD * const dtd = _dtd; /* save one level of indirection */ +getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, + const char *end) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); ELEMENT_TYPE *ret; - if (!name) + if (! name) return NULL; - ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); - if (!ret) + ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, + sizeof(ELEMENT_TYPE)); + if (! ret) return NULL; if (ret->name != name) poolDiscard(&dtd->pool); else { poolFinish(&dtd->pool); - if (!setElementTypePrefix(parser, ret)) + if (! setElementTypePrefix(parser, ret)) return NULL; } return ret; } + +static XML_Char * +copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { + size_t charsRequired = 0; + XML_Char *result; + + /* First determine how long the string is */ + while (s[charsRequired] != 0) { + charsRequired++; + } + /* Include the terminator */ + charsRequired++; + + /* Now allocate space for the copy */ + result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); + if (result == NULL) + return NULL; + /* Copy the original into place */ + memcpy(result, s, charsRequired * sizeof(XML_Char)); + return result; +} + +#ifdef XML_DTD + +static float +accountingGetCurrentAmplification(XML_Parser rootParser) { + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = rootParser->m_accounting.countBytesDirect + ? (countBytesOutput + / (float)(rootParser->m_accounting.countBytesDirect)) + : 1.0f; + assert(! rootParser->m_parentParser); + return amplificationFactor; +} + +static void +accountingReportStats(XML_Parser originParser, const char *epilog) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + if (rootParser->m_accounting.debugLevel < 1) { + return; + } + + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + fprintf(stderr, + "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( + "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", + (void *)rootParser, rootParser->m_accounting.countBytesDirect, + rootParser->m_accounting.countBytesIndirect, + (double)amplificationFactor, epilog); +} + +static void +accountingOnAbort(XML_Parser originParser) { + accountingReportStats(originParser, " ABORTING\n"); +} + +static void +accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, const char *before, + const char *after, ptrdiff_t bytesMore, int source_line, + enum XML_Account account) { + assert(! rootParser->m_parentParser); + + fprintf(stderr, + " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", + bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", + levelsAwayFromRootParser, source_line, 10, ""); + + const char ellipis[] = "[..]"; + const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; + const unsigned int contextLength = 10; + + /* Note: Performance is of no concern here */ + const char *walker = before; + if ((rootParser->m_accounting.debugLevel >= 3) + || (after - before) + <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } else { + for (; walker < before + contextLength; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + fprintf(stderr, ellipis); + walker = after - contextLength; + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } + fprintf(stderr, "\"\n"); +} + +static XML_Bool +accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, + const char *after, int source_line, + enum XML_Account account) { + /* Note: We need to check the token type *first* to be sure that + * we can even access variable , safely. + * E.g. for XML_TOK_NONE may hold an invalid pointer. */ + switch (tok) { + case XML_TOK_INVALID: + case XML_TOK_PARTIAL: + case XML_TOK_PARTIAL_CHAR: + case XML_TOK_NONE: + return XML_TRUE; + } + + if (account == XML_ACCOUNT_NONE) + return XML_TRUE; /* because these bytes have been accounted for, already */ + + unsigned int levelsAwayFromRootParser; + const XML_Parser rootParser + = getRootParserOf(originParser, &levelsAwayFromRootParser); + assert(! rootParser->m_parentParser); + + const int isDirect + = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); + const ptrdiff_t bytesMore = after - before; + + XmlBigCount *const additionTarget + = isDirect ? &rootParser->m_accounting.countBytesDirect + : &rootParser->m_accounting.countBytesIndirect; + + /* Detect and avoid integer overflow */ + if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) + return XML_FALSE; + *additionTarget += bytesMore; + + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + const XML_Bool tolerated + = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) + || (amplificationFactor + <= rootParser->m_accounting.maximumAmplificationFactor); + + if (rootParser->m_accounting.debugLevel >= 2) { + accountingReportStats(rootParser, ""); + accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, + bytesMore, source_line, account); + } + + return tolerated; +} + +unsigned long long +testingAccountingGetCountBytesDirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesDirect; +} + +unsigned long long +testingAccountingGetCountBytesIndirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesIndirect; +} + +static void +entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, + const char *action, int sourceLine) { + assert(! rootParser->m_parentParser); + if (rootParser->m_entity_stats.debugLevel < 1) + return; + +# if defined(XML_UNICODE) + const char *const entityName = "[..]"; +# else + const char *const entityName = entity->name; +# endif + + fprintf( + stderr, + "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", + (void *)rootParser, rootParser->m_entity_stats.countEverOpened, + rootParser->m_entity_stats.currentDepth, + rootParser->m_entity_stats.maximumDepthSeen, + (rootParser->m_entity_stats.currentDepth - 1) * 2, "", + entity->is_param ? "%" : "&", entityName, action, entity->textLen, + sourceLine); +} + +static void +entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + rootParser->m_entity_stats.countEverOpened++; + rootParser->m_entity_stats.currentDepth++; + if (rootParser->m_entity_stats.currentDepth + > rootParser->m_entity_stats.maximumDepthSeen) { + rootParser->m_entity_stats.maximumDepthSeen++; + } + + entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); +} + +static void +entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); + rootParser->m_entity_stats.currentDepth--; +} + +static XML_Parser +getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { + XML_Parser rootParser = parser; + unsigned int stepsTakenUpwards = 0; + while (rootParser->m_parentParser) { + rootParser = rootParser->m_parentParser; + stepsTakenUpwards++; + } + assert(! rootParser->m_parentParser); + if (outLevelDiff != NULL) { + *outLevelDiff = stepsTakenUpwards; + } + return rootParser; +} + +const char * +unsignedCharToPrintable(unsigned char c) { + switch (c) { + case 0: + return "\\0"; + case 1: + return "\\x1"; + case 2: + return "\\x2"; + case 3: + return "\\x3"; + case 4: + return "\\x4"; + case 5: + return "\\x5"; + case 6: + return "\\x6"; + case 7: + return "\\x7"; + case 8: + return "\\x8"; + case 9: + return "\\t"; + case 10: + return "\\n"; + case 11: + return "\\xB"; + case 12: + return "\\xC"; + case 13: + return "\\r"; + case 14: + return "\\xE"; + case 15: + return "\\xF"; + case 16: + return "\\x10"; + case 17: + return "\\x11"; + case 18: + return "\\x12"; + case 19: + return "\\x13"; + case 20: + return "\\x14"; + case 21: + return "\\x15"; + case 22: + return "\\x16"; + case 23: + return "\\x17"; + case 24: + return "\\x18"; + case 25: + return "\\x19"; + case 26: + return "\\x1A"; + case 27: + return "\\x1B"; + case 28: + return "\\x1C"; + case 29: + return "\\x1D"; + case 30: + return "\\x1E"; + case 31: + return "\\x1F"; + case 32: + return " "; + case 33: + return "!"; + case 34: + return "\\\""; + case 35: + return "#"; + case 36: + return "$"; + case 37: + return "%"; + case 38: + return "&"; + case 39: + return "'"; + case 40: + return "("; + case 41: + return ")"; + case 42: + return "*"; + case 43: + return "+"; + case 44: + return ","; + case 45: + return "-"; + case 46: + return "."; + case 47: + return "/"; + case 48: + return "0"; + case 49: + return "1"; + case 50: + return "2"; + case 51: + return "3"; + case 52: + return "4"; + case 53: + return "5"; + case 54: + return "6"; + case 55: + return "7"; + case 56: + return "8"; + case 57: + return "9"; + case 58: + return ":"; + case 59: + return ";"; + case 60: + return "<"; + case 61: + return "="; + case 62: + return ">"; + case 63: + return "?"; + case 64: + return "@"; + case 65: + return "A"; + case 66: + return "B"; + case 67: + return "C"; + case 68: + return "D"; + case 69: + return "E"; + case 70: + return "F"; + case 71: + return "G"; + case 72: + return "H"; + case 73: + return "I"; + case 74: + return "J"; + case 75: + return "K"; + case 76: + return "L"; + case 77: + return "M"; + case 78: + return "N"; + case 79: + return "O"; + case 80: + return "P"; + case 81: + return "Q"; + case 82: + return "R"; + case 83: + return "S"; + case 84: + return "T"; + case 85: + return "U"; + case 86: + return "V"; + case 87: + return "W"; + case 88: + return "X"; + case 89: + return "Y"; + case 90: + return "Z"; + case 91: + return "["; + case 92: + return "\\\\"; + case 93: + return "]"; + case 94: + return "^"; + case 95: + return "_"; + case 96: + return "`"; + case 97: + return "a"; + case 98: + return "b"; + case 99: + return "c"; + case 100: + return "d"; + case 101: + return "e"; + case 102: + return "f"; + case 103: + return "g"; + case 104: + return "h"; + case 105: + return "i"; + case 106: + return "j"; + case 107: + return "k"; + case 108: + return "l"; + case 109: + return "m"; + case 110: + return "n"; + case 111: + return "o"; + case 112: + return "p"; + case 113: + return "q"; + case 114: + return "r"; + case 115: + return "s"; + case 116: + return "t"; + case 117: + return "u"; + case 118: + return "v"; + case 119: + return "w"; + case 120: + return "x"; + case 121: + return "y"; + case 122: + return "z"; + case 123: + return "{"; + case 124: + return "|"; + case 125: + return "}"; + case 126: + return "~"; + case 127: + return "\\x7F"; + case 128: + return "\\x80"; + case 129: + return "\\x81"; + case 130: + return "\\x82"; + case 131: + return "\\x83"; + case 132: + return "\\x84"; + case 133: + return "\\x85"; + case 134: + return "\\x86"; + case 135: + return "\\x87"; + case 136: + return "\\x88"; + case 137: + return "\\x89"; + case 138: + return "\\x8A"; + case 139: + return "\\x8B"; + case 140: + return "\\x8C"; + case 141: + return "\\x8D"; + case 142: + return "\\x8E"; + case 143: + return "\\x8F"; + case 144: + return "\\x90"; + case 145: + return "\\x91"; + case 146: + return "\\x92"; + case 147: + return "\\x93"; + case 148: + return "\\x94"; + case 149: + return "\\x95"; + case 150: + return "\\x96"; + case 151: + return "\\x97"; + case 152: + return "\\x98"; + case 153: + return "\\x99"; + case 154: + return "\\x9A"; + case 155: + return "\\x9B"; + case 156: + return "\\x9C"; + case 157: + return "\\x9D"; + case 158: + return "\\x9E"; + case 159: + return "\\x9F"; + case 160: + return "\\xA0"; + case 161: + return "\\xA1"; + case 162: + return "\\xA2"; + case 163: + return "\\xA3"; + case 164: + return "\\xA4"; + case 165: + return "\\xA5"; + case 166: + return "\\xA6"; + case 167: + return "\\xA7"; + case 168: + return "\\xA8"; + case 169: + return "\\xA9"; + case 170: + return "\\xAA"; + case 171: + return "\\xAB"; + case 172: + return "\\xAC"; + case 173: + return "\\xAD"; + case 174: + return "\\xAE"; + case 175: + return "\\xAF"; + case 176: + return "\\xB0"; + case 177: + return "\\xB1"; + case 178: + return "\\xB2"; + case 179: + return "\\xB3"; + case 180: + return "\\xB4"; + case 181: + return "\\xB5"; + case 182: + return "\\xB6"; + case 183: + return "\\xB7"; + case 184: + return "\\xB8"; + case 185: + return "\\xB9"; + case 186: + return "\\xBA"; + case 187: + return "\\xBB"; + case 188: + return "\\xBC"; + case 189: + return "\\xBD"; + case 190: + return "\\xBE"; + case 191: + return "\\xBF"; + case 192: + return "\\xC0"; + case 193: + return "\\xC1"; + case 194: + return "\\xC2"; + case 195: + return "\\xC3"; + case 196: + return "\\xC4"; + case 197: + return "\\xC5"; + case 198: + return "\\xC6"; + case 199: + return "\\xC7"; + case 200: + return "\\xC8"; + case 201: + return "\\xC9"; + case 202: + return "\\xCA"; + case 203: + return "\\xCB"; + case 204: + return "\\xCC"; + case 205: + return "\\xCD"; + case 206: + return "\\xCE"; + case 207: + return "\\xCF"; + case 208: + return "\\xD0"; + case 209: + return "\\xD1"; + case 210: + return "\\xD2"; + case 211: + return "\\xD3"; + case 212: + return "\\xD4"; + case 213: + return "\\xD5"; + case 214: + return "\\xD6"; + case 215: + return "\\xD7"; + case 216: + return "\\xD8"; + case 217: + return "\\xD9"; + case 218: + return "\\xDA"; + case 219: + return "\\xDB"; + case 220: + return "\\xDC"; + case 221: + return "\\xDD"; + case 222: + return "\\xDE"; + case 223: + return "\\xDF"; + case 224: + return "\\xE0"; + case 225: + return "\\xE1"; + case 226: + return "\\xE2"; + case 227: + return "\\xE3"; + case 228: + return "\\xE4"; + case 229: + return "\\xE5"; + case 230: + return "\\xE6"; + case 231: + return "\\xE7"; + case 232: + return "\\xE8"; + case 233: + return "\\xE9"; + case 234: + return "\\xEA"; + case 235: + return "\\xEB"; + case 236: + return "\\xEC"; + case 237: + return "\\xED"; + case 238: + return "\\xEE"; + case 239: + return "\\xEF"; + case 240: + return "\\xF0"; + case 241: + return "\\xF1"; + case 242: + return "\\xF2"; + case 243: + return "\\xF3"; + case 244: + return "\\xF4"; + case 245: + return "\\xF5"; + case 246: + return "\\xF6"; + case 247: + return "\\xF7"; + case 248: + return "\\xF8"; + case 249: + return "\\xF9"; + case 250: + return "\\xFA"; + case 251: + return "\\xFB"; + case 252: + return "\\xFC"; + case 253: + return "\\xFD"; + case 254: + return "\\xFE"; + case 255: + return "\\xFF"; + default: + assert(0); /* never gets here */ + return "dead code"; + } + assert(0); /* never gets here */ +} + +#endif /* XML_DTD */ + +static unsigned long +getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { + const char *const valueOrNull = getenv(variableName); + if (valueOrNull == NULL) { + return defaultDebugLevel; + } + const char *const value = valueOrNull; + + errno = 0; + char *afterValue = (char *)value; + unsigned long debugLevel = strtoul(value, &afterValue, 10); + if ((errno != 0) || (afterValue[0] != '\0')) { + errno = 0; + return defaultDebugLevel; + } + + return debugLevel; +} diff --git a/contrib/expat/lib/xmlrole.c b/contrib/expat/lib/xmlrole.c index 44772e21dd..3f0f5c150c 100644 --- a/contrib/expat/lib/xmlrole.c +++ b/contrib/expat/lib/xmlrole.c @@ -1,22 +1,50 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2002-2003 Fred L. Drake, Jr. + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2019 David Loffredo + Copyright (c) 2021 Dong-hee Na + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#elif defined(__amigaos__) -#include "amigaconfig.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" -#else -#ifdef HAVE_EXPAT_CONFIG_H -#include +#ifdef _WIN32 +# include "winconfig.h" #endif -#endif /* ndef COMPILED_FROM_DSP */ #include "expat_external.h" #include "internal.h" @@ -30,107 +58,88 @@ */ -static const char KW_ANY[] = { - ASCII_A, ASCII_N, ASCII_Y, '\0' }; -static const char KW_ATTLIST[] = { - ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; -static const char KW_CDATA[] = { - ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_DOCTYPE[] = { - ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; -static const char KW_ELEMENT[] = { - ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; -static const char KW_EMPTY[] = { - ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; -static const char KW_ENTITIES[] = { - ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, - '\0' }; -static const char KW_ENTITY[] = { - ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; -static const char KW_FIXED[] = { - ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; -static const char KW_ID[] = { - ASCII_I, ASCII_D, '\0' }; -static const char KW_IDREF[] = { - ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; -static const char KW_IDREFS[] = { - ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; +static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'}; +static const char KW_ATTLIST[] + = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'}; +static const char KW_CDATA[] + = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_DOCTYPE[] + = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'}; +static const char KW_ELEMENT[] + = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'}; +static const char KW_EMPTY[] + = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'}; +static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, + ASCII_I, ASCII_E, ASCII_S, '\0'}; +static const char KW_ENTITY[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; +static const char KW_FIXED[] + = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'}; +static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'}; +static const char KW_IDREF[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; +static const char KW_IDREFS[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; #ifdef XML_DTD -static const char KW_IGNORE[] = { - ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; +static const char KW_IGNORE[] + = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'}; #endif -static const char KW_IMPLIED[] = { - ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; +static const char KW_IMPLIED[] + = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'}; #ifdef XML_DTD -static const char KW_INCLUDE[] = { - ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; +static const char KW_INCLUDE[] + = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'}; #endif -static const char KW_NDATA[] = { - ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_NMTOKEN[] = { - ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; -static const char KW_NMTOKENS[] = { - ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, - '\0' }; -static const char KW_NOTATION[] = - { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, - '\0' }; -static const char KW_PCDATA[] = { - ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_PUBLIC[] = { - ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; -static const char KW_REQUIRED[] = { - ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, - '\0' }; -static const char KW_SYSTEM[] = { - ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; +static const char KW_NDATA[] + = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_NMTOKEN[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; +static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, + ASCII_E, ASCII_N, ASCII_S, '\0'}; +static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, + ASCII_I, ASCII_O, ASCII_N, '\0'}; +static const char KW_PCDATA[] + = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_PUBLIC[] + = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'}; +static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, + ASCII_R, ASCII_E, ASCII_D, '\0'}; +static const char KW_SYSTEM[] + = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'}; #ifndef MIN_BYTES_PER_CHAR -#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) +# define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #endif #ifdef XML_DTD -#define setTopLevel(state) \ - ((state)->handler = ((state)->documentEntity \ - ? internalSubset \ - : externalSubset1)) +# define setTopLevel(state) \ + ((state)->handler \ + = ((state)->documentEntity ? internalSubset : externalSubset1)) #else /* not XML_DTD */ -#define setTopLevel(state) ((state)->handler = internalSubset) +# define setTopLevel(state) ((state)->handler = internalSubset) #endif /* not XML_DTD */ -typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, +typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok, + const char *ptr, const char *end, const ENCODING *enc); -static PROLOG_HANDLER - prolog0, prolog1, prolog2, - doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, - internalSubset, - entity0, entity1, entity2, entity3, entity4, entity5, entity6, - entity7, entity8, entity9, entity10, - notation0, notation1, notation2, notation3, notation4, - attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, - attlist7, attlist8, attlist9, - element0, element1, element2, element3, element4, element5, element6, - element7, +static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, + doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, + entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10, + notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, + attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, + attlist9, element0, element1, element2, element3, element4, element5, + element6, element7, #ifdef XML_DTD - externalSubset0, externalSubset1, - condSect0, condSect1, condSect2, + externalSubset0, externalSubset1, condSect0, condSect1, condSect2, #endif /* XML_DTD */ - declClose, - error; + declClose, error; static int FASTCALL common(PROLOG_STATE *state, int tok); static int PTRCALL -prolog0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: state->handler = prolog1; @@ -147,10 +156,8 @@ prolog0(PROLOG_STATE *state, case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: - if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_DOCTYPE)) + if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, + KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_DOCTYPE_NONE; @@ -162,12 +169,8 @@ prolog0(PROLOG_STATE *state, } static int PTRCALL -prolog1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -176,12 +179,17 @@ prolog1(PROLOG_STATE *state, case XML_TOK_COMMENT: return XML_ROLE_COMMENT; case XML_TOK_BOM: - return XML_ROLE_NONE; + /* This case can never arise. To reach this role function, the + * parse must have passed through prolog0 and therefore have had + * some form of input, even if only a space. At that point, a + * byte order mark is no longer a valid character (though + * technically it should be interpreted as a non-breaking space), + * so will be rejected by the tokenizing stages. + */ + return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ case XML_TOK_DECL_OPEN: - if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_DOCTYPE)) + if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, + KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_DOCTYPE_NONE; @@ -193,12 +201,11 @@ prolog1(PROLOG_STATE *state, } static int PTRCALL -prolog2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -214,12 +221,11 @@ prolog2(PROLOG_STATE *state, } static int PTRCALL -doctype0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -232,12 +238,8 @@ doctype0(PROLOG_STATE *state, } static int PTRCALL -doctype1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -262,12 +264,11 @@ doctype1(PROLOG_STATE *state, } static int PTRCALL -doctype2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -279,12 +280,11 @@ doctype2(PROLOG_STATE *state, } static int PTRCALL -doctype3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -296,12 +296,11 @@ doctype3(PROLOG_STATE *state, } static int PTRCALL -doctype4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -316,12 +315,11 @@ doctype4(PROLOG_STATE *state, } static int PTRCALL -doctype5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_DOCTYPE_NONE; @@ -333,40 +331,28 @@ doctype5(PROLOG_STATE *state, } static int PTRCALL -internalSubset(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ENTITY)) { state->handler = entity0; return XML_ROLE_ENTITY_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ATTLIST)) { state->handler = attlist0; return XML_ROLE_ATTLIST_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ELEMENT)) { state->handler = element0; return XML_ROLE_ELEMENT_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_NOTATION)) { state->handler = notation0; return XML_ROLE_NOTATION_NONE; @@ -390,12 +376,8 @@ internalSubset(PROLOG_STATE *state, #ifdef XML_DTD static int PTRCALL -externalSubset0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { state->handler = externalSubset1; if (tok == XML_TOK_XML_DECL) return XML_ROLE_TEXT_DECL; @@ -403,12 +385,8 @@ externalSubset0(PROLOG_STATE *state, } static int PTRCALL -externalSubset1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_COND_SECT_OPEN: state->handler = condSect0; @@ -435,12 +413,11 @@ externalSubset1(PROLOG_STATE *state, #endif /* XML_DTD */ static int PTRCALL -entity0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -455,12 +432,11 @@ entity0(PROLOG_STATE *state, } static int PTRCALL -entity1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -472,12 +448,8 @@ entity1(PROLOG_STATE *state, } static int PTRCALL -entity2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -500,12 +472,11 @@ entity2(PROLOG_STATE *state, } static int PTRCALL -entity3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -517,12 +488,11 @@ entity3(PROLOG_STATE *state, } static int PTRCALL -entity4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -534,12 +504,8 @@ entity4(PROLOG_STATE *state, } static int PTRCALL -entity5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -557,12 +523,11 @@ entity5(PROLOG_STATE *state, } static int PTRCALL -entity6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -575,12 +540,8 @@ entity6(PROLOG_STATE *state, } static int PTRCALL -entity7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -603,12 +564,11 @@ entity7(PROLOG_STATE *state, } static int PTRCALL -entity8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -620,12 +580,11 @@ entity8(PROLOG_STATE *state, } static int PTRCALL -entity9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -637,12 +596,11 @@ entity9(PROLOG_STATE *state, } static int PTRCALL -entity10(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ENTITY_NONE; @@ -654,12 +612,11 @@ entity10(PROLOG_STATE *state, } static int PTRCALL -notation0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -671,12 +628,8 @@ notation0(PROLOG_STATE *state, } static int PTRCALL -notation1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -695,12 +648,11 @@ notation1(PROLOG_STATE *state, } static int PTRCALL -notation2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -712,12 +664,11 @@ notation2(PROLOG_STATE *state, } static int PTRCALL -notation3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -730,12 +681,11 @@ notation3(PROLOG_STATE *state, } static int PTRCALL -notation4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NOTATION_NONE; @@ -751,12 +701,11 @@ notation4(PROLOG_STATE *state, } static int PTRCALL -attlist0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -769,12 +718,11 @@ attlist0(PROLOG_STATE *state, } static int PTRCALL -attlist1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -790,34 +738,23 @@ attlist1(PROLOG_STATE *state, } static int PTRCALL -attlist2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; - case XML_TOK_NAME: - { - static const char * const types[] = { - KW_CDATA, - KW_ID, - KW_IDREF, - KW_IDREFS, - KW_ENTITY, - KW_ENTITIES, - KW_NMTOKEN, - KW_NMTOKENS, - }; - int i; - for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) - if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { - state->handler = attlist8; - return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; - } - } + case XML_TOK_NAME: { + static const char *const types[] = { + KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS, + KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS, + }; + int i; + for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++) + if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { + state->handler = attlist8; + return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; + } + } if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { state->handler = attlist5; return XML_ROLE_ATTLIST_NONE; @@ -831,12 +768,11 @@ attlist2(PROLOG_STATE *state, } static int PTRCALL -attlist3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -850,12 +786,11 @@ attlist3(PROLOG_STATE *state, } static int PTRCALL -attlist4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -870,12 +805,11 @@ attlist4(PROLOG_STATE *state, } static int PTRCALL -attlist5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -887,12 +821,11 @@ attlist5(PROLOG_STATE *state, } static int PTRCALL -attlist6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -904,12 +837,11 @@ attlist6(PROLOG_STATE *state, } static int PTRCALL -attlist7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -925,33 +857,23 @@ attlist7(PROLOG_STATE *state, /* default value */ static int PTRCALL -attlist8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; case XML_TOK_POUND_NAME: - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_IMPLIED)) { state->handler = attlist1; return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; } - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_REQUIRED)) { state->handler = attlist1; return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; } - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_FIXED)) { state->handler = attlist9; return XML_ROLE_ATTLIST_NONE; @@ -965,12 +887,11 @@ attlist8(PROLOG_STATE *state, } static int PTRCALL -attlist9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ATTLIST_NONE; @@ -982,12 +903,11 @@ attlist9(PROLOG_STATE *state, } static int PTRCALL -element0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1000,12 +920,8 @@ element0(PROLOG_STATE *state, } static int PTRCALL -element1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1030,19 +946,13 @@ element1(PROLOG_STATE *state, } static int PTRCALL -element2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; case XML_TOK_POUND_NAME: - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_PCDATA)) { state->handler = element3; return XML_ROLE_CONTENT_PCDATA; @@ -1070,12 +980,11 @@ element2(PROLOG_STATE *state, } static int PTRCALL -element3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1095,12 +1004,11 @@ element3(PROLOG_STATE *state, } static int PTRCALL -element4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1113,12 +1021,11 @@ element4(PROLOG_STATE *state, } static int PTRCALL -element5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1134,12 +1041,11 @@ element5(PROLOG_STATE *state, } static int PTRCALL -element6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1164,12 +1070,11 @@ element6(PROLOG_STATE *state, } static int PTRCALL -element7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_ELEMENT_NONE; @@ -1214,12 +1119,8 @@ element7(PROLOG_STATE *state, #ifdef XML_DTD static int PTRCALL -condSect0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1238,12 +1139,11 @@ condSect0(PROLOG_STATE *state, } static int PTRCALL -condSect1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1256,12 +1156,11 @@ condSect1(PROLOG_STATE *state, } static int PTRCALL -condSect2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1275,12 +1174,11 @@ condSect2(PROLOG_STATE *state, #endif /* XML_DTD */ static int PTRCALL -declClose(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return state->role_none; @@ -1291,30 +1189,52 @@ declClose(PROLOG_STATE *state, return common(state, tok); } +/* This function will only be invoked if the internal logic of the + * parser has broken down. It is used in two cases: + * + * 1: When the XML prolog has been finished. At this point the + * processor (the parser level above these role handlers) should + * switch from prologProcessor to contentProcessor and reinitialise + * the handler function. + * + * 2: When an error has been detected (via common() below). At this + * point again the processor should be switched to errorProcessor, + * which will never call a handler. + * + * The result of this is that error() can only be called if the + * processor switch failed to happen, which is an internal error and + * therefore we shouldn't be able to provoke it simply by using the + * library. It is a necessary backstop, however, so we merely exclude + * it from the coverage statistics. + * + * LCOV_EXCL_START + */ static int PTRCALL -error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +error(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(state); + UNUSED_P(tok); + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); return XML_ROLE_NONE; } +/* LCOV_EXCL_STOP */ static int FASTCALL -common(PROLOG_STATE *state, int tok) -{ +common(PROLOG_STATE *state, int tok) { #ifdef XML_DTD - if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) + if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) return XML_ROLE_INNER_PARAM_ENTITY_REF; +#else + UNUSED_P(tok); #endif state->handler = error; return XML_ROLE_ERROR; } void -XmlPrologStateInit(PROLOG_STATE *state) -{ +XmlPrologStateInit(PROLOG_STATE *state) { state->handler = prolog0; #ifdef XML_DTD state->documentEntity = 1; @@ -1326,8 +1246,7 @@ XmlPrologStateInit(PROLOG_STATE *state) #ifdef XML_DTD void -XmlPrologStateInitExternalEntity(PROLOG_STATE *state) -{ +XmlPrologStateInitExternalEntity(PROLOG_STATE *state) { state->handler = externalSubset0; state->documentEntity = 0; state->includeLevel = 0; diff --git a/contrib/expat/lib/xmlrole.h b/contrib/expat/lib/xmlrole.h index 4dd9f06f97..d6e1fa150a 100644 --- a/contrib/expat/lib/xmlrole.h +++ b/contrib/expat/lib/xmlrole.h @@ -1,5 +1,36 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Karl Waclawek + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef XmlRole_INCLUDED @@ -8,7 +39,7 @@ #ifdef __VMS /* 0 1 2 3 0 1 2 3 1234567890123456789012345678901 1234567890123456789012345678901 */ -#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt +# define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt #endif #include "xmltok.h" @@ -85,11 +116,8 @@ enum { }; typedef struct prolog_state { - int (PTRCALL *handler) (struct prolog_state *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); + int(PTRCALL *handler)(struct prolog_state *state, int tok, const char *ptr, + const char *end, const ENCODING *enc); unsigned level; int role_none; #ifdef XML_DTD @@ -104,8 +132,8 @@ void XmlPrologStateInit(PROLOG_STATE *); void XmlPrologStateInitExternalEntity(PROLOG_STATE *); #endif /* XML_DTD */ -#define XmlTokenRole(state, tok, ptr, end, enc) \ - (((state)->handler)(state, tok, ptr, end, enc)) +#define XmlTokenRole(state, tok, ptr, end, enc) \ + (((state)->handler)(state, tok, ptr, end, enc)) #ifdef __cplusplus } diff --git a/contrib/expat/lib/xmltok.c b/contrib/expat/lib/xmltok.c index bf09dfc72b..2b7012a58b 100644 --- a/contrib/expat/lib/xmltok.c +++ b/contrib/expat/lib/xmltok.c @@ -1,22 +1,58 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2001-2003 Fred L. Drake, Jr. + Copyright (c) 2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016 Pascal Cuoq + Copyright (c) 2016 Don Lewis + Copyright (c) 2017 Rhodri James + Copyright (c) 2017 Alexander Bluhm + Copyright (c) 2017 Benbuck Nason + Copyright (c) 2017 José Gutiérrez de la Concha + Copyright (c) 2019 David Loffredo + Copyright (c) 2021 Dong-hee Na + Copyright (c) 2022 Martin Ettl + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include +#include /* memcpy */ +#include -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#elif defined(__amigaos__) -#include "amigaconfig.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" -#else -#ifdef HAVE_EXPAT_CONFIG_H -#include +#ifdef _WIN32 +# include "winconfig.h" #endif -#endif /* ndef COMPILED_FROM_DSP */ #include "expat_external.h" #include "internal.h" @@ -24,59 +60,44 @@ #include "nametab.h" #ifdef XML_DTD -#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) +# define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) #else -#define IGNORE_SECTION_TOK_VTABLE /* as nothing */ +# define IGNORE_SECTION_TOK_VTABLE /* as nothing */ #endif -#define VTABLE1 \ - { PREFIX(prologTok), PREFIX(contentTok), \ - PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ - { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ - PREFIX(sameName), \ - PREFIX(nameMatchesAscii), \ - PREFIX(nameLength), \ - PREFIX(skipS), \ - PREFIX(getAtts), \ - PREFIX(charRefNumber), \ - PREFIX(predefinedEntityName), \ - PREFIX(updatePosition), \ - PREFIX(isPublicId) +#define VTABLE1 \ + {PREFIX(prologTok), PREFIX(contentTok), \ + PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE}, \ + {PREFIX(attributeValueTok), PREFIX(entityValueTok)}, \ + PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS), \ + PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName), \ + PREFIX(updatePosition), PREFIX(isPublicId) #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) -#define UCS2_GET_NAMING(pages, hi, lo) \ - (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) +#define UCS2_GET_NAMING(pages, hi, lo) \ + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F))) /* A 2 byte UTF-8 representation splits the characters 11 bits between the bottom 5 and 6 bits of the bytes. We need 8 bits to index into pages, 3 bits to add to that index and 5 bits to generate the mask. */ -#define UTF8_GET_NAMING2(pages, byte) \ - (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ - + ((((byte)[0]) & 3) << 1) \ - + ((((byte)[1]) >> 5) & 1)] \ - & (1 << (((byte)[1]) & 0x1F))) +#define UTF8_GET_NAMING2(pages, byte) \ + (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)] \ + & (1u << (((byte)[1]) & 0x1F))) /* A 3 byte UTF-8 representation splits the characters 16 bits between the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index into pages, 3 bits to add to that index and 5 bits to generate the mask. */ -#define UTF8_GET_NAMING3(pages, byte) \ - (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ - + ((((byte)[1]) >> 2) & 0xF)] \ - << 3) \ - + ((((byte)[1]) & 3) << 1) \ - + ((((byte)[2]) >> 5) & 1)] \ - & (1 << (((byte)[2]) & 0x1F))) - -#define UTF8_GET_NAMING(pages, p, n) \ - ((n) == 2 \ - ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ - : ((n) == 3 \ - ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ - : 0)) +#define UTF8_GET_NAMING3(pages, byte) \ + (namingBitmap \ + [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)] \ + << 3) \ + + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \ + & (1u << (((byte)[2]) & 0x1F))) /* Detection of invalid UTF-8 sequences is based on Table 3.1B of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ @@ -88,88 +109,76 @@ (A & 0xC0) == 0xC0 means A > 0xBF */ -#define UTF8_INVALID2(p) \ +#define UTF8_INVALID2(p) \ ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) -#define UTF8_INVALID3(p) \ - (((p)[2] & 0x80) == 0 \ - || \ - ((*p) == 0xEF && (p)[1] == 0xBF \ - ? \ - (p)[2] > 0xBD \ - : \ - ((p)[2] & 0xC0) == 0xC0) \ - || \ - ((*p) == 0xE0 \ - ? \ - (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ - : \ - ((p)[1] & 0x80) == 0 \ - || \ - ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) - -#define UTF8_INVALID4(p) \ - (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \ - || \ - ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \ - || \ - ((*p) == 0xF0 \ - ? \ - (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ - : \ - ((p)[1] & 0x80) == 0 \ - || \ - ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) +#define UTF8_INVALID3(p) \ + (((p)[2] & 0x80) == 0 \ + || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \ + : ((p)[2] & 0xC0) == 0xC0) \ + || ((*p) == 0xE0 \ + ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ + : ((p)[1] & 0x80) == 0 \ + || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) + +#define UTF8_INVALID4(p) \ + (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0 \ + || ((p)[2] & 0xC0) == 0xC0 \ + || ((*p) == 0xF0 \ + ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ + : ((p)[1] & 0x80) == 0 \ + || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) static int PTRFASTCALL -isNever(const ENCODING *enc, const char *p) -{ +isNever(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + UNUSED_P(p); return 0; } static int PTRFASTCALL -utf8_isName2(const ENCODING *enc, const char *p) -{ +utf8_isName2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); } static int PTRFASTCALL -utf8_isName3(const ENCODING *enc, const char *p) -{ +utf8_isName3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); } #define utf8_isName4 isNever static int PTRFASTCALL -utf8_isNmstrt2(const ENCODING *enc, const char *p) -{ +utf8_isNmstrt2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); } static int PTRFASTCALL -utf8_isNmstrt3(const ENCODING *enc, const char *p) -{ +utf8_isNmstrt3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); } #define utf8_isNmstrt4 isNever static int PTRFASTCALL -utf8_isInvalid2(const ENCODING *enc, const char *p) -{ +utf8_isInvalid2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_INVALID2((const unsigned char *)p); } static int PTRFASTCALL -utf8_isInvalid3(const ENCODING *enc, const char *p) -{ +utf8_isInvalid3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_INVALID3((const unsigned char *)p); } static int PTRFASTCALL -utf8_isInvalid4(const ENCODING *enc, const char *p) -{ +utf8_isInvalid4(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_INVALID4((const unsigned char *)p); } @@ -177,50 +186,44 @@ struct normal_encoding { ENCODING enc; unsigned char type[256]; #ifdef XML_MIN_SIZE - int (PTRFASTCALL *byteType)(const ENCODING *, const char *); - int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); - int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); - int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); - int (PTRCALL *charMatches)(const ENCODING *, const char *, int); + int(PTRFASTCALL *byteType)(const ENCODING *, const char *); + int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); + int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); + int(PTRCALL *charMatches)(const ENCODING *, const char *, int); #endif /* XML_MIN_SIZE */ - int (PTRFASTCALL *isName2)(const ENCODING *, const char *); - int (PTRFASTCALL *isName3)(const ENCODING *, const char *); - int (PTRFASTCALL *isName4)(const ENCODING *, const char *); - int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); - int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); - int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); - int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); - int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); - int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); + int(PTRFASTCALL *isName2)(const ENCODING *, const char *); + int(PTRFASTCALL *isName3)(const ENCODING *, const char *); + int(PTRFASTCALL *isName4)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); }; -#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc)) +#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc)) #ifdef XML_MIN_SIZE -#define STANDARD_VTABLE(E) \ - E ## byteType, \ - E ## isNameMin, \ - E ## isNmstrtMin, \ - E ## byteToAscii, \ - E ## charMatches, +# define STANDARD_VTABLE(E) \ + E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches, #else -#define STANDARD_VTABLE(E) /* as nothing */ +# define STANDARD_VTABLE(E) /* as nothing */ #endif -#define NORMAL_VTABLE(E) \ - E ## isName2, \ - E ## isName3, \ - E ## isName4, \ - E ## isNmstrt2, \ - E ## isNmstrt3, \ - E ## isNmstrt4, \ - E ## isInvalid2, \ - E ## isInvalid3, \ - E ## isInvalid4 +#define NORMAL_VTABLE(E) \ + E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, \ + E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4 + +#define NULL_VTABLE \ + /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL, \ + /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \ + /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL static int FASTCALL checkCharRefNumber(int); @@ -228,75 +231,76 @@ static int FASTCALL checkCharRefNumber(int); #include "ascii.h" #ifdef XML_MIN_SIZE -#define sb_isNameMin isNever -#define sb_isNmstrtMin isNever +# define sb_isNameMin isNever +# define sb_isNmstrtMin isNever #endif #ifdef XML_MIN_SIZE -#define MINBPC(enc) ((enc)->minBytesPerChar) +# define MINBPC(enc) ((enc)->minBytesPerChar) #else /* minimum bytes per character */ -#define MINBPC(enc) 1 +# define MINBPC(enc) 1 #endif -#define SB_BYTE_TYPE(enc, p) \ +#define SB_BYTE_TYPE(enc, p) \ (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) #ifdef XML_MIN_SIZE static int PTRFASTCALL -sb_byteType(const ENCODING *enc, const char *p) -{ +sb_byteType(const ENCODING *enc, const char *p) { return SB_BYTE_TYPE(enc, p); } -#define BYTE_TYPE(enc, p) \ - (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) +# define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) #else -#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) +# define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) #endif #ifdef XML_MIN_SIZE -#define BYTE_TO_ASCII(enc, p) \ - (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) +# define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) static int PTRFASTCALL -sb_byteToAscii(const ENCODING *enc, const char *p) -{ +sb_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return *p; } #else -#define BYTE_TO_ASCII(enc, p) (*(p)) +# define BYTE_TO_ASCII(enc, p) (*(p)) #endif -#define IS_NAME_CHAR(enc, p, n) \ - (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p)) -#define IS_NMSTRT_CHAR(enc, p, n) \ - (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p)) -#define IS_INVALID_CHAR(enc, p, n) \ - (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p)) +#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p)) +#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p)) +#ifdef XML_MIN_SIZE +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n \ + && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#else +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#endif #ifdef XML_MIN_SIZE -#define IS_NAME_CHAR_MINBPC(enc, p) \ - (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ - (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) +# define IS_NAME_CHAR_MINBPC(enc, p) \ + (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) \ + (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) #else -#define IS_NAME_CHAR_MINBPC(enc, p) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) +# define IS_NAME_CHAR_MINBPC(enc, p) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) #endif #ifdef XML_MIN_SIZE -#define CHAR_MATCHES(enc, p, c) \ - (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) +# define CHAR_MATCHES(enc, p, c) \ + (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) static int PTRCALL -sb_charMatches(const ENCODING *enc, const char *p, int c) -{ +sb_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); return *p == c; } #else /* c is an ASCII character */ -#define CHAR_MATCHES(enc, p, c) (*(p) == c) +# define CHAR_MATCHES(enc, p, c) (*(p) == (c)) #endif -#define PREFIX(ident) normal_ ## ident +#define PREFIX(ident) normal_##ident #define XML_TOK_IMPL_C #include "xmltok_impl.c" #undef XML_TOK_IMPL_C @@ -311,223 +315,301 @@ sb_charMatches(const ENCODING *enc, const char *p, int c) #undef IS_NMSTRT_CHAR_MINBPC #undef IS_INVALID_CHAR -enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ - UTF8_cval1 = 0x00, - UTF8_cval2 = 0xc0, - UTF8_cval3 = 0xe0, - UTF8_cval4 = 0xf0 +enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ + UTF8_cval1 = 0x00, + UTF8_cval2 = 0xc0, + UTF8_cval3 = 0xe0, + UTF8_cval4 = 0xf0 }; -static void PTRCALL -utf8_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ - char *to; - const char *from; - if (fromLim - *fromP > toLim - *toP) { - /* Avoid copying partial characters. */ - for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) - if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) +void +_INTERNAL_trim_to_complete_utf8_characters(const char *from, + const char **fromLimRef) { + const char *fromLim = *fromLimRef; + size_t walked = 0; + for (; fromLim > from; fromLim--, walked++) { + const unsigned char prev = (unsigned char)fromLim[-1]; + if ((prev & 0xf8u) + == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ + if (walked + 1 >= 4) { + fromLim += 4 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xf0u) + == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ + if (walked + 1 >= 3) { + fromLim += 3 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xe0u) + == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ + if (walked + 1 >= 2) { + fromLim += 2 - 1; break; + } else { + walked = 0; + } + } else if ((prev & 0x80u) + == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ + break; + } } - for (to = *toP, from = *fromP; from != fromLim; from++, to++) - *to = *from; - *fromP = from; - *toP = to; + *fromLimRef = fromLim; } -static void PTRCALL -utf8_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ +static enum XML_Convert_Result PTRCALL +utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + bool input_incomplete = false; + bool output_exhausted = false; + + /* Avoid copying partial characters (due to limited space). */ + const ptrdiff_t bytesAvailable = fromLim - *fromP; + const ptrdiff_t bytesStorable = toLim - *toP; + UNUSED_P(enc); + if (bytesAvailable > bytesStorable) { + fromLim = *fromP + bytesStorable; + output_exhausted = true; + } + + /* Avoid copying partial characters (from incomplete input). */ + { + const char *const fromLimBefore = fromLim; + _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim); + if (fromLim < fromLimBefore) { + input_incomplete = true; + } + } + + { + const ptrdiff_t bytesToCopy = fromLim - *fromP; + memcpy(*toP, *fromP, bytesToCopy); + *fromP += bytesToCopy; + *toP += bytesToCopy; + } + + if (output_exhausted) /* needs to go first */ + return XML_CONVERT_OUTPUT_EXHAUSTED; + else if (input_incomplete) + return XML_CONVERT_INPUT_INCOMPLETE; + else + return XML_CONVERT_COMPLETED; +} + +static enum XML_Convert_Result PTRCALL +utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; unsigned short *to = *toP; const char *from = *fromP; - while (from != fromLim && to != toLim) { + while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: + if (fromLim - from < 2) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: - *to++ = (unsigned short)(((from[0] & 0xf) << 12) - | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); + if (fromLim - from < 3) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } + *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) + | (from[2] & 0x3f)); from += 3; break; - case BT_LEAD4: - { - unsigned long n; - if (to + 1 == toLim) - goto after; - n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) - | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); - n -= 0x10000; - to[0] = (unsigned short)((n >> 10) | 0xD800); - to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); - to += 2; - from += 4; + case BT_LEAD4: { + unsigned long n; + if (toLim - to < 2) { + res = XML_CONVERT_OUTPUT_EXHAUSTED; + goto after; } - break; + if (fromLim - from < 4) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } + n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) + | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); + n -= 0x10000; + to[0] = (unsigned short)((n >> 10) | 0xD800); + to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); + to += 2; + from += 4; + } break; default: *to++ = *from++; break; } } + if (from < fromLim) + res = XML_CONVERT_OUTPUT_EXHAUSTED; after: *fromP = from; *toP = to; + return res; } #ifdef XML_NS -static const struct normal_encoding utf8_encoding_ns = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { -#include "asciitab.h" -#include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; +static const struct normal_encoding utf8_encoding_ns + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { +# include "asciitab.h" +# include "utf8tab.h" + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #endif -static const struct normal_encoding utf8_encoding = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding utf8_encoding + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #ifdef XML_NS -static const struct normal_encoding internal_utf8_encoding_ns = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { -#include "iasciitab.h" -#include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; +static const struct normal_encoding internal_utf8_encoding_ns + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { +# include "iasciitab.h" +# include "utf8tab.h" + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #endif -static const struct normal_encoding internal_utf8_encoding = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding internal_utf8_encoding + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "iasciitab.h" #undef BT_COLON #include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; -static void PTRCALL -latin1_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ +static enum XML_Convert_Result PTRCALL +latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + UNUSED_P(enc); for (;;) { unsigned char c; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = (char)((c >> 6) | UTF8_cval2); *(*toP)++ = (char)((c & 0x3f) | 0x80); (*fromP)++; - } - else { + } else { if (*toP == toLim) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = *(*fromP)++; } } } -static void PTRCALL -latin1_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ - while (*fromP != fromLim && *toP != toLim) +static enum XML_Convert_Result PTRCALL +latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + UNUSED_P(enc); + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS -static const struct normal_encoding latin1_encoding_ns = { - { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(sb_) -}; +static const struct normal_encoding latin1_encoding_ns + = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; #endif -static const struct normal_encoding latin1_encoding = { - { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, - { +static const struct normal_encoding latin1_encoding + = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(sb_) -}; - -static void PTRCALL -ascii_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ - while (*fromP != fromLim && *toP != toLim) + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; + +static enum XML_Convert_Result PTRCALL +ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + UNUSED_P(enc); + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS -static const struct normal_encoding ascii_encoding_ns = { - { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, - { -#include "asciitab.h" -/* BT_NONXML == 0 */ - }, - STANDARD_VTABLE(sb_) -}; +static const struct normal_encoding ascii_encoding_ns + = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, + { +# include "asciitab.h" + /* BT_NONXML == 0 */ + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; #endif -static const struct normal_encoding ascii_encoding = { - { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding ascii_encoding + = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON -/* BT_NONXML == 0 */ - }, - STANDARD_VTABLE(sb_) -}; + /* BT_NONXML == 0 */ + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; static int PTRFASTCALL -unicode_byte_type(char hi, char lo) -{ +unicode_byte_type(char hi, char lo) { switch ((unsigned char)hi) { - case 0xD8: case 0xD9: case 0xDA: case 0xDB: + /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */ + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: return BT_LEAD4; - case 0xDC: case 0xDD: case 0xDE: case 0xDF: + /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */ + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: return BT_TRAIL; case 0xFF: switch ((unsigned char)lo) { - case 0xFF: - case 0xFE: + case 0xFF: /* noncharacter-FFFF */ + case 0xFE: /* noncharacter-FFFE */ return BT_NONXML; } break; @@ -535,85 +617,105 @@ unicode_byte_type(char hi, char lo) return BT_NONASCII; } -#define DEFINE_UTF16_TO_UTF8(E) \ -static void PTRCALL \ -E ## toUtf8(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - char **toP, const char *toLim) \ -{ \ - const char *from; \ - for (from = *fromP; from != fromLim; from += 2) { \ - int plane; \ - unsigned char lo2; \ - unsigned char lo = GET_LO(from); \ - unsigned char hi = GET_HI(from); \ - switch (hi) { \ - case 0: \ - if (lo < 0x80) { \ - if (*toP == toLim) { \ - *fromP = from; \ - return; \ - } \ - *(*toP)++ = lo; \ - break; \ - } \ - /* fall through */ \ - case 0x1: case 0x2: case 0x3: \ - case 0x4: case 0x5: case 0x6: case 0x7: \ - if (toLim - *toP < 2) { \ - *fromP = from; \ - return; \ - } \ - *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ - *(*toP)++ = ((lo & 0x3f) | 0x80); \ - break; \ - default: \ - if (toLim - *toP < 3) { \ - *fromP = from; \ - return; \ - } \ - /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ - *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ - *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ - *(*toP)++ = ((lo & 0x3f) | 0x80); \ - break; \ - case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ - if (toLim - *toP < 4) { \ - *fromP = from; \ - return; \ - } \ - plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ - *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ - *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ - from += 2; \ - lo2 = GET_LO(from); \ - *(*toP)++ = (((lo & 0x3) << 4) \ - | ((GET_HI(from) & 0x3) << 2) \ - | (lo2 >> 6) \ - | 0x80); \ - *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ - break; \ - } \ - } \ - *fromP = from; \ -} +#define DEFINE_UTF16_TO_UTF8(E) \ + static enum XML_Convert_Result PTRCALL E##toUtf8( \ + const ENCODING *enc, const char **fromP, const char *fromLim, \ + char **toP, const char *toLim) { \ + const char *from = *fromP; \ + UNUSED_P(enc); \ + fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ + for (; from < fromLim; from += 2) { \ + int plane; \ + unsigned char lo2; \ + unsigned char lo = GET_LO(from); \ + unsigned char hi = GET_HI(from); \ + switch (hi) { \ + case 0: \ + if (lo < 0x80) { \ + if (*toP == toLim) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + *(*toP)++ = lo; \ + break; \ + } \ + /* fall through */ \ + case 0x1: \ + case 0x2: \ + case 0x3: \ + case 0x4: \ + case 0x5: \ + case 0x6: \ + case 0x7: \ + if (toLim - *toP < 2) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ + *(*toP)++ = ((lo & 0x3f) | 0x80); \ + break; \ + default: \ + if (toLim - *toP < 3) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ + *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ + *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ + *(*toP)++ = ((lo & 0x3f) | 0x80); \ + break; \ + case 0xD8: \ + case 0xD9: \ + case 0xDA: \ + case 0xDB: \ + if (toLim - *toP < 4) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + if (fromLim - from < 4) { \ + *fromP = from; \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ + *(*toP)++ = (char)((plane >> 2) | UTF8_cval4); \ + *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ + from += 2; \ + lo2 = GET_LO(from); \ + *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2) \ + | (lo2 >> 6) | 0x80); \ + *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ + break; \ + } \ + } \ + *fromP = from; \ + if (from < fromLim) \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + else \ + return XML_CONVERT_COMPLETED; \ + } -#define DEFINE_UTF16_TO_UTF16(E) \ -static void PTRCALL \ -E ## toUtf16(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - unsigned short **toP, const unsigned short *toLim) \ -{ \ - /* Avoid copying first half only of surrogate */ \ - if (fromLim - *fromP > ((toLim - *toP) << 1) \ - && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ - fromLim -= 2; \ - for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ - *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ -} +#define DEFINE_UTF16_TO_UTF16(E) \ + static enum XML_Convert_Result PTRCALL E##toUtf16( \ + const ENCODING *enc, const char **fromP, const char *fromLim, \ + unsigned short **toP, const unsigned short *toLim) { \ + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ + UNUSED_P(enc); \ + fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ + /* Avoid copying first half only of surrogate */ \ + if (fromLim - *fromP > ((toLim - *toP) << 1) \ + && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ + fromLim -= 2; \ + res = XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ + *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ + if ((*toP == toLim) && (*fromP < fromLim)) \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + else \ + return res; \ + } -#define SET2(ptr, ch) \ - (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) +#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8))) #define GET_LO(ptr) ((unsigned char)(ptr)[0]) #define GET_HI(ptr) ((unsigned char)(ptr)[1]) @@ -624,8 +726,7 @@ DEFINE_UTF16_TO_UTF16(little2_) #undef GET_LO #undef GET_HI -#define SET2(ptr, ch) \ - (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) +#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF))) #define GET_LO(ptr) ((unsigned char)(ptr)[1]) #define GET_HI(ptr) ((unsigned char)(ptr)[0]) @@ -636,317 +737,307 @@ DEFINE_UTF16_TO_UTF16(big2_) #undef GET_LO #undef GET_HI -#define LITTLE2_BYTE_TYPE(enc, p) \ - ((p)[1] == 0 \ - ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ - : unicode_byte_type((p)[1], (p)[0])) -#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) -#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) -#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \ +#define LITTLE2_BYTE_TYPE(enc, p) \ + ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ + : unicode_byte_type((p)[1], (p)[0])) +#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) +#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c)) +#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) -#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ +#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) #ifdef XML_MIN_SIZE static int PTRFASTCALL -little2_byteType(const ENCODING *enc, const char *p) -{ +little2_byteType(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TYPE(enc, p); } static int PTRFASTCALL -little2_byteToAscii(const ENCODING *enc, const char *p) -{ - return LITTLE2_BYTE_TO_ASCII(enc, p); +little2_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_BYTE_TO_ASCII(p); } static int PTRCALL -little2_charMatches(const ENCODING *enc, const char *p, int c) -{ - return LITTLE2_CHAR_MATCHES(enc, p, c); +little2_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); + return LITTLE2_CHAR_MATCHES(p, c); } static int PTRFASTCALL -little2_isNameMin(const ENCODING *enc, const char *p) -{ - return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); +little2_isNameMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_IS_NAME_CHAR_MINBPC(p); } static int PTRFASTCALL -little2_isNmstrtMin(const ENCODING *enc, const char *p) -{ - return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); +little2_isNmstrtMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p); } -#undef VTABLE -#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 +# undef VTABLE +# define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 #else /* not XML_MIN_SIZE */ -#undef PREFIX -#define PREFIX(ident) little2_ ## ident -#define MINBPC(enc) 2 +# undef PREFIX +# define PREFIX(ident) little2_##ident +# define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ -#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) -#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) -#define IS_NAME_CHAR(enc, p, n) 0 -#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) -#define IS_NMSTRT_CHAR(enc, p, n) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) - -#define XML_TOK_IMPL_C -#include "xmltok_impl.c" -#undef XML_TOK_IMPL_C - -#undef MINBPC -#undef BYTE_TYPE -#undef BYTE_TO_ASCII -#undef CHAR_MATCHES -#undef IS_NAME_CHAR -#undef IS_NAME_CHAR_MINBPC -#undef IS_NMSTRT_CHAR -#undef IS_NMSTRT_CHAR_MINBPC -#undef IS_INVALID_CHAR +# define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) +# define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p) +# define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c) +# define IS_NAME_CHAR(enc, p, n) 0 +# define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p) +# define IS_NMSTRT_CHAR(enc, p, n) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) + +# define XML_TOK_IMPL_C +# include "xmltok_impl.c" +# undef XML_TOK_IMPL_C + +# undef MINBPC +# undef BYTE_TYPE +# undef BYTE_TO_ASCII +# undef CHAR_MATCHES +# undef IS_NAME_CHAR +# undef IS_NAME_CHAR_MINBPC +# undef IS_NMSTRT_CHAR +# undef IS_NMSTRT_CHAR_MINBPC +# undef IS_INVALID_CHAR #endif /* not XML_MIN_SIZE */ #ifdef XML_NS -static const struct normal_encoding little2_encoding_ns = { - { VTABLE, 2, 0, -#if BYTEORDER == 1234 - 1 -#else - 0 -#endif - }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) -}; +static const struct normal_encoding little2_encoding_ns + = {{VTABLE, 2, 0, +# if BYTEORDER == 1234 + 1 +# else + 0 +# endif + }, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; #endif -static const struct normal_encoding little2_encoding = { - { VTABLE, 2, 0, +static const struct normal_encoding little2_encoding + = {{VTABLE, 2, 0, #if BYTEORDER == 1234 - 1 + 1 #else - 0 + 0 #endif - }, - { + }, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) -}; + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; #if BYTEORDER != 4321 -#ifdef XML_NS +# ifdef XML_NS -static const struct normal_encoding internal_little2_encoding_ns = { - { VTABLE, 2, 0, 1 }, - { -#include "iasciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) -}; +static const struct normal_encoding internal_little2_encoding_ns + = {{VTABLE, 2, 0, 1}, + { +# include "iasciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; -#endif +# endif -static const struct normal_encoding internal_little2_encoding = { - { VTABLE, 2, 0, 1 }, - { -#define BT_COLON BT_NMSTRT -#include "iasciitab.h" -#undef BT_COLON -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) -}; +static const struct normal_encoding internal_little2_encoding + = {{VTABLE, 2, 0, 1}, + { +# define BT_COLON BT_NMSTRT +# include "iasciitab.h" +# undef BT_COLON +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; #endif - -#define BIG2_BYTE_TYPE(enc, p) \ - ((p)[0] == 0 \ - ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ - : unicode_byte_type((p)[0], (p)[1])) -#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) -#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) -#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \ +#define BIG2_BYTE_TYPE(enc, p) \ + ((p)[0] == 0 \ + ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ + : unicode_byte_type((p)[0], (p)[1])) +#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1) +#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c)) +#define BIG2_IS_NAME_CHAR_MINBPC(p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) -#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ +#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) #ifdef XML_MIN_SIZE static int PTRFASTCALL -big2_byteType(const ENCODING *enc, const char *p) -{ +big2_byteType(const ENCODING *enc, const char *p) { return BIG2_BYTE_TYPE(enc, p); } static int PTRFASTCALL -big2_byteToAscii(const ENCODING *enc, const char *p) -{ - return BIG2_BYTE_TO_ASCII(enc, p); +big2_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_BYTE_TO_ASCII(p); } static int PTRCALL -big2_charMatches(const ENCODING *enc, const char *p, int c) -{ - return BIG2_CHAR_MATCHES(enc, p, c); +big2_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); + return BIG2_CHAR_MATCHES(p, c); } static int PTRFASTCALL -big2_isNameMin(const ENCODING *enc, const char *p) -{ - return BIG2_IS_NAME_CHAR_MINBPC(enc, p); +big2_isNameMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_IS_NAME_CHAR_MINBPC(p); } static int PTRFASTCALL -big2_isNmstrtMin(const ENCODING *enc, const char *p) -{ - return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); +big2_isNmstrtMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_IS_NMSTRT_CHAR_MINBPC(p); } -#undef VTABLE -#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 +# undef VTABLE +# define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 #else /* not XML_MIN_SIZE */ -#undef PREFIX -#define PREFIX(ident) big2_ ## ident -#define MINBPC(enc) 2 +# undef PREFIX +# define PREFIX(ident) big2_##ident +# define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ -#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) -#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) -#define IS_NAME_CHAR(enc, p, n) 0 -#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) -#define IS_NMSTRT_CHAR(enc, p, n) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) - -#define XML_TOK_IMPL_C -#include "xmltok_impl.c" -#undef XML_TOK_IMPL_C - -#undef MINBPC -#undef BYTE_TYPE -#undef BYTE_TO_ASCII -#undef CHAR_MATCHES -#undef IS_NAME_CHAR -#undef IS_NAME_CHAR_MINBPC -#undef IS_NMSTRT_CHAR -#undef IS_NMSTRT_CHAR_MINBPC -#undef IS_INVALID_CHAR +# define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) +# define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p) +# define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c) +# define IS_NAME_CHAR(enc, p, n) 0 +# define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p) +# define IS_NMSTRT_CHAR(enc, p, n) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p) + +# define XML_TOK_IMPL_C +# include "xmltok_impl.c" +# undef XML_TOK_IMPL_C + +# undef MINBPC +# undef BYTE_TYPE +# undef BYTE_TO_ASCII +# undef CHAR_MATCHES +# undef IS_NAME_CHAR +# undef IS_NAME_CHAR_MINBPC +# undef IS_NMSTRT_CHAR +# undef IS_NMSTRT_CHAR_MINBPC +# undef IS_INVALID_CHAR #endif /* not XML_MIN_SIZE */ #ifdef XML_NS -static const struct normal_encoding big2_encoding_ns = { - { VTABLE, 2, 0, -#if BYTEORDER == 4321 - 1 -#else - 0 -#endif - }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) -}; +static const struct normal_encoding big2_encoding_ns + = {{VTABLE, 2, 0, +# if BYTEORDER == 4321 + 1 +# else + 0 +# endif + }, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; #endif -static const struct normal_encoding big2_encoding = { - { VTABLE, 2, 0, +static const struct normal_encoding big2_encoding + = {{VTABLE, 2, 0, #if BYTEORDER == 4321 - 1 + 1 #else - 0 + 0 #endif - }, - { + }, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) -}; + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; #if BYTEORDER != 1234 -#ifdef XML_NS +# ifdef XML_NS -static const struct normal_encoding internal_big2_encoding_ns = { - { VTABLE, 2, 0, 1 }, - { -#include "iasciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) -}; +static const struct normal_encoding internal_big2_encoding_ns + = {{VTABLE, 2, 0, 1}, + { +# include "iasciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; -#endif +# endif -static const struct normal_encoding internal_big2_encoding = { - { VTABLE, 2, 0, 1 }, - { -#define BT_COLON BT_NMSTRT -#include "iasciitab.h" -#undef BT_COLON -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) -}; +static const struct normal_encoding internal_big2_encoding + = {{VTABLE, 2, 0, 1}, + { +# define BT_COLON BT_NMSTRT +# include "iasciitab.h" +# undef BT_COLON +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; #endif #undef PREFIX static int FASTCALL -streqci(const char *s1, const char *s2) -{ +streqci(const char *s1, const char *s2) { for (;;) { char c1 = *s1++; char c2 = *s2++; if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; if (ASCII_a <= c2 && c2 <= ASCII_z) - c2 += ASCII_A - ASCII_a; + /* The following line will never get executed. streqci() is + * only called from two places, both of which guarantee to put + * upper-case strings into s2. + */ + c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ if (c1 != c2) return 0; - if (!c1) + if (! c1) break; } return 1; } static void PTRCALL -initUpdatePosition(const ENCODING *enc, const char *ptr, - const char *end, POSITION *pos) -{ +initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, + POSITION *pos) { + UNUSED_P(enc); normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); } static int -toAscii(const ENCODING *enc, const char *ptr, const char *end) -{ +toAscii(const ENCODING *enc, const char *ptr, const char *end) { char buf[1]; char *p = buf; XmlUtf8Convert(enc, &ptr, end, &p, p + 1); @@ -957,8 +1048,7 @@ toAscii(const ENCODING *enc, const char *ptr, const char *end) } static int FASTCALL -isSpace(int c) -{ +isSpace(int c) { switch (c) { case 0x20: case 0xD: @@ -973,21 +1063,16 @@ isSpace(int c) followed by name=val. */ static int -parsePseudoAttribute(const ENCODING *enc, - const char *ptr, - const char *end, - const char **namePtr, - const char **nameEndPtr, - const char **valPtr, - const char **nextTokPtr) -{ +parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, + const char **namePtr, const char **nameEndPtr, + const char **valPtr, const char **nextTokPtr) { int c; char open; if (ptr == end) { *namePtr = NULL; return 1; } - if (!isSpace(toAscii(enc, ptr, end))) { + if (! isSpace(toAscii(enc, ptr, end))) { *nextTokPtr = ptr; return 0; } @@ -1043,12 +1128,9 @@ parsePseudoAttribute(const ENCODING *enc, c = toAscii(enc, ptr, end); if (c == open) break; - if (!(ASCII_a <= c && c <= ASCII_z) - && !(ASCII_A <= c && c <= ASCII_Z) - && !(ASCII_0 <= c && c <= ASCII_9) - && c != ASCII_PERIOD - && c != ASCII_MINUS - && c != ASCII_UNDERSCORE) { + if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z) + && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD + && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } @@ -1057,68 +1139,52 @@ parsePseudoAttribute(const ENCODING *enc, return 1; } -static const char KW_version[] = { - ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' -}; +static const char KW_version[] + = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'}; -static const char KW_encoding[] = { - ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' -}; +static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, + ASCII_i, ASCII_n, ASCII_g, '\0'}; -static const char KW_standalone[] = { - ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, - ASCII_n, ASCII_e, '\0' -}; +static const char KW_standalone[] + = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, + ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'}; -static const char KW_yes[] = { - ASCII_y, ASCII_e, ASCII_s, '\0' -}; +static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'}; -static const char KW_no[] = { - ASCII_n, ASCII_o, '\0' -}; +static const char KW_no[] = {ASCII_n, ASCII_o, '\0'}; static int -doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, - const char *, +doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), - int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) -{ + int isGeneralTextEntity, const ENCODING *enc, const char *ptr, + const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingName, + const ENCODING **encoding, int *standalone) { const char *val = NULL; const char *name = NULL; const char *nameEnd = NULL; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) - || !name) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) + || ! name) { *badPtr = ptr; return 0; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { - if (!isGeneralTextEntity) { + if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { + if (! isGeneralTextEntity) { *badPtr = name; return 0; } - } - else { + } else { if (versionPtr) *versionPtr = val; if (versionEndPtr) *versionEndPtr = ptr; - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } - if (!name) { + if (! name) { if (isGeneralTextEntity) { /* a TextDecl must have an EncodingDecl */ *badPtr = ptr; @@ -1129,7 +1195,7 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, } if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { int c = toAscii(enc, val, end); - if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { + if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) { *badPtr = val; return 0; } @@ -1137,14 +1203,14 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, *encodingName = val; if (encoding) *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } - if (!name) + if (! name) return 1; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) + if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { *badPtr = name; return 0; @@ -1152,12 +1218,10 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { if (standalone) *standalone = 1; - } - else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { + } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { if (standalone) *standalone = 0; - } - else { + } else { *badPtr = val; return 0; } @@ -1171,11 +1235,16 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, } static int FASTCALL -checkCharRefNumber(int result) -{ +checkCharRefNumber(int result) { switch (result >> 8) { - case 0xD8: case 0xD9: case 0xDA: case 0xDB: - case 0xDC: case 0xDD: case 0xDE: case 0xDF: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: return -1; case 0: if (latin1_encoding.type[result] == BT_NONXML) @@ -1190,8 +1259,7 @@ checkCharRefNumber(int result) } int FASTCALL -XmlUtf8Encode(int c, char *buf) -{ +XmlUtf8Encode(int c, char *buf) { enum { /* minN is minimum legal resulting value for N byte sequence */ min2 = 0x80, @@ -1200,7 +1268,7 @@ XmlUtf8Encode(int c, char *buf) }; if (c < 0) - return 0; + return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ if (c < min2) { buf[0] = (char)(c | UTF8_cval1); return 1; @@ -1223,12 +1291,11 @@ XmlUtf8Encode(int c, char *buf) buf[3] = (char)((c & 0x3f) | 0x80); return 4; } - return 0; + return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ } int FASTCALL -XmlUtf16Encode(int charNum, unsigned short *buf) -{ +XmlUtf16Encode(int charNum, unsigned short *buf) { if (charNum < 0) return 0; if (charNum < 0x10000) { @@ -1252,17 +1319,15 @@ struct unknown_encoding { char utf8[256][4]; }; -#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc)) +#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc)) int -XmlSizeOfUnknownEncoding(void) -{ +XmlSizeOfUnknownEncoding(void) { return sizeof(struct unknown_encoding); } static int PTRFASTCALL -unknown_isName(const ENCODING *enc, const char *p) -{ +unknown_isName(const ENCODING *enc, const char *p) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) @@ -1271,8 +1336,7 @@ unknown_isName(const ENCODING *enc, const char *p) } static int PTRFASTCALL -unknown_isNmstrt(const ENCODING *enc, const char *p) -{ +unknown_isNmstrt(const ENCODING *enc, const char *p) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) @@ -1281,81 +1345,72 @@ unknown_isNmstrt(const ENCODING *enc, const char *p) } static int PTRFASTCALL -unknown_isInvalid(const ENCODING *enc, const char *p) -{ +unknown_isInvalid(const ENCODING *enc, const char *p) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); int c = uenc->convert(uenc->userData, p); return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } -static void PTRCALL -unknown_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ +static enum XML_Convert_Result PTRCALL +unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); char buf[XML_UTF8_ENCODE_MAX]; for (;;) { const char *utf8; int n; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; utf8 = uenc->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { int c = uenc->convert(uenc->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; utf8 = buf; *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); - } - else { + } else { if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; (*fromP)++; } - do { - *(*toP)++ = *utf8++; - } while (--n != 0); + memcpy(*toP, utf8, n); + *toP += n; } } -static void PTRCALL -unknown_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ +static enum XML_Convert_Result PTRCALL +unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); - while (*fromP != fromLim && *toP != toLim) { + while (*fromP < fromLim && *toP < toLim) { unsigned short c = uenc->utf16[(unsigned char)**fromP]; if (c == 0) { - c = (unsigned short) - uenc->convert(uenc->userData, *fromP); + c = (unsigned short)uenc->convert(uenc->userData, *fromP); *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); - } - else + } else (*fromP)++; *(*toP)++ = c; } + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } ENCODING * -XmlInitUnknownEncoding(void *mem, - int *table, - CONVERTER convert, - void *userData) -{ +XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, + void *userData) { int i; struct unknown_encoding *e = (struct unknown_encoding *)mem; - for (i = 0; i < (int)sizeof(struct normal_encoding); i++) - ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; + memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding)); for (i = 0; i < 128; i++) if (latin1_encoding.type[i] != BT_OTHER - && latin1_encoding.type[i] != BT_NONXML - && table[i] != i) + && latin1_encoding.type[i] != BT_NONXML && table[i] != i) return 0; for (i = 0; i < 256; i++) { int c = table[i]; @@ -1365,32 +1420,30 @@ XmlInitUnknownEncoding(void *mem, e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; - } - else if (c < 0) { + } else if (c < 0) { if (c < -4) return 0; + /* Multi-byte sequences need a converter function */ + if (! convert) + return 0; e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->utf8[i][0] = 0; e->utf16[i] = 0; - } - else if (c < 0x80) { + } else if (c < 0x80) { if (latin1_encoding.type[c] != BT_OTHER - && latin1_encoding.type[c] != BT_NONXML - && c != i) + && latin1_encoding.type[c] != BT_NONXML && c != i) return 0; e->normal.type[i] = latin1_encoding.type[c]; e->utf8[i][0] = 1; e->utf8[i][1] = (char)c; e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); - } - else if (checkCharRefNumber(c) < 0) { + } else if (checkCharRefNumber(c) < 0) { e->normal.type[i] = BT_NONXML; /* This shouldn't really get used. */ e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; - } - else { + } else { if (c > 0xFFFF) return 0; if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) @@ -1435,44 +1488,32 @@ enum { NO_ENC }; -static const char KW_ISO_8859_1[] = { - ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, - ASCII_MINUS, ASCII_1, '\0' -}; -static const char KW_US_ASCII[] = { - ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, - '\0' -}; -static const char KW_UTF_8[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' -}; -static const char KW_UTF_16[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' -}; -static const char KW_UTF_16BE[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, - '\0' -}; -static const char KW_UTF_16LE[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, - '\0' -}; +static const char KW_ISO_8859_1[] + = {ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, + ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'}; +static const char KW_US_ASCII[] + = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, + ASCII_C, ASCII_I, ASCII_I, '\0'}; +static const char KW_UTF_8[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'}; +static const char KW_UTF_16[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'}; +static const char KW_UTF_16BE[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, + ASCII_6, ASCII_B, ASCII_E, '\0'}; +static const char KW_UTF_16LE[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, + ASCII_6, ASCII_L, ASCII_E, '\0'}; static int FASTCALL -getEncodingIndex(const char *name) -{ - static const char * const encodingNames[] = { - KW_ISO_8859_1, - KW_US_ASCII, - KW_UTF_8, - KW_UTF_16, - KW_UTF_16BE, - KW_UTF_16LE, +getEncodingIndex(const char *name) { + static const char *const encodingNames[] = { + KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE, }; int i; if (name == NULL) return NO_ENC; - for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) + for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++) if (streqci(name, encodingNames[i])) return i; return UNKNOWN_ENC; @@ -1492,18 +1533,12 @@ getEncodingIndex(const char *name) XML_PROLOG_STATE otherwise. */ - static int -initScan(const ENCODING * const *encodingTable, - const INIT_ENCODING *enc, - int state, - const char *ptr, - const char *end, - const char **nextTokPtr) -{ +initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, + int state, const char *ptr, const char *end, const char **nextTokPtr) { const ENCODING **encPtr; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; encPtr = enc->encPtr; if (ptr + 1 == end) { @@ -1525,20 +1560,17 @@ initScan(const ENCODING * const *encodingTable, case 0xFE: case 0xFF: case 0xEF: /* possibly first byte of UTF-8 BOM */ - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; /* fall through */ case 0x00: case 0x3C: return XML_TOK_PARTIAL; } - } - else { + } else { switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { case 0xFEFF: - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16BE_ENC]; @@ -1552,8 +1584,7 @@ initScan(const ENCODING * const *encodingTable, *encPtr = encodingTable[UTF_16LE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); case 0xFFFE: - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16LE_ENC]; @@ -1568,8 +1599,8 @@ initScan(const ENCODING * const *encodingTable, */ if (state == XML_CONTENT_STATE) { int e = INIT_ENC_INDEX(enc); - if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC - || e == UTF_16LE_ENC || e == UTF_16_ENC) + if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC + || e == UTF_16_ENC) break; } if (ptr + 2 == end) @@ -1592,8 +1623,7 @@ initScan(const ENCODING * const *encodingTable, break; *encPtr = encodingTable[UTF_16BE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); - } - else if (ptr[1] == '\0') { + } else if (ptr[1] == '\0') { /* We could recover here in the case: - parsing an external entity - second byte is 0 @@ -1615,7 +1645,6 @@ initScan(const ENCODING * const *encodingTable, return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } - #define NS(x) x #define ns(x) x #define XML_TOK_NS_C @@ -1626,22 +1655,19 @@ initScan(const ENCODING * const *encodingTable, #ifdef XML_NS -#define NS(x) x ## NS -#define ns(x) x ## _ns +# define NS(x) x##NS +# define ns(x) x##_ns -#define XML_TOK_NS_C -#include "xmltok_ns.c" -#undef XML_TOK_NS_C +# define XML_TOK_NS_C +# include "xmltok_ns.c" +# undef XML_TOK_NS_C -#undef NS -#undef ns +# undef NS +# undef ns ENCODING * -XmlInitUnknownEncodingNS(void *mem, - int *table, - CONVERTER convert, - void *userData) -{ +XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, + void *userData) { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; diff --git a/contrib/expat/lib/xmltok.h b/contrib/expat/lib/xmltok.h index ca867aa6b4..6f630c2f9b 100644 --- a/contrib/expat/lib/xmltok.h +++ b/contrib/expat/lib/xmltok.h @@ -1,5 +1,37 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2005 Karl Waclawek + Copyright (c) 2016-2017 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef XmlTok_INCLUDED @@ -10,16 +42,18 @@ extern "C" { #endif /* The following token may be returned by XmlContentTok */ -#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be - start of illegal ]]> sequence */ +#define XML_TOK_TRAILING_RSQB \ + -5 /* ] or ]] at the end of the scan; might be \ + start of illegal ]]> sequence */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok. */ -#define XML_TOK_NONE -4 /* The string to be scanned is empty */ -#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; - might be part of CRLF sequence */ -#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ -#define XML_TOK_PARTIAL -1 /* only part of a token */ +#define XML_TOK_NONE -4 /* The string to be scanned is empty */ +#define XML_TOK_TRAILING_CR \ + -3 /* A CR at the end of the scan; \ + might be part of CRLF sequence */ +#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ +#define XML_TOK_PARTIAL -1 /* only part of a token */ #define XML_TOK_INVALID 0 /* The following tokens are returned by XmlContentTok; some are also @@ -34,24 +68,24 @@ extern "C" { #define XML_TOK_DATA_NEWLINE 7 #define XML_TOK_CDATA_SECT_OPEN 8 #define XML_TOK_ENTITY_REF 9 -#define XML_TOK_CHAR_REF 10 /* numeric character reference */ +#define XML_TOK_CHAR_REF 10 /* numeric character reference */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok. */ -#define XML_TOK_PI 11 /* processing instruction */ -#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ +#define XML_TOK_PI 11 /* processing instruction */ +#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ #define XML_TOK_COMMENT 13 -#define XML_TOK_BOM 14 /* Byte order mark */ +#define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ #define XML_TOK_PROLOG_S 15 -#define XML_TOK_DECL_OPEN 16 /* */ +#define XML_TOK_DECL_OPEN 16 /* */ #define XML_TOK_NAME 18 #define XML_TOK_NMTOKEN 19 -#define XML_TOK_POUND_NAME 20 /* #name */ -#define XML_TOK_OR 21 /* | */ +#define XML_TOK_POUND_NAME 20 /* #name */ +#define XML_TOK_OR 21 /* | */ #define XML_TOK_PERCENT 22 #define XML_TOK_OPEN_PAREN 23 #define XML_TOK_CLOSE_PAREN 24 @@ -62,14 +96,14 @@ extern "C" { #define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ -#define XML_TOK_NAME_QUESTION 30 /* name? */ -#define XML_TOK_NAME_ASTERISK 31 /* name* */ -#define XML_TOK_NAME_PLUS 32 /* name+ */ -#define XML_TOK_COND_SECT_OPEN 33 /* */ -#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ -#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ -#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ +#define XML_TOK_NAME_QUESTION 30 /* name? */ +#define XML_TOK_NAME_ASTERISK 31 /* name* */ +#define XML_TOK_NAME_PLUS 32 /* name+ */ +#define XML_TOK_COND_SECT_OPEN 33 /* */ +#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ +#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ +#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ #define XML_TOK_COMMA 38 /* The following token is returned only by XmlAttributeValueTok */ @@ -84,20 +118,20 @@ extern "C" { #define XML_TOK_PREFIXED_NAME 41 #ifdef XML_DTD -#define XML_TOK_IGNORE_SECT 42 +# define XML_TOK_IGNORE_SECT 42 #endif /* XML_DTD */ #ifdef XML_DTD -#define XML_N_STATES 4 +# define XML_N_STATES 4 #else /* not XML_DTD */ -#define XML_N_STATES 3 +# define XML_N_STATES 3 #endif /* not XML_DTD */ #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 #define XML_CDATA_SECTION_STATE 2 #ifdef XML_DTD -#define XML_IGNORE_SECTION_STATE 3 +# define XML_IGNORE_SECTION_STATE 3 #endif /* XML_DTD */ #define XML_N_LITERAL_TYPES 2 @@ -125,49 +159,41 @@ typedef struct { struct encoding; typedef struct encoding ENCODING; -typedef int (PTRCALL *SCANNER)(const ENCODING *, - const char *, - const char *, - const char **); +typedef int(PTRCALL *SCANNER)(const ENCODING *, const char *, const char *, + const char **); + +enum XML_Convert_Result { + XML_CONVERT_COMPLETED = 0, + XML_CONVERT_INPUT_INCOMPLETE = 1, + XML_CONVERT_OUTPUT_EXHAUSTED + = 2 /* and therefore potentially input remaining as well */ +}; struct encoding { SCANNER scanners[XML_N_STATES]; SCANNER literalScanners[XML_N_LITERAL_TYPES]; - int (PTRCALL *sameName)(const ENCODING *, - const char *, - const char *); - int (PTRCALL *nameMatchesAscii)(const ENCODING *, - const char *, - const char *, - const char *); - int (PTRFASTCALL *nameLength)(const ENCODING *, const char *); + int(PTRCALL *nameMatchesAscii)(const ENCODING *, const char *, const char *, + const char *); + int(PTRFASTCALL *nameLength)(const ENCODING *, const char *); const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *); - int (PTRCALL *getAtts)(const ENCODING *enc, - const char *ptr, - int attsMax, - ATTRIBUTE *atts); - int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); - int (PTRCALL *predefinedEntityName)(const ENCODING *, - const char *, - const char *); - void (PTRCALL *updatePosition)(const ENCODING *, - const char *ptr, - const char *end, - POSITION *); - int (PTRCALL *isPublicId)(const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr); - void (PTRCALL *utf8Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - char **toP, - const char *toLim); - void (PTRCALL *utf16Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - unsigned short **toP, - const unsigned short *toLim); + int(PTRCALL *getAtts)(const ENCODING *enc, const char *ptr, int attsMax, + ATTRIBUTE *atts); + int(PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); + int(PTRCALL *predefinedEntityName)(const ENCODING *, const char *, + const char *); + void(PTRCALL *updatePosition)(const ENCODING *, const char *ptr, + const char *end, POSITION *); + int(PTRCALL *isPublicId)(const ENCODING *enc, const char *ptr, + const char *end, const char **badPtr); + enum XML_Convert_Result(PTRCALL *utf8Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, char **toP, + const char *toLim); + enum XML_Convert_Result(PTRCALL *utf16Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, + unsigned short **toP, + const unsigned short *toLim); int minBytesPerChar; char isUtf8; char isUtf16; @@ -194,68 +220,62 @@ struct encoding { the prolog outside literals, comments and processing instructions. */ - -#define XmlTok(enc, state, ptr, end, nextTokPtr) \ +#define XmlTok(enc, state, ptr, end, nextTokPtr) \ (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) -#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) +#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) -#define XmlContentTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) +#define XmlContentTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) -#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) +#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) #ifdef XML_DTD -#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) +# define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) #endif /* XML_DTD */ /* This is used for performing a 2nd-level tokenization on the content of a literal that has already been returned by XmlTok. */ -#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ +#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) -#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) +#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) +#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) - -#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ +#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) -#define XmlNameLength(enc, ptr) \ - (((enc)->nameLength)(enc, ptr)) +#define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) -#define XmlSkipS(enc, ptr) \ - (((enc)->skipS)(enc, ptr)) +#define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) -#define XmlGetAttributes(enc, ptr, attsMax, atts) \ +#define XmlGetAttributes(enc, ptr, attsMax, atts) \ (((enc)->getAtts)(enc, ptr, attsMax, atts)) -#define XmlCharRefNumber(enc, ptr) \ - (((enc)->charRefNumber)(enc, ptr)) +#define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) -#define XmlPredefinedEntityName(enc, ptr, end) \ +#define XmlPredefinedEntityName(enc, ptr, end) \ (((enc)->predefinedEntityName)(enc, ptr, end)) -#define XmlUpdatePosition(enc, ptr, end, pos) \ +#define XmlUpdatePosition(enc, ptr, end, pos) \ (((enc)->updatePosition)(enc, ptr, end, pos)) -#define XmlIsPublicId(enc, ptr, end, badPtr) \ +#define XmlIsPublicId(enc, ptr, end, badPtr) \ (((enc)->isPublicId)(enc, ptr, end, badPtr)) -#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ +#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) -#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ +#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) typedef struct { @@ -263,16 +283,11 @@ typedef struct { const ENCODING **encPtr; } INIT_ENCODING; -int XmlParseXmlDecl(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, +int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); + const ENCODING **namedEncodingPtr, int *standalonePtr); int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); const ENCODING *XmlGetUtf8InternalEncoding(void); @@ -281,34 +296,22 @@ int FASTCALL XmlUtf8Encode(int charNumber, char *buf); int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf); int XmlSizeOfUnknownEncoding(void); +typedef int(XMLCALL *CONVERTER)(void *userData, const char *p); -typedef int (XMLCALL *CONVERTER) (void *userData, const char *p); - -ENCODING * -XmlInitUnknownEncoding(void *mem, - int *table, - CONVERTER convert, - void *userData); +ENCODING *XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, + void *userData); -int XmlParseXmlDeclNS(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, +int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); + const ENCODING **namedEncodingPtr, int *standalonePtr); int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); const ENCODING *XmlGetUtf8InternalEncodingNS(void); const ENCODING *XmlGetUtf16InternalEncodingNS(void); -ENCODING * -XmlInitUnknownEncodingNS(void *mem, - int *table, - CONVERTER convert, - void *userData); +ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, + void *userData); #ifdef __cplusplus } #endif diff --git a/contrib/expat/lib/xmltok_impl.c b/contrib/expat/lib/xmltok_impl.c index 9c2895b877..1971d74bf8 100644 --- a/contrib/expat/lib/xmltok_impl.c +++ b/contrib/expat/lib/xmltok_impl.c @@ -1,114 +1,165 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2018 Benjamin Peterson + Copyright (c) 2018 Anton Maklakov + Copyright (c) 2019 David Loffredo + Copyright (c) 2020 Boris Kolpackov + Copyright (c) 2022 Martin Ettl + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* This file is included! */ #ifdef XML_TOK_IMPL_C -#ifndef IS_INVALID_CHAR -#define IS_INVALID_CHAR(enc, ptr, n) (0) -#endif +# ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined +# define IS_INVALID_CHAR(enc, ptr, n) (0) +# endif -#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_INVALID_CHAR(enc, ptr, n)) { \ - *(nextTokPtr) = (ptr); \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *(nextTokPtr) = (ptr); \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define INVALID_CASES(ptr, nextTokPtr) \ - INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ - case BT_NONXML: \ - case BT_MALFORM: \ - case BT_TRAIL: \ - *(nextTokPtr) = (ptr); \ +# define INVALID_CASES(ptr, nextTokPtr) \ + INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ + INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ + INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ + case BT_NONXML: \ + case BT_MALFORM: \ + case BT_TRAIL: \ + *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; -#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NAME_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; + +# define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ + case BT_NONASCII: \ + if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + /* fall through */ \ + case BT_NMSTRT: \ + case BT_HEX: \ + case BT_DIGIT: \ + case BT_NAME: \ + case BT_MINUS: \ + ptr += MINBPC(enc); \ + break; \ + CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ + CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ + CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) -#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - case BT_DIGIT: \ - case BT_NAME: \ - case BT_MINUS: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) +# define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ + case BT_LEAD##n: \ + if ((end) - (ptr) < (n)) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ + case BT_NONASCII: \ + if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + /* fall through */ \ + case BT_NMSTRT: \ + case BT_HEX: \ + ptr += MINBPC(enc); \ + break; \ + CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ + CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ + CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) -#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) +# ifndef PREFIX +# define PREFIX(ident) ident +# endif -#ifndef PREFIX -#define PREFIX(ident) ident -#endif +# define HAS_CHARS(enc, ptr, end, count) \ + ((end) - (ptr) >= ((count)*MINBPC(enc))) + +# define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) + +# define REQUIRE_CHARS(enc, ptr, end, count) \ + { \ + if (! HAS_CHARS(enc, ptr, end, count)) { \ + return XML_TOK_PARTIAL; \ + } \ + } + +# define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1) /* ptr points to character following " */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { - case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: + case BT_S: + case BT_CR: + case BT_LF: + case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* fall through */ - case BT_S: case BT_CR: case BT_LF: + case BT_S: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return XML_TOK_DECL_OPEN; case BT_NMSTRT: @@ -175,12 +228,12 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, - const char *end, int *tokPtr) -{ +PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, + int *tokPtr) { int upper = 0; + UNUSED_P(enc); *tokPtr = XML_TOK_PI; - if (end - ptr != MINBPC(enc)*3) + if (end - ptr != MINBPC(enc) * 3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_x: @@ -220,35 +273,34 @@ PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, /* ptr points to character following "= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -319,14 +368,12 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) + REQUIRE_CHAR(enc, ptr, end); + if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + REQUIRE_CHAR(enc, ptr, end); + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -334,8 +381,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; @@ -343,23 +389,25 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ + } \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONXML: case BT_MALFORM: case BT_TRAIL: @@ -380,24 +428,26 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, /* ptr points to character following "= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -803,7 +840,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_CR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -814,50 +851,52 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) + if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr; return XML_TOK_INVALID; - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ + } \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_RSQB: - if (ptr + MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { - ptr += MINBPC(enc); - break; - } - if (ptr + 2*MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { - ptr += MINBPC(enc); - break; - } - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_INVALID; - } + if (HAS_CHARS(enc, ptr, end, 2)) { + if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { + ptr += MINBPC(enc); + break; + } + if (HAS_CHARS(enc, ptr, end, 3)) { + if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { + ptr += MINBPC(enc); + break; + } + *nextTokPtr = ptr + 2 * MINBPC(enc); + return XML_TOK_INVALID; + } } /* fall through */ case BT_AMP: @@ -882,22 +921,23 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; + const char **nextTokPtr) { + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + case BT_S: + case BT_LF: + case BT_CR: + case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_PERCENT; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_PARAM_ENTITY_REF; @@ -911,21 +951,24 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; + const char **nextTokPtr) { + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_CR: case BT_LF: case BT_S: - case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_CR: + case BT_LF: + case BT_S: + case BT_RPAR: + case BT_GT: + case BT_PERCNT: + case BT_VERBAR: *nextTokPtr = ptr; return XML_TOK_POUND_NAME; default: @@ -937,25 +980,27 @@ PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, } static int PTRCALL -PREFIX(scanLit)(int open, const ENCODING *enc, - const char *ptr, const char *end, - const char **nextTokPtr) -{ - while (ptr != end) { +PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { + while (HAS_CHAR(enc, ptr, end)) { int t = BYTE_TYPE(enc, ptr); switch (t) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_QUOT: case BT_APOS: ptr += MINBPC(enc); if (t != open) break; - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: - case BT_GT: case BT_PERCNT: case BT_LSQB: + case BT_S: + case BT_CR: + case BT_LF: + case BT_GT: + case BT_PERCNT: + case BT_LSQB: return XML_TOK_LITERAL; default: return XML_TOK_INVALID; @@ -970,10 +1015,9 @@ PREFIX(scanLit)(int open, const ENCODING *enc, static int PTRCALL PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ + const char **nextTokPtr) { int tok; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -989,28 +1033,26 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_APOS: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_LT: - { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - case BT_EXCL: - return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_QUEST: - return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_NMSTRT: - case BT_HEX: - case BT_NONASCII: - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - *nextTokPtr = ptr - MINBPC(enc); - return XML_TOK_INSTANCE_START; - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; + case BT_LT: { + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); + switch (BYTE_TYPE(enc, ptr)) { + case BT_EXCL: + return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); + case BT_QUEST: + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); + case BT_NMSTRT: + case BT_HEX: + case BT_NONASCII: + case BT_LEAD2: + case BT_LEAD3: + case BT_LEAD4: + *nextTokPtr = ptr - MINBPC(enc); + return XML_TOK_INSTANCE_START; } + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } case BT_CR: if (ptr + MINBPC(enc) == end) { *nextTokPtr = end; @@ -1018,13 +1060,15 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return -XML_TOK_PROLOG_S; } /* fall through */ - case BT_S: case BT_LF: + case BT_S: + case BT_LF: for (;;) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) break; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_LF: + case BT_S: + case BT_LF: break; case BT_CR: /* don't split CR/LF pair */ @@ -1048,13 +1092,12 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 2); if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { - *nextTokPtr = ptr + 2*MINBPC(enc); + *nextTokPtr = ptr + 2 * MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } } @@ -1065,7 +1108,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: @@ -1077,8 +1120,12 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_PLUS: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_PLUS; - case BT_CR: case BT_LF: case BT_S: - case BT_GT: case BT_COMMA: case BT_VERBAR: + case BT_CR: + case BT_LF: + case BT_S: + case BT_GT: + case BT_COMMA: + case BT_VERBAR: case BT_RPAR: *nextTokPtr = ptr; return XML_TOK_CLOSE_PAREN; @@ -1093,24 +1140,30 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DECL_CLOSE; case BT_NUM: return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NAME; \ - break; \ - } \ - if (IS_NAME_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NMTOKEN; \ - break; \ - } \ - *nextTokPtr = ptr; \ +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NAME; \ + break; \ + } \ + if (IS_NAME_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NMTOKEN; \ + break; \ + } \ + *nextTokPtr = ptr; \ return XML_TOK_INVALID; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NMSTRT: case BT_HEX: tok = XML_TOK_NAME; @@ -1119,9 +1172,9 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_DIGIT: case BT_NAME: case BT_MINUS: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif tok = XML_TOK_NMTOKEN; ptr += MINBPC(enc); break; @@ -1141,24 +1194,29 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: case BT_RPAR: case BT_COMMA: - case BT_VERBAR: case BT_LSQB: case BT_PERCNT: - case BT_S: case BT_CR: case BT_LF: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_GT: + case BT_RPAR: + case BT_COMMA: + case BT_VERBAR: + case BT_LSQB: + case BT_PERCNT: + case BT_S: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return tok; -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) default: tok = XML_TOK_NMTOKEN; break; @@ -1169,23 +1227,23 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, break; } break; -#endif +# endif case BT_PLUS: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -1200,19 +1258,30 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, } static int PTRCALL -PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1232,7 +1301,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1258,19 +1327,30 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1278,8 +1358,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) { - int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), - end, nextTokPtr); + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; } *nextTokPtr = ptr; @@ -1294,7 +1373,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1312,12 +1391,11 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, return XML_TOK_DATA_CHARS; } -#ifdef XML_DTD +# ifdef XML_DTD static int PTRCALL -PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { int level = 0; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -1326,15 +1404,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, end = ptr + n; } } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_LT: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { ++level; ptr += MINBPC(enc); @@ -1342,11 +1420,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, } break; case BT_RSQB: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr += MINBPC(enc); if (level == 0) { @@ -1365,15 +1443,14 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, return XML_TOK_PARTIAL; } -#endif /* XML_DTD */ +# endif /* XML_DTD */ static int PTRCALL PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr) -{ + const char **badPtr) { ptr += MINBPC(enc); end -= MINBPC(enc); - for (; ptr != end; ptr += MINBPC(enc)) { + for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -1393,9 +1470,9 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, case BT_AST: case BT_PERCNT: case BT_NUM: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif break; case BT_S: if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { @@ -1405,8 +1482,9 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, break; case BT_NAME: case BT_NMSTRT: - if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) + if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; + /* fall through */ default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ @@ -1428,9 +1506,8 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, */ static int PTRCALL -PREFIX(getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts) -{ +PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, + ATTRIBUTE *atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; /* defined when state == inValue; @@ -1438,32 +1515,35 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { -#define START_NAME \ - if (state == other) { \ - if (nAtts < attsMax) { \ - atts[nAtts].name = ptr; \ - atts[nAtts].normalized = 1; \ - } \ - state = inName; \ - } -#define LEAD_CASE(n) \ - case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define START_NAME \ + if (state == other) { \ + if (nAtts < attsMax) { \ + atts[nAtts].name = ptr; \ + atts[nAtts].normalized = 1; \ + } \ + state = inName; \ + } +# define LEAD_CASE(n) \ + case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ + START_NAME ptr += (n - MINBPC(enc)); \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break; -#undef START_NAME +# undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_QUOT; - } - else if (open == BT_QUOT) { + } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; @@ -1476,8 +1556,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_APOS; - } - else if (open == BT_APOS) { + } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; @@ -1491,16 +1570,15 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, case BT_S: if (state == inName) state = other; - else if (state == inValue - && nAtts < attsMax - && atts[nAtts].normalized + else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) atts[nAtts].normalized = 0; break; - case BT_CR: case BT_LF: + case BT_CR: + case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) @@ -1521,29 +1599,44 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, } static int PTRFASTCALL -PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) -{ +PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; /* skip &# */ - ptr += 2*MINBPC(enc); + UNUSED_P(enc); + ptr += 2 * MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { - for (ptr += MINBPC(enc); - !CHAR_MATCHES(enc, ptr, ASCII_SEMI); + for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { - case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: - case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: + case ASCII_0: + case ASCII_1: + case ASCII_2: + case ASCII_3: + case ASCII_4: + case ASCII_5: + case ASCII_6: + case ASCII_7: + case ASCII_8: + case ASCII_9: result <<= 4; result |= (c - ASCII_0); break; - case ASCII_A: case ASCII_B: case ASCII_C: - case ASCII_D: case ASCII_E: case ASCII_F: + case ASCII_A: + case ASCII_B: + case ASCII_C: + case ASCII_D: + case ASCII_E: + case ASCII_F: result <<= 4; result += 10 + (c - ASCII_A); break; - case ASCII_a: case ASCII_b: case ASCII_c: - case ASCII_d: case ASCII_e: case ASCII_f: + case ASCII_a: + case ASCII_b: + case ASCII_c: + case ASCII_d: + case ASCII_e: + case ASCII_f: result <<= 4; result += 10 + (c - ASCII_a); break; @@ -1551,9 +1644,8 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) if (result >= 0x110000) return -1; } - } - else { - for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { + } else { + for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - ASCII_0); @@ -1566,9 +1658,9 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) static int PTRCALL PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, - const char *end) -{ - switch ((end - ptr)/MINBPC(enc)) { + const char *end) { + UNUSED_P(enc); + switch ((end - ptr) / MINBPC(enc)) { case 2: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { @@ -1618,98 +1710,43 @@ PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, return 0; } -static int PTRCALL -PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) -{ - for (;;) { - switch (BYTE_TYPE(enc, ptr1)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (*ptr1++ != *ptr2++) \ - return 0; - LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) -#undef LEAD_CASE - /* fall through */ - if (*ptr1++ != *ptr2++) - return 0; - break; - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 1) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 2) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 3) { - if (*ptr2++ != *ptr1++) - return 0; - } - } - } - break; - default: - if (MINBPC(enc) == 1 && *ptr1 == *ptr2) - return 1; - switch (BYTE_TYPE(enc, ptr2)) { - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - return 0; - default: - return 1; - } - } - } - /* not reached */ -} - static int PTRCALL PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, - const char *end1, const char *ptr2) -{ + const char *end1, const char *ptr2) { + UNUSED_P(enc); for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (ptr1 == end1) - return 0; - if (!CHAR_MATCHES(enc, ptr1, *ptr2)) + if (end1 - ptr1 < MINBPC(enc)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the + * paranoia check is still valuable, however. + */ + return 0; /* LCOV_EXCL_LINE */ + } + if (! CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } return ptr1 == end1; } static int PTRFASTCALL -PREFIX(nameLength)(const ENCODING *enc, const char *ptr) -{ +PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { const char *start = ptr; for (;;) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif case BT_HEX: case BT_DIGIT: case BT_NAME: @@ -1722,9 +1759,8 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) } } -static const char * PTRFASTCALL -PREFIX(skipS)(const ENCODING *enc, const char *ptr) -{ +static const char *PTRFASTCALL +PREFIX(skipS)(const ENCODING *enc, const char *ptr) { for (;;) { switch (BYTE_TYPE(enc, ptr)) { case BT_LF: @@ -1739,45 +1775,45 @@ PREFIX(skipS)(const ENCODING *enc, const char *ptr) } static void PTRCALL -PREFIX(updatePosition)(const ENCODING *enc, - const char *ptr, - const char *end, - POSITION *pos) -{ - while (ptr < end) { +PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, + POSITION *pos) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + pos->columnNumber++; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_LF: - pos->columnNumber = (XML_Size)-1; + pos->columnNumber = 0; pos->lineNumber++; ptr += MINBPC(enc); break; case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); - if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) + if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); - pos->columnNumber = (XML_Size)-1; + pos->columnNumber = 0; break; default: ptr += MINBPC(enc); + pos->columnNumber++; break; } - pos->columnNumber++; } } -#undef DO_LEAD_CASE -#undef MULTIBYTE_CASES -#undef INVALID_CASES -#undef CHECK_NAME_CASE -#undef CHECK_NAME_CASES -#undef CHECK_NMSTRT_CASE -#undef CHECK_NMSTRT_CASES +# undef DO_LEAD_CASE +# undef MULTIBYTE_CASES +# undef INVALID_CASES +# undef CHECK_NAME_CASE +# undef CHECK_NAME_CASES +# undef CHECK_NMSTRT_CASE +# undef CHECK_NMSTRT_CASES #endif /* XML_TOK_IMPL_C */ diff --git a/contrib/expat/lib/xmltok_impl.h b/contrib/expat/lib/xmltok_impl.h index da0ea60a65..3469c4ae13 100644 --- a/contrib/expat/lib/xmltok_impl.h +++ b/contrib/expat/lib/xmltok_impl.h @@ -1,46 +1,74 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2017-2019 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ enum { - BT_NONXML, - BT_MALFORM, - BT_LT, - BT_AMP, - BT_RSQB, - BT_LEAD2, - BT_LEAD3, - BT_LEAD4, - BT_TRAIL, - BT_CR, - BT_LF, - BT_GT, - BT_QUOT, - BT_APOS, - BT_EQUALS, - BT_QUEST, - BT_EXCL, - BT_SOL, - BT_SEMI, - BT_NUM, - BT_LSQB, - BT_S, - BT_NMSTRT, - BT_COLON, - BT_HEX, - BT_DIGIT, - BT_NAME, - BT_MINUS, - BT_OTHER, /* known not to be a name or name start character */ + BT_NONXML, /* e.g. noncharacter-FFFF */ + BT_MALFORM, /* illegal, with regard to encoding */ + BT_LT, /* less than = "<" */ + BT_AMP, /* ampersand = "&" */ + BT_RSQB, /* right square bracket = "[" */ + BT_LEAD2, /* lead byte of a 2-byte UTF-8 character */ + BT_LEAD3, /* lead byte of a 3-byte UTF-8 character */ + BT_LEAD4, /* lead byte of a 4-byte UTF-8 character */ + BT_TRAIL, /* trailing unit, e.g. second 16-bit unit of a 4-byte char. */ + BT_CR, /* carriage return = "\r" */ + BT_LF, /* line feed = "\n" */ + BT_GT, /* greater than = ">" */ + BT_QUOT, /* quotation character = "\"" */ + BT_APOS, /* apostrophe = "'" */ + BT_EQUALS, /* equal sign = "=" */ + BT_QUEST, /* question mark = "?" */ + BT_EXCL, /* exclamation mark = "!" */ + BT_SOL, /* solidus, slash = "/" */ + BT_SEMI, /* semicolon = ";" */ + BT_NUM, /* number sign = "#" */ + BT_LSQB, /* left square bracket = "[" */ + BT_S, /* white space, e.g. "\t", " "[, "\r"] */ + BT_NMSTRT, /* non-hex name start letter = "G".."Z" + "g".."z" + "_" */ + BT_COLON, /* colon = ":" */ + BT_HEX, /* hex letter = "A".."F" + "a".."f" */ + BT_DIGIT, /* digit = "0".."9" */ + BT_NAME, /* dot and middle dot = "." + chr(0xb7) */ + BT_MINUS, /* minus = "-" */ + BT_OTHER, /* known not to be a name or name start character */ BT_NONASCII, /* might be a name or name start character */ - BT_PERCNT, - BT_LPAR, - BT_RPAR, - BT_AST, - BT_PLUS, - BT_COMMA, - BT_VERBAR + BT_PERCNT, /* percent sign = "%" */ + BT_LPAR, /* left parenthesis = "(" */ + BT_RPAR, /* right parenthesis = "(" */ + BT_AST, /* asterisk = "*" */ + BT_PLUS, /* plus sign = "+" */ + BT_COMMA, /* comma = "," */ + BT_VERBAR /* vertical bar = "|" */ }; #include diff --git a/contrib/expat/lib/xmltok_ns.c b/contrib/expat/lib/xmltok_ns.c index c3b88fdf4e..fbdd3e3c7b 100644 --- a/contrib/expat/lib/xmltok_ns.c +++ b/contrib/expat/lib/xmltok_ns.c @@ -1,61 +1,83 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* This file is included! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2017-2021 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* This file is included! */ #ifdef XML_TOK_NS_C const ENCODING * -NS(XmlGetUtf8InternalEncoding)(void) -{ +NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; } const ENCODING * -NS(XmlGetUtf16InternalEncoding)(void) -{ -#if BYTEORDER == 1234 +NS(XmlGetUtf16InternalEncoding)(void) { +# if BYTEORDER == 1234 return &ns(internal_little2_encoding).enc; -#elif BYTEORDER == 4321 +# elif BYTEORDER == 4321 return &ns(internal_big2_encoding).enc; -#else +# else const short n = 1; - return (*(const char *)&n - ? &ns(internal_little2_encoding).enc - : &ns(internal_big2_encoding).enc); -#endif + return (*(const char *)&n ? &ns(internal_little2_encoding).enc + : &ns(internal_big2_encoding).enc); +# endif } -static const ENCODING * const NS(encodings)[] = { - &ns(latin1_encoding).enc, - &ns(ascii_encoding).enc, - &ns(utf8_encoding).enc, - &ns(big2_encoding).enc, - &ns(big2_encoding).enc, - &ns(little2_encoding).enc, - &ns(utf8_encoding).enc /* NO_ENC */ +static const ENCODING *const NS(encodings)[] = { + &ns(latin1_encoding).enc, &ns(ascii_encoding).enc, + &ns(utf8_encoding).enc, &ns(big2_encoding).enc, + &ns(big2_encoding).enc, &ns(little2_encoding).enc, + &ns(utf8_encoding).enc /* NO_ENC */ }; static int PTRCALL NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - return initScan(NS(encodings), (const INIT_ENCODING *)enc, - XML_PROLOG_STATE, ptr, end, nextTokPtr); + const char **nextTokPtr) { + return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, + ptr, end, nextTokPtr); } static int PTRCALL NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - return initScan(NS(encodings), (const INIT_ENCODING *)enc, - XML_CONTENT_STATE, ptr, end, nextTokPtr); + const char **nextTokPtr) { + return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, + ptr, end, nextTokPtr); } int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, - const char *name) -{ + const char *name) { int i = getEncodingIndex(name); if (i == UNKNOWN_ENC) return 0; @@ -69,10 +91,9 @@ NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, } static const ENCODING * -NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) -{ -#define ENCODING_MAX 128 - char buf[ENCODING_MAX]; +NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { +# define ENCODING_MAX 128 + char buf[ENCODING_MAX] = ""; char *p = buf; int i; XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); @@ -88,28 +109,14 @@ NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) } int -NS(XmlParseXmlDecl)(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) -{ - return doParseXmlDecl(NS(findEncoding), - isGeneralTextEntity, - enc, - ptr, - end, - badPtr, - versionPtr, - versionEndPtr, - encodingName, - encoding, - standalone); +NS(XmlParseXmlDecl)(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, + const char **encodingName, const ENCODING **encoding, + int *standalone) { + return doParseXmlDecl(NS(findEncoding), isGeneralTextEntity, enc, ptr, end, + badPtr, versionPtr, versionEndPtr, encodingName, + encoding, standalone); } #endif /* XML_TOK_NS_C */ -- 2.41.0