File manager - Edit - /home/newsbmcs.com/public_html/static/img/logo/bs4.zip
Back
PK z�ZMa�I I tests/test_element.pynu �[��� """Tests of classes in element.py. The really big classes -- Tag, PageElement, and NavigableString -- are tested in separate files. """ from bs4.element import ( CharsetMetaAttributeValue, ContentMetaAttributeValue, NamespacedAttribute, ) from . import SoupTest class TestNamedspacedAttribute(object): def test_name_may_be_none_or_missing(self): a = NamespacedAttribute("xmlns", None) assert a == "xmlns" a = NamespacedAttribute("xmlns", "") assert a == "xmlns" a = NamespacedAttribute("xmlns") assert a == "xmlns" def test_namespace_may_be_none_or_missing(self): a = NamespacedAttribute(None, "tag") assert a == "tag" a = NamespacedAttribute("", "tag") assert a == "tag" def test_attribute_is_equivalent_to_colon_separated_string(self): a = NamespacedAttribute("a", "b") assert "a:b" == a def test_attributes_are_equivalent_if_prefix_and_name_identical(self): a = NamespacedAttribute("a", "b", "c") b = NamespacedAttribute("a", "b", "c") assert a == b # The actual namespace is not considered. c = NamespacedAttribute("a", "b", None) assert a == c # But name and prefix are important. d = NamespacedAttribute("a", "z", "c") assert a != d e = NamespacedAttribute("z", "b", "c") assert a != e class TestAttributeValueWithCharsetSubstitution(object): """Certain attributes are designed to have the charset of the final document substituted into their value. """ def test_content_meta_attribute_value(self): # The value of a CharsetMetaAttributeValue is whatever # encoding the string is in. value = CharsetMetaAttributeValue("euc-jp") assert "euc-jp" == value assert "euc-jp" == value.original_value assert "utf8" == value.encode("utf8") assert "ascii" == value.encode("ascii") def test_content_meta_attribute_value(self): value = ContentMetaAttributeValue("text/html; charset=euc-jp") assert "text/html; charset=euc-jp" == value assert "text/html; charset=euc-jp" == value.original_value assert "text/html; charset=utf8" == value.encode("utf8") assert "text/html; charset=ascii" == value.encode("ascii") PK z�Z��w�g g tests/test_docs.pynu �[��� "Test harness for doctests." # TODO: Pretty sure this isn't used and should be deleted. # pylint: disable-msg=E0611,W0142 __metaclass__ = type __all__ = [ 'additional_tests', ] import atexit import doctest import os #from pkg_resources import ( # resource_filename, resource_exists, resource_listdir, cleanup_resources) import unittest DOCTEST_FLAGS = ( doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) # def additional_tests(): # "Run the doc tests (README.txt and docs/*, if any exist)" # doctest_files = [ # os.path.abspath(resource_filename('bs4', 'README.txt'))] # if resource_exists('bs4', 'docs'): # for name in resource_listdir('bs4', 'docs'): # if name.endswith('.txt'): # doctest_files.append( # os.path.abspath( # resource_filename('bs4', 'docs/%s' % name))) # kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) # atexit.register(cleanup_resources) # return unittest.TestSuite(( # doctest.DocFileSuite(*doctest_files, **kwargs))) PK z�Z��U~< < tests/test_fuzz.pynu �[��� """This file contains test cases reported by third parties using fuzzing tools, primarily from Google's oss-fuzz project. Some of these represent real problems with Beautiful Soup, but many are problems in libraries that Beautiful Soup depends on, and many of the test cases represent different ways of triggering the same problem. Grouping these test cases together makes it easy to see which test cases represent the same problem, and puts the test cases in close proximity to code that can trigger the problems. """ import os import pytest from bs4 import ( BeautifulSoup, ParserRejectedMarkup, ) try: from soupsieve.util import SelectorSyntaxError import lxml import html5lib fully_fuzzable = True except ImportError: fully_fuzzable = False @pytest.mark.skipif(not fully_fuzzable, reason="Prerequisites for fuzz tests are not installed.") class TestFuzz(object): # Test case markup files from fuzzers are given this extension so # they can be included in builds. TESTCASE_SUFFIX = ".testcase" # Copied 20230512 from # https://github.com/google/oss-fuzz/blob/4ac6a645a197a695fe76532251feb5067076b3f3/projects/bs4/bs4_fuzzer.py # # Copying the code lets us precisely duplicate the behavior of # oss-fuzz. The downside is that this code changes over time, so # multiple copies of the code must be kept around to run against # older tests. I'm not sure what to do about this, but I may # retire old tests after a time. def fuzz_test_with_css(self, filename): data = self.__markup(filename) parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml'] try: idx = int(data[0]) % len(parsers) except ValueError: return css_selector, data = data[1:10], data[10:] try: soup = BeautifulSoup(data[1:], features=parsers[idx]) except ParserRejectedMarkup: return except ValueError: return list(soup.find_all(True)) try: soup.css.select(css_selector.decode('utf-8', 'replace')) except SelectorSyntaxError: return soup.prettify() # This class of error has been fixed by catching a less helpful # exception from html.parser and raising ParserRejectedMarkup # instead. @pytest.mark.parametrize( "filename", [ "clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912", "crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a", ] ) def test_rejected_markup(self, filename): markup = self.__markup(filename) with pytest.raises(ParserRejectedMarkup): BeautifulSoup(markup, 'html.parser') # This class of error has to do with very deeply nested documents # which overflow the Python call stack when the tree is converted # to a string. This is an issue with Beautiful Soup which was fixed # as part of [bug=1471755]. # # These test cases are in the older format that doesn't specify # which parser to use or give a CSS selector. @pytest.mark.parametrize( "filename", [ "clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440", "clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632", "clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464", "clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400", ] ) def test_deeply_nested_document_without_css(self, filename): # Parsing the document and encoding it back to a string is # sufficient to demonstrate that the overflow problem has # been fixed. markup = self.__markup(filename) BeautifulSoup(markup, 'html.parser').encode() # This class of error has to do with very deeply nested documents # which overflow the Python call stack when the tree is converted # to a string. This is an issue with Beautiful Soup which was fixed # as part of [bug=1471755]. @pytest.mark.parametrize( "filename", [ "clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016", "clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000", "clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624", ] ) def test_deeply_nested_document(self, filename): self.fuzz_test_with_css(filename) @pytest.mark.parametrize( "filename", [ "clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256", "clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824", ] ) def test_soupsieve_errors(self, filename): self.fuzz_test_with_css(filename) # This class of error represents problems with html5lib's parser, # not Beautiful Soup. I use # https://github.com/html5lib/html5lib-python/issues/568 to notify # the html5lib developers of these issues. # # These test cases are in the older format that doesn't specify # which parser to use or give a CSS selector. @pytest.mark.skip(reason="html5lib-specific problems") @pytest.mark.parametrize( "filename", [ # b"""ÿ<!DOCTyPEV PUBLIC'''Ð'""" "clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320", # b')<a><math><TR><a><mI><a><p><a>' "clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456", # b'-<math><sElect><mi><sElect><sElect>' "clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896", # b'ñ<table><svg><html>' "clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224", # <TABLE>, some ^@ characters, some <math> tags. "clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744", # Nested table "crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08" ] ) def test_html5lib_parse_errors_without_css(self, filename): markup = self.__markup(filename) print(BeautifulSoup(markup, 'html5lib').encode()) # This class of error represents problems with html5lib's parser, # not Beautiful Soup. I use # https://github.com/html5lib/html5lib-python/issues/568 to notify # the html5lib developers of these issues. @pytest.mark.skip(reason="html5lib-specific problems") @pytest.mark.parametrize( "filename", [ # b'- \xff\xff <math>\x10<select><mi><select><select>t' "clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640", ] ) def test_html5lib_parse_errors(self, filename): self.fuzz_test_with_css(filename) def __markup(self, filename): if not filename.endswith(self.TESTCASE_SUFFIX): filename += self.TESTCASE_SUFFIX this_dir = os.path.split(__file__)[0] path = os.path.join(this_dir, 'fuzz', filename) return open(path, 'rb').read() PK z�Z��_9 9 6 tests/__pycache__/test_navigablestring.cpython-310.pycnu �[��� o �h� � @ sh d dl Z d dlmZmZmZmZmZmZmZm Z m Z mZ ddlm Z G dd� de �ZG dd� de �ZdS ) � N) �CData�Comment�Declaration�Doctype�NavigableString�RubyParenthesisString�RubyTextString�Script� Stylesheet�TemplateString� )�SoupTestc @ s e Zd Zdd� Zdd� ZdS )�TestNavigableStringc C sV t d�}td�}td�}d|�� ksJ �d|jdd�ksJ �dgt|j�ks(J �dgt|j�ks2J �dgt|�� �ks=J �d|�� ksEJ �d|jdd�ksOJ �dgt|j�ksYJ �dgt|j�kscJ �dgt|�� �ksnJ �d|�� ksvJ �g t|j�ksJ �g t|j�ks�J �g t|�� �ks�J �d |jdtd �ks�J �d|jtt fd�ks�J �d S )Nzfee zfie zfoe �feeT��strip�fie� �foe�r �types�r )r r r �get_text�list�strings�stripped_strings�_all_strings)�self�s�cdata�comment� r! �Q/usr/local/CyberCP/lib/python3.10/site-packages/bs4/tests/test_navigablestring.py�test_text_acquisition_methods s&