Current File : //usr/local/apps/python3/lib/python3.11/test/__pycache__/test_robotparser.cpython-311.opt-2.pyc
�

�Ke,��H�ddlZddlZddlZddlZddlZddlmZddlm	Z	ddlm
Z
ddlmZm
Z
Gd�d��ZGd�d	eej��ZGd
�deej��ZGd�d
eej��ZGd�deej��ZGd�de��ZGd�deej��ZGd�deej��ZGd�de��ZGd�deej��ZGd�deej��ZGd�deej��ZGd�deej��ZGd �d!e��ZGd"�d#eej��ZGd$�d%eej��ZGd&�d'eej��ZGd(�d)eej��Z Gd*�d+eej��Z!Gd,�d-eej��Z"Gd.�d/e��Z#ej$ej%d0��Gd1�d2ej����Z&ej'��Gd3�d4ej����Z(e)d5krej*��dSdS)6�N)�support)�
socket_helper)�threading_helper)�BaseHTTPRequestHandler�
HTTPServerc�@�eZdZdZdZgZgZdZd�Zd�Z	d�Z
d�Zd�ZdS)	�
BaseRobotTest��test_robotparserNc���tj|j�����}tj���|_|j�|��dS�N)	�io�StringIO�
robots_txt�	readlines�urllib�robotparser�RobotFileParser�parser�parse)�self�liness  �?/usr/local/apps/python3/lib/python3.11/test/test_robotparser.py�setUpzBaseRobotTest.setUpsQ����D�O�,�,�6�6�8�8���(�8�8�:�:�������%� � � � � �c�P�t|t��r	|\}}||fS|j|fSr
)�
isinstance�tuple�agent�r�urlrs   r�get_agent_and_urlzBaseRobotTest.get_agent_and_urls3���c�5�!�!�	��J�E�3��#�:���z�3��rc��|jD]w}|�|��\}}|�||���5|�|j�||����ddd��n#1swxYwY�xdS�N)r!r)�goodr"�subTest�
assertTruer�	can_fetchr s   r�test_good_urlszBaseRobotTest.test_good_urlss����9�	C�	C�C��/�/��4�4�J�E�3����#�U��3�3�
C�
C������ 5� 5�e�S� A� A�B�B�B�
C�
C�
C�
C�
C�
C�
C�
C�
C�
C�
C����
C�
C�
C�
C��	C�	C��/A4�4A8	�;A8	c��|jD]w}|�|��\}}|�||���5|�|j�||����ddd��n#1swxYwY�xdSr$)�badr"r&�assertFalserr(r s   r�
test_bad_urlszBaseRobotTest.test_bad_urls$s����8�	D�	D�C��/�/��4�4�J�E�3����#�U��3�3�
D�
D�� � ���!6�!6�u�c�!B�!B�C�C�C�
D�
D�
D�
D�
D�
D�
D�
D�
D�
D�
D����
D�
D�
D�
D��	D�	Dr*c�j�|�|j���|j��dSr
)�assertEqualr�	site_maps�rs r�test_site_mapszBaseRobotTest.test_site_maps*s.�������.�.�0�0�$�.�A�A�A�A�Ar)
�__name__�
__module__�__qualname__rrr%r,r1rr"r)r.r3�rrr	r	s��������J��E�
�D�
�C��I�!�!�!�
���C�C�C�D�D�D�B�B�B�B�Brr	c�"�eZdZdZddgZgd�ZdS)�UserAgentWildcardTestz�User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    �/�
/test.html)�/cyberworld/map/index.htmlz/tmp/xxx�	/foo.htmlN�r4r5r6rr%r,r7rrr9r9.s,�������J�
���D�
A�
A�
A�C�C�Crr9c� �eZdZdZgd�ZdgZdS)�CrawlDelayAndCustomAgentTestz�# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    )r:r;)�cybermapperr<r<Nr>r7rrr@r@9s+�������J�N�M�M�D�'�
(�C�C�Crr@c�(�eZdZdZddgZdgZddgZdS)�SitemapTesta# robots.txt for http://www.example.com/

User-agent: *
Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml
Sitemap: http://www.google.com/hostednews/sitemap_index.xml
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

    r:r;r<z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlN)r4r5r6rr%r,r1r7rrrCrCJs7������	�J�
���D�'�
(�C�J�E�G�I�I�IrrCc��eZdZdZgZgd�ZdS)�RejectAllRobotsTestz(# go away
User-agent: *
Disallow: /
    )r<r:�/tmp/Nr>r7rrrErE[s'�������J�
�D�
6�
6�
6�C�C�CrrEc��eZdZdZdZd�ZdS)�BaseRequestRateTestNc�r�|j}|j|jzD�]}|�|��\}}|�||���5|�|�|��|j��|�|��}|�||j��|j�o|�|tj
j��|�|j|jj��|�|j
|jj
��ddd��n#1swxYwY�� dSr$)rr%r,r"r&r0�crawl_delay�request_rate�assertIsInstancerr�RequestRate�requests�seconds)rrr!r�parsed_request_rates     r�test_request_ratez%BaseRequestRateTest.test_request_rateis�������9�t�x�'�	�	�C��/�/��4�4�J�E�3����#�U��3�3�
�
�� � ��!3�!3�E�!:�!:�D�<L�M�M�M�&,�&9�&9�%�&@�&@�#�� � �!4�d�6G�H�H�H��$�0��)�)�+��*�6�����$�$�+�4��)�2�����$�$�+�3��)�1����
�
�
�
�
�
�
�
�
�
�
����
�
�
�
��	�	s�	CD*�*D.	�1D.	)r4r5r6rKrJrQr7rrrHrHes-�������L��K�����rrHc��eZdZdZdgZdS)�
EmptyFileTestr
z/fooN)r4r5r6rr%r7rrrSrS�s�������J��8�D�D�DrrSc�^�eZdZdZdZej�dd��ZdZ	dgZ
gd�ZdS)	�CrawlDelayAndRequestRateTestz�User-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    �figtree�	��)rVr=)�/tmpz	/tmp.html�/tmp/a.html�/a%3cd.html�/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlN)r4r5r6rrrrrMrKrJr%r,r7rrrUrU�sR�������J�
�E��%�1�1�!�R�8�8�L��K�$�%�D�.�.�.�C�C�CrrUc��eZdZdZdS)�DifferentAgentTestzFigTree Robot libwww-perl/5.04N�r4r5r6rr7rrr_r_�s������,�E�E�Err_c�$�eZdZdZdgZgd�ZdZdS)�InvalidRequestRateTestz�User-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    rZ)rFr[r\r]z	/a/b.htmlz/%7Ejoe/index.htmlrYN)r4r5r6rr%r,rJr7rrrbrb�s4�������J�
�8�D�!�!�!�C��K�K�Krrbc��eZdZdZdgZgZdS)�InvalidCrawlDelayTestz2User-Agent: *
Disallow: /.
Crawl-delay: pears
    r=Nr>r7rrrdrd�s#�������J�

�=�D�
�C�C�Crrdc�"�eZdZdZdZdgZdgZdS)�AnotherInvalidRequestRateTestzeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    �	Googlebot�/folder1/myfile.html�/folder1/anotherfile.htmlN�r4r5r6rrr%r,r7rrrfrf�s,�������J�
�E�"�#�D�&�
'�C�C�Crrfc��eZdZdZdZdgZdS)�UserAgentOrderingTestzMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    rgz/something.jpgN)r4r5r6rrr,r7rrrlrl�s$�������J�
�E��
�C�C�Crrlc��eZdZdZdS)�UserAgentGoogleMobileTestzGooglebot-MobileNr`r7rrrnrn�s�������E�E�Errnc�"�eZdZdZdZdgZdgZdS)�GoogleURLOrderingTestzJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    �	googlebotrhriNrjr7rrrprp�s,�������J�

�E�"�#�D�&�
'�C�C�Crrpc��eZdZdZdgZdgZdS)�DisallowQueryStringTestz2User-agent: *
Disallow: /some/path?name=value
    �
/some/pathz/some/path?name=valueNr>r7rrrsrs�s&�������J�
�>�D�"�
#�C�C�Crrsc��eZdZdZdgZdgZdS)�UseFirstUserAgentWildcardTestzNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    z
/another/pathrtNr>r7rrrvrv�s&�������J�
��D��.�C�C�Crrvc��eZdZdZdgZdgZdS)�EmptyQueryStringTestz>User-agent: *
Allow: /some/path?
Disallow: /another/path?
    z/some/path?z/another/path?Nr>r7rrrxrx�s&�������J�

�?�D��
�C�C�Crrxc�Z�eZdZdZej�dd��ZdZddgZ	dgZ
dS)	�DefaultEntryTestzOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    rY��r:r;r<N)r4r5r6rrrrMrKrJr%r,r7rrrzrzsF�������J��%�1�1�!�R�8�8�L��K����D�'�
(�C�C�Crrzc��eZdZdZdZd�ZdS)�StringFormattingTestz�User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zxUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/c�`�|�t|j��|j��dSr
)r0�strr�expected_outputr2s r�test_string_formattingz+StringFormattingTest.test_string_formatting*s+������T�[�)�)�4�+?�@�@�@�@�@rN)r4r5r6rr�r�r7rrr~r~s6������	�J��O�A�A�A�A�Arr~c��eZdZd�Zd�ZdS)�RobotHandlerc�2�|�dd��dS)Ni�zForbidden access)�
send_errorr2s r�do_GETzRobotHandler.do_GET0s������/�0�0�0�0�0rc��dSr
r7)r�format�argss   r�log_messagezRobotHandler.log_message3s���rN)r4r5r6r�r�r7rrr�r�.s2������1�1�1�
�
�
�
�
rr�z&Socket server requires working socket.c�:�eZdZd�Zd�Zejd���ZdS)�PasswordProtectedSiteTestCasec�4�|�tjj��t	t
jdft��|_tj
d|jjddi���|_d|j_
|j���dS)NrzHTTPServer serving�
poll_intervalg{�G�z�?)�name�target�kwargsT)�
addCleanupr�request�
urlcleanuprr�HOSTr��server�	threading�Thread�
serve_forever�t�daemon�startr2s rrz#PasswordProtectedSiteTestCase.setUp=s~�������1�2�2�2� �-�"4�a�!8�,�G�G����!�%��;�,�$�D�)�
+�+�+������
���������rc��|j���|j���|j���dSr
)r��shutdownr��join�server_closer2s r�tearDownz&PasswordProtectedSiteTestCase.tearDownMs?�������������
�
�
��� � �"�"�"�"�"rc�X�|jj}dtjzdzt	|d��z}|dz}t
j���}|�|��|�	��|�
|�d|����dS)Nzhttp://�:r|z/robots.txt�*)r��server_addressrr�r�rrr�set_url�readr-r()r�addrr!�
robots_urlrs     r�testPasswordProtectedSitez7PasswordProtectedSiteTestCase.testPasswordProtectedSiteRs����{�)���-�,�,�s�2�S��a��\�\�A���=�(�
��#�3�3�5�5�����s�������
�
�
�����)�)�#�z�:�:�;�;�;�;�;rN)r4r5r6rr�r�reap_threadsr�r7rrr�r�7sQ��������� #�#�#�
�"�<�<�#�"�<�<�<rr�c�j�eZdZdZd�e��Zed���Zd�Zd�Z	d�Z
d�ZdS)	�NetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtc��tjd��tj|j��5t
j�|j��|_	|j	�
��ddd��dS#1swxYwYdS)N�network)r�requiresr�transient_internet�base_urlrrrrrr�)�clss r�
setUpClasszNetworkTestCase.setUpClasscs������#�#�#�
�
-�c�l�
;�
;�	�	��+�;�;�C�N�K�K�C�J��J�O�O����	�	�	�	�	�	�	�	�	�	�	�	����	�	�	�	�	�	s�AA>�>B�Bc��d�|j|tj�|��dsdnd��S)Nz{}{}{}r|r:r
)r�r��os�path�splitext)rr�s  rr!zNetworkTestCase.urljs?������M�4�B�G�,<�,<�T�,B�,B�1�,E�!M���2�
�
�	
rc��|�|jj��|�|jj��|�|j���d��|�|j�d����|�|j�d����dS)Nrr�)r-r�disallow_all�	allow_all�
assertGreater�mtimerJrKr2s r�
test_basiczNetworkTestCase.test_basicos��������1�2�2�2������.�/�/�/����4�;�,�,�.�.��2�2�2������0�0��5�5�6�6�6������1�1�#�6�6�7�7�7�7�7rc���|�|j�d|�d������|�|j�d|j����|�|j�d|�d������|�|j�d|�d������|�|j�d|�d������|�|j�d|j����dS)Nr��	elsewhere�Nutch�brian�webstats)r'rr(r!r-r�r2s r�test_can_fetchzNetworkTestCase.test_can_fetchvs�������-�-�c�4�8�8�K�3H�3H�I�I�J�J�J������.�.�w��
�F�F�G�G�G������.�.�w�����8I�8I�J�J�K�K�K������.�.�w�����8L�8L�M�M�N�N�N������.�.�s�D�H�H�Z�4H�4H�I�I�J�J�J������-�-�c�4�=�A�A�B�B�B�B�Brc���tj�|�d����}|���|�|j��|�|j��|�	|�
��d��|�|�d����|�|�
d����dS)Nzi-robot.txtrr�)rrrr!r�r'r�r-r�r0r��assertIsNonerJrK)rrs  r�
test_read_404zNetworkTestCase.test_read_404~s����#�3�3�D�H�H�]�4K�4K�L�L�����
�
�
�����(�)�)�)�����,�-�-�-����������+�+�+����&�,�,�S�1�1�2�2�2����&�-�-�c�2�2�3�3�3�3�3rN)r4r5r6r�r�r�classmethodr�r!r�r�r�r7rrr�r�]s�������,�H�)�0�0��:�:�J�����[��
�
�
�
8�8�8�C�C�C�4�4�4�4�4rr��__main__)+rr�r��unittest�urllib.robotparserr�testr�test.supportrr�http.serverrrr	�TestCaser9r@rCrErHrSrUr_rbrdrfrlrnrprsrvrxrzr~r��
skipUnless�has_socket_supportr��requires_working_socketr�r4�mainr7rr�<module>r�s.��	�	�	�	�	�	�	�	�������������������&�&�&�&�&�&�)�)�)�)�)�)�:�:�:�:�:�:�:�:�B�B�B�B�B�B�B�B�DB�B�B�B�B�M�8�+<�B�B�B�)�)�)�)�)�=�(�2C�)�)�)�"G�G�G�G�G�-��!2�G�G�G�"7�7�7�7�7�-��):�7�7�7������-����8�����'��):����
.�.�.�.�.�#6��8I�.�.�.�$-�-�-�-�-�5�-�-�-�
�
�
�
�
�]�H�,=�
�
�
� 	
�	
�	
�	
�	
�M�8�+<�	
�	
�	
�
(�
(�
(�
(�
(�M�8�3D�
(�
(�
(������M�8�+<��������� 5����
(�
(�
(�
(�
(�M�8�+<�
(�
(�
(�$�$�$�$�$�m�X�->�$�$�$�
�
�
�
�
�M�8�3D�
�
�
������=�(�*;����
)�
)�
)�
)�
)�*�H�,=�
)�
)�
)�A�A�A�A�A�=�(�*;�A�A�A�4
�
�
�
�
�)�
�
�
������,���<�<�<�<�<�H�$5�<�<�	��<�D!�� �"�"�'4�'4�'4�'4�'4�h�'�'4�'4�#�"�'4�R�Z����H�M�O�O�O�O�O��r