| Current File : //proc/thread-self/root/lib64/python3.6/html/__pycache__/parser.cpython-36.pyc | 
3
  \AE  �               @   s�   d Z ddlZddlZddlZddlmZ dgZejd�Zejd�Z	ejd�Z
ejd�Zejd	�Zejd
�Z
ejd�Zejd�Zejd
�Zejdej�Zejd
�Zejd�ZG dd� dej�ZdS )zA parser for HTML and XHTML.�    N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
         (?:\s*,)*                   # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c               @   s�   e Zd ZdZd:Zdd�dd�Zdd	� Zd
d� Zdd
� ZdZ	dd� Z
dd� Zdd� Zdd� Z
dd� Zd;dd�Zdd� Zdd� Zd d!� Zd"d#� Zd$d%� Zd&d'� Zd(d)� Zd*d+� Zd,d-� Zd.d/� Zd0d1� Zd2d3� Zd4d5� Zd6d7� Zd8d9� ZdS )<r   aE  Find tags and other markup and call handler functions.
    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()
    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    �script�styleT)�convert_charrefsc            C   s   || _ | j�  dS )z�Initialize and reset this instance.
        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   �reset)�selfr   � r
   �#/usr/lib64/python3.6/html/parser.py�__init__W   s    zHTMLParser.__init__c             C   s(   d| _ d| _t| _d| _tjj| � dS )z1Reset this instance.  Loses all unprocessed data.� z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser   )r	   r
   r
   r   r   `   s
    zHTMLParser.resetc             C   s   | j | | _ | jd� dS )z�Feed data to the parser.
        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   �goahead)r	   �datar
   r
   r   �feedh   s    zHTMLParser.feedc             C   s   | j d� dS )zHandle any buffered data.�   N)r   )r	   r
   r
   r   �closeq   s    zHTMLParser.closeNc             C   s   | j S )z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_text)r	   r
   r
   r   �get_starttag_textw   s    zHTMLParser.get_starttag_textc             C   s$   |j � | _tjd| j tj�| _d S )Nz</\s*%s\s*>)�lowerr   �re�compile�Ir   )r	   �elemr
   r
   r   �set_cdata_mode{   s    
zHTMLParser.set_cdata_modec             C   s   t | _d | _d S )N)r   r   r   )r	   r
   r
   r   �clear_cdata_mode   s    zHTMLParser.clear_cdata_modec             C   sZ  | j }d}t|�}�x�||k �r�| jr|| j r||jd|�}|dk r�|jdt||d ��}|dkrvtjd�j	||� rvP |}n(| j
j	||�}|r�|j� }n| jr�P |}||k r�| jr�| j r�| jt
|||� �� n| j|||� � | j||�}||kr�P |j}|d|��rLtj||��r&| j|�}	n�|d|��r>| j|�}	nl|d|��rV| j|�}	nT|d|��rn| j|�}	n<|d	|��r�| j|�}	n$|d
 |k �r�| jd� |d
 }	nP |	dk �r>|�s�P |jd|d
 �}	|	dk �r�|jd|d
 �}	|	dk �r|d
 }	n|	d
7 }	| j�r,| j �r,| jt
|||	� �� n| j|||	� � | j||	�}q|d|��r�tj||�}|�r�|j� d
d� }
| j|
� |j� }	|d|	d
 ��s�|	d
 }	| j||	�}qn:d||d � k�r�| j|||d
 � � | j||d
 �}P q|d|��r�tj||�}|�rP|jd
�}
| j|
� |j� }	|d|	d
 ��sB|	d
 }	| j||	�}qtj||�}|�r�|�r�|j� ||d � k�r�|j� }	|	|k�r�|}	| j||d
 �}P n,|d
 |k �r�| jd� | j||d
 �}nP qdstd��qW |�rH||k �rH| j �rH| j�r*| j �r*| jt
|||� �� n| j|||� � | j||�}||d � | _ d S )Nr   �<�&�"