U
    cN(                     @  sV  d Z ddlmZ ddlZddlmZmZmZmZm	Z	 ddl
ZddlmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZmZmZmZmZ erdd
lmZmZmZm Z m!Z! dZ"ddddddZ#dde"dfdddddddddZ$de"fdddddddZ%d dddd!d"d#Z&de"dfd$ddddd%d&d'Z'de"dfd(ddddd%d)d*Z(dS )+z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKINGHashableIterableIteratorcast)lib)hash_object_array)	ArrayLikenpt)is_categorical_dtypeis_list_like)ABCDataFrameABCExtensionArrayABCIndexABCMultiIndex	ABCSeries)Categorical	DataFrameIndex
MultiIndexSeriesZ0123456789123456zIterator[np.ndarray]intznpt.NDArray[np.uint64])arrays	num_itemsreturnc                 C  s   zt | }W n$ tk
r0   tjg tjd Y S X t|g| } td}t|td }t| D ]6\}}|| }||N }||9 }|td| | 7 }qf|d |kst	d|td7 }|S )z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    dtypeiCB ixV4 iXB    zFed in wrong num_itemsi| )
nextStopIterationnparrayuint64	itertoolschainZ
zeros_like	enumerateAssertionError)r   r   firstZmultoutiaZ	inverse_i r,   </tmp/pip-unpacked-wheel-g7fro6k3/pandas/core/util/hashing.pycombine_hash_arrays2   s    
r.   Tutf8zIndex | DataFrame | Seriesboolstrz
str | Noner   )objindexencodinghash_key
categorizer   c                   s  ddl m} dkrtttr8|tdddS ttrptj j	ddd}||ddd}ntt
rtj j	ddd}|rȇ fd	d
dD }t|g|}	t|	d}||jddd}nttrj fdd
 D }
tj}|rL fdd
dD }|d7 }t|
|}dd
 |D }
t|
|}||jddd}ntdt |S )a~  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object
    r   )r   Nr#   F)r   copyr7   )r3   r   r7   c                 3  s$   | ]}t jd  djV  qdS F)r3   r4   r5   r6   Nhash_pandas_objectr3   _values.0_r6   r4   r5   r2   r,   r-   	<genexpr>   s   z%hash_pandas_object.<locals>.<genexpr>N   c                 3  s"   | ]\}}t |j V  qd S rB   )
hash_arrayr<   )r>   r?   Zseries)r6   r4   r5   r,   r-   rA      s   c                 3  s$   | ]}t jd  djV  qdS r9   r:   r=   r@   r,   r-   rA      s   r   c                 s  s   | ]
}|V  qd S rB   r,   )r>   xr,   r,   r-   rA      s     zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer   hash_tuplesr   rD   r<   astyper   r$   r%   r.   r3   r   itemslencolumns	TypeErrortype)r2   r3   r4   r5   r6   r   hZserZ
index_iterr   hashesr   Zindex_hash_generator_hashesr,   r@   r-   r;   T   sN    

 
 




r;   z+MultiIndex | Iterable[tuple[Hashable, ...]])valsr4   r5   r   c                   sz   t | stdddlm m} t| ts6|| n|  fddtj	D }fdd|D }t
|t|}|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )r   r   c                   s(   g | ] } j | j| d ddqS )FTZorderedZfastpath)codeslevels)r>   level)r   mir,   r-   
<listcomp>   s   zhash_tuples.<locals>.<listcomp>c                 3  s   | ]}t | d V  qdS )r4   r5   N)_hash_categorical)r>   catrZ   r,   r-   rA      s    zhash_tuples.<locals>.<genexpr>)r   rN   rF   r   r   rH   r   from_tuplesrangeZnlevelsr.   rL   )rS   r4   r5   r   Zcat_valsrQ   rP   r,   )r   r4   r5   rX   r-   rI      s    
rI   r   )r\   r4   r5   r   c                 C  sd   t | jj}t|||dd}|  }t|r<|| j}nt j	t|dd}|
 r`tj||< |S )a  
    Hash a Categorical by hashing its categories, and then mapping the codes
    to the hashes

    Parameters
    ----------
    cat : Categorical
    encoding : str
    hash_key : str

    Returns
    -------
    ndarray[np.uint64] of hashed values, same size as len(c)
    F)r6   r#   r   )r!   Zasarray
categoriesr<   rD   ZisnarL   ZtakerU   zerosanyr   Zu8max)r\   r4   r5   valueshashedmaskresultr,   r,   r-   r[      s    	
r[   r
   )rS   r4   r5   r6   r   c                 C  s~   t | dstd| j}t|r6td| } t| ||S t| trN|  \} }n"t| t	j
sptdt| j dt| |||S )aK  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.
    r   zmust pass a ndarray-liker   z6hash_array requires np.ndarray or ExtensionArray, not z!. Use hash_pandas_object instead.)hasattrrN   r   r   r   r[   rH   r   Z_values_for_factorizer!   ZndarrayrO   __name___hash_ndarray)rS   r4   r5   r6   r   r?   r,   r,   r-   rD     s    


rD   z
np.ndarrayc                 C  st  | j }t|tjr4tt| dtt|   S |tkrH| d} nt	|j
tjtjfrr| djddd} nt	|j
tjr|jdkr| d| j j d} n|rdd	lm}m}m} || dd
\}}	||||	ddd}
t|
||S zt| ||} W n. tk
r.   t| tt||} Y nX | | d? N } | td9 } | | d? N } | td9 } | | d? N } | S )z!
    See hash_array.__doc__.
       u8i8Fr8      ur   )r   r   	factorize)sortTrT      l   e9z    l   b&&&	    )r   r!   Z
issubdtypeZ
complex128rD   realimagr0   rJ   
issubclassrO   Z
datetime64Ztimedelta64viewnumberitemsizerF   r   r   rn   Z_with_inferr[   r	   rN   r1   objectr#   )rS   r4   r5   r6   r   r   r   rn   rU   r_   r\   r,   r,   r-   rh   7  sB    	      
rh   ))__doc__
__future__r   r$   typingr   r   r   r   r   Znumpyr!   Zpandas._libsr   Zpandas._libs.hashingr	   Zpandas._typingr
   r   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.genericr   r   r   r   r   rF   r   r   r   r   r   rG   r.   r;   rI   r[   rD   rh   r,   r,   r,   r-   <module>   s<   
$^.*3