U
    Ïøïc(5  ã                   @  s<  d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZmZmZmZ ejejejej ej!ej"ej"dœZ#ejej$dfej"ej%e	fejej&dfejej&dfej ej&dfej'ej%dfej!ej(d fiZ)ej&dej$dej%diZ*G dd„ deƒZ+dS )é    )Úannotations)ÚAnyN)Úinfer_dtype)ÚiNaT)Úcache_readonly)Úis_categorical_dtypeÚis_string_dtype)ÚPandasBuffer)ÚColumnÚColumnBuffersÚColumnNullTypeÚ	DtypeKind)ÚArrowCTypesÚ
EndiannessÚNoBufferPresentÚdtype_to_arrow_c_fmt)ÚiÚuÚfÚbÚUÚMÚméÿÿÿÿzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                   @  sö   e Zd ZdZd.ddddœdd„Zd	d
œdd„Zed	d
œdd„ƒZedd
œdd„ƒZ	dd
œdd„Z
edd„ ƒZedd„ ƒZed	d
œdd„ƒZedd
œdd„ƒZd	d
œdd„Zd/d d!œd"d#„Zd$d
œd%d&„Zd'd
œd(d)„Zd'd
œd*d+„Zd'd
œd,d-„ZdS )0ÚPandasColumnaö  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tz	pd.SeriesÚboolÚNone)ÚcolumnÚ
allow_copyÚreturnc                 C  s0   t |tjƒs tdt|ƒ› dƒ‚|| _|| _dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zColumns of type ú not handled yetN)Ú
isinstanceÚpdÚSeriesÚNotImplementedErrorÚtypeÚ_colÚ_allow_copy)Úselfr   r   © r)   úB/tmp/pip-unpacked-wheel-g7fro6k3/pandas/core/interchange/column.pyÚ__init__H   s    zPandasColumn.__init__Úint)r   c                 C  s   | j jS )z2
        Size of the column, in elements.
        )r&   Úsize©r(   r)   r)   r*   r-   T   s    zPandasColumn.sizec                 C  s   dS )z7
        Offset of first element. Always zero.
        r   r)   r.   r)   r)   r*   ÚoffsetZ   s    zPandasColumn.offsetztuple[DtypeKind, int, str, str]c                 C  s€   | j j}t|ƒr>| j jj}|  |j¡\}}}}tj||tj	fS t
|ƒrrt| j ƒdkrhtjdt|ƒtj	fS tdƒ‚n
|  |¡S d S )NÚstringé   z.Non-string object dtypes are not supported yet)r&   Údtyper   ÚvaluesÚcodesÚ_dtype_from_pandasdtyper   ÚCATEGORICALr   ÚNATIVEr   r   ÚSTRINGr   r$   )r(   r2   r4   Ú_ZbitwidthZc_arrow_dtype_f_strr)   r)   r*   r2   b   s.    

ûüü
zPandasColumn.dtypec                 C  s>   t  |jd¡}|dkr&td|› dƒ‚||jd t|ƒ|jfS )z/
        See `self.dtype` for details.
        Nú
Data type z& not supported by interchange protocolr1   )Ú	_NP_KINDSÚgetÚkindÚ
ValueErrorÚitemsizer   Ú	byteorder)r(   r2   r=   r)   r)   r*   r5   €   s    z$PandasColumn._dtype_from_pandasdtypec                 C  s:   | j d tjkstdƒ‚| jjjdtt 	| jjj
¡ƒdœS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)Z
is_orderedZis_dictionaryÚ
categories)r2   r   r6   Ú	TypeErrorr&   ÚcatZorderedr   r"   r#   rA   r.   r)   r)   r*   Údescribe_categorical   s    ÿýz!PandasColumn.describe_categoricalc                 C  sH   | j d }zt| \}}W n$ tk
r>   td|› dƒ‚Y nX ||fS )Nr   r:   z not yet supported)r2   Ú_NULL_DESCRIPTIONÚKeyErrorr$   )r(   r=   ÚnullÚvaluer)   r)   r*   Údescribe_null¬   s    
zPandasColumn.describe_nullc                 C  s   | j  ¡  ¡  ¡ S )zB
        Number of null elements. Should always be known.
        )r&   ZisnaÚsumÚitemr.   r)   r)   r*   Ú
null_count¶   s    zPandasColumn.null_countzdict[str, pd.Index]c                 C  s   d| j jiS )z8
        Store specific metadata of the column.
        zpandas.index)r&   Úindexr.   r)   r)   r*   Úmetadata½   s    zPandasColumn.metadatac                 C  s   dS )zE
        Return the number of chunks the column consists of.
        é   r)   r.   r)   r)   r*   Ú
num_chunksÄ   s    zPandasColumn.num_chunksNz
int | None)Ún_chunksc                 c  sr   |rh|dkrht | jƒ}|| }|| dkr2|d7 }td|| |ƒD ]"}t| jj||| … | jƒV  qBn| V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rO   r   N)Úlenr&   Úranger   Zilocr'   )r(   rQ   r-   ÚstepÚstartr)   r)   r*   Ú
get_chunksÊ   s    
 ÿ
zPandasColumn.get_chunksr   c                 C  s`   |   ¡ dddœ}z|  ¡ |d< W n tk
r4   Y nX z|  ¡ |d< W n tk
rZ   Y nX |S )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)ÚdataÚvalidityÚoffsetsrX   rY   )Ú_get_data_bufferÚ_get_validity_bufferr   Ú_get_offsets_buffer)r(   Úbuffersr)   r)   r*   Úget_buffersÛ   s    ýzPandasColumn.get_buffersztuple[PandasBuffer, Any]c                 C  sü   | j d tjtjtjtjtjfkr>t| j 	¡ | j
d}| j }n¶| j d tjkrt| jjj}t|| j
d}|  |j ¡}n€| j d tjkrà| j 	¡ }tƒ }|D ] }t|tƒr˜| |jdd¡ q˜ttj|ddƒ}tjdtjtjf}ntd| jj › d	ƒ‚||fS )
zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r   )r   úutf-8©ÚencodingZuint8)r2   r1   r:   r    )r2   r   ÚINTÚUINTÚFLOATÚBOOLÚDATETIMEr	   r&   Úto_numpyr'   r6   r3   Ú_codesr5   r8   Ú	bytearrayr!   ÚstrÚextendÚencodeÚnpZ
frombufferr   r   r7   r$   )r(   Úbufferr2   r4   Úbufr   Úobjr)   r)   r*   rZ      s6    û


üzPandasColumn._get_data_bufferc                 C  sÌ   | j \}}| jd tjkr’| j ¡ }|dk}| }tjt|ƒftj	d}t
|ƒD ]\}}t|tƒrf|n|||< qPt|ƒ}tjdtjtjf}	||	fS zt| d }
W n tk
r¾   tdƒ‚Y nX t|
ƒ‚dS )zÒ
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   ©Úshaper2   r1   z! so does not have a separate maskzSee self.describe_nullN)rI   r2   r   r8   r&   rg   rm   ÚzerosrR   Zbool_Ú	enumerater!   rj   r	   re   r   r   r7   Ú_NO_VALIDITY_BUFFERrF   r$   r   )r(   rG   Úinvalidro   ZvalidÚmaskr   rp   rn   r2   Úmsgr)   r)   r*   r[   -  s     

z!PandasColumn._get_validity_bufferc           	      C  s¤   | j d tjkr”| j ¡ }d}tjt|ƒd ftjd}t	|ƒD ]6\}}t
|tƒrj|jdd}|t|ƒ7 }|||d < q@t|ƒ}tjdtjtjf}ntdƒ‚||fS )a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rO   rq   r_   r`   é@   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r2   r   r8   r&   rg   rm   rs   rR   Zint64rt   r!   rj   rl   r	   rb   r   ZINT64r   r7   r   )	r(   r3   ÚptrrY   r   Úvr   rn   r2   r)   r)   r*   r\   S  s&    

üÿz PandasColumn._get_offsets_buffer)T)N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r+   r-   Úpropertyr/   r   r2   r5   rD   rI   rL   rN   rP   rV   r^   rZ   r[   r\   r)   r)   r)   r*   r   <   s,   

	%-&r   ),Ú
__future__r   Útypingr   Znumpyrm   Zpandas._libs.libr   Zpandas._libs.tslibsr   Zpandas.util._decoratorsr   Zpandasr"   Zpandas.api.typesr   r   Zpandas.core.interchange.bufferr	   Z*pandas.core.interchange.dataframe_protocolr
   r   r   r   Zpandas.core.interchange.utilsr   r   r   r   rb   rc   rd   re   r8   rf   r;   ZUSE_NANZUSE_SENTINELZNON_NULLABLEr6   ZUSE_BYTEMASKrE   ru   r   r)   r)   r)   r*   Ú<module>   sR   ù       ö   ý