U
    c(5                     @  s<  d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZmZmZmZ ejejejej ej!ej"ej"dZ#ejej$dfej"ej%e	fejej&dfejej&dfej ej&dfej'ej%dfej!ej(d fiZ)ej&dej$dej%diZ*G dd deZ+dS )    )annotations)AnyN)infer_dtype)iNaT)cache_readonly)is_categorical_dtypeis_string_dtype)PandasBuffer)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
EndiannessNoBufferPresentdtype_to_arrow_c_fmt)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                   @  s   e Zd ZdZd.ddddddZd	d
ddZed	d
ddZedd
ddZ	dd
ddZ
edd Zedd Zed	d
ddZedd
ddZd	d
ddZd/d d!d"d#Zd$d
d%d&Zd'd
d(d)Zd'd
d*d+Zd'd
d,d-ZdS )0PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tz	pd.SeriesboolNone)column
allow_copyreturnc                 C  s0   t |tjs tdt| d|| _|| _dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zColumns of type  not handled yetN)
isinstancepdSeriesNotImplementedErrortype_col_allow_copy)selfr   r    r)   B/tmp/pip-unpacked-wheel-g7fro6k3/pandas/core/interchange/column.py__init__H   s    zPandasColumn.__init__int)r   c                 C  s   | j jS )z2
        Size of the column, in elements.
        )r&   sizer(   r)   r)   r*   r-   T   s    zPandasColumn.sizec                 C  s   dS )z7
        Offset of first element. Always zero.
        r   r)   r.   r)   r)   r*   offsetZ   s    zPandasColumn.offsetztuple[DtypeKind, int, str, str]c                 C  s   | j j}t|r>| j jj}| |j\}}}}tj||tj	fS t
|rrt| j dkrhtjdt|tj	fS tdn
| |S d S )Nstring   z.Non-string object dtypes are not supported yet)r&   dtyper   valuescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r$   )r(   r2   r4   _ZbitwidthZc_arrow_dtype_f_strr)   r)   r*   r2   b   s.    


zPandasColumn.dtypec                 C  s>   t |jd}|dkr&td| d||jd t||jfS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolr1   )	_NP_KINDSgetkind
ValueErroritemsizer   	byteorder)r(   r2   r=   r)   r)   r*   r5      s    z$PandasColumn._dtype_from_pandasdtypec                 C  s:   | j d tjkstd| jjjdtt	| jjj
dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)Z
is_orderedZis_dictionary
categories)r2   r   r6   	TypeErrorr&   catZorderedr   r"   r#   rA   r.   r)   r)   r*   describe_categorical   s    z!PandasColumn.describe_categoricalc                 C  sH   | j d }zt| \}}W n$ tk
r>   td| dY nX ||fS )Nr   r:   z not yet supported)r2   _NULL_DESCRIPTIONKeyErrorr$   )r(   r=   nullvaluer)   r)   r*   describe_null   s    
zPandasColumn.describe_nullc                 C  s   | j    S )zB
        Number of null elements. Should always be known.
        )r&   Zisnasumitemr.   r)   r)   r*   
null_count   s    zPandasColumn.null_countzdict[str, pd.Index]c                 C  s   d| j jiS )z8
        Store specific metadata of the column.
        zpandas.index)r&   indexr.   r)   r)   r*   metadata   s    zPandasColumn.metadatac                 C  s   dS )zE
        Return the number of chunks the column consists of.
           r)   r.   r)   r)   r*   
num_chunks   s    zPandasColumn.num_chunksNz
int | None)n_chunksc                 c  sr   |rh|dkrht | j}|| }|| dkr2|d7 }td|| |D ]"}t| jj|||  | jV  qBn| V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rO   r   N)lenr&   ranger   Zilocr'   )r(   rQ   r-   stepstartr)   r)   r*   
get_chunks   s    
 
zPandasColumn.get_chunksr   c                 C  s`   |   ddd}z|  |d< W n tk
r4   Y nX z|  |d< W n tk
rZ   Y nX |S )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsrX   rY   )_get_data_buffer_get_validity_bufferr   _get_offsets_buffer)r(   buffersr)   r)   r*   get_buffers   s    zPandasColumn.get_buffersztuple[PandasBuffer, Any]c                 C  s   | j d tjtjtjtjtjfkr>t| j	 | j
d}| j }n| j d tjkrt| jjj}t|| j
d}| |j }n| j d tjkr| j	 }t }|D ] }t|tr||jdd qttj|dd}tjdtjtjf}ntd| jj  d	||fS )
zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r   )r   utf-8encodingZuint8)r2   r1   r:   r    )r2   r   INTUINTFLOATBOOLDATETIMEr	   r&   to_numpyr'   r6   r3   _codesr5   r8   	bytearrayr!   strextendencodenpZ
frombufferr   r   r7   r$   )r(   bufferr2   r4   bufr   objr)   r)   r*   rZ      s6    


zPandasColumn._get_data_bufferc                 C  s   | j \}}| jd tjkr| j }|dk}| }tjt|ftj	d}t
|D ]\}}t|trf|n|||< qPt|}tjdtjtjf}	||	fS zt| d }
W n tk
r   tdY nX t|
dS )z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   shaper2   r1   z! so does not have a separate maskzSee self.describe_nullN)rI   r2   r   r8   r&   rg   rm   zerosrR   Zbool_	enumerater!   rj   r	   re   r   r   r7   _NO_VALIDITY_BUFFERrF   r$   r   )r(   rG   invalidro   Zvalidmaskr   rp   rn   r2   msgr)   r)   r*   r[   -  s     

z!PandasColumn._get_validity_bufferc           	      C  s   | j d tjkr| j }d}tjt|d ftjd}t	|D ]6\}}t
|trj|jdd}|t|7 }|||d < q@t|}tjdtjtjf}ntd||fS )a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rO   rq   r_   r`   @   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r2   r   r8   r&   rg   rm   rs   rR   Zint64rt   r!   rj   rl   r	   rb   r   ZINT64r   r7   r   )	r(   r3   ptrrY   r   vr   rn   r2   r)   r)   r*   r\   S  s&    

z PandasColumn._get_offsets_buffer)T)N)__name__
__module____qualname____doc__r+   r-   propertyr/   r   r2   r5   rD   rI   rL   rN   rP   rV   r^   rZ   r[   r\   r)   r)   r)   r*   r   <   s,   

	%-&r   ),
__future__r   typingr   Znumpyrm   Zpandas._libs.libr   Zpandas._libs.tslibsr   Zpandas.util._decoratorsr   Zpandasr"   Zpandas.api.typesr   r   Zpandas.core.interchange.bufferr	   Z*pandas.core.interchange.dataframe_protocolr
   r   r   r   Zpandas.core.interchange.utilsr   r   r   r   rb   rc   rd   re   r8   rf   r;   ZUSE_NANZUSE_SENTINELZNON_NULLABLEr6   ZUSE_BYTEMASKrE   ru   r   r)   r)   r)   r*   <module>   sR             