U
    c                     @  sz  d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
mZmZmZ ddlZddlmZmZ ddlm  mZ ddlm  mZ ddlmZmZmZmZmZ ddl m!Z! ddl"m#Z# dd	l$m%Z%m&Z& dd
l'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7 ddl8m9Z9m:Z: ddl;m<Z<m=Z=m>Z>m?Z?m@Z@ ddlAmBZB ddlCmDZD ddlEmFZF ddlGmHZHmIZI ddlJmKZK ddlLmMZM ddlNmOZO ddlPmQZQmRZRmSZSmTZT ddlUmVZV ddlWmXZXmYZYmZZZm[Z[m\Z\m]Z] erddl^m_Z_ G dd dZ`G dd dZaG dd deaZbdd d!d"d#ZcG d$d% d%ee ZdG d&d' d'edZeG d(d) d)edZfd/d*d+ddd%d,d-d.ZgdS )0a  
Provide classes to perform the groupby aggregate operations.

These are not exposed to the user and provide implementations of the grouping
operations, primarily in cython. These classes (BaseGrouper and BinGrouper)
are contained *in* the SeriesGroupBy and DataFrameGroupBy objects.
    )annotationsN)TYPE_CHECKINGCallableGenericHashableIteratorNoReturnSequencefinal)NaTlib)	ArrayLikeDtypeObjNDFrameTShapenptAbstractMethodError)cache_readonly)maybe_cast_pointwise_resultmaybe_downcast_to_dtype)ensure_float64ensure_int64ensure_platform_intensure_uint64is_1d_only_ea_dtypeis_bool_dtypeis_complex_dtypeis_datetime64_any_dtypeis_float_dtypeis_integer_dtypeis_numeric_dtype	is_sparseis_timedelta64_dtypeneeds_i8_conversion)CategoricalDtype)isna
maybe_fill)CategoricalDatetimeArrayExtensionArrayPeriodArrayTimedeltaArray)BooleanDtype)FloatingDtype)IntegerDtype)BaseMaskedArrayBaseMaskedDtype)StringDtype)	DataFrame)grouper)CategoricalIndexIndex
MultiIndexensure_index)Series)compress_group_indexdecons_obs_group_idsget_flattened_listget_group_indexget_group_index_sorterget_indexer_dict)NDFramec                   @  s  e Zd ZdZedddddgZdddd	d
ddZddddddddddd
dddddddZdd d!d"d#d$dd%d&d'd(hZd&d)iZ	e
ejd*d+ddd,dd-d.d/Zd0d0d1d2d3Zd_d5dd6d7d8Zd9d0d:d;d<d=Zd,d,d>d?d@Zd,d,d>dAdBZddCdDdEZedFd9d9d0dGdHdIdJZdFd0d1dKdLZdFd0dFdMdNdOZedPd9d9d0dPdHdQdRZed*d*dSd0d9d9d0dTdTd0dUdVdWZed0d9d9d0dTdTd0dUdXdYZedZd[dGd9d9d0d9dGd\d]d^Zd*S )`WrappedCythonOpaB  
    Dispatch logic for functions defined in _libs.groupby

    Parameters
    ----------
    kind: str
        Whether the operation is an aggregate or transform.
    how: str
        Operation name, e.g. "mean".
    has_dropped_na: bool
        True precisely when dropna=True and the grouper contains a null value.
    rankcountsizeZidxminZidxmaxstrboolNone)kindhowhas_dropped_nareturnc                 C  s   || _ || _|| _d S NrH   rI   rJ   )selfrH   rI   rJ    rO   ;/tmp/pip-unpacked-wheel-g7fro6k3/pandas/core/groupby/ops.py__init__z   s    zWrappedCythonOp.__init__Z	group_sumZ
group_prodZ	group_minZ	group_maxZ
group_meanZgroup_median_float64Z	group_varZ	group_nthZ
group_lastZ
group_ohlc)
sumprodminmaxmeanmedianvarfirstlastohlcZgroup_cumprod_float64Zgroup_cumsumZgroup_cumminZgroup_cummaxZ
group_rank)cumprodcumsumcummincummaxrB   )	aggregate	transformr^   r_   rT   rU   rZ   rY   rR   r[   r]   rS      N)maxsizeznp.dtype)rH   rI   dtype
is_numericc                 C  s   |j }| j| | }tt|}|r&|S |ttkrx|dkrTtd| d| dn d|jkrttd| d| d|S td|d S )NrW   r\   z2function is not implemented for this dtype: [how->z,dtype->]objectzPThis should not be reached. Please report a bug at github.com/pandas-dev/pandas/)	name_CYTHON_FUNCTIONSgetattr
libgroupbynprd   rh   NotImplementedErrorZ__signatures__)clsrH   rI   rd   re   Z	dtype_strZftypefrO   rO   rP   _get_cython_function   s&    

z$WrappedCythonOp._get_cython_function
np.ndarray)valuesrK   c                 C  sp   | j }|dkrt|}nT|jjdkrl|dks<| jdkrF| jrFt|}n&|dkrl|jjdkrdt|}nt|}|S )z
        Cast numeric dtypes to float64 for functions that only support that.

        Parameters
        ----------
        values : np.ndarray

        Returns
        -------
        values : np.ndarray
        rf   )iu)rX   rV   ra   )rR   r[   rS   r]   rt   )rI   r   rd   rH   rJ   r   r   )rN   rs   rI   rO   rO   rP   _get_cython_vals   s    


z WrappedCythonOp._get_cython_valsFr   )rd   re   c                 C  s   | j }|rdS t|trd|dkr6t| d| dq|dkrNt| dq|jst| dnZt|r|t| dnBt|r|dkrtd| dn t|r|dkrtd	| ddS )
z
        Check if we can do this operation with our cython functions.

        Raises
        ------
        NotImplementedError
            This is either not a valid function for this dtype, or
            valid but not implemented in cython.
        N)rR   rS   r]   r\   z type does not support z operations)rB   z dtype not supportedz!datetime64 type does not support )rS   r\   z"timedelta64 type does not support )	rI   
isinstancer%   	TypeErrorrn   orderedr"   r   r#   )rN   rd   re   rI   rO   rO   rP   _disallow_invalid_ops   s$    

z%WrappedCythonOp._disallow_invalid_opsintr   )ngroupsrs   rK   c                 C  sf   | j }| j}| j|d}|dkr,|df}n6|dkr>tdn$|dkrN|j}n|f|jdd   }|S )N   r[   rb   z<arity of more than 1 is not supported for the 'how' argumentra   )rI   rH   _cython_aritygetrn   shape)rN   r|   rs   rI   rH   arity	out_shaperO   rO   rP   _get_output_shape  s    
z!WrappedCythonOp._get_output_shape)rd   rK   c                 C  s<   | j }|dkrd}nt|r.|j |j }nd}t|S )NrB   float64rh   )rI   r!   rH   itemsizerm   rd   )rN   rd   rI   	out_dtyperO   rO   rP   _get_out_dtype.  s    zWrappedCythonOp._get_out_dtypec                 C  s^   | j }|dkr*|ttkrZttjS n0|dkrZt|sBt|rF|S t|rZttjS |S )a  
        Get the desired dtype of a result based on the
        input dtype and how it was computed.

        Parameters
        ----------
        dtype : np.dtype

        Returns
        -------
        np.dtype
            The desired dtype of the result.
        )rR   r]   rR   rS   )rV   rW   rX   )	rI   rm   rd   rF   int64r   r   r!   r   )rN   rd   rI   rO   rO   rP   _get_result_dtype:  s    z!WrappedCythonOp._get_result_dtyperK   c                 C  s   | j | jkS rL   )rI   _MASKED_CYTHON_FUNCTIONSrN   rO   rO   rP   	uses_maskT  s    zWrappedCythonOp.uses_maskr*   r   )rs   	min_countr|   comp_idsrK   c           	      K  s   t |tr,|  r,| j|f|||d|S t |tr|  r| jdksLt|jsVt| }|j	}| j
|f||||d|}|S | |}| j
|f|||dd|}| j| jkr|S | ||S )zs
        If we have an ExtensionArray, unwrap, call _cython_operation, and
        re-wrap if appropriate.
        r   r|   r   rB   r   r|   r   maskN)rw   r0   r    _masked_ea_wrap_cython_operationr(   rI   AssertionErrorry   r&   _ndarray_cython_op_ndim_compat_ea_to_cython_valuescast_blocklist_reconstruct_ea_result)	rN   rs   r   r|   r   kwargsr   npvalues
res_valuesrO   rO   rP   _ea_wrap_cython_operationW  sP    

	z)WrappedCythonOp._ea_wrap_cython_operationc                 C  s   t |tttfr|jd}nrt |jttfr@|j	dt
jd}nPt |jtrb|j	|jjt
jd}n.t |jtr|j	tt
jd}ntd|j |S )NM8[ns]r   )Zna_valuez,function is not implemented for this dtype: )rw   r)   r+   r,   r   viewrd   r-   r/   Zto_numpyrm   nanr.   numpy_dtyper2   rh   rn   )rN   rs   r   rO   rO   rP   r     s    
z$WrappedCythonOp._ea_to_cython_values)rs   r   rK   c                 C  s   t |jtr(|j}| }|j||dS t |jtrb| |jj}t|}| }|j||dS t |t	t
tfr|jdkst||jj}||S tdS )zL
        Construct an ExtensionArray result from an ndarray result.
        rd   r   N)rw   rd   r2   Zconstruct_array_typeZ_from_sequencer1   r   r   Zfrom_numpy_dtyper)   r,   r+   r   r   r   Z_from_backing_datarn   )rN   rs   r   rd   Zstring_array_clsZ	new_dtypeZmasked_array_clsrO   rO   rP   r     s    

z&WrappedCythonOp._reconstruct_ea_resultr0   c                 K  st   |}|j }| jdkr| }ntj|td}|j}	| j|	f|||||d|}
| jdkrht	|dj
}||
|S )z
        Equivalent of `_ea_wrap_cython_operation`, but optimized for masked EA's
        and cython algorithms which accept a mask.
        r`   r   r   r|   r   r   result_maskr[   )rb   r}   )Z_maskrH   copyrm   zerosrF   _datar   rI   ZtileTZ_maybe_mask_result)rN   rs   r   r|   r   r   orig_valuesr   r   Zarrr   rO   rO   rP   r     s(    



z0WrappedCythonOp._masked_ea_wrap_cython_operation)r   r   znpt.NDArray[np.bool_] | None)rs   r   r|   r   r   r   rK   c          
      K  s   |j dkr|d d d f }|d k	r2|d d d f }|d k	rJ|d d d f }| j|f|||||d|}	|	jd dkr~|	d S |	jS | j|f|||||d|S )Nr}   r   r   )ndim_call_cython_opr   r   )
rN   rs   r   r|   r   r   r   r   Zvalues2dresrO   rO   rP   r     s>    
	z&WrappedCythonOp._cython_op_ndim_compatc             
   K  st  |}|j }	t|	}
t|	}|r.|d}d}
nt|	r@|d}|j dkrV|tj}|j}|d k	rx|j}|d k	rx|j}| 	||}| 
| j| j|j |
}| |}| |j }ttj||d}| jdkrdtj|tjd}| jdkr|||||||||d n^| jd	kr*|||||||||d
 n8| jdkrN||||||||d n||||||f| nV|  r| jdkr||d< |f ||||||d| n|f |||||d| | jdkr@t|j r@|s@t| jdkrdnd|}||k }| r@|d k	r,|  r,||  s@tn|d}tj||< |j}| j| jkrl| |j }t||}n|}|S )Nr   TZuint8Zfloat16r   r`   )rT   rU   rV   rZ   rY   )outcountsrs   labelsr   r   r   is_datetimelike)rR   )r   r   rs   r   r   r   r   r   )r[   rS   )r   r   r   rB   r   )r   rs   r   r|   r   r   )r   rs   r   r|   r   )rR   rS   r   r}   r   )rd   r!   r$   r   r   Zastyperm   Zfloat32r   r   rq   rH   rI   rv   r   r'   emptyr   r   r   r    rU   anyallr   r   r   r   r   )rN   rs   r   r|   r   r   r   r   r   rd   re   r   r   funcr   resultr   cutoffZempty_groupsZ	res_dtype	op_resultrO   rO   rP   r     s    








	


zWrappedCythonOp._call_cython_op)r   )rs   axisr   r   r|   rK   c          	      K  s   |j dkrtdn2|j dkr0|dksFt|nt|jsF|dksFt|j}t|}| || t|tj	s| j
|f|||d|S | j|f|||dd|S )zW
        Call our cython function, with appropriate pre- and post- processing.
           z.number of dimensions is currently limited to 2r}   r   r   Nr   )r   rn   r   r   rd   r!   rz   rw   rm   ndarrayr   r   )	rN   rs   r   r   r   r|   r   rd   re   rO   rO   rP   cython_operation  s:    



z WrappedCythonOp.cython_operation)F)__name__
__module____qualname____doc__	frozensetr   rQ   rj   r   r~   classmethod	functools	lru_cacherq   rv   rz   r   r   r   r   r
   r   r   r   r   r   r   r   rO   rO   rO   rP   rA   h   sv   
!%*;( + rA   c                
   @  sZ  e Zd ZU dZded< dgdddddd	dd
dddZeddddZeddddZddddZ	eddddZ
dhdddddd Zedid!dd"dd#d$Zd%d& Zeed'd( Zedjd)d*dd+d,d-d.Zed/dd0d1Zed2dd3d4Zeed5dd6d7Zed8dd9d:Zed;dd<d=Zed>dd?d@ZedAddBdCZeedddDdEZeedddFdGZedHddIdJZeed2ddKdLZedMddNdOZeedddPdQZedRddSdTZedddUdVZ edWddXdYZ!edkd[d[ddd\d]d^d_Z"edld>d)dd\d`dadbZ#ed>d)dcdddedfZ$dS )mBaseGroupera  
    This is an internal Grouper class, which actually holds
    the generated groups

    Parameters
    ----------
    axis : Index
    groupings : Sequence[Grouping]
        all the grouping instances to handle in this grouper
        for example for grouper list to groupby, need to pass the list
    sort : bool, default True
        whether this grouper will give sorted result or not
    group_keys : bool, default True
    mutated : bool, default False
    indexer : np.ndarray[np.intp], optional
        the indexer created by Grouper
        some groupers (TimeGrouper) will sort its axis and its
        group_info is also sorted, so need the indexer to reorder

    r6   r   TFNzSequence[grouper.Grouping]rF   znpt.NDArray[np.intp] | NonerG   )r   	groupingssort
group_keysmutatedindexerdropnarK   c                 C  sD   t |tst||| _t|| _|| _|| _|| _|| _	|| _
d S rL   )rw   r6   r   r   list
_groupings_sortr   r   r   r   )rN   r   r   r   r   r   r   r   rO   rO   rP   rQ     s    

zBaseGrouper.__init__list[grouper.Grouping]r   c                 C  s   | j S rL   )r   r   rO   rO   rP   r     s    zBaseGrouper.groupingsr   c                 C  s   t dd | jD S )Nc                 s  s   | ]}|j V  qd S rL   )r|   .0pingrO   rO   rP   	<genexpr>   s     z$BaseGrouper.shape.<locals>.<genexpr>)tupler   r   rO   rO   rP   r     s    zBaseGrouper.shapezIterator[Hashable]c                 C  s
   t | jS rL   )iterindicesr   rO   rO   rP   __iter__  s    zBaseGrouper.__iter__r{   c                 C  s
   t | jS rL   )lenr   r   rO   rO   rP   nkeys  s    zBaseGrouper.nkeysr   r   z#Iterator[tuple[Hashable, NDFrameT]])datar   rK   c                 c  s(   | j ||d}| j}t||E dH  dS )
        Groupby iterator

        Returns
        -------
        Generator yielding sequence of (name, subsetted object)
        for each group
        r   N)_get_splittergroup_keys_seqzip)rN   r   r   splitterkeysrO   rO   rP   get_iterator	  s    zBaseGrouper.get_iteratorr@   DataSplitterc                 C  s   | j \}}}t||||dS )zV
        Returns
        -------
        Generator yielding subsetted objects
        r   )
group_infoget_splitter)rN   r   r   ids_r|   rO   rO   rP   r     s    zBaseGrouper._get_splitterc                 C  s   | j d jS )
        We are a grouper as part of another's groupings.

        We have a specific method of grouping, so cannot
        convert to a Index for our grouper.
        r   )r   grouping_vectorr   rO   rO   rP   _get_grouper"  s    zBaseGrouper._get_grouperc                 C  s:   t | jdkr| jd S | j\}}}t||| j| jS d S Nr}   r   )r   r   levelsr   r<   codes)rN   r   r   r|   rO   rO   rP   r   +  s    
zBaseGrouper.group_keys_seqr   zDataFrame | Seriesztuple[list, bool])rp   r   r   rK   c                 C  s   | j }| j||d}| j}g }t||}|D ]B\}	}
t|
d|	 |
j}||
}|sdt|||sdd}|| q,t	|dkrt
|dd dkr||jd d  ||fS )Nr   ri   Tr   r   )ZmadZskewrR   rS   )r   r   r   r   rh   __setattr__axes_is_indexed_likeappendr   rk   iloc)rN   rp   r   r   r   r   r   Zresult_valuesZzippedkeygroupZ
group_axesr   rO   rO   rP   apply6  s    
	zBaseGrouper.applyz$dict[Hashable, npt.NDArray[np.intp]]c                 C  sP   t | jdkr&t| jtr&| jd jS dd | jD }dd | jD }t||S )z"dict {group name -> group indices}r}   r   c                 S  s   g | ]
}|j qS rO   r   r   rO   rO   rP   
<listcomp>_  s     z'BaseGrouper.indices.<locals>.<listcomp>c                 S  s   g | ]
}|j qS rO   group_indexr   rO   rO   rP   r   `  s     )r   r   rw   result_indexr5   r   r?   )rN   Z
codes_listr   rO   rO   rP   r   Y  s
    zBaseGrouper.indicesnpt.NDArray[np.intp]c                 C  s|   t | j| j| jdd}t|| jd\}}| jrVt|dk}t|dk| }|| }t	|| j
}| jrx|t||7 }|S )zR
        Get the original integer locations of result_index in the input.
        Tr   xnullr   r   r   )r=   r   r   r   r:   rJ   rm   wherer]   r>   r|   take)rN   r   r   r   Z	null_gapsr   rO   rO   rP   result_ilocsc  s       zBaseGrouper.result_ilocsz#list[npt.NDArray[np.signedinteger]]c                 C  s   dd | j D S )Nc                 S  s   g | ]
}|j qS rO   r   r   rO   rO   rP   r     s     z%BaseGrouper.codes.<locals>.<listcomp>r   r   rO   rO   rP   r   ~  s    zBaseGrouper.codeslist[Index]c                 C  s   dd | j D S )Nc                 S  s   g | ]
}|j qS rO   r   r   rO   rO   rP   r     s     z&BaseGrouper.levels.<locals>.<listcomp>r  r   rO   rO   rP   r     s    zBaseGrouper.levelslist[Hashable]c                 C  s   dd | j D S )Nc                 S  s   g | ]
}|j qS rO   )ri   r   rO   rO   rP   r     s     z%BaseGrouper.names.<locals>.<listcomp>r  r   rO   rO   rP   names  s    zBaseGrouper.namesr9   c                 C  s<   | j \}}}|r(tj||dk |d}ng }t|| jddS )z&
        Compute group sizes.
        r   )Z	minlengthr   )indexrd   )r   rm   Zbincountr9   r   )rN   r   r   r|   r   rO   rO   rP   rD     s
    zBaseGrouper.sizezdict[Hashable, np.ndarray]c                 C  sF   t | jdkr| jd jS tdd | jD  }t|}| j|S dS )!dict {group name -> group labels}r}   r   c                 s  s   | ]}|j V  qd S rL   )r   r   rO   rO   rP   r     s     z%BaseGrouper.groups.<locals>.<genexpr>N)r   r   groupsr   r6   r   groupby)rN   Z
to_groupbyr  rO   rO   rP   r    s
    zBaseGrouper.groupsc                 C  s   t | jd jS Nr   )r6   r   Zis_monotonic_increasingr   rO   rO   rP   is_monotonic  s    zBaseGrouper.is_monotonicc                 C  s   t | jd dk  S )zE
        Whether grouper has null value(s) that are dropped.
        r   )rF   r   r   r   rO   rO   rP   rJ     s    zBaseGrouper.has_dropped_na6tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]c                 C  s&   |   \}}t|}t|}|||fS rL   )_get_compressed_codesr   r   )rN   r   obs_group_idsr|   rO   rO   rP   r     s    zBaseGrouper.group_infoc                 C  s:   | j \}}}| jd k	r6t|| jf}|| }t|}|S rL   )r   r   rm   Zlexsortr   )rN   r   r   ZsorterrO   rO   rP   
codes_info  s    
zBaseGrouper.codes_infoz:tuple[npt.NDArray[np.signedinteger], npt.NDArray[np.intp]]c                 C  sV   t | jdkr0t| j| jddd}t|| jdS | jd }|jtjt |j	tj
dfS )Nr}   Tr   r   r   r   )r   r   r=   r   r   r:   r   rm   aranger   intp)rN   r   r   rO   rO   rP   r    s
    
z!BaseGrouper._get_compressed_codesc                 C  s
   t | jS rL   )r   r   r   rO   rO   rP   r|     s    zBaseGrouper.ngroupszlist[npt.NDArray[np.intp]]c                 C  s&   | j }| j\}}}t||| j|ddS )NT)r   )r   r   r;   r   )rN   r   r   Zobs_idsr   rO   rO   rP   reconstructed_codes  s    zBaseGrouper.reconstructed_codesc                 C  sN   t | jdkr&| jd j| jd S | j}dd | jD }t||d| jdS )Nr}   r   c                 S  s   g | ]
}|j qS rO   )r   r   rO   rO   rP   r     s     z,BaseGrouper.result_index.<locals>.<listcomp>F)r   r   Zverify_integrityr  )r   r   r   renamer  r  r7   )rN   r   r   rO   rO   rP   r     s       zBaseGrouper.result_indexzlist[ArrayLike]c                 C  sZ   t | jdkr| jd jgS g }t| j| jD ]&\}}t|}|j|}|| q.|S r   )r   r   Zgroup_arrayliker   r  r   r   r   )rN   	name_listr   r   r   rO   rO   rP   get_group_levels  s    zBaseGrouper.get_group_levelsr   rE   r   )rH   rI   r   r   rK   c                 K  sJ   |dkst t||| jd}| j\}}	}	| j}
|jf |||||
d|S )z;
        Returns the values of a cython operation.
        )ra   r`   rM   )rs   r   r   r   r|   )r   rA   rJ   r   r|   r   )rN   rH   rs   rI   r   r   r   Zcy_opr   r   r|   rO   rO   rP   _cython_operation  s    zBaseGrouper._cython_operation)objr   preserve_dtyperK   c                 C  sr   t |dkr| ||}n,t|jtjs:| ||}d}n| ||}tj|dd}|rjt||j	dd}n|}|S )a1  
        Parameters
        ----------
        obj : Series
        func : function taking a Series and returning a scalar-like
        preserve_dtype : bool
            Whether the aggregation is known to be dtype-preserving.

        Returns
        -------
        np.ndarray or ExtensionArray
        r   TF)Z	try_float)Znumeric_only)
r   _aggregate_series_pure_pythonrw   Z_valuesrm   r   r   Zmaybe_convert_objectsr   rd   )rN   r  r   r  r   r   r   rO   rO   rP   
agg_series  s    zBaseGrouper.agg_seriesznpt.NDArray[np.object_]r  r   rK   c                 C  s   | j \}}}tj|td}tj|dd}d}t|||dd}	t|	D ]F\}
}||}t|}|stt	||j
 d}|jd ||
< |||
< qD|S )Nr   OFr   r   T)r   rm   r   r{   r   r   	enumeratelibreductionZextract_resultZcheck_result_arrayrd   r   )rN   r  r   r   r   r|   r   r   Zinitializedr   rt   r   r   rO   rO   rP   r  B  s    

z)BaseGrouper._aggregate_series_pure_python)TTFNT)r   )r   )r   )r   )F)%r   r   r   r   __annotations__rQ   propertyr   r   r   r   r   r
   r   r   r   r   r   r   r  r   r   r  rD   r  r  rJ   r   r  r  r|   r  r   r  r  r  r  rO   rO   rO   rP   r     s   
      			 "		
  )r   c                   @  s   e Zd ZU dZded< ded< ded< d4dd
dddZedd ZeddddZ	dd Z
d5dddddZedd ZeddddZed dd!d"Zeddd#d$Zed%dd&d'Zed(dd)d*Zed+dd,d-Zd.d/d0d1d2d3Zd	S )6
BinGrouperaU  
    This is an internal Grouper class

    Parameters
    ----------
    bins : the split index of binlabels to group the item of axis
    binlabels : the label list
    mutated : bool, default False
    indexer : np.ndarray[np.intp]

    Examples
    --------
    bins: [2, 4, 6, 8, 10]
    binlabels: DatetimeIndex(['2005-01-01', '2005-01-03',
        '2005-01-05', '2005-01-07', '2005-01-09'],
        dtype='datetime64[ns]', freq='2D')

    the group_info, which contains the label of each item in grouped
    axis, the index of label in label list, group number, is

    (array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]), array([0, 1, 2, 3, 4]), 5)

    means that, the grouped axis has 10 items, can be grouped into 5
    labels, the first and second items belong to the first label, the
    third and forth items belong to the second label, and so on

    znpt.NDArray[np.int64]binsr6   	binlabelsrF   r   FNrG   )r   rK   c                 C  s<   t || _t|| _|| _|| _t| jt| jks8td S rL   )r   r"  r8   r#  r   r   r   r   )rN   r"  r#  r   r   rO   rO   rP   rQ     s
    

zBinGrouper.__init__c                 C  s   dd t | j| jD }|S )r  c                 S  s   i | ]\}}|t k	r||qS rO   )r   )r   r   valuerO   rO   rP   
<dictcomp>  s    z%BinGrouper.groups.<locals>.<dictcomp>)r   r#  r"  )rN   r   rO   rO   rP   r    s    zBinGrouper.groupsr{   r   c                 C  s   dS )Nr}   rO   r   rO   rO   rP   r     s    zBinGrouper.nkeysc                 C  s   | S )r   rO   r   rO   rO   rP   r     s    zBinGrouper._get_grouperr   r@   )r   r   c                 #  s   |dkr fdd}n fdd}t  j| }d}t| j| jD ]$\}}|tk	rb||||fV  |}qB||k r| jd ||dfV  dS )r   r   c                   s    j | | S rL   r   startedger   rO   rP   <lambda>      z)BinGrouper.get_iterator.<locals>.<lambda>c                   s    j d d | |f S rL   r&  r'  r*  rO   rP   r+    r,  r   N)r   r   r   r"  r#  r   )rN   r   r   Zslicerlengthr(  r)  labelrO   r*  rP   r     s    	zBinGrouper.get_iteratorc                 C  sP   t t}d}t| j| jD ].\}}||k r|tk	rFtt||||< |}q|S r
  )collectionsdefaultdictr   r   r#  r"  r   range)rN   r   rt   r.  binrO   rO   rP   r     s    
zBinGrouper.indicesr  c                 C  s   | j }tj|tjd}ttjd| jf }t|}|t| jkrVt	t||}nt	tjdt|f |}t|||fS )Nr   r   r   )
r|   rm   r  r  Zdiffr_r"  r   r   repeat)rN   r|   r  repr   rO   rO   rP   r     s    zBinGrouper.group_infozlist[np.ndarray]c                 C  s2   t jdt | jdd  | jd d kd f gS )Nr   r}   r   )rm   r3  Zflatnonzeror"  r   rO   rO   rP   r    s    zBinGrouper.reconstructed_codesc                 C  s0   t | jdkr*t| jd r*| jdd  S | jS )Nr   r}   )r   r#  r&   r   rO   rO   rP   r     s    zBinGrouper.result_indexr  c                 C  s   | j gS rL   )r#  r   rO   rO   rP   r     s    zBinGrouper.levelsr  c                 C  s
   | j jgS rL   )r#  ri   r   rO   rO   rP   r    s    zBinGrouper.namesr   c                 C  s   | j }tj||dd d}|gS )NF)Zin_axislevel)r#  r4   ZGrouping)rN   Zlevr   rO   rO   rP   r     s    zBinGrouper.groupingsr9   r   r   r  c                 C  s   t dd S )Nz=This should not be reached; use _aggregate_series_pure_python)rn   )rN   r  r   rO   rO   rP   _aggregate_series_fast  s    z!BinGrouper._aggregate_series_fast)FN)r   )r   r   r   r   r  rQ   r   r  r   r   r   r   r   r   r  r   r   r  r   r7  rO   rO   rO   rP   r!  ^  s8   
  
	
r!  r{   rF   )r   rK   c                 C  sP   t | tr.t|dkrdS | j| || S t | trL| j| || S dS )Nr}   F)rw   r9   r   r   equalsr3   )r  r   r   rO   rO   rP   r     s    

r   c                   @  sr   e Zd ZdddddddddZedd	d
dZedd	ddZdd Zedd	ddZdddddZ	dS )r   r   r   r   r{   rG   r   r   r|   r   rK   c                 C  s2   || _ t|| _|| _|| _t|ts.t|d S rL   )r   r   r   r|   r   rw   r{   r   )rN   r   r   r|   r   rO   rO   rP   rQ     s
    
zDataSplitter.__init__r   c                 C  s   | j | jS rL   )r   r   	_sort_idxr   rO   rO   rP   slabels  s    zDataSplitter.slabelsc                 C  s   t | j| jS rL   )r>   r   r|   r   rO   rO   rP   r:  $  s    zDataSplitter._sort_idxc                 c  sT   | j }| jdkrd S t| j| j\}}t||D ]\}}| |t||V  q2d S r
  )sorted_datar|   r   Zgenerate_slicesr;  r   _chopslice)rN   sdataZstartsZendsr(  endrO   rO   rP   r   )  s    
zDataSplitter.__iter__c                 C  s   | j j| j| jdS )Nr   )r   r   r:  r   r   rO   rO   rP   r<  6  s    zDataSplitter.sorted_datar>  r@   )	slice_objrK   c                 C  s   t | d S rL   r   )rN   r?  rA  rO   rO   rP   r=  :  s    zDataSplitter._chopN)r   )
r   r   r   rQ   r   r;  r:  r   r<  r=  rO   rO   rO   rP   r     s    r   c                   @  s   e Zd ZddddddZdS )SeriesSplitterr9   r>  r?  rA  rK   c                 C  s,   |j |}|j||jdd}|j|ddS )NT)ri   Zfastpathr	  method)_mgr	get_slice_constructorri   __finalize__)rN   r?  rA  mgrZserrO   rO   rP   r=  ?  s    zSeriesSplitter._chopNr   r   r   r=  rO   rO   rO   rP   rB  >  s   rB  c                   @  s   e Zd ZddddddZdS )FrameSplitterr3   r>  rC  c                 C  s.   |j j|d| j d}||}|j|ddS )Nr}   r   r	  rD  )rF  rG  r   rH  rI  )rN   r?  rA  rJ  ZdfrO   rO   rP   r=  G  s    
zFrameSplitter._chopNrK  rO   rO   rO   rP   rL  F  s   rL  r@   rr   r9  c                 C  s"   t | trt}nt}|| |||S rL   )rw   r9   rB  rL  )r   r   r|   r   klassrO   rO   rP   r   R  s    
r   )r   )hr   
__future__r   r/  r   typingr   r   r   r   r   r   r	   r
   Znumpyrm   Zpandas._libsr   r   Zpandas._libs.groupbyZ_libsr	  rl   Zpandas._libs.reductionZ	reductionr  Zpandas._typingr   r   r   r   r   Zpandas.errorsr   Zpandas.util._decoratorsr   Zpandas.core.dtypes.castr   r   Zpandas.core.dtypes.commonr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   Zpandas.core.dtypes.dtypesr%   Zpandas.core.dtypes.missingr&   r'   Zpandas.core.arraysr(   r)   r*   r+   r,   Zpandas.core.arrays.booleanr-   Zpandas.core.arrays.floatingr.   Zpandas.core.arrays.integerr/   Zpandas.core.arrays.maskedr0   r1   Zpandas.core.arrays.string_r2   Zpandas.core.framer3   Zpandas.core.groupbyr4   Zpandas.core.indexes.apir5   r6   r7   r8   Zpandas.core.seriesr9   Zpandas.core.sortingr:   r;   r<   r=   r>   r?   Zpandas.core.genericr@   rA   r   r!  r   r   rB  rL  r   rO   rO   rO   rP   <module>   sZ   (@ 	    j    $. 