U
    ck                     @  s@  d dl mZ d dlmZmZmZmZmZ d dlZ	d dl
mZmZmZmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZmZ d dlm   m!Z" d d
l#m$Z$ d dl%m&Z& d dl'm(Z(m)Z)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 erd dl1m2Z2 edee$d dddFdddddddddddZ3dddddddddd d!Z4dGd"ddd#d$d%Z5dHddd&d'd(Z6dIdd)d*d+Z7dJddd,d-d.Z8d/d0 Z9edee$d1 ddedd2gd3dKdd4d4d4dd5d6d1Z:dLddddd7d8d9Z;dMdddd:d;d<Z<dNdd>d?d@Z=dAdAdBdCdDdEZ>dS )O    )annotations)TYPE_CHECKINGCallableHashableSequencecastN)AggFuncTypeAggFuncTypeBaseAggFuncTypeDict
IndexLabel)AppenderSubstitutiondeprecate_nonkeyword_arguments)rewrite_warning)maybe_downcast_to_dtype)is_integer_dtypeis_list_likeis_nested_list_like	is_scalar)ABCDataFrame	ABCSeries)_shared_docs)Grouper)Index
MultiIndexget_objs_combined_axis)concat)cartesian_product)Series	DataFramez
data : DataFramepivot_table   )indentsmeanFTAllr    r   boolstr)dataaggfuncmarginsdropnamargins_nameobservedsortreturnc                 C  s   t |}t |}t|trg }g }|D ]>}t| |||||||||	|
d}|| |t|d| q&t||dd}|j| ddS t| |||||||||	|
}|j| ddS )N)
valuesindexcolumns
fill_valuer)   r*   r+   r,   r-   r.   __name__r"   )keysaxisr!   )method)_convert_by
isinstancelist__internal_pivot_tableappendgetattrr   Z__finalize__)r(   r0   r1   r2   r)   r3   r*   r+   r,   r-   r.   piecesr5   funcZ_tabletable rA   =/tmp/pip-unpacked-wheel-g7fro6k3/pandas/core/reshape/pivot.pyr!   8   sJ    

z!AggFuncTypeBase | AggFuncTypeDictc                 C  s  || }|dk	}|rt |r*d}t|}n
d}|g}|D ]}|| kr8t|q8g }|| D ]@}t|trn|j}z|| kr|| W qZ tk
r   Y qZX qZt|t| j	k r| | } nB| j	}|D ].}z|
|}W q tttfk
r   Y qX qt|}| j||	|
d}d}tdt|d ||}W 5 Q R X |rt|trt|j	r|jdd	}|D ]r}|| kr`t| | r`||kr`t|| s`t|| ts`t| | jtjr`t|| | | j||< q`|}|jjd
kr^|r^|jjdt| }g }tt|t|D ]<}|jj| }|dks:||krF|| n
|| q||}|st|jtrtjt|jj|jjd}|j|dd}t|j	trtjt|j	j|j	jd}|j|d
d}|
dkrt|tr|j d
d}|dk	r
|j!|dd}|rD|r*| | " j#d
d } t$|| |||||||d	}|rl|sl|j	jd
krl|j%dd
d}t|dkrt|dkr|j&}t|tr|r|jdd
d}|S )zL
    Helper of :func:`pandas.pivot_table` for any non-list ``aggfunc``.
    NTF)r-   r.   zpivot_table dropped a column because it failed to aggregate. This behavior is deprecated and will raise in a future version of pandas. Select only the columns that can be aggregated.z!The default value of numeric_only)Ztarget_messageZtarget_categoryZnew_messageall)howr"   namesr   r6   Zinfer)Zdowncast)rowscolsr)   r-   r,   r3   )rD   r6   )'r   r:   KeyErrorr9   r   keyr<   	TypeErrorlenr2   Zdrop
ValueErrorgroupbyr   FutureWarningaggr   r+   r   dtypenpr   r1   nlevelsrF   rangeunstackr   from_arraysr   levelsreindexZ
sort_indexfillnaZnotnarC   _add_marginsZ	droplevelT)r(   r0   r1   r2   r)   r3   r*   r+   r,   r-   r.   r5   Zvalues_passedZvalues_multiiZ	to_filterxrK   ZgroupedmsgZaggedvr@   Zindex_namesZ
to_unstacknamemrA   rA   rB   r;   q   s    





 

 
 
r;   zDataFrame | Series)r@   r(   r,   c	              	   C  s  t |tstdd| d}	| jjD ]}
|| j|
kr&t|	q&t||||}| jdkr| jjdd  D ]}
|| j|
krlt|	qlt	|dkr|fdt	|d   }n|}|st | t
r| t||| iS |rt| |||||||}t |ts|S |\}}}n>t | tstt| ||||||}t |tsB|S |\}}}|j|j|d}|D ]0}t |tr~|| ||< n||d  ||< q`dd	lm} ||t|gd
j}|jj}t|jD ],}||gj}|| jt|fd||< q||}||j_|S )Nz&margins_name argument must be a stringzConflicting name "z" in margins   r"    )r3   r   r   )r2   )args)r9   r'   rN   r1   rF   get_level_values_compute_grand_marginndimr2   rM   r   _appendr   _generate_marginal_resultstupler   AssertionError)_generate_marginal_results_without_valuesrY   pandasr    r   r\   setZdtypesZselect_dtypesapplyr   )r@   r(   r0   rH   rI   r)   r-   r,   r3   r_   levelgrand_marginrK   Zmarginal_result_setresultmargin_keys
row_marginkr    Zmargin_dummyZ	row_namesrR   rA   rA   rB   r[     sx    



             
 
r[   )r(   r,   c              	   C  s   |ri }| |   D ]\}}zlt|tr:t|| ||< nLt|trzt|| trht|||  ||< q|| |||< n||||< W q tk
r   Y qX q|S ||| jiS d S N)itemsr9   r'   r=   dictrL   r1   )r(   r0   r)   r,   rs   rw   r`   rA   rA   rB   rh   \  s    

rh   )r,   c                   s  t  dkrBg }g }	 fdd}
t |dkr|||  j||d|}d}| jd||dD ]8\}}|
|}| }|| ||< || |	| q`nddlm} d}| jd||dD ]d\}}t  dkr|
|}n}|| |||j}t	|g|j
jd|_
|| |	| qt||d	}t |dkrL|S n
| }| j}	t  dkr| |  j |d|}| }t  gttt   }|j
||_
nttj|jd
}||	|fS )Nr   c                   s   | fdt  d   S )Nrd   r"   rM   )rK   rI   r,   rA   rB   _all_keyz  s    z,_generate_marginal_results.<locals>._all_keyr-   r"   rr   r6   r-   r   ra   rG   r1   )rM   rO   rQ   copyr<   ro   r    rq   r\   r   r1   ra   r   r2   stackr:   rU   Zreorder_levelsr   rS   nan)r@   r(   r0   rH   rI   r)   r-   r,   Ztable_piecesru   r}   marginZcat_axisrK   Zpieceall_keyr    Ztransformed_piecert   rv   Z	new_orderrA   r|   rB   rk   r  sH    



rk   )r@   r,   c                   s   t  dkrg } fdd}t |dkr`|| j||d|}	| }
|	| |
< | }||
 q|jdd|d|}	| }
|	| |
< | }||
 |S n
| }| j}t  r|  j |d|}nttj|jd}|||fS )Nr   c                     s&   t  dkrS fdt  d   S )Nr"   rd   r{   rA   r|   rA   rB   r}     s    z;_generate_marginal_results_without_values.<locals>._all_keyr~   r   r   )rM   rO   rq   r<   r2   r   rS   r   )r@   r(   rH   rI   r)   r-   r,   ru   r}   r   r   rt   rv   rA   r|   rB   rn     s*    
rn   c                 C  sF   | d krg } n4t | s2t| tjtttfs2t| r:| g} nt| } | S rx   )	r   r9   rS   Zndarrayr   r   r   callabler:   )ZbyrA   rA   rB   r8     s    r8   pivotr(   )versionZallowed_argszIndexLabel | None)r(   r1   r2   r0   r/   c                   s4  |d krt dt|}|d krV|d k	r6t|}ng }|d k} j|| |d}n|d krt jtr fddt jjD }qt	 j jj
dg}n fddt|D } fdd|D }	||	 t|}
t|rt|tsttt |} j | j|
|d}n j | j|
d	}||S )
Nz.pivot() missing 1 required argument: 'columns')r<   c                   s   g | ]} j |qS rA   )r1   rg   ).0r]   r(   rA   rB   
<listcomp>  s    zpivot.<locals>.<listcomp>r   c                   s   g | ]} | qS rA   rA   )r   idxr   rA   rB   r     s     c                   s   g | ]} | qS rA   rA   )r   colr   rA   rB   r     s     )r1   r2   r   )rL   comZconvert_to_list_likeZ	set_indexr9   r1   r   rU   rT   r   ra   extendrW   r   rl   r   r   r   Z_constructorZ_valuesZ_constructor_slicedrV   )r(   r1   r2   r0   Zcolumns_listlikerI   r<   ZindexedZ
index_listZdata_columnsZ
multiindexrA   r   rB   r     s>    	
 



  )r*   r,   r+   r/   c
                 C  sX  |dkr|dk	rt d|dk	r0|dkr0t dt| s>| g} t|sL|g}d}
dd | | D }|rtt|ddd}
t| |d	d
}t||dd
}t||\}}}}ddlm} tt|| tt||}|||
d}|dkrd|d< t	dd}n||d< d|i}|j
d|||||d|}|	dk	r8t||	||d}|j|dd}|j|dd}|S )a  
    Compute a simple cross tabulation of two (or more) factors.

    By default, computes a frequency table of the factors unless an
    array of values and an aggregation function are passed.

    Parameters
    ----------
    index : array-like, Series, or list of arrays/Series
        Values to group by in the rows.
    columns : array-like, Series, or list of arrays/Series
        Values to group by in the columns.
    values : array-like, optional
        Array of values to aggregate according to the factors.
        Requires `aggfunc` be specified.
    rownames : sequence, default None
        If passed, must match number of row arrays passed.
    colnames : sequence, default None
        If passed, must match number of column arrays passed.
    aggfunc : function, optional
        If specified, requires `values` be specified as well.
    margins : bool, default False
        Add row/column margins (subtotals).
    margins_name : str, default 'All'
        Name of the row/column that will contain the totals
        when margins is True.
    dropna : bool, default True
        Do not include columns whose entries are all NaN.
    normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False
        Normalize by dividing all values by the sum of values.

        - If passed 'all' or `True`, will normalize over all values.
        - If passed 'index' will normalize over each row.
        - If passed 'columns' will normalize over each column.
        - If margins is `True`, will also normalize margin values.

    Returns
    -------
    DataFrame
        Cross tabulation of the data.

    See Also
    --------
    DataFrame.pivot : Reshape data based on column values.
    pivot_table : Create a pivot table as a DataFrame.

    Notes
    -----
    Any Series passed will have their name attributes used unless row or column
    names for the cross-tabulation are specified.

    Any input passed containing Categorical data will have **all** of its
    categories included in the cross-tabulation, even if the actual data does
    not contain any instances of a particular category.

    In the event that there aren't overlapping indexes an empty DataFrame will
    be returned.

    Reference :ref:`the user guide <reshaping.crosstabulations>` for more examples.

    Examples
    --------
    >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
    ...               "bar", "bar", "foo", "foo", "foo"], dtype=object)
    >>> b = np.array(["one", "one", "one", "two", "one", "one",
    ...               "one", "two", "two", "two", "one"], dtype=object)
    >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
    ...               "shiny", "dull", "shiny", "shiny", "shiny"],
    ...              dtype=object)
    >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
    b   one        two
    c   dull shiny dull shiny
    a
    bar    1     2    1     0
    foo    2     2    1     2

    Here 'c' and 'f' are not represented in the data and will not be
    shown in the output because dropna is True by default. Set
    dropna=False to preserve categories with no data.

    >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
    >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
    >>> pd.crosstab(foo, bar)
    col_0  d  e
    row_0
    a      1  0
    b      0  1
    >>> pd.crosstab(foo, bar, dropna=False)
    col_0  d  e  f
    row_0
    a      1  0  0
    b      0  1  0
    c      0  0  0
    Nz&aggfunc cannot be used without values.z)values cannot be used without an aggfunc.c                 S  s   g | ]}t |ttfr|qS rA   )r9   r   r   )r   r^   rA   rA   rB   r     s      zcrosstab.<locals>.<listcomp>TF)Z	intersectr.   rowprefixr   r   r   r   	__dummy__)r)   r3   r)   )r1   r2   r*   r,   r+   )	normalizer*   r,   )r1   r6   r"   )r2   r6   )r   )rN   r   r   
_get_names_build_names_mapperro   r    rz   ziprM   r!   
_normalizeZrename_axis)r1   r2   r0   rownamescolnamesr)   r*   r,   r+   r   Z
common_idxZ	pass_objsrownames_mapperunique_rownamescolnames_mapperunique_colnamesr    r(   Zdfkwargsr@   rA   rA   rB   crosstab  sf    j 
   r   )r@   r*   r/   c              
   C  sH  t |ttfsRddd}z|| }W n, tk
rP } ztd|W 5 d }~X Y nX |dkrdd dd d	d d
}|d |d< z|| }W n, tk
r } ztd|W 5 d }~X Y nX || } | d} nv|dkr<| j}| j}	| jdd d f j	}
||
k||
k@ rt| d| jd ddf }| jdd df }| jd dd df } t
| |dd} |dkr||  }t| |gdd} | d} |	| _n|dkr||  }| |} | d} || _np|dks|dkr2||  }||  }d|j|< t| |gdd} | |} | d} || _|	| _ntdntd| S )Nr1   r2   )r   r"   zNot a valid normalize argumentFc                 S  s   | | j ddj dd S Nr"   rG   r   sumr^   rA   rA   rB   <lambda>      z_normalize.<locals>.<lambda>c                 S  s   | |    S rx   r   r   rA   rA   rB   r     r   c                 S  s   | j | jddddS r   )divr   r   rA   rA   rB   r     r   )rC   r2   r1   rC   Tr   z not in pivoted DataFrame)r   r*   r"   rG   zNot a valid margins argument)r9   r&   r'   rJ   rN   rZ   r1   r2   Zilocra   r   r   r   rj   loc)r@   r   r*   r,   Z	axis_subserrZnormalizersfZtable_indexZtable_columnsZlast_ind_or_colZcolumn_marginZindex_marginrA   rA   rB   r     sd    










r   r   r   c                 C  s   |d krVg }t | D ]>\}}t|tr>|jd k	r>||j q|| d|  qn*t|t| krntdt|tst|}|S )N_z*arrays and names must have the same length)	enumerater9   r   ra   r<   rM   rm   r:   )ZarrsrF   r   r]   ZarrrA   rA   rB   r     s    
r   z	list[str]z;tuple[dict[str, str], list[str], dict[str, str], list[str]])r   r   r/   c                   s   dd }t | t |}|| ||B |B   fddt| D } fddt| D } fddt|D } fddt|D }||||fS )	a  
    Given the names of a DataFrame's rows and columns, returns a set of unique row
    and column names and mappers that convert to original names.

    A row or column name is replaced if it is duplicate among the rows of the inputs,
    among the columns of the inputs or between the rows and the columns.

    Parameters
    ----------
    rownames: list[str]
    colnames: list[str]

    Returns
    -------
    Tuple(Dict[str, str], List[str], Dict[str, str], List[str])

    rownames_mapper: dict[str, str]
        a dictionary with new row names as keys and original rownames as values
    unique_rownames: list[str]
        a list of rownames with duplicate names replaced by dummy names
    colnames_mapper: dict[str, str]
        a dictionary with new column names as keys and original column names as values
    unique_colnames: list[str]
        a list of column names with duplicate names replaced by dummy names

    c                   s   t    fdd| D S )Nc                   s   h | ]}| kr|qS rA   rA   )r   ra   seenrA   rB   	<setcomp>L  s      z>_build_names_mapper.<locals>.get_duplicates.<locals>.<setcomp>)rp   rE   rA   r   rB   get_duplicatesJ  s    z+_build_names_mapper.<locals>.get_duplicatesc                   s$   i | ]\}}| krd | |qS Zrow_rA   r   r]   ra   Z	dup_namesrA   rB   
<dictcomp>Q  s      z'_build_names_mapper.<locals>.<dictcomp>c                   s&   g | ]\}}| krd | n|qS r   rA   r   r   rA   rB   r   T  s    z'_build_names_mapper.<locals>.<listcomp>c                   s$   i | ]\}}| krd | |qS Zcol_rA   r   r   rA   rB   r   X  s      c                   s&   g | ]\}}| krd | n|qS r   rA   r   r   rA   rB   r   [  s    )rp   intersectionr   )r   r   r   Zshared_namesr   r   r   r   rA   r   rB   r   ,  s     



r   )
NNNr$   NFTr%   FT)Nr%   N)r%   )r%   )r%   )NNN)NNNNFr%   TF)r%   )r   )?
__future__r   typingr   r   r   r   r   ZnumpyrS   Zpandas._typingr   r	   r
   r   Zpandas.util._decoratorsr   r   r   Zpandas.util._exceptionsr   Zpandas.core.dtypes.castr   Zpandas.core.dtypes.commonr   r   r   r   Zpandas.core.dtypes.genericr   r   Zpandas.core.commoncorecommonr   Zpandas.core.framer   Zpandas.core.groupbyr   Zpandas.core.indexes.apir   r   r   Zpandas.core.reshape.concatr   Zpandas.core.reshape.utilr   Zpandas.core.seriesr   ro   r    r!   r;   r[   rh   rk   rn   r8   r   r   r   r   r   rA   rA   rA   rB   <module>   s~             "7 #   P A &   9         , R