U
    ceb                     @   s  d dl Zd dlZd dlmZmZmZmZmZ d dl	m
Z ejdd Zdd Zdd Zd	d
 Zdd Zejddddgejdddgejddddgejdddgejdddgdd Zejdddgejdddd dddd gdddddgdddddgfddd dddd gdddddgdddddgfddd dddd gdddddgdddddgfgd!d" Zejd#d$ Zejd%ddddddgd&d'd(gfddddddgd&d)d(gfddddddgd&d*d+gfdddd,d-d-gd&d*d+gfgd.d/ Zejd0d1 Zejd2ddd ddd3d4d5d6dd g	d,d,d7d-d-d-d-d7d7g	fddd ddd3dd gd,d,d7d7d7d7gfddd dd3d4d5d6gd,d,d-d-d-d-gfddd dd3gd,d,d7gfgd8d9 Zejd:d; Zejd<dddgejd=d>d?gd@dAdBgdCfdddddgeedgedDdEdFgedGdHejggd d d d gd dddgdd ddggd@dAdBgdIfgejdddgdJdK ZejdddgejdLddgejdMdejdddd d d ddd d d d gejdNfded,d-d-dOdOdOd,d,dOdOdOdOgfgdPdQ Z dRdS Z!ejdddgejdMdejdddd d d ddd d d d gejdNfded,d-d-dOdOdOd,d,dOdOdOdOgfgdTdU Z"ejdddgejdMdejdddd d d ddd d d d d d d d d d gejdNfded,d-d-dOdOdOd,d,dOdOdOdOdOdOdOdOdOdOgfgdVdW Z#ejdddgejdXddYdZd[d\d]d^d_d`dadbdcddgfddYd[d]d_dagfgejdMdejdd dd dd dd dd d d gejdNfded7dOd7dOd7dOd7dOd7dOdOdOgfgdedf Z$ejdddgejdLddgejdMdejdddd d d ddd d d d gejdNfded,d-d-dOdOdOd,d,dOdOdOdOgfgdgdh Z%ejdiddjdddgfddkd,d,d7gfgdldm Z&ejdndoe'dpdqddrdsdsdtgfdue'dvdwg dqddrdsdxdwgfgejdddgdydz Z(ejd{d|d}gd~d Z)dd Z*dd Z+dd Z,dd Z-dd Z.dS )    N)CategoricalIndex	DataFrameIndex
MultiIndexSeriesc                	   C   s4   t ddddddgddddddgddddddgdS )	NmalefemalelowmediumhighUSFR)gender	educationcountryr    r   r   P/tmp/pip-unpacked-wheel-g7fro6k3/pandas/tests/groupby/test_frame_value_counts.pyeducation_df   s    r   c              	   C   s4   | j ddd}tjtdd |  W 5 Q R X d S )Nr      axisr   match)groupbypytestraisesNotImplementedErrorvalue_countsr   gpr   r   r   	test_axis   s    r!   c              	   C   s6   |  d}tjtdd |jdgd W 5 Q R X d S )Nr   subsetr   r"   )r   r   r   
ValueErrorr   r   r   r   r   test_bad_subset   s    
r%   c                 C   sZ   |  dddg jdd}tdddddgtjdd	d
ddgdddgdd}t|| d S )Nr   r   r   T	normalize      ?      ?r   r   r	   r   r   r   r   r   r
   r   r   r   r   r   r	   namesdataindex)r   r   r   r   from_tuplestmassert_series_equal)r   resultexpectedr   r   r   
test_basic%   s     r9   c                 C   s   | | j |||dS )Nr'   sort	ascending)r   )dfkeysr'   r;   r<   r   r   r   _frame_value_counts:   s    r?   r   columnarrayfunctionr'   TFzsort, ascending)FN)TT)TFas_indexframec                    s  d d j  fddd| } j||d}|ddg j|||d}	|r|tddg|||}
|rrt|	|
 n|rzd	nd
}|
 jd|idd}
|dkr|
jddidd}
t	
|
d dd|
d< n0|dkr|
d dk|
d< nt	
|
d dd|
d< t|	|
 n d d  d   d< |d j|||d}
d |
_|r|
jjdd}|d jdjd|d< |d jdjd|d< |d= |jdd idd}t||
_t|	|
 nV|
dd|
d jdjd |
dd|
d jdjd |
d= t|	|
 d S )Nr   c                    s    d |  dkS )Nr   r   r   )xr   r   r   <lambda>X       z6test_against_frame_and_seriesgroupby.<locals>.<lambda>)r@   rA   rB   )byrC   r   r   r:   
proportioncountr   r   r   r@   level_0r   r   rB   -ZbothFr3      )valuesr   r   applyr?   r5   r6   reset_indexrenamenpwhereassert_frame_equalnamer3   Zto_framestrsplitgetr   
from_frameinsert)r   r   r'   r;   r<   rC   rD   rI   r    r7   r8   rW   Zindex_framer   rF   r   $test_against_frame_and_seriesgroupby>   sd    
        ""r]   zCsort, ascending, expected_rows, expected_count, expected_group_sizer   rO         c           
         s   j ddgddd}|d j|||d}t }	dD ]  fdd	|D |	 < q2|rn||	d
< |	d
  |  < n||	d< t||	 d S )Nr   r   F)rC   r;   r   r:   )r   r   r   c                    s   g | ]}  | qS r   r   .0rowr@   r   r   r   
<listcomp>   s     z!test_compound.<locals>.<listcomp>rJ   rK   )r   r   r   r5   rV   )
r   r'   r;   r<   expected_rowsZexpected_countZexpected_group_sizer    r7   r8   r   rc   r   test_compound   s      rf   c                   C   s4   t ddddgddddgddddgdddd	d
gdS )Nr   rO   r_      r   )keynum_legs	num_wingsZfalconZdogcatZantrN   r   r   r   r   r   
animals_df   s    "
rl   z9sort, ascending, normalize, expected_data, expected_index)r   r   r   )rO   r_   rg   )rO   r   r   )rO   rg   r_   )r_   rO   rg   )r   rO   r   r(   r)   c           	      C   s^   | j |||d}t|tj|dddgdd}t|| | dj |||d}t|| d S )N)r;   r<   r'   rh   ri   rj   r/   r1   )r   r   r   from_arraysr5   r6   r   )	rl   r;   r<   r'   expected_dataexpected_indexresult_framer8   result_frame_groupbyr   r   r   test_data_frame_value_counts   s&       
  rr   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r_   rg   r^   rO            )ABCD)rT   nanr   )nr   r   r   nulls_df   s    r|   z:group_dropna, count_dropna, expected_rows, expected_valuesrs   ru   rg   rt   g      ?c           
         sr   j ddg|d}|jdd|d}t }jD ]  fdd|D | < q.t|}t||d}	t||	 d S )	Nrv   rw   )dropnaT)r'   r;   r}   c                    s   g | ]}  | qS r   r   r`   r@   r|   r   r   rd      s     z,test_dropna_combinations.<locals>.<listcomp>r1   )	r   r   r   columnsr   r[   r   r5   r6   )
r|   Zgroup_dropnaZcount_dropnare   expected_valuesr    r7   r   r3   r8   r   r~   r   test_dropna_combinations   s    

r   c                 C   s(   t ddddgddddgd| | dgdS )Nr   JohnAnneBethSmithLouise)rh   
first_namemiddle_namer   )Znulls_fixturer   r   r   names_with_nulls_df   s    


r   z%dropna, expected_data, expected_indexr   r   )r   r   )r   r   rh   r   r   r/   r   r   r   r   r   )levelscodesr0   c                 C   s^   | j ||d}t||d}|r.|tt| }t|| | dj ||d}t|| d S )N)r}   r'   r1   rh   )r   r   floatlenr5   r6   r   )r   r}   r'   rn   ro   rp   r8   rq   r   r   r   #test_data_frame_value_counts_dropna  s    !
 r   observedznormalize, expected_data)Zdtype        c                 C   s   |  djd||d}|j|d}tjddddd	d
ddddddgdddgd}t||d}tdD ]"}	|jjt	|jj
|	 |	d|_qb|rt|| n |j|rdndd}
t||
 d S )Ncategoryr   rC   r   r&   r*   r+   r,   r   r   r	   r   r   r
   r   r   r   r-   r.   r   r   r	   r   r   r
   r   r   r   r   r   r
   r   r   r/   r1   r^   levelrJ   rK   rW   )astyper   r   r   r4   r   ranger3   
set_levelsr   r   r5   r6   rR   rV   r   rC   r   r'   rn   r    r7   ro   expected_seriesir8   r   r   r   =test_categorical_single_grouper_with_only_observed_categories7  sJ    
   

r   c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|dddgdd	}t	d
D ]@}	t
|jj|	 }
|	dkr|
| d jj}
|jj|
|	d|_qd|rt|| n |j|rdndd}t|| d S )Nr   r   ASIAr   r&   r   r   r/   r1   r^   r   r   rJ   rK   r   )copyr   rk   Zadd_categoriesr   r   r   r   r4   r   r   r3   r   Zset_categories
categoriesr   r5   r6   rR   rV   )r   rC   r   ro   r'   rn   r    r7   r   r   Zindex_levelr8   r   r   r   !assert_categorical_single_groupers  s0    

r   c                 C   s4   ddddddddd	d
ddg}t | |d|||d d S )Nr*   r+   r,   r   r   r   r-   r.   r   r   r   r   Tr   rC   r   ro   r'   rn   r   r   rC   r'   rn   ro   r   r   r   -test_categorical_single_grouper_observed_true  s*    r   c                 C   s@   ddddddddd	d
ddddddddg}t | |d|||d d S )Nr*   r+   r,   r   r   r   r-   r.   r   r   r   r   )r   r   r	   )r   r   r   )r   r   r
   )r   r   r	   )r   r   r   )r   r   r
   Fr   r   r   r   r   r   .test_categorical_single_grouper_observed_false  s6    +r   zobserved, expected_index)r   r   r   )r   r   r   )r   r	   r   )r   r	   r   )r   r
   r   )r   r
   r   )r   r   r   )r   r   r   )r   r	   r   )r   r	   r   )r   r
   r   )r   r
   r   c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r^||dk n|tj|dddgdd	}td
D ]"}	|jj	t
|jj|	 |	d|_q|rt|| n |j|rdndd}
t||
 d S )Nr   r   r   r   r&   r   r   r/   r1   rO   r   rJ   rK   r   r   r   r   r   r   r   r4   r   r3   r   r   r   r5   r6   rR   rV   )r   rC   r   ro   r'   rn   r    r7   r   r   r8   r   r   r   "test_categorical_multiple_groupers	  s6    2   

r   c                 C   s   |   } | d d| d< | d d| d< | jd||d}|j|d}ddd	d
ddddddddg}t|tj|dddgdd}tddD ]"}	|jj	t
|jj|	 |	d|_q|rt|| n |j|rdndd}
t||
 d S )Nr   r   r   r   r   r&   r*   r+   r,   r   r   r   r-   r.   r   r   r   r   r/   r1   r   r^   r   rJ   rK   r   r   r   r   r   r   test_categorical_non_groupersY  sH     

r   z*normalize, expected_label, expected_valuesrK   rJ   c                 C   s   t dddgdddgd}|jdddgddd	 gd
d}|jd| d}t ddddgddddgddddgddddg||i}t|| d S )Nr   rO   r^   )rv   rw   r_   rs   rv   c                 S   s   | dkrdS dS )Nr   ru   rt   r   )r   r   r   r   rG     rH   z&test_mixed_groupings.<locals>.<lambda>FrC   T)r;   r'   rL   Zlevel_2rt   ru   rw   )r   r   r   r5   rV   )r'   expected_labelr   r=   r    r7   r8   r   r   r   test_mixed_groupings  s"    	     	r   ztest, columns, expected_namesrepeatZabbdeadber   Zabcdlevel_1cc           	      C   s   t dddddgdddd	d
gg|d}ddg}|jdddgdg|d }|rptdtj||dd}t|| n@dd |D }t|}d|d< |	d t ||d}t
|| d S )Nr   r^   rs   ru   	   rO   r_   rg   rt   
   r   )r   r   ru   r^   rs   r   )rO   r   rt   r_   rg   r   r   r   r   r   r   r/   r1   c                 S   s   g | ]}t |d g qS )r   )listr`   r   r   r   rd     s     z0test_column_label_duplicates.<locals>.<listcomp>r   rK   )r   r   r   r   r   r4   r5   r6   r   appendrV   )	testr   Zexpected_namesrC   r=   rn   r7   r8   Zexpected_columnsr   r   r   test_column_label_duplicates  s$    $
r   znormalize, expected_label)FrK   )TrJ   c              	   C   sZ   t dddggdd|gdjddd}d	| d
}tjt|d |j| d W 5 Q R X d S )Nr   rO   r^   r   r   r   Fr   zColumn label 'z' is duplicate of result columnr   r&   )r   r   r   r   r$   r   )r'   r   gbmsgr   r   r   test_result_label_duplicates  s    	 r   c                  C   sX   t dddgi} | ddg}| }tdgtjddggd dgdd}t|| d S )Nr   r   rO   r/   rN   )r   r   r   r   r   r4   r5   r6   )r=   r   r7   r8   r   r   r   test_ambiguous_grouping  s
    "r   c               	   C   sZ   t dddgdddgddddgd	} d
}tjt|d | djdgd W 5 Q R X d S )Nr   r   r   rE   yc1c2r   r   rN   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   r   r#   r   r   r   r$   r   r   r=   r   r   r   r   "test_subset_overlaps_gb_key_raises  s    $r   c               	   C   sZ   t dddgdddgddddgd	} d
}tjt|d | djdgd W 5 Q R X d S )Nr   r   r   rE   r   r   r   r   rN   z4Keys {'c3'} in subset do not exist in the DataFrame.r   r   c3r#   r   r   r   r   r   !test_subset_doesnt_exist_in_frame  s    $r   c                  C   st   t dddgdddgddddgd	} | jdd
jdgd}tddgtjddgddggd dgdd	}t|| d S )Nr   r   r   rE   r   r   r   r   rN   r   r   r#   rO   r/   r   r   r   r   r   rm   r5   r6   r=   r7   r8   r   r   r   test_subset  s    $ r   c                  C   s   t dddgdddgdddggdddgdddgd	} | jdd
jdgd}tddgtjddgddgddggd ddgdd}t|| d S )Nr   rE   r   r   r   r   r   r   )r3   r   r   r#   rO   r/   rN   r   r   r   r   r   test_subset_duplicate_columns  s     r   )/ZnumpyrT   r   Zpandasr   r   r   r   r   Zpandas._testingZ_testingr5   Zfixturer   r!   r%   r9   r?   markZparametrizer]   rf   rl   rr   r|   r   r   rm   rz   r   rA   Zint64r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s  

>***
	

""


	
 * 
0$* 
& $$ * $* /


	
