U
    cP                  
   @   s  d Z ddlZddlZddlmZmZ ddlmZ ddl	m
  mZ ddlmZ ddlZddlmZ ddlmZ ejdd Zejd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Z dd Z!ej"j#dddd  Z$ej"j#ddd!d" Z%d#d$ Z&d%d& Z'd'd( Z(d)d* Z)d+d, Z*ej"+d-ej,e-d.e.d.dej/gd/d0 Z0ej"+d1d2d3gd4d5 Z1d6d7 Z2d8d9 Z3d:d; Z4ej"+d<d2d3gej"j#d=dd>d? Z5ej"+d<d2d3gej"j#d=dd@dA Z6ej"+dBdCdDgej"+d<d2d3gdEdF Z7ej"+dBdCdDgej"+dGej8ej9gdHdI Z:dJdK Z;e<dLdMdN Z=e<dLdOdP Z>e<dLdQdR Z?dSdT Z@dUdV ZAej"+dWdXdYdZge9d3d3d3gfdXdYdge9d3d3d2gfgd[d\ ZBd]d^ ZCej"+d_ejDejEej-gd`da ZFdbdc ZGddde ZHdfdg ZIdhdi ZJdS )jz
This module tests the functionality of StringArray and ArrowStringArray.
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
    N)pa_version_under2p0pa_version_under6p0)PerformanceWarning)is_dtype_equal)ArrowStringArrayc                 C   s   t j| dS )z=Fixture giving StringDtype from parametrized 'string_storage')storage)pdStringDtype)string_storage r   K/tmp/pip-unpacked-wheel-g7fro6k3/pandas/tests/arrays/string_/test_string.pydtype   s    r   c                 C   s   |   S )z3Fixture giving array type from parametrized 'dtype')Zconstruct_array_typer   r   r   r   cls   s    r   c                 C   s   t dt jdt jdg| di}d}t||ks4td}t|j|ksJt| jdkrXdnd	}d
| d}t|jj|ks|td S )NAabr   z      A
0     a
1  <NA>
2     bz40       a
1    <NA>
2       b
Name: A, dtype: stringpyarrowr   StringArray<z+>
['a', <NA>, 'b']
Length: 3, dtype: string)r   	DataFramearrayNAreprAssertionErrorr   r   )r   dfexpectedZarr_namer   r   r   	test_repr"   s     r   c                 C   s6   |  dd dg}|d d k	s t|d tjks2td S )Nr   r      )_from_sequencer   r   r   )r   r   r   r   r   test_none_to_nan/   s    r    c              	   C   s   |  ddg}| tjjkr d}nd}tjt|d d|d< W 5 Q R X | tjjkrXd}nd}tjt|d td	d
g|d d < W 5 Q R X d S )Nr   r   z4Cannot set non-string value '10' into a StringArray.Scalar must be NA or strmatch
   r   zMust provide strings.r      )	r   r   arraysr   pytestraises
ValueErrornpr   )r   arrmsgr   r   r   test_setitem_validates5   s    r-   c                 C   s<   t jddg| d}d|d< t jddg| d}t|| d S )Nr   cr   dr   )r   r   tmassert_extension_array_equal)r   r+   r   r   r   r   test_setitem_with_scalar_stringG   s    r2   c                 C   s   | j dkr,d}tjj|td}|j| ntjjdtd}|j| ttj	ddd}d |d< |
| }t|j| s~t|
d	}t|| d S )
Nr   z6ValueError: Could not convert object to NumPy datetime)reasonr(   z/GH#36153 casting from StringArray to dt64 fails2000   )Zperiodsr   zdatetime64[ns])r   r'   markxfailr)   node
add_markerr   SeriesZ
date_rangeastyper   r   r   r0   assert_series_equal)r   requestr3   r6   serZcastedresultr   r   r   test_astype_roundtripP   s    
 

r@   c                 C   s   | j dkr*d}tjjt|d}|j| tjdddd d g| d}tjdd	d d
d g| d}|| }tjddd d d g| d}t	
|| ||}t	
|| ||}tjddd d d g| d}t	
|| |j|dd}tjddddd g| d}t	
|| d S )Nr   zLunsupported operand type(s) for +: 'ArrowStringArray' and 'ArrowStringArray'r(   r3   r   r   r.   r   xyzaxZbyxaZyb-)Z
fill_valuezc-z-z)r   r'   r6   r7   NotImplementedErrorr8   r9   r   r:   r0   r<   addZradd)r   r=   r3   r6   r   r   r?   r   r   r   r   test_adde   s$    


rJ   c              	   C   s   | j dkr*d}tjjd |d}|j| tjdddg| d}tjdddggt	d}tj
tdd	 ||  W 5 Q R X t|}tj
tdd	 ||  W 5 Q R X d S )
Nr   z*Failed: DID NOT RAISE <class 'ValueError'>rA   r   r   r.   r   z3 != 1r"   )r   r'   r6   r7   r8   r9   r   r   r*   objectr(   r)   r:   )r   r=   r3   r6   r   r   sr   r   r   test_add_2d   s    

rM   c                 C   s   | j dkr*d}tjjt|d}|j| tjddd d g| d}dd dd g}|| }tjd	d d d g| d}t	
|| || }tjd
d d d g| d}t	
|| d S )Nr   z@unsupported operand type(s) for +: 'ArrowStringArray' and 'list'rA   r   r   r   rB   rC   rE   rF   r   r'   r6   r7   rH   r8   r9   r   r   r0   r1   )r   r=   r3   r6   r   otherr?   r   r   r   r   test_add_sequence   s    
rP   c                 C   s~   | j dkr*d}tjjt|d}|j| tjddd g| d}|d }tjdd	d g| d}t	
|| d| }t	
|| d S )
Nr   z?unsupported operand type(s) for *: 'ArrowStringArray' and 'int'rA   r   r   r   r%   ZaaZbbrN   )r   r=   r3   r6   r   r?   r   r   r   r   test_mul   s    
rQ   zGH-28527)r3   c                 C   s   t jddddg| d}t dddd	gg}||tks<t|| }t d
dddgg| }t|| || }t ddddgg| }t|| d S )Nr   r   r.   r/   r   tuvwatZbuZcvZdwtaZubZvcwd)	r   r   r   __add__NotImplementedr   r;   r0   assert_frame_equalr   r+   r   r?   r   r   r   r   test_add_strings   s    r]   c                 C   s   t jddtjtjg| d}t dtjdtjgg}||tksDt|| }t dtjtjtjgg| }t	
|| || }t dtjtjtjgg| }t	
|| d S )Nr   r   r   rB   rC   rE   rF   )r   r   r*   nanr   rY   rZ   r   r;   r0   r[   r\   r   r   r   test_add_frame   s      r_   c                    sp   d| j  d tjdd dg|d}dt| }tj fdd|D td}tj|dd}t|| d S )N__r   r.   r   c                    s   g | ]}t | qS r   )getattr).0itemop_namerO   r   r   
<listcomp>   s     z2test_comparison_methods_scalar.<locals>.<listcomp>boolean)__name__r   r   ra   r*   rK   r0   r1   )comparison_opr   r   r?   r   r   rd   r   test_comparison_methods_scalar   s    rj   c                 C   sV   d| j  d}tjdd dg|d}t||tj}tjd d d gdd}t|| d S )Nr`   r   r.   r   rg   )rh   r   r   ra   r   r0   r1   )ri   r   re   r   r?   r   r   r   r   $test_comparison_methods_scalar_pd_na   s
    rk   c              	   C   s   d| j  d}tjdd dg|d}d}|dkrZtjtdd t||| W 5 Q R X d S t|||}d	d d	gd
d d
gd| }tj|dd}t|| d S )Nr`   r   r.   r   *   )__eq____ne__znot supported betweenr"   FTrg   )	rh   r   r   r'   r(   	TypeErrorra   r0   r1   )ri   r   re   r   rO   r?   Zexpected_datar   r   r   r   )test_comparison_methods_scalar_not_string   s    rp   c                 C   s   d| j  d}tjdd dg|d}d d dg}t|||}tj|dd}t|d ||d |d< tj|dd}t|| t||tj}tjd d d gdd}t|| d S )Nr`   r   r.   r   rK   rg   )	rh   r   r   ra   r*   Z
empty_liker0   r1   r   )ri   r   re   r   rO   r?   r   r   r   r   test_comparison_methods_array   s    
rr   c              	   C   s  | t jjkrd}nd}tjt|d | tjddgdd W 5 Q R X tjt|d | tg  W 5 Q R X | t jjkr| tjdtjgt	d | tjdd gt	d nbtjt|d | tjdtjgt	d W 5 Q R X tjt|d | tjdd gt	d W 5 Q R X tjt|d | tjdt j
gt	d W 5 Q R X tjt|d$ | tjdtdd	gt	d W 5 Q R X tjt|d$ | tjdtdd	gt	d W 5 Q R X d S )
Nz7StringArray requires a sequence of strings or pandas.NAzBUnsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArrayr"   r   r   ZS1r   NaTns)r   r&   r   r'   r(   r)   r*   r   r^   rK   rs   Z
datetime64Ztimedelta64)r   r,   r   r   r   test_constructor_raises  s(     " "(ru   nar^   c                 C   s>   t jtdt jg}tt jtjd| gdd| d S )Nr   rK   r   )r   r&   r   r*   r   r   r0   r1   )rv   r   r   r   r   test_constructor_nan_like)  s
     rw   copyTFc           	      C   s   |t kr,| dkr,tjjtdd}|j| tjdtj	gt
d}tjdtjgt
d}|j|| d}|t krdd l}||j|| dd	}n||}t|| | r|n|}t|| d S )
NFznumpy array are differentrA   r   r   )rx   r   TtypeZfrom_pandas)r   r'   r6   r7   r   r8   r9   r*   r   r^   rK   r   r   r   r   stringr0   r1   assert_numpy_array_equal)	rx   r   r=   r6   Znan_arrZna_arrr?   par   r   r   r   test_from_sequence_no_mutate1  s      r~   c              	   C   s   t jdddg| d}|d}tjdddgdd}t|| t jdt jdg| d}d	}tjt	|d
 |d W 5 Q R X d S )N123r   Zint64r   r%      zJint\(\) argument must be a string, a bytes-like object or a( real)? numberr"   )
r   r   r;   r*   r0   r|   r   r'   r(   ro   )r   r+   r?   r   r,   r   r   r   test_astype_intK  s    
r   c                 C   sF   t jdt jdg| d}|d}t jdt jdgdd}t|| d S )Nr   r   r   Int64r   r   )r   r   r   r;   r0   r1   r   r+   r?   r   r   r   r   test_astype_nullable_intW  s    
r   c                 C   sF   t jdt jdg| d}||}t jdtjdg|d}t|| d S )Nz1.1z3.3r   g?gffffff
@)r   r:   r   r;   r*   r^   r0   r<   )r   Zany_float_dtyper>   r?   r   r   r   r   test_astype_float_  s    
r   skipnazNot implemented StringArray.sumc                 C   s0   t jdddg|d}|j| d}|dks,td S Nr   r   r.   r   r   abc)r   r:   sumr   r   r   r+   r?   r   r   r   test_reduceg  s    r   c                 C   sJ   t jd dd ddd g|d}|j| d}| r8|dksFtnt |sFtd S r   )r   r:   r   r   isnar   r   r   r   test_reduce_missingo  s
    r   methodminmaxc           	      C   s   |j dkr.tr.d}tjjt|d}|j| tj	dddd g|d}t
|| |d}|rv| d	krddnd}||kstn|tjkstd S )
Nr   0'ArrowStringArray' object has no attribute 'max'rA   r   r   r.   r   r   r   )r   r   r'   r6   r7   ro   r8   r9   r   r:   ra   r   r   )	r   r   r   r=   r3   r6   r+   r?   r   r   r   r   test_min_maxz  s    r   boxc           	      C   s   |j dkrHts|tjkrH|tjkr(d}nd}tjjt|d}|j	| |dddd g|d}t
t| |}| d	krvdnd}||kstd S )
Nr   z<'<=' not supported between instances of 'str' and 'NoneType'r   rA   r   r   r.   r   r   )r   r   r   r   r'   r6   r7   ro   r8   r9   ra   r*   r   )	r   r   r   r=   r3   r6   r+   r?   r   r   r   r   test_min_max_numpy  s    
r   c              	   C   s   | j dkr*d}tjjt|d}|j| tjdtj	g| d}|j
dd}tjddg| d}t|| |j
tdd}tjddg| d}t|| d}tjt|d	 |j
d
d W 5 Q R X d S )Nr   zmRegex pattern "Cannot set non-string value '1' into a StringArray." does not match 'Scalar must be NA or str'rA   r   r   r   )valuez3Cannot set non-string value '1' into a StringArray.r"   r   )r   r'   r6   r7   r   r8   r9   r   r   r   Zfillnar0   r1   r*   Zstr_r(   r)   )r   r=   r3   r6   r+   resr   r,   r   r   r   test_fillna_args  s    
r   r   c                 C   sd   dd l }tjdddg| d}||}|jt|| dd}| jdkrR||}||s`td S )	Nr   r   r   r.   r   Try   r   )	r   r   r   listr{   r   chunked_arrayequalsr   )r   r}   datar+   r   r   r   r   test_arrow_array  s    


r   c              	   C   s   dd l }tjddd g| d}td|i}||}|djdksHttd| |	 }W 5 Q R X t
|d jtjs~t|d| d}t|| |jd	 tjkstd S )
Nr   r   r   r   r{   r
   string[])r%   r   )r   r   r   r   tablefieldrz   r   option_context	to_pandas
isinstancer   r	   r;   r0   r[   locr   r   Zstring_storage2r}   r   r   r   r?   r   r   r   r   test_arrow_roundtrip  s    
r   c              	   C   s   dd l }tjg | d}td|i}||}|djdksBt|j|jg |	 dg|j
d}td| | }W 5 Q R X t|d jtjst|d| d	}t|| d S )
Nr   r   r   r{   )rz   )schemar
   r   r   )r   r   r   r   r   r   rz   r   r   r{   r   r   r   r   r   r	   r;   r0   r[   r   r   r   r    test_arrow_load_from_zero_chunks  s    
 r   c                 C   s   t jdddt jg| d}|jdd}t jdddg|ddd	g d
d}t|| |jdd}t jddg|d d d
d}t|| d S )Nr   r   r   F)Zdropnar%   r   r   r   r   indexr   T)r   r   r   value_countsr:   r0   r<   r   r   r   r   test_value_counts_na  s     r   c                 C   sT   t jdddt jg| d}|jdd}t jddg|d d dd	d
 }t|| d S )Nr   r   r   T)	normalizer%   r   ZFloat64r   r   )r   r:   r   r   r0   r<   )r   r>   r?   r   r   r   r    test_value_counts_with_normalize  s     r   zvalues, expectedr   r   r.   c              	   C   s   t j| |d} t ddb |  }t|| t |  }t |}t|| t |  }t |}t	|| W 5 Q R X d S )Nr   zmode.use_inf_as_naT)
r   r   r   r   r0   r|   r:   r<   r   r[   )valuesr   r   r?   r   r   r   test_use_inf_as_na  s    	

r   c                 C   sf   | j dkrtd| j   tjdddg| d}d|j  k r\|   kr\|jdd	k sbn td S )
Nr   znot applicable for r   r   r.   r   r   T)deep)r   r'   skipr   r:   nbytesZmemory_usager   )r   Zseriesr   r   r   test_memory_usage  s    
r   float_dtypec                 C   s:   t jdg| d}||}t jdg|d}t|| d S )Ng?r   z0.1)r   r:   r;   r0   r<   )r   r   r>   r?   r   r   r   r   test_astype_from_float_dtype  s    
r   c                 C   sF   t jdt jdg| d}t|}tjdt jdgtd}t|| d S )Nr   r   r   )r   r   r   r*   rK   r0   r|   r   r   r   r   "test_to_numpy_returns_pdna_default%  s    
r   c                 C   sJ   |}t jdt jdg| d}|j|d}tjd|dgtd}t|| d S )Nr   r   r   )na_value)r   r   r   Zto_numpyr*   rK   r0   r|   )r   Znulls_fixturer   r+   r?   r   r   r   r   test_to_numpy_na_value,  s
    r   c              	   C   s@  t jddd g| d}tt| dko$t |ddg}W 5 Q R X t dddg}t|| tt| dkont |dt jg}W 5 Q R X t dddg}t|| tt| dkot |g }W 5 Q R X t dddg}t|| tt| dkot |d|g}W 5 Q R X t dddg}t|| d S )Nr   r   r   r   r.   TF)	r   r:   r0   Zmaybe_produces_warningr   r   isinr<   r   )r   Zfixed_now_tsrL   r?   r   r   r   r   	test_isin4  s:     
 
 
 r   c              	   C   s   t jdddg| d}tdddg}d ||< |jd t jks@tt jdddg| d}t|jt jjkrld}nd	}t	j
t|d
 d||< W 5 Q R X d S )Nr   r   r.   r   FTr   zCannot set non-string valuer!   r"   )r   r:   r*   r   r   r   rz   r&   r   r'   r(   r)   )r   r>   maskr,   r   r   r   (test_setitem_scalar_with_mask_validationT  s    r   )K__doc__Znumpyr*   r'   Zpandas.compatr   r   Zpandas.errorsr   Zpandas.util._test_decoratorsutilZ_test_decoratorstdZpandas.core.dtypes.commonr   Zpandasr   Zpandas._testingZ_testingr0   Zpandas.core.arrays.string_arrowr   Zfixturer   r   r   r    r-   r2   r@   rJ   rM   rP   rQ   r6   r7   r]   r_   rj   rk   rp   rr   ru   Zparametrizer^   Zfloat64floatr   rw   r~   r   r   r   r   r   r   r:   r   r   r   Z
skip_if_nor   r   r   r   r   r   r   Zfloat16Zfloat32r   r   r   r   r   r   r   r   r   <module>   s   

	


!$

	




 