o
    jh                    @   s  d dl m Z  d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ ejfddZi dejdejd	d d
ejdejdejdejdejdejdejdejdejdejdd dejdejdejd ejejd ejdZdd Zdd Zdd Zdd  Zejd!d"d#gd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zejd.d/g d0eg d1d/d2fd/d3gg d4eg d1d/d2e	g d5gfd/d6gg d7eg d1d/d2eg d1d/d2gfgejd8d"d#gd9d: Z d;d< Z!d=d> Z"d?d@ Z#ejd!d"d#gejdAd"d#gejdBd"d#gdCdD Z$dEdF Z%dGdH Z&dIdJ Z'dKdL Z(dMdN Z)ejdOdPgee*dQd dRgdSdTgdUfee*dQ+dVdW dTgd dSgdUfee*dXd dRgdSdTgdUfgdYdZ Z,d[d\ Z-d]d^ Z.d_d` Z/ejdaddbdgfddcddgfddcdgfddbddgfgdedf Z0dgdh Z1didj Z2dkdl Z3ejd!d"d#gdmdn Z4ejd!d"d#gdodp Z5dqdr Z6dsdt Z7dudv Z8ejdwd"ee
j9eg dxdydzg d{gd/d3gd|g d1d}d~fd#eeg dxdydzg d{g d1dfgdd Z:ejddej;ej<gdd Z=ej>dd Z?ejdddgdd Z@ejdddgejdAd#dgdd ZAejdd"e
9e	dgdQ dgdQ  dydde	g ddydde	ddgdQ gg dfd#e
jBeddgd#deg dd#de	ddggg dd|dTdTdQdQej;ej;dSdSej;ej;dRdRgfde
jBeddgd#deg dd#de	ddggg dd|dTdTdQdQej;ej;dSdSej;ej;dRdRgfgdd ZCdd ZDejdg dg dgdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJejdAd#dgdd ZKdd ZLejdejMg dxfejNg d{fgdd ZOdd ZPejdddgdd ZQdd ZRdd ZSdd ZTdd ZUdd ZVejdddgdeWdAeXfddÄZYejdddgdeWdAeXfddńZZddǄ Z[ejdddgddɄ Z\dd˄ Z]dd̈́ Z^ddτ Z_ejdg dѢejd!d"d#gddӄ Z`ejdddgejd!d"d#gddׄ Zaejdg dѢejdddgejd!d"d#gdd܄ Zbejdg dѢejdg dݢejd!d"d#gdd߄ Zcejdg dѢdd ZddS )    )datetimeN)CategoricalCategoricalIndex	DataFrameIndex
MultiIndexSeriesqcut)get_groupby_method_argsc                 C   s.   dd }t jt|||d}| j||d S )zpReindex to a cartesian production for the groupers,
    preserving the nature (Categorical) of each grouper
    c                 S   s4   t | ttfr| j}tjtt||| jd} | S )N
categoriesordered)	
isinstancer   r   r   
from_codesnparangelenr   )ar    r   f/var/www/html/smartRegister/venv/lib/python3.10/site-packages/pandas/tests/groupby/test_categorical.pyf   s   z)cartesian_product_for_groupers.<locals>.fnames
fill_value)r   from_productmapreindex
sort_index)resultargsr   r   r   indexr   r   r   cartesian_product_for_groupers   s   r"   allanycountcorrwithfirstidxmaxidxminlastmaxmeanmedianminnthnuniqueprodquantilesem)sizeskewstdsumvarc                 C   sB   t | jd}dd }| j|ddj|}|jjd dksJ d S )N   c                 S   s   |   |  |  |  dS )Nr.   r+   r%   r,   r:   )groupr   r   r   	get_statsH   s
   z2test_apply_use_categorical_name.<locals>.get_statsFobservedr   C)r	   r?   groupbyDapplyr!   r   )dfcatsr<   r   r   r   r   test_apply_use_categorical_nameE   s   rE   c                  C   s  t g dg ddd} tg d| d}ttdddd	}td
dddtjgi|d}|jddd }t	|| t g dg ddd}t g dg ddd}t||g dd}|jddd}tg dddd	}	tdt
g d|	di}|jdd}t	|| tddgddgddggddgd}
t |
j|
d< |
jdgdd}|d d! }t	||
dg  |
d}|
jd"dg }t	|| d#d$ }||}|
jd"dg  }tddgdd%|_|d d&|d< t	|| td
g d'i}tj|jg d(d)}|jj|ddt}t||d
  t|jj|ddd*d! |d
  t	|j|ddt|d
g  |j|dd}|d+d! }t	||d
g  td " |d,d! }|t}|tjj}|d-d! }W d    n	1 sxw   Y  tj	||d
g dd. tj	||d
g dd. t	||d
g  t	||d
g  t|jj|ddtj|d
  t	|j|ddtj| td
g d/i}tj|jg d0d)}|jj|ddt}t||d
  t|jj|ddd1d! |d
  t	|j|ddt|d
g  t	|j|ddd2d! |d
g  td
g d3i}tj|jg d4t tdd5}|j|ddt}t|j j!|j j"d6}t
g d3|d}d
|j_#t|| g d7}tj$j%d"dd8d9}t j&||dd6} ttj$'d8d}|j| dd }|jt(| dd }t|| j!dd}	|)|	}t	|| |j| dd}|* }| j+, }t(| -|}|-|}t |dg d7d:}|j|ddd;* }t	|| t j&t.d/d<|dd6}t|}t0|1 j2d"| tg d=d }t0|1 j2d| d S )>N	r   r   r   brG   rG   crH   rH   r   rG   rH   dTr   	   rL   rL      rM   rM      r9      r   rG   abcdrG   namer   r   rL   rM   r9   r!   Fr=   r   r   rG   rG   r   rG   zrH   rJ   rH   rJ   rH   rJ   yrL   rM   rN   r9   ABvaluesr]   r_   )rN      r   numeric_onlyzJohn P. Doez	Jane Dove	person_idperson_namecolumnsc                 S   s   | S Nr   xr   r   r   <lambda>t       ztest_basic.<locals>.<lambda>r   c                 S   s   |  djd S )Nrd   r   )drop_duplicatesilocrh   r   r   r   r   {   s   ztest_basic.<locals>.frS   object)rO         )r   
         (   )binsc                 S   
   t | S rg   r   r7   xsr   r   r   rj         
 c                 S      t j| ddS Nr   axisr   r+   ry   r   r   r   rj          c                 S   r|   r}   r   ry   r   r   r   rj      r   c                 S   s   t j| S rg   )r   maximumreducery   r   r   r   rj          )check_dtype)rO   rp   rq   )ir   rr   rs   rt   ru   c                 S   rw   rg   rx   ry   r   r   r   rj      r{   c                 S   rw   rg   rx   ry   r   r   r   rj      r{   )rL   r   r   r   r   rL   rM   rN   r9   )labelsr   foobarbazquxd   r4   )r   r   sortr>      r%   r,   r6   r.   z25%z50%z75%r+   )3r   r   r   listr   nanr@   r,   tmassert_frame_equalr   r7   rd   	transformrl   rm   rB   copyr   r!   astypepdcutr   assert_series_equalassert_produces_warningr+   r   r   filterr#   r   r_   r   r   rS   randomrandintr   randnasarrayr   describecodesargsorttaker   repeatassert_index_equalstackget_level_values)rD   data	exp_indexexpectedr   cat1cat2rC   gbexp_idxri   gr   rH   gbcresult2result3result4result5levelsr   groupeddesc_resultidx
ord_labelsord_dataexp_catsexpcexpr   r   r   
test_basicT   s   

"
$"



r   c                 C   s   t tdddttddgtdgdgd dgd  tdgd	d
gdd}|jd	g| d}t tdddttddgtdgdgd tdgd	d
gdd}|d}t	|| d S )NrM      r   rG   rr   r   rO   rL   Index1Index2)r   r   r   r   r!   levelr>      )
r   r   r   r   r   ranger@   	get_groupr   r   )r>   rC   r   r   r   r   r   r   test_level_get_group   s&   
r   c                  C   s   t dgd dgd  g dd tdd} t| jg dd	d
| _| dd  }|jdd	d}g d}t|g dd	d
}g dt|g}t	j
|ddgd}tdgd |dd}t|| d S )Nr]      r^   )highmedlowr9   g      (@)r;   doseoutcomes)r   r   r   Tr   r;   r   r   )r   sort_remaining)r   r   r   r   r   r   )r]   r]   r]   r^   r^   r^   r   rM   r%   r!   rS   )r   r   r   r   r   r@   value_countsr   r   r   from_arraysr   r   r   rC   r   r!   r   r   r   r   (test_sorting_with_different_categoricals   s   
r   r   TFc           	      C   s  t td| d}t tdddg| d}tt|}t|||d}|jdd	gd
d}tj||gdd	gd}tg d|dgd}|	dd }t
|| | }t
|| |tj}t
|| tj||gdd	gd}td|d}|	dd }t
|| d S )Nabcr   aaar   rG   r   )missingdenser_   r   r   Tr=   r   )r   rL          @r_   )r!   rf   c                 S   r|   r}   )r   r,   rh   r   r   r   rj   "  r   ztest_apply.<locals>.<lambda>rL   rT   c                 S   s   dS NrL   r   rh   r   r   r   rj   .  rk   )r   r   r   r   r   r   r@   r   r   rB   r   r   r,   aggr   r   )	r   r   r   r_   rC   r   r   r   r   r   r   r   
test_apply  s"   r   c                 C   s@  t g dg ddd}t g dg ddd}t||g dd}d	d
gd |d< |jg d| d}tj||d	d
gd gg dd}tdtg d|di }| }| sdt|||d	d
ggt	ddd}t
|| |jddg| d}tj||gddgd}tg dg dd|d}| }| st|||gt	ddd}t
|| t g dg dddg dg dd}t|}|jd| d}	|	 }tt	d dt	d!dd"}td#d#gd$d%gd&|d}| stt	d!dt	d!dd"}
||
}t
|| |jdd'g| d}|d(}tg d)t g dg dddg d*d+dd'g}| s0t||jjd,dggdd'g}t
|| d-D ]}|\}}||}||j|k|j|k@  }t
|| q8g d.g d/g d0d1}t|}t|d	 tdd2d3}||d4< |jd4d5gd6| d7}|d(}|jd4d5gd| d7}|d( }t
|| d S )8NrU   rV   Tr   rX   rY   r[   r\   r   r   rM   r?   )r]   r^   r?   r=   r   r_   rT   ABCr   r   r]   r^   )r   r   r   r   )r_   r?   AB)r   rG   r   rG   r   rG   rH   rL   rL   rM   rM   rr   rs   rt   ru   )catintsvalr   abr   )rS   r   r   g      ?      4@rt   )r   r   r   r,   )      $@g      >@r   g      D@rL   rM   rL   rM   )r   r   r   rL   ))r   rL   )rG   rM   )rG   rL   )r   rM   )rr   r   r9   r   r9   rL   rL   rr   rs   rt   ru   2   <   F   )rJ   rH   er   r   rJ   rH   r   r   r   rr   rN   r   r   Fas_indexr>   )r   r   r@   r   r   r   r   r7   r"   r   r   r   r,   r   r   r   	set_indexr   r_   r   r   r   r   r   linspacereset_index)r>   r   r   rC   r   r   r   r   rJ   groups_single_keyr!   groups_double_keykeyrH   ir   groupsgroups2r   r   r   test_observed2  s   


	

r   c                 C   s   g dg dg dd}t |}t|d g d}d|_|j|dg| d	}tj|g dgddgd
}t g dg dd|d}| sOt||jg dgddg}|	d}t
|| d S )N)rN   rN   r9   rO   r[   )rr   r      "   )C1C2C3r  )rL   rM   rN   r   r   r  r=   r   )      @r  g      @g      @)r   g      Y@g      i@g      A@)r  r  rT   r,   )r   r   r   rS   r@   r   r   r"   r_   r   r   r   )r>   rJ   rC   r_   r   r   r   r   r   r   r   test_observed_codes_remap  s   
r  c                  C   s   t tjjddddtjjddddtjjdddddd} | jtd| d< | jg d	d
d}| }|j	j
d  | j ksEJ |j	j
d  | j ksTJ |j	j
d  | j kscJ d S )Nr      i0u  r   '  )r   int_idother_idr   categoryr   )r   r	  r
  Tr=   rL   rM   )r   r   r   r   r   r   strr@   r%   r!   r   r0   r	  r
  )rC   r   r   r   r   r   test_observed_perf  s   "r  c                 C   s   t g dg dd}t|g dd}|jd| d}|j}| r0tdd	gd
dtdgd
dd}ntdd	gd
dtg d
dtdgd
dd}t|| d S )N)r   rH   r   r   r   rL   rM   rN   r   valsr   r=   r   rM   int64dtyperL   )r   rH   )r   r   r@   r   r   r   assert_dict_equal)r>   r   rC   r   r   r   r   r   r   test_observed_groups  s   "
r  z,keys, expected_values, expected_index_levelsr   )rp   	   r   r  rn   rG   )	r`   r   r   r   r   r  r   r   r   r9   rO   r   a2)	rp   r   r   r   r  r   r   r   r   test_seriesc           
      C   s   t tg dg ddtg dg ddg dg ddddg}d	| vr,|jd	d
}|j| dd}|r9|d }| }t| dkrF|}ng ddg d g}t||| d}t d|i|d}	|re|	d }	t	||	 d S )NrL   rL   rM   r  r  r  )r`   r   r  )r   r  rG   rH   r   r  rG   re   Fr=   rH   rL   	r   r   r   rL   rL   rL   rM   rM   rM   rN   r   rL   rM   )r   r   rT   )
r   r   r   dropr@   r7   r   r   r   assert_equal)
keysexpected_valuesexpected_index_levelsr  rC   r   r   r!   r   r   r   r   r   test_unobserved_in_index  s6   
r#  c                 C   s   t tdtjdgg ddg dd}|jd| d}|j}| r*dtdd	gd
di}ntdd	gd
dtg d
dtg d
dd}t|| d S )Nr   )r   rG   rJ   r  r  r  r   r=   r   rM   r  r  )	r   r   r   r   r@   r   r   r   r  )r>   rC   r   r   r   r   r   r   test_observed_groups_with_nan  s   

r$  c                  C   sl   t dtjtjgg dd} tg d}t| |d}|jdddd	 d
}|d	 jd
g }t	|| d S )Nr   r   r  r  )r   serr   Fr=   r%  r   )
r   r   r   r   r   r@   r/   rm   r   r   )r   r%  rC   r   r   r   r   r   test_observed_nth)  s   r&  c                 C   s   t tjdtjdgg dd}tg d}t||d}|jd| d  }| r8tt dgg dddgd}ntt g dg dddtjtjgd}t	|| d S )	Nr   r   r  r[   )s1s2r'  r=   rM   )
r   r   r   r   r   r@   r'   r   r   r   )r>   r'  r(  rC   r   r   r   r   r   #test_dataframe_categorical_with_nan4  s   r)  r>   r   c           	      C   s   t g dg d| d}tg d}t||d}|jd||dd d}t|jjd	d
}t|j}|s:d|| < t||ksRd|  d| d| d| }J |d S )N)rJ   r   rG   r   rJ   rG   )r   rG   r   rJ   r   )labelr   r*  )r>   r   r   r'   ro   r  r   zDLabels and aggregation results not consistently sorted
for (ordered=z, observed=z, sort=z
)
Result:
)	r   r   r   r@   	aggregater!   arrayisnar#   )	r   r>   r   r*  r   rC   r   aggrmsgr   r   r   0test_dataframe_categorical_ordered_observed_sortH  s2   	
r0  c                  C   s|  t jddd} tjjdddd}tj|| dd}ttjdd}|j	|d	d

 }|j	t|d	d

 }|| }t|j|jdd|_t|| |j	|d	d
}| }|j }||}	||}
|
j	|	d	d
 }t|| t|j|j t|jd|jd tjtdd| dd}t|}t| jd| tg dd }t| jd| d S )Nz
2014-01-01r9   )periodsr   r   r   Tr   Fr=   r   r   r   rL   )r   
date_ranger   r   r   r   r   r   r   r@   r,   r   r   r   r!   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   rD   r   r   r   r   r   r   r   r   r   r   r   r   r   test_datetimek  s6   




r3  c                  C   s  t jd} g d}| jdddd}tj||dd}tt t dd	d	dt
d
d}||d< |djddd }|t
d
 j|jdd }ttjg d|dddd|_t|| |jddd }|t
d
 j|jdd }ttjg d|dddd|_t|| d S )Ni90  r   r   r9   rs   r   Tr   rQ   re   rD   Fr   r=   r   rL   rM   rN   rn   )r   r   RandomStater   r   r   r   r   r   reshaper   r   r@   r7   r   r   r!   r   r   )sr   r   rD   rC   r   r   r   r   r   test_categorical_index  s$   &r9  c                  C   sn   t g dg ddd} ttjdd| d}|g dd	  }t|	 j
|  t|	 j
j| j d S )
N)r   r   r   r   r   Tr   rs   r9   re   r[   rO   )r   r   r   r   r   r@   r   r   r   r   rf   assert_categorical_equalr_   )rD   rC   r   r   r   r   !test_describe_categorical_columns  s   r;  c                  C   s   t tdddgd tdd d} | d d	| d< | jd
dgddd   }| }tddgddd}t	
|j| t	|jj|j |d |d  }tddgtddgd
dd}t	|| d S )Nrr   r]   r^   rO   XYXXYrM   )r   mediumartistr=  r  r>  Fr=   r   r   rS   r   r9   XYrn   rT   )r   r   r   r   r@   r%   unstackr   r   r   r   rf   r:  r_   r   r   r   )rC   gcatr   exp_columnsr   r   r   r   test_unstack_categorical  s   rE  c                  C   st   t tjtjddddddddg
} t|  jd}tjt	dd | 
|  W d    d S 1 s3w   Y  d S )NrL   rM   rN   r9   z$Grouper and axis must be same lengthmatch)r   r   r   r   r   dropnar_   pytestraises
ValueErrorr@   r,   )seriesrv   r   r   r   test_bins_unequal_len  s
    "rM  rL  r   r9   rN   rL   rM   r]   r^   c                 C   s   | d S r   r   )r   r   r   r   rj     s    rj   r`   c                 C   sD   |  ttddd}|t}t|t| d}t|| d S )NABBAr  r  rT   )r@   r   r   r+  r   r   r   r   )rL  r   r@   r   r   r   r   r   test_categorical_series  s   
rP  c                     s  t tg dg dg dg dd  jddgdd	d
 } t tddg jjjdddgddgdg dd}t| |  fdd} jd|gdd	d
 } t tddg jjjdddgddgdg dd}t| | tg ddd} jd|gdd	d
 } t| | ddg}t tddg jjjdddgddgdg dd}dD ]}t	t
d|d _ j|dd	d
 } t| | qd S )NrL   rM   rM   r  )rr      rR  )e   f   g   )r   r]   r^   r   r]   FTr   rL   rM   r  rr   rR  rS     re   c                    s    j | df S )Nr]   )loc)rrC   r   r   rj     r   ztest_as_index.<locals>.<lambda>r   )r   rG   rG   rn   )Nr@  r^   r   )r   r   r@   r7   r   r   r   r   r   r   r   r!   )r   r   r   r8  group_columnsrS   r   rY  r   test_as_index  sR   	r[  c                  C   s  t d} tdtt d| ddi}t| | ddd}tt d| ddd}t|jdddd	 j| t|jdddd	 j| tdtt d| ddi}t| | ddd}tt dt dddd}t|jdddd	 j| t|jdddd	 j| d S )
Nr   r]   baTr   r?  bacFr   )	r   r   r   r   r   r   r@   r'   r!   )r   rC   r   nosort_indexr   r   r   test_preserve_categories$  s&   r_  c               	   C   s   t g dg dttdtdddttdtdddd} t d	d
tjgddtjgttdtdddttdtdddd}dD ]/}| j|dddjdd}| j|dddjdd }|j|j	d}t
|| t
|| qDd S )N)rL   rM   rL   rL   rM   )rr      r      r  abaabr]  Fr   T)r]   r^   r  r  r   g      ?g      9@r   )r  r  )byr   r>   ra   re   )r   r   r   r   r   r@   r,   r   r   rf   r   r   )rC   exp_fullcolresult1r   r   r   r   r   test_preserve_categorical_dtypeB  s6   	

rg  zfunc, valuessecondfourththirdc                 C   s   t g ddd}tg d|d}|d}t||  }tddgt||jd	dd}t|| |dd
 }t||  }|d
 }t	|| d S )N)r'   rh  rj  ri  Tr   )r4  r4  rk  )payloadre  rl  rk  r4  r  re  )
r   r   r@   getattrr   r  r   r   r   r   )funcr_   rH   rC   r   r   r   sgbr   r   r   test_preserve_on_ordered_opsc  s   
rp  c                  C   sP  t tjd} tg d}tj|g ddd}| j|dd }| j|dd }t	|j
|j|jd|_
t|| tg d	}tj|g d
dd}| j|dd }| j|dd |j}t	|j
|j|jd|_
t|| tg dg ddd}tg d|d} | jddd }|d j}tdddtjg}t|| d S )Nr  r  r  Tr   Fr=   r   )	r   r   r   rL   rL   rL   rN   rN   rN   r5  rF   rI   rK   rP   rG   r   rL   rM   r9   )r   r   r   r   r,  r   r   r@   r,   r   r!   r   r   r   r   r   r   r_   r   assert_numpy_array_equal)r   r   rD   r   r   r   r   r   test_categorical_no_compress  s6   
rr  c                  C   sb   t d gd tg dd} | d d }ttg ddgdtg d	dd
dd}t|| d S )NrN   )trainrs  testrN  r]   r^   rt  rs  r  ro   r  rS   r   )r   r   r@   r'   r   r   r   rC   r   r   r   r   r    test_groupby_empty_with_category  s   rw  c                  C   s   t dtjdddi} dd tdddD }t||}| jdgdd	} tj| j	tdd
dd|d| d< | j
dgddd  }|t|jdd d }t|j|jjd|_t|| d S )Nvaluer   r  r   c                 S   s   g | ]}| d |d  qS )z - i  r   ).0r   r   r   r   
<listcomp>  s    ztest_sort.<locals>.<listcomp>i  T)rc  	ascendingi)  F)rightr   value_groupr=   c                 S   s   t |  d S )Nr   )floatsplitrh   r   r   r   rj     s    ztest_sort.<locals>.<lambda>)r   rn   )r   r   r   r   r   r   sort_valuesr   r   rx  r@   r%   sortedr!   r   rS   r   r   )rC   r   
cat_labelsresr   r   r   r   	test_sort  s   

r  c              	   C   s   t g dg dg dg dg dg dg dgg dd	}t|d
 |d|d
< |jd
| dd }| rFddgddgddgddgg}g d}nddgddgddgddgg}g d}t |ddgt|d
|dd}t|| d S )N)	(7.5, 10]rr   rr   )r  r   rs   )(2.5, 5]rO   rt   )(5, 7.5]r   ru   )r  r9   r   )(0, 2.5]rL   r   )r  r`   r   )r   r   r   re   r   r   Fr   rL   r   rO   rt   r   ru   rr   )r  r  r  r  )r  r  r  r  r   r   rR   rf   r!   )r   r   r@   r'   r   r   r   )r   r   rC   r   data_valuesindex_valuesr   r   r   r   
test_sort2  s0   	
r  c                 C   sP  t tdddtdddtdddtdddtdddtdddtdddgg dg ddg dd	}t|d
 |d|d
< | rdddgddgddgddgg}tdddtdddtdddtdddg}n$ddgddgddgddgg}tdddtdddtdddtdddg}t |ddgt|d
|dd}|jd
| dd }t|| d S )Ni  r`   rL   rM   rO   )rr   r   rO   r   r9   rL   r`   r   )dtr   r   re   r  r   r   rt   r   ru   rr   r   r   rR   r  Fr   )r   r   r   r   r@   r'   r   r   )r   r   rC   r  r  r   r   r   r   r   test_sort_datetimelike  sF   






	







r  c                  C   s  t tg dg ddg dd} tg ddd}| jddd	j }tg d
|dd}t|| | jddd	jjdd}tg d
|dd}t|| | jddd	jjdd}tddt	j
g|dd}t|| | jddd	jjdd}tdt	j
t	j
g|dd}t|| d S )Nr   r   rG   r   r  rL   rM   rL   rN  r]   rn   Fr=   )rN   rL   r   r^   r   	min_countrL   rN   rM   )r   r   r   r@   r^   r7   r   r   r   r   r   rC   expected_idxr   r   r   r   r   test_empty_sum  s    r  c                  C   s   t tg dg ddg dd} tg ddd}| jddd	j }tg d
|dd}t|| | jddd	jjdd}tg d
|dd}t|| | jddd	jjdd}tddt	j
g|dd}t|| d S )Nr  r   r  r  rN  r]   rn   Fr=   )rM   rL   rL   r^   r   r  rL   rM   )r   r   r   r@   r^   r1   r   r   r   r   r   r  r   r   r   test_empty_prod:  s   r  c                  C   s   t ttdtttjddddd tdd} | dd	g }t	j
tg d
ttjddddgdd	gd}t ddddddddtjdg	i|d}t|| d S )N	abcbabcbaz2018-06-01 001TrN   )freqr1  r  )key1key2r_   r  r  r   r   r_   r   r9   r   rO   r   rM   rT   )r   r   r   r   r2  r   r   r@   r,   r   r   r   r   r   )rC   r   r   r   r   r   r   ,test_groupby_multiindex_categorical_datetimeR  s"   
	
$r  zas_index, expectedr  r  r  rQ  r   ri   )r!   r   rS   r   rG   ri   c                 C   sP   t tg dddg dg dd}|jddg| d	d
d  }t|| d S )Nr  r  r  rQ  r  r  r   rG   Tr   ri   )r   r   r@   r7   r   r  )r   r   rC   r   r   r   r   ,test_groupby_agg_observed_true_single_columnk  s
   r  r   c                 C   sJ   t g dg ddd}t g dg ddd}|jd| d}t|| d S )NrI   Fr   )Nr   rG   rH   rL   r   )r   shiftr   r  )r   ctr   r  r   r   r   
test_shift  s   r  c                 C   sX   |   dd }|d d|d< |d d|d< tg d|d< |jdgd	d
}|S )a  
    DataFrame with multiple categorical columns and a column of integers.
    Shortened so as not to contain all possible combinations of categories.
    Useful for testing `observed` kwarg functionality on GroupBy objects.

    Parameters
    ----------
    df: DataFrame
        Non-categorical, longer DataFrame from another fixture, used to derive
        this one

    Returns
    -------
    df_cat: DataFrame
    Nr9   r]   r  r^   r[   r?   rA   rL   r~   )r   r   r   r  )rC   df_catr   r   r   r    s   r  	operationr   rB   c                 C   s   t g d| d jdd}t g d| d jdd}t||g}tg d|dd }| jddgd	d
d }t||t}t	
|| d S )N)r   r   r   r   r]   ru  )onethreer  twor^   )rM   r9   rL   rN   r?   r   r!   rS   Tr=   )r   r  r   r   r   r   r@   rm  r7   r   r   )r  r  lev_alev_br!   r   r   r   r   r   r    test_seriesgroupby_observed_true  s   r  c                 C   s   t jtddgddtg dddgddgd \}}td	d
tjdtjdg|dd}|dkr5|jddd}| jddg|dd }t	||t
}t|| d S )Nr   r   Fr   r  r  r  r]   r^   r   rM   r9   rL   rN   r?   r  r   r   infer)downcastr=   )r   r   r   	sortlevelr   r   r   fillnar@   rm  r7   r   r   )r  r>   r  r!   _r   r   r   r   r   r   )test_seriesgroupby_observed_false_or_none  s   r  zobserved, index, datar   r   r]   ru  )r  r  r  r  r  r  r  r  r^   )rM   rM   r9   r9   rL   rL   rN   rN   r   r  )r]   r^   Nc                 C   s>   t ||dd}| jddg|dd dd }t|| d S )Nr?   r  r]   r^   r=   c                 S   s   |   |  dS )Nr.   r+   r  rh   r   r   r   rj     s    z8test_seriesgroupby_observed_apply_dict.<locals>.<lambda>)r   r@   rB   r   r   )r  r>   r!   r   r   r   r   r   r   &test_seriesgroupby_observed_apply_dict  s
   .r  c                 C   s<   |  ddgd  }|  ddg d }t|| d S )Nr]   r^   r?   )r@   r,   r   r   )r  r   r   r   r   r   4test_groupby_categorical_series_dataframe_consistent  s   r  code)rL   r   r   )r   r   r   c                 C   sf   t g dg dg dd}tj| tdd}|j|dd }|jj|d	d j}t|| d S )
Nr[   )r4  rk  )rO   r   r`   r   r   r   r  rL   r~   r   )	r   r   r   r   r@   r,   Tr   r   )r  rC   r   r   r   r   r   r   test_groupby_categorical_axis_1  s
   r  c                 C   s\   t tddg|dddgdddgd	}| }|jd| d
jt jdd }t|| d S )NBobGregr   rL   rM   )NameItemr  r  re   r=   T)skipna)	r   r   r   r@   r   r7   r   r   r   )r>   r   rC   r   r   r   r   r   $test_groupby_cat_preserves_structure  s   r  c                  C   s^   t g dtdd} tjtdd | ddd  W d    d S 1 s(w   Y  d S )	NrU   r9   r8   r   z'vau'rF  r8   c                 S   s&   t | jd d g| jd d gdS )Nr4  r8   vaur  )r   rm   )rowsr   r   r   rj   3  s     z/test_get_nonexistent_category.<locals>.<lambda>)r   r   rI  rJ  KeyErrorr@   rB   rY  r   r   r   test_get_nonexistent_category.  s   
"r  c           
      C   s   | dkr	t d | dkrt jjdd}|j| tttdtddttd	d
 tdddgd d}t	| |}|r@dnd}|j
ddg|dd }t|| }|| }	t|	|ks^J d S )Nngroupngroup is not truly a reductionr&   6TODO: implemented SeriesGroupBy.corrwith. See GH 32293reasonAABBABCDr  r   rM   皙?r9   cat_1cat_2rx  r`  r  r  r=   rx  )rI  skipmarkxfailnode
add_markerr   r   r   r
   r@   rm  r   )
reduction_funcr>   requestr  rC   r    expected_lengthseries_groupbyr   r   r   r   r   0test_series_groupby_on_2_categoricals_unobserved9  s&   


r  c                 C   s.  | dkr	t d | dkrt jjdd}|j| tttdtddttd	d
 tdddgd d}t	dt	dt	dt	dt	dg}t
| |}|jddgddd }t|| }|| }t|  }	|D ]}
|j|
 }t|	rxt|s~||	ks~J qg|	dkr| dkrt|jtjsJ d S d S d S )Nr  r  r&   r  r  r  r   r  r   rM   r  r9   r  ACBCCACBCCr  r  Fr=   rx  r   r7   )rI  r  r  r  r  r  r   r   r   tupler
   r@   rm  -_results_for_groupbys_with_missing_categoriesrW  r   r-  r   
issubdtyper  integer)r  r  r  rC   
unobservedr    r  r   r   zero_or_nanr   r   r   r   r   ?test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nansV  s6   
"


 r  c                 C   s   | dkr	t d tttdtddttdtddg dd	}g d
}|jddgdd}t| |}t|| | }|D ]	}||jvsGJ q>d S )Nr  2ngroup does not return the Categories on the indexr  r   r  111112r  r  r  r  r  )r]   2)r^   r  )r?   1)r?   r  r  r  Tr=   )	rI  r  r   r   r   r@   r
   rm  r!   )r  rC   unobserved_catsdf_grpr    r  r   r   r   r   >test_dataframe_groupby_on_2_categoricals_when_observed_is_true  s   

r  c                 C   s   | dkr	t d tttdtddttdtddg dd	}g d
}|jddg|d}t| |}t|| | }t|  }|t	j
u rT|j|    sRJ d S |j| |k  saJ d S )Nr  r  r  r   r  r  r  r  r  r  r  r  r=   )rI  r  r   r   r   r@   r
   rm  r  r   r   rW  isnullr#   )r  r>   rC   r  r  r    r  r   r   r   r   ?test_dataframe_groupby_on_2_categoricals_when_observed_is_false  s    	


r  c                  C   s   g dg dg dd} t | }t|d tddd}||d	< |jd	d
gddd}|d d}|dd }t|| d S )N)rr   r   r9   rL   r   )rJ   rH   rJ   rH   r   r   r   rs   rO   r   r   T)r   r   r,   )	r   r   r   r   r   r@   r   r   r   )rJ   rC   r   r   r   r   r   r   r   3test_series_groupby_categorical_aggregation_getitem  s   r  zfunc, expected_valuesc                 C   sb   t g dg dtg ddd}|d| }t d|itg ddd	d
}t|| d S )Nr   )r   rL   rL   rM   rM   )r   r   r   r   rL   )idr   rx  r  r   rx  r  rn   rT   )r   r   r   r@   r   r   r   r   )rn  r!  rC   r   r   r   r   r   $test_groupby_agg_categorical_columns  s   
r  c                  C   s~   t dtg dg ddi} t dddgitddgd}| g dtj}t	|| | g d }t	|| d S )	Nr]   r  r   r  rM   rL   rT   r  )
r   r   r   r,  r@   r   r   r0   r   r   rC   r   r   r   r   r   test_groupby_agg_non_numeric  s   r  rn  c                 C   sl   t dgtdgddj d}|dd }t||  }tdgtdgddd|d jd	}t	|| d S )
Ni  rG   r  r  rN  r]   r^   rn   r!   rS   r  )
r   r   r   
as_orderedr@   rm  r   r  r   r   )rn  rC   
df_groupedr   r   r   r   r   <test_groupby_first_returned_categorical_instead_of_dataframe  s    r  c                  C   sv   t ddg} d| j_tg dtg dt| dd}tdd	d
git| ddd}|jddd	 }t
|| d S )NrL   rM   F)rL   rN   rO   r`   r   r  rP   r   r   g      @rG   rn   r   r   )r   r,  flags	writeabler   r   r   r   r@   r,   r   r   )rD   rC   r   r   r   r   r   test_read_only_category_no_sort  s   r  c                  C   s   t g dg dd} | d djjg ddd| d< t d	d	d
d	dd
ddd
ddd
d}|jddd}tg dg ddddd|_| ddg 	 }t
|| d S )N)smalllarger   r   r=  r   r   r=  )r?   r]   r]   r?   r]   r?   r]   r?   )r   r   r   r  )tinyr  r=  r   Tr   r   )r]   r?   rL   rN   rM   r   r!   r~   )r   r   rS   r  )r   r   r   set_categoriesrename_axisr   rf   r@   r4   rB  r   r   r  r   r   r   #test_sorted_missing_category_values  s6   
r  c                  C   s   t dg di} | d d| d< | dj }tg dtg dddd| d jd}t	|| | d
ddi}| }t|| d S )	Ncol_num)rL   rL   rM   rN   r  col_catr  rn   r  r'   )r   r   r@   r  r'   r   r   r  r   r   r   to_framer   rv  r   r   r   1test_agg_cython_category_not_implemented_fallback1  s   r  c                  C   s   t g dg dddtjdgg dg dd} | dd	i} | d
dgdd }tjddgddggdd}t ddgddgddgd|d}t	|| d S )N)rL   rL   rL   rL   r   r  g?g333333?)r   r   r   fee)r]   r^   numerical_col
object_colcategorical_colr  r  r]   r^   c                 S   s   |    S rg   )r-  r7   rY  r   r   r   rj   U  r   z7test_aggregate_categorical_with_isnan.<locals>.<lambda>rL   rM   rN  r   r   )r
  r  r  r   )
r   r   r   r   r@   r   r   r   r   r   r   r   r   r   %test_aggregate_categorical_with_isnanG  s&   
r  c                  C   s   t g dg dd} tjg ddd}| d || d< | dd t| d	< |  }t g dg dg d
d}|d ||d< |d	 ||d	< t	|| d S )N)rL   rL   rL   rM   rM   rN   )WaitingOnTheWay	Deliveredr  r  r  )
package_idstatus)r  r  r  Tr   r  r  last_status)r  r  r  r  r  r  )r  r  r  )
r   r   CategoricalDtyper   r@   r   r+   r   r   r   )rC   delivery_status_typer   r   r   r   r   test_categorical_transformb  s(   r  c                 C   s   t g d}g d}t|||d}t ddg}tj||gddgd}tdtjtjdg|d	d
tdtjtjdg|d	d
d}||  }|rL| tj	}|j
ddg|dd	 }	t|	|  }
t|
| d S N)r   r   rL   rL   )r   rL   rL   r   r   r   rL   r   rG   r   rH   rn   )r'   r*   r=   )r   r   r   r   r   r   NaNrH  r   r  r@   rm  r   r   )rn  r>   r   r   rC   r   r   expected_dictr   srs_grpr   r   r   r   Ftest_series_groupby_first_on_categorical_col_grouped_on_2_categoricals  s   r  c                 C   s   t g d}g d}t|||d}t ddg}tj||gddgd}tdtjtjdg|d	d
tdtjtjdg|d	d
d}||   }|rN| 	tj
}|jddg|d}	t|	|  }
t|
| d S r  )r   r   r   r   r   r   r  r  rH  r   r  r@   rm  r   r   )rn  r>   r   r   rC   r   r   r  r   r  r   r   r   r   Btest_df_groupby_first_on_categorical_col_grouped_on_2_categoricals  s   r  c                  C   s   t tg dg ddtdd} | jddd}|j}tjd	d
gddtjdgddtjg ddd}| | ks=J | D ]}t	|| ||  qAd S )N)rG   rG   r   r   r  rN   )r   re  r   Fr  r   rL   intpr  rM   )rG   r   rH   )
r   r   r   r@   indicesr   r,  r   r   rq  )rC   r   r   r   r   r   r   r   2test_groupby_categorical_indices_unused_categories  s   r  c                 C   sj   t dg di}|d d|d< t|dd |  }ttg ddtg dddd}t|| d S )Nr   r  r  rG   rn   )rS   r!   )	r   r   rm  r@   r   r   r   r   r   )rn  rC   r   r   r   r   r   1test_groupby_last_first_preserve_categoricaldtype  s   r   c               	   C   s   t ddgddgddgd} | jdddd} | jd	d
gdd d }tddgttddgd	dtddgd
dgdd}t	|| d S )NrL   rM   rr   rR  r   r  rP   r  r   rG   Tr=   rH   rn   r   )
r   r   r@   r0   r   r   r   r   r   r   rv  r   r   r   )test_groupby_categorical_observed_nunique  s   r!  c                  C   s   t jddgdd} tddgddgddggddgd	d| i}|dd  }tddgtddgdd
dt jddgddd}t	|| d S )Nr  bigTr   rL   rM   grpdescriptionre   rn   r  )
r   r  r   r   r@   r+   r   r   r   r   )r  rC   r   r   r   r   r   ,test_groupby_categorical_aggregate_functions  s   
r%  c                 C   s   t ddgg dd}tt ddgg ddddgd}|jd| |d	}| }| r4td
ddgi|d}ntg dg d}td
g di|d}d|j_t|| d S )NrL   rM   r  r  rN   r9   )ri   rZ   ri   )r>   rH  rZ   rT   )rN   r9   r   )	r   r   r@   r7   r   r!   rS   r   r   )r>   rH  r   rC   r   r   r   r!   r   r   r   test_groupby_categorical_dropna
  s   "r&  
index_kind)r   singlemultic                 C   sp  |dv r|s|dkrd}| j tjj|d n |dkr,|s,d}| j tjj|d n|dkr8|s8tjdd ttg d	g d
|dtdd}|dkrRdg}	n |dkr_dg}	|	|	}n|dkrrddg}	|d |d< |	|	}t
||}
|j|	|||d}t|||
 }|r|jdj}n|d jj}tg d
}t|| |dkr|jdj}t|| d S d S )N)r(   r)   r)  z1GH#10694 - idxmax/min fail with unused categoriesr  r&   zDGH#49950 - corrwith with as_index=False may not have grouping columnr   /Result doesn't have categories, nothing to testrM   rL   rM   rN   rL   r9   rN   rM   r   r9   rP   r   r(  r  r   r   r>   )r  r  rI  r  r  r  r   r   r   r   r
   r@   rm  r!   r   r   r   r   r   r   )r  r   r   r>   r  r'  r   r/  rC   r   r    r   	op_resultr   r   r   r   r   test_category_order_reducer  sJ   

r/  r(  r)  c                 C   s   t tg dg d|dtdd}|dkrdg}||}n|dkr2dd	g}|d |d	< ||}t||}|j|| ||d
}	t|	|| }
|
jdj	}t
g d}t|| |dkrm|
jd	j	}t|| d S d S )Nr+  r,  r   r9   rP   r(  r   r)  r  r-  )r   r   r   r   r
   r@   rm  r!   r   r   r   r   r   )r   r   r>   transformation_funcr'  r   rC   r   r    r   r.  r   r   r   r   r   test_category_order_transformerK  s,   

r1  methodheadtailc                 C   s   t tg dg d|dtdd}|dkrdg}n |dkr'dg}||}n|d	kr:dd
g}|d |d
< ||}|j|| ||d}t|| }	|dkrT|	d jj}
n|	j	dj}
t
g d}t|
| |d	krz|	j	d
j}
t|
| d S d S )Nr+  r,  r   r9   rP   r   r   r(  r)  r  r-  )r   r   r   r   r@   rm  r   r   r!   r   r   r   r   r   r   r>   r2  r'  r   rC   r   r   r.  r   r   r   r   r   test_category_order_head_tailj  s2   
r6  )rB   r   r   c                 C   s0  |dkr|dks| s|dkrt d ttg dg d|dtdd}|dkr-d	g}n |d
kr:d	g}||}n|dkrMd	dg}|d	 |d< ||}|j|| ||d}t||dd }	|dkse| sp|dkrp|	d	 jj	}
n|	j
d	j	}
tg d}t|
| |dkr|	j
dj	}
t|
| d S d S )Nr   r   z(No categories in result, nothing to testr+  r,  r   r9   rP   r   r(  r)  r  r-  c                 S   s   | j ddS )NTra   )r7   rh   r   r   r   rj     r   z+test_category_order_apply.<locals>.<lambda>)rI  r  r   r   r   r   r@   rm  r   r   r!   r   r   r   r   r5  r   r   r   test_category_order_apply  s<   

r7  c                 C   sV  |dkr| st jdd tddd}tg d||d}t|tdd	}|dkr-d
g}n |dkr:d
g}||}n|dkrMd
dg}|d
 |d< ||}|j|| |dd}|	 }	|r`g dng d}
t
|
|j|d
d}| rtd|
i}|dkrtt||d|_n||_n|dkrtt|t||
d}n	tt||
d	}t|	| d S )Nr   r*  r  i'  r4  r+  r   r9   rP   r   r(  r)  r  Tr-  )rN   rM   rL   )rM   rL   rN   )r   r   rS   rG   )r   r  )r   r  rG   )rI  r  r   r   r   r   r   r   r@   r7   r   r   r   
from_framer!   r   r   r   )r   r   r'  r   r   grouperrC   r   r   r   r   r!   r   r   r   r   test_many_categories  s:   

r:  )er   numpyr   rI  pandasr   r   r   r   r   r   r   r	   pandas._testing_testingr   pandas.tests.groupbyr
   r  r"   r  rE   r   r   r   r  parametrizer   r   r  r  r  r#  r$  r&  r)  r0  r3  r9  r;  rE  rM  r   renamerP  r[  r_  rg  rp  rr  rw  r  r  r  r  r  r  r   r  r   NaTr  fixturer  r  r  r   r  r  r  r  r  r  r  r  r  r  r0   r%   r  r  r  r  r  r  r  r  r  boolr  r  r  r   r!  r%  r&  r/  r1  r6  r7  r:  r   r   r   r   <module>   s   $		
  
n" '
$

8!




	%
"
2




"
"
,	
	+
!

-5
.!#