
    eGI                        d dl mZ d dlZd dlZddlmZ ddlmZ ddl	m
Z
mZ ddlmZmZmZmZ ej"                  rdd	l	mZ dd
lmZ  G d de
      Zd Zy)    )annotationsN   )version_compare)CudaCompiler   )NewExtensionModule
ModuleInfo)flattenpermittedKwargsnoKwargsInvalidArguments)ModuleState)Compilerc                       e Zd Z eddd      Z fdZe	 	 	 	 	 	 dd       Z edg      	 	 	 	 	 	 dd       Z	 edg      	 	 	 	 	 	 dd	       Z
ed
        Zed        Zed        Zd ZddZddZ xZS )
CudaModuleCUDAz0.50.0T)unstablec                    t         |           | j                  j                  | j                  | j
                  | j                  d       y )N)min_driver_versionnvcc_arch_flagsnvcc_arch_readable)super__init__methodsupdater   r   r   )selfargskwargs	__class__s      9/usr/lib/python3/dist-packages/mesonbuild/modules/cuda.pyr   zCudaModule.__init__%   s@    "&"9"9"&"6"6"&"9"9
 	    c                l   t        d      }t        |      dk7  st        |d   t              s||d   }g dddddd	d
ddddddddddddddddddddddddddddddd d!dd"d#d$dd%d&d'dd(d)d*dd+d,d*dd-d.d/dd0d1d2dd3d4d5dd6d7d8dd9d:d;dd<d=d>dd?d@dAddBdCdDddEdFdGddHdIdJddKdLdMddNdOdPddQdRdSddTdUdVddWdXdYddZd[d\dd]d^d_d}d`}|D ]=  }t	        ||da         s|j                  |j                  j                  |db         } |S  |S )cNzmin_driver_version must have exactly one positional argument: a CUDA Toolkit version string. Beware that, since CUDA 11.0, the CUDA Toolkit's components (including NVCC) are versioned independently from each other (and the CUDA Toolkit as a whole).r   r   z>=12.0.0z527.41z	525.60.13)cuda_versionwindowslinuxz>=11.8.0z522.06z	520.61.05z>=11.7.1z516.31z	515.48.07z>=11.7.0z516.01z	515.43.04z>=11.6.1z511.65z	510.47.03z>=11.6.0z511.23z	510.39.01z>=11.5.1z496.13z	495.29.05z>=11.5.0z496.04z>=11.4.3z472.50z	470.82.01z>=11.4.1z471.41z	470.57.02z>=11.4.0z471.11z	470.42.01z>=11.3.0z465.89z	465.19.01z>=11.2.2z461.33z	460.32.03z>=11.2.1z461.09z>=11.2.0z460.82z	460.27.03z>=11.1.1z456.81z455.32z>=11.1.0z456.38z455.23z>=11.0.3z451.82z	450.51.06z>=11.0.2z451.48z	450.51.05z>=11.0.1z451.22z	450.36.06z	>=10.2.89z441.22z440.33z
>=10.1.105z418.96z418.39z
>=10.0.130z411.31z410.48z	>=9.2.148z398.26z396.37z>=9.2.88z397.44z396.26z>=9.1.85z391.29z390.46z>=9.0.76z385.54z384.81z>=8.0.61z376.51z375.26z>=8.0.44z369.30z367.48z>=7.5.16z353.66z352.31z>=7.0.28z347.62z346.46unknownr#   r%   )r   len
isinstancestrr   gethost_machinesystem)	r   stater   r   argerrorr#   driver_version_tabledriver_versionds	            r    r   zCudaModule.min_driver_version-   s    $ %g h
 t9>DGS!9NAw  
'hU  
'hU  
 (hU  
 (hU	  

 (hU  
 (hU  
 (hU  
 (hU  
 (hU  
 (hU  
 (hU  
 (hU  
 (hU  
 (hU  
 (hU  
  (hR!  
" (hR#  
$ (hU%  
& (hU'  
( (hU)  
* )hR+  
, *hR-  
. *hR/  
0 )hR1  
2 (hR3  
4 (hR5  
6 (hR7  
8 (hR9  
: (hR;  
< (hR=  
> (hR?  
D #% 	A|Q~->?!"u'9'9'@'@!G*!M	
 r!   detectedc                N    | j                  ||      } | j                  | d   }|S )Nr   _validate_nvcc_arch_args_nvcc_arch_flagsr   r-   r   r   nvcc_arch_argsrets         r    r   zCudaModule.nvcc_arch_flagsd   2     66tVD#d##^4Q7
r!   c                N    | j                  ||      } | j                  | d   }|S )Nr   r4   r7   s         r    r   zCudaModule.nvcc_arch_readablel   r:   r!   c                t    t        j                  dd|       } | j                  d      j                  d      } | S )Nz	[ 	
,;]+;)resubstripsplit)ss    r    _break_arch_stringzCudaModule._break_arch_stringt   s1    FF>3*GGCLs#r!   c                <    t        | t              r| j                  S y)N )r(   r   detected_cccs    r    _detected_cc_from_compilerz%CudaModule._detected_cc_from_compilerz   s    a&== r!   c                `    t        | t              r| j                  S t        | t              r| S y)Nr&   )r(   r   versionr)   rG   s    r    _version_from_compilerz!CudaModule._version_from_compiler   s(    a&99aHr!   c                   t        d      }t        |      dk  r||d   }| j                  |      }|dk(  r|t        |      dk  rg nt        |dd        }|D cg c]  }| j	                  |       }}t        |      }t        |      dkD  r't        |      j                  h d      st        d      t        |      dk(  r|d   n|}|j                  d| j                  |            }t        |g      }|D cg c]  }| j	                  |       }}t        |      }t        |      j                  h d      st        d      |||fS c c}w c c}w )NzJThe first argument must be an NVCC compiler object, or its version string!r   r   r&   >   AllAutoCommonzaThe special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!r2   )	r   r'   rL   r
   rC   set
isdisjointr*   rI   )	r   r   r   r.   compilerr#   	arch_listar2   s	            r    r5   z#CudaModule._validate_nvcc_arch_args   s`   #$pqt9q=NAwH66x@Ly(d)q.Bgd12h.?	9BCAT,,Q/C	CI&	y>Ac)n&?&?@Y&Z"  $K  L  L$'	Na$7IaLY	::j$*I*I(*STH:&8@A1D++A.AA8$8}''(AB"  $K  L  LY00 D Bs   E;Ec                    g }|D ]C  }|s|rt        |d|z         r|rt        |d|z         r|s,|}||vs3|j                  |       E |S )z
        Filter CUDA arch list (no codenames) for >= low and < hi architecture
        bounds, and deduplicate.
        If saturate is provided, architectures >= hi are replaced with saturate.
        <>=)r   append)r   cuda_arch_listlohisaturatefiltered_cuda_arch_listarchs          r    _filter_cuda_arch_listz!CudaModule._filter_cuda_arch_list   si     #%" 		9D/$b9/$r	:# #D66+2248		9 '&r!   c                   g d}g d}d}d}g d}t        |d      rd}t        |d      r!|g d	z  }|dgz  }t        |d
      r|dgz  }d}t        |d      r)|ddgz  }|ddgz  }|g dz  }t        |d      r|dgz  }d}t        |d      r*|ddgz  }|dgz  }|ddgz  }d}t        |d      r|dgz  }d}t        |d      r&|dgz  }|dgz  }|dgz  }t        |d       r|d!gz  }d}dg}	dg}
t        |d"      r(|d#gz  }|dgz  }|dgz  }d$}t        |d%      r|d&gz  }d'}t        |d(      r)|	d'gz  }	d'g}
|d'gz  }|d'gz  }t        |d)      r|d*gz  }d+}t        |d,      r#|g d-z  }|g d.z  }|g d/z  }t        |d0      rd1}t        |d2      rd3}t        |d4      rd5}|sd6}|d7k(  r|}no|d8k(  r|}ng|d6k(  rA|r<t        |t              r|}n| j                  |      }| j	                  ||||d9         }n$|}n!t        |t
              r| j                  |      }t        d: t        |      D              }g }g }|D ]  }g }g }|j                  d;      }|r|dt        d;        }t        j                  d<|      r|g|g}}n\dd=gg fd>gg fd?gg fdd$gd$gfd@gg fd3dgdgfddgdgfdAgg fdgdgfdgg fdgdgf|	|
fd+gg fdBgdBgfdCgdCgfdDj                  |dE      \  }}|t        dF| dG      ||z  }|s|s|}||z  } t        t        |            }t        t        |            }g }g }|D ]  }t        j                  dH|      j                         \  }}t        |dI|z         r:|rt        |dJ|z         rL|r>|j                  dKdL      }|j                  dKdL      }|dMdN|z   dOz   |z   gz  }|dP|z   gz  }|j                  dKdL      }|dMdN|z   dOz   |z   gz  }|dP|z   gz  } |D ]z  }t        j                  dH|      j                         \  }}|r|}t        |dI|z         r>|rt        |dJ|z         rP|j                  dKdL      }|dMdN|z   dQz   |z   gz  }|dR|z   gz  }| ||fS )Sz{
        Using the CUDA Toolkit version and the target architectures, compute
        the NVCC architecture flags.
        )FermiKeplerMaxwell)3.03.55.0Nz2.0)re   3.2rf   rg   z<7.0z5.2z>=7.0)Kepler+TegraKepler+TeslaMaxwell+Tegraz<8.0z5.2+PTX6.0z>=8.0PascalPascal+Tegra6.1)rl   ro   6.2z<9.0z6.1+PTXz7.0z>=9.0VoltaXavierz7.2re   z<10.0z7.2+PTXz8.0z>=10.0Turingz7.5z<11.0z7.5+PTXz>=11.0Ampererf   z<11.1z8.0+PTXz8.6z>=11.1z<11.8z8.6+PTX8.7z>=11.8)OrinLovelaceHopper)8.99.0z9.0+PTX)ru   ry   rz   z<12z9.1z>=12.0rg   z<13z10.0rO   rN   rP   c              3  &   K   | ]	  }|s|  y wN ).0xs     r    	<genexpr>z.CudaModule._nvcc_arch_flags.<locals>.<genexpr>0  s     Da!Ds   z+PTXz![0-9]+\.[0-9](\([0-9]+\.[0-9]\))?z2.1(2.0)rh   z3.7z5.3rp   ry   rz   )rb   ri   rj   rc   rk   rd   rm   rn   rq   rr   rs   rt   rv   rw   rx   )NNzUnknown CUDA Architecture Name !z'([0-9]+\.[0-9])(?:\(([0-9]+\.[0-9])\))?rW   rX   .rE   z-gencodezarch=compute_z	,code=sm_sm_z,code=compute_compute_)r   r(   listrC   r`   r)   sortedrQ   endswithr'   r>   	fullmatchr*   r   groupsreplace)r   r#   rZ   r2   cuda_known_gpu_architecturescuda_common_gpu_architecturescuda_hi_limit_gpu_architecturecuda_lo_limit_gpu_architecturecuda_all_gpu_architecturescuda_ampere_bincuda_ampere_ptxcuda_arch_bincuda_arch_ptx	arch_namearch_binarch_ptxadd_ptx
nvcc_flagsnvcc_archs_readabler_   codevs                        r    r6   zCudaModule._nvcc_arch_flags   sc    *H$)>%)-&).&)E"<0-2*<1(-^^()eW4)|V4-)<-16.<1(h-GG()eU^;)&-BB&|V4-)<-16.<1(gx-@@()eW4)&eU^;&-2*|W5-)<-16.<2(hZ7()eW4)&eW4&|W5-)<-16. !' '<2(hZ7()eW4)&eW4&-2*|W5-)<-16.<2w&O %wO)eW4)&eW4&|W5-)<-16.<2(-KK()-FF)&-BB&|U316.<2 .3*|U317.#Nu$9Nx':Nv%h-%-N%)%<%<X%FN!%!<!<^=[=[=Z[]=^"`
 "?,!44^DND3~+>DD' $	*IHH((0G%mF|4	||CYO&/[9+( (-j&92%>',g2%>',g2%>',enE7%C',g2%>',enE7%C',enE7%C',g2%>',gE7%C',g2%>',gE7%C&5?%K',g2%>',gE7%C',gE7%C&  #i.! #($ &)HST'UVVX%M'H)I$	*L s=12s=12
 ! 	6D,,=tEEKVX D% tS+I%IJ-/$OmHm2n||C,c2.z?U+B[+PSW+WXX
#~5#||C,z?T+AK+ORV+VWW
#~5##	6& " 	7D,,=tEEKVX D% tS+I%IJ-/$OmHm2n<<R(D:'=@P'PSW'WXXJJ$5#66	7  ...r!   )r-   'ModuleState'r   zT.Tuple[str]r   T.Dict[str, T.Any]returnr)   )r-   r   r   z-T.Tuple[T.Union[Compiler, CudaCompiler, str]]r   r   r   zT.List[str])NNN)rO   rE   )__name__
__module____qualname__r	   INFOr   r   r   r   r   r   staticmethodrC   rI   rL   r5   r`   r6   __classcell__)r   s   @r    r   r   !   s    fh6D 4!-4#54:=4 4l j\"K 27B # j\"!N#5:E #  
  
  16'(L/r!   r   c                     t        | i |S r}   )r   )r   r   s     r    
initializer     s    t&v&&r!   )
__future__r   typingTr>   mesonlibr   compilers.cudar   rE   r   r	   interpreterbaser
   r   r   r   TYPE_CHECKINGr   	compilersr   r   r   r~   r!   r    <module>r      sG    #  	 & ) , 
 ??$b/# b/H'r!   