opendilab · puyuan1996 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/ding/policy/base_policy.py b/ding/policy/base_policy.py
@@ -9,6 +9,7 @@
 from ding.model import create_model
 from ding.utils import import_module, allreduce, allreduce_with_indicator, broadcast, get_rank, allreduce_async, \
     synchronize, deep_merge_dicts, POLICY_REGISTRY
+from ding.torch_utils import auto_device_init, move_to_device
 
 
 class Policy(ABC):
@@ -83,8 +84,12 @@ def default_config(cls: type) -> EasyDict:
     config = dict(
         # (bool) Whether the learning policy is the same as the collecting data policy (on-policy).
         on_policy=False,
-        # (bool) Whether to use cuda in policy.
+        # (bool) Whether to use cuda in policy (deprecated, use 'device' instead).
         cuda=False,
+        # (str) Device to use for policy. Can be 'auto', 'cuda', 'npu', or 'cpu'.
+        # 'auto' will automatically detect NPU > GPU > CPU.
+        # If not specified, will use 'cuda' config for backward compatibility.
+        device='auto',
         # (bool) Whether to use data parallel multi-gpu mode in policy.
         multi_gpu=False,
         # (bool) Whether to synchronize update the model parameters after allreduce the gradients of model parameters.
@@ -136,25 +141,56 @@ def __init__(
 
         if len(set(self._enable_field).intersection(set(['learn', 'collect', 'eval']))) > 0:
             model = self._create_model(cfg, model)
-            self._cuda = cfg.cuda and torch.cuda.is_available()
+
+            # Device initialization with auto-detection support for NPU/GPU/CPU
+            # Backward compatibility: if 'device' not in cfg, use 'cuda' config
+            if hasattr(cfg, 'device') and cfg.device is not None:
+                # New way: use 'device' config for auto-detection or explicit setting
+                cfg_device = cfg.device
+            else:
+                # Legacy way: convert 'cuda' boolean to device string
+                cfg_device = 'cuda' if (hasattr(cfg, 'cuda') and cfg.cuda) else 'cpu'
+
             # now only support multi-gpu for only enable learn mode
             if len(set(self._enable_field).intersection(set(['learn']))) > 0:
                 multi_gpu = self._cfg.multi_gpu
                 self._rank = get_rank() if multi_gpu else 0
-                if self._cuda:
-                    # model.cuda() is an in-place operation.
-                    model.cuda()
+            else:
+                self._rank = 0
+
+            # Auto-detect or set device
+            self._device_type, self._use_accelerator, self._device = auto_device_init(cfg_device, self._rank)
+
+            # Keep backward compatibility with _cuda attribute
+            # Set _cuda=True for ANY accelerator (GPU or NPU) to ensure data transfer logic works
+            self._cuda = self._use_accelerator
+
+            # Move model to the detected/configured device
+            if self._use_accelerator:
+                move_to_device(model, self._device_type, self._rank)
+
+            # Print final device configuration summary
+            print(f"\n{'='*70}")
+            print(f"🎉 [DI-engine Policy] Device Setup Complete")
+            print(f"{'='*70}")
+            print(f"  Policy Type: {self.__class__.__name__}")
+            print(f"  Device Type: {self._device_type.upper()}")
+            print(f"  Device String: {self._device}")
+            print(f"  Using Accelerator: {self._use_accelerator}")
+            print(f"  Rank: {self._rank}")
+            print(f"  Multi-GPU: {self._cfg.multi_gpu if hasattr(self._cfg, 'multi_gpu') else False}")
+            print(f"  Legacy _cuda flag: {self._cuda}")
+            print(f"{'='*70}\n")
+
+            # Multi-GPU initialization
+            if len(set(self._enable_field).intersection(set(['learn']))) > 0:
+                multi_gpu = self._cfg.multi_gpu
                 if multi_gpu:
                     bp_update_sync = self._cfg.bp_update_sync
                     self._bp_update_sync = bp_update_sync
                     self._init_multi_gpu_setting(model, bp_update_sync)
-            else:
-                self._rank = 0
-                if self._cuda:
-                    # model.cuda() is an in-place operation.
-                    model.cuda()
+
             self._model = model
-            self._device = 'cuda:{}'.format(self._rank % torch.cuda.device_count()) if self._cuda else 'cpu'
         else:
             self._cuda = False
             self._rank = 0

diff --git a/ding/torch_utils/__init__.py b/ding/torch_utils/__init__.py
@@ -12,3 +12,5 @@
 from .dataparallel import DataParallel
 from .reshape_helper import fold_batch, unfold_batch, unsqueeze_repeat
 from .parameter import NonegativeParameter, TanhParameter
+from .device_helper import get_available_device, get_device_count, move_to_device, get_device_string, \
+    auto_device_init, is_npu_available, is_cuda_available
diff --git a/ding/torch_utils/device_helper.py b/ding/torch_utils/device_helper.py
@@ -0,0 +1,224 @@
+"""
+Copyright 2020 Sensetime X-lab. All Rights Reserved.
+
+Device helper utilities for automatic detection of NPU and GPU devices.
+Supports Huawei Ascend NPU (torch_npu) and NVIDIA GPU (torch.cuda).
+"""
+
+import torch
+from typing import Tuple, Optional
+import logging
+
+# Try to import torch_npu for Huawei NPU support
+try:
+    import torch_npu
+    TORCH_NPU_AVAILABLE = True
+except ImportError:
+    TORCH_NPU_AVAILABLE = False
+
+logger = logging.getLogger(__name__)
+
+
+def get_available_device() -> Tuple[str, bool]:
+    """
+    Overview:
+        Automatically detect the available device (NPU or GPU or CPU).
+        Priority: NPU > GPU > CPU
+    Returns:
+        - device_type (:obj:`str`): Device type string, one of 'npu', 'cuda', 'cpu'
+        - is_accelerator (:obj:`bool`): Whether an accelerator (NPU/GPU) is available
+    Examples:
+        >>> device_type, is_accelerator = get_available_device()
+        >>> print(f"Using device: {device_type}")
+    """
+    print("\n" + "="*70)
+    print("🔍 [DI-engine] Device Detection")
+    print("="*70)
+
+    # Check for NPU first (Huawei Ascend)
+    if TORCH_NPU_AVAILABLE:
+        print("✓ torch_npu module is installed")
+        if torch.npu.is_available():
+            npu_count = torch.npu.device_count()
+            print(f"✓ NPU is available: {npu_count} device(s) detected")
+            print(f"✓ NPU device names: {[torch.npu.get_device_name(i) for i in range(npu_count)]}")
+            print(f"🎯 Selected device: NPU")
+            print("="*70 + "\n")
+            logger.info(f"[Device] Using NPU with {npu_count} device(s)")
+            return 'npu', True
+        else:
+            print("✗ NPU is not available")
+    else:
+        print("✗ torch_npu module is not installed")
+
+    # Check for CUDA GPU
+    if torch.cuda.is_available():
+        gpu_count = torch.cuda.device_count()
+        print(f"✓ CUDA is available: {gpu_count} device(s) detected")
+        print(f"✓ GPU device names: {[torch.cuda.get_device_name(i) for i in range(gpu_count)]}")
+        print(f"🎯 Selected device: CUDA GPU")
+        print("="*70 + "\n")
+        logger.info(f"[Device] Using CUDA GPU with {gpu_count} device(s)")
+        return 'cuda', True
+    else:
+        print("✗ CUDA is not available")
+
+    # Fallback to CPU
+    print("🎯 Selected device: CPU (no accelerator detected)")
+    print("="*70 + "\n")
+    logger.info("[Device] Using CPU (no accelerator available)")
+    return 'cpu', False
+
+
+def get_device_count(device_type: str) -> int:
+    """
+    Overview:
+        Get the number of available devices for the specified device type.
+    Arguments:
+        - device_type (:obj:`str`): Device type, one of 'npu', 'cuda', 'cpu'
+    Returns:
+        - count (:obj:`int`): Number of available devices
+    """
+    if device_type == 'npu' and TORCH_NPU_AVAILABLE:
+        return torch.npu.device_count()
+    elif device_type == 'cuda':
+        return torch.cuda.device_count()
+    else:
+        return 1  # CPU always has 1 "device"
+
+
+def move_to_device(model: torch.nn.Module, device_type: str, rank: int = 0) -> torch.nn.Module:
+    """
+    Overview:
+        Move a PyTorch model to the specified device.
+        Supports NPU, CUDA, and CPU devices.
+    Arguments:
+        - model (:obj:`torch.nn.Module`): The model to move
+        - device_type (:obj:`str`): Device type, one of 'npu', 'cuda', 'cpu'
+        - rank (:obj:`int`): Device rank for multi-device setups
+    Returns:
+        - model (:obj:`torch.nn.Module`): The model moved to the device (in-place operation)
+    """
+    if device_type == 'npu' and TORCH_NPU_AVAILABLE:
+        device_count = torch.npu.device_count()
+        device_id = rank % device_count if device_count > 0 else 0
+        print(f"📦 [DI-engine] Moving model to NPU device {device_id} (rank={rank})")
+        model.npu(device_id)
+        logger.info(f"[Device] Model moved to NPU device {device_id}")
+    elif device_type == 'cuda':
+        device_count = torch.cuda.device_count()
+        device_id = rank % device_count if device_count > 0 else 0
+        print(f"📦 [DI-engine] Moving model to CUDA device {device_id} (rank={rank})")
+        model.cuda(device_id)
+        logger.info(f"[Device] Model moved to CUDA device {device_id}")
+    else:
+        print(f"📦 [DI-engine] Model will stay on CPU")
+        logger.info("[Device] Model stays on CPU")
+    # CPU case: no need to move
+    return model
+
+
+def get_device_string(device_type: str, rank: int = 0) -> str:
+    """
+    Overview:
+        Get the device string for PyTorch tensor operations.
+    Arguments:
+        - device_type (:obj:`str`): Device type, one of 'npu', 'cuda', 'cpu'
+        - rank (:obj:`int`): Device rank for multi-device setups
+    Returns:
+        - device_str (:obj:`str`): Device string like 'npu:0', 'cuda:0', or 'cpu'
+    """
+    if device_type in ['npu', 'cuda']:
+        device_count = get_device_count(device_type)
+        device_id = rank % device_count if device_count > 0 else 0
+        return f'{device_type}:{device_id}'
+    else:
+        return 'cpu'
+
+
+def auto_device_init(cfg_device: Optional[str], rank: int = 0) -> Tuple[str, bool, str]:
+    """
+    Overview:
+        Initialize device settings based on config.
+        Supports automatic detection, explicit device type, or legacy 'cuda' boolean.
+    Arguments:
+        - cfg_device (:obj:`Optional[str]`): Device configuration from config.
+            Can be 'auto', 'npu', 'cuda', 'cpu', or None (defaults to 'auto')
+        - rank (:obj:`int`): Device rank for multi-device setups
+    Returns:
+        - device_type (:obj:`str`): Detected device type ('npu', 'cuda', or 'cpu')
+        - use_accelerator (:obj:`bool`): Whether an accelerator is being used
+        - device_str (:obj:`str`): Full device string for PyTorch operations
+    Examples:
+        >>> device_type, use_accelerator, device_str = auto_device_init('auto')
+        >>> # Returns ('npu', True, 'npu:0') if NPU available
+        >>> # Returns ('cuda', True, 'cuda:0') if GPU available
+        >>> # Returns ('cpu', False, 'cpu') otherwise
+    """
+    print(f"\n⚙️  [DI-engine] Device Configuration: cfg_device='{cfg_device}', rank={rank}")
+
+    # Default to auto detection if not specified
+    if cfg_device is None or cfg_device == 'auto':
+        print(f"🔧 [DI-engine] Using auto-detection mode")
+        device_type, use_accelerator = get_available_device()
+    else:
+        # Explicit device type specified
+        device_type = cfg_device.lower()
+        print(f"🔧 [DI-engine] Explicit device type requested: '{device_type}'")
+
+        # Validate the device type is available
+        if device_type == 'npu':
+            if TORCH_NPU_AVAILABLE and torch.npu.is_available():
+                use_accelerator = True
+                npu_count = torch.npu.device_count()
+                print(f"✓ NPU requested and available: {npu_count} device(s)")
+                logger.info(f"[Device] Using NPU as explicitly configured ({npu_count} device(s))")
+            else:
+                print(f"⚠️  NPU requested but not available, falling back to CPU")
+                logger.warning("[Device] NPU requested but not available, falling back to CPU")
+                device_type = 'cpu'
+                use_accelerator = False
+        elif device_type == 'cuda':
+            if torch.cuda.is_available():
+                use_accelerator = True
+                gpu_count = torch.cuda.device_count()
+                print(f"✓ CUDA requested and available: {gpu_count} device(s)")
+                logger.info(f"[Device] Using CUDA GPU as explicitly configured ({gpu_count} device(s))")
+            else:
+                print(f"⚠️  CUDA requested but not available, falling back to CPU")
+                logger.warning("[Device] CUDA requested but not available, falling back to CPU")
+                device_type = 'cpu'
+                use_accelerator = False
+        else:
+            # CPU or any other value
+            device_type = 'cpu'
+            use_accelerator = False
+            print(f"✓ Using CPU as configured")
+            logger.info("[Device] Using CPU as configured")
+
+    device_str = get_device_string(device_type, rank)
+
+    print(f"✅ [DI-engine] Device initialized: type={device_type}, accelerator={use_accelerator}, device_string='{device_str}'")
+    print("="*70 + "\n")
+
+    return device_type, use_accelerator, device_str
+
+
+def is_npu_available() -> bool:
+    """
+    Overview:
+        Check if Huawei NPU is available.
+    Returns:
+        - available (:obj:`bool`): True if NPU is available
+    """
+    return TORCH_NPU_AVAILABLE and torch.npu.is_available()
+
+
+def is_cuda_available() -> bool:
+    """
+    Overview:
+        Check if NVIDIA CUDA GPU is available.
+    Returns:
+        - available (:obj:`bool`): True if CUDA is available
+    """
+    return torch.cuda.is_available()
diff --git a/ding/utils/default_helper.py b/ding/utils/default_helper.py
@@ -7,6 +7,13 @@
 import torch
 import treetensor.torch as ttorch
 
+# Try to import torch_npu for Huawei NPU support
+try:
+    import torch_npu
+    TORCH_NPU_AVAILABLE = True
+except ImportError:
+    TORCH_NPU_AVAILABLE = False
+
 
 def get_shape0(data: Union[List, Dict, torch.Tensor, ttorch.Tensor]) -> int:
     """
@@ -418,7 +425,7 @@ def set_pkg_seed(seed: int, use_cuda: bool = True) -> None:
         This is usaually used in entry scipt in the section of setting random seed for all package and instance
     Argument:
         - seed(:obj:`int`): Set seed
-        - use_cuda(:obj:`bool`) Whether use cude
+        - use_cuda(:obj:`bool`) Whether use cuda or other accelerators (NPU/GPU)
     Examples:
         >>> # ../entry/xxxenv_xxxpolicy_main.py
         >>> ...
@@ -431,11 +438,23 @@ def set_pkg_seed(seed: int, use_cuda: bool = True) -> None:
         >>> ...
 
     """
+    print(f"\n🌱 [DI-engine] Setting random seed: {seed}")
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
-    if use_cuda and torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
+    print(f"  ✓ Set seed for: random, numpy, torch")
+
+    # Set seed for accelerators (GPU or NPU)
+    if use_cuda:
+        # Set CUDA seed if available
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(seed)
+            print(f"  ✓ Set CUDA seed: {seed}")
+        # Set NPU seed if available
+        if TORCH_NPU_AVAILABLE and torch.npu.is_available():
+            torch.npu.manual_seed(seed)
+            print(f"  ✓ Set NPU seed: {seed}")
+    print()
 
 
 @lru_cache()

diff --git a/dizoo/classic_control/cartpole/config/cartpole_ppo_config.py b/dizoo/classic_control/cartpole/config/cartpole_ppo_config.py
@@ -9,7 +9,7 @@
         stop_value=195,
     ),
     policy=dict(
-        cuda=False,
+        device='auto',  # Auto-detect NPU > GPU > CPU
         action_space='discrete',
         model=dict(
             obs_shape=4,