# compute_environment: LOCAL_MACHINE # debug: false # distributed_type: FSDP # downcast_bf16: 'no' # enable_cpu_affinity: false # fsdp_config: # fsdp_activation_checkpointing: true # Need fix from: https://github.com/huggingface/transformers/pull/36610 # fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP # fsdp_backward_prefetch: BACKWARD_PRE # fsdp_cpu_ram_efficient_loading: true # fsdp_forward_prefetch: true # fsdp_offload_params: false # fsdp_sharding_strategy: FULL_SHARD # fsdp_state_dict_type: FULL_STATE_DICT # fsdp_sync_module_states: true # fsdp_use_orig_params: true # machine_rank: 0 # main_training_function: main # mixed_precision: bf16 # num_machines: 1 # num_processes: 8 # rdzv_backend: static # same_network: true # tpu_env: [] # tpu_use_cluster: false # tpu_use_sudo: false # use_cpu: false compute_environment: LOCAL_MACHINE debug: false distributed_type: FSDP downcast_bf16: 'no' enable_cpu_affinity: false fsdp_config: fsdp_activation_checkpointing: false # Need fix from: https://github.com/huggingface/transformers/pull/36610 fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP fsdp_backward_prefetch: BACKWARD_POST fsdp_forward_prefetch: false fsdp_cpu_ram_efficient_loading: true fsdp_forward_prefetch: true fsdp_offload_params: false fsdp_sharding_strategy: FULL_SHARD fsdp_state_dict_type: FULL_STATE_DICT fsdp_sync_module_states: true fsdp_use_orig_params: true machine_rank: 0 main_training_function: main mixed_precision: bf16 num_machines: 1 num_processes: 8 rdzv_backend: static same_network: true tpu_env: [] tpu_use_cluster: false tpu_use_sudo: false use_cpu: false