compute_environment: LOCAL_MACHINE distributed_type: MULTI_GPU downcast_bf16: 'no' machine_rank: 0 num_machines: 1 num_processes: 8