unity ml代理python api的奇怪结果

env = UnityEnvironment() env.reset() behavior_names = env.behavior_specs for i in range(50): arr = [] behavior_names = env.behavior_specs for i in behavior_names: print(i) DecisionSteps = env.get_steps("3DBall?team=0") print(DecisionSteps[0].reward,len(DecisionSteps[0].reward)) print(DecisionSteps[0].action_mask) #for some reason it returns action mask as false when Decisionsteps[0].reward is empty and is None when not for i in range(len(DecisionSteps[0])): arr.append([]) for b in range(2): arr[-1].append(random.uniform(-10,10)) if(len(DecisionSteps[0])!= 0): env.set_actions("3DBall?team=0",numpy.array(arr)) env.step() else: env.step() env.close()

1条回答

网友

1楼 · 发布于 2024-09-30 08:20:17

我认为您的问题是，当模拟终止并需要重置时，代理不会返回decision_step，而是返回terminal_step。这是因为代理丢了球，在终端步骤中返回的奖励将为-1.0。我接受了你的代码并做了一些更改，现在它运行良好（除了你可能想要更改，这样你就不会每次一个代理掉球时都重置）

import numpy as np
import mlagents
from mlagents_envs.environment import UnityEnvironment

#         -
# This code is used to close an env that might not have been closed before
try:
    unity_env.close()
except:
    pass
#         -

env = UnityEnvironment(file_name = None)
env.reset()

for i in range(1000):
    arr = []
    behavior_names = env.behavior_specs

    # Go through all existing behaviors
    for behavior_name in behavior_names:
        decision_steps, terminal_steps = env.get_steps(behavior_name)

        for agent_id_terminated in terminal_steps:
            print("Agent " + behavior_name + " has terminated, resetting environment.")
            # This is probably not the desired behaviour, as the other agents are still active. 
            env.reset()

        actions = []
        for agent_id_decisions in decision_steps:
            actions.append(np.random.uniform(-1,1,2))

        # print(decision_steps[0].reward)
        # print(decision_steps[0].action_mask)

        if len(actions) > 0:
            env.set_actions(behavior_name, np.array(actions))
    try:
        env.step()
    except:
        print("Something happend when taking a step in the environment.")
        print("The communicatior has probably terminated, stopping simulation early.")
        break
env.close()

相关问题更多 >

编程相关推荐

热门问题

热门文章