Hi! Hope you guys are doing well.
I am trying to speed up the learning process of my robot in custom env, and want to use multiprocessing to enhance the training speed. As far as know, since openai_ros fetch the sensor datas by subscribing it we might need to launch multiple simulations environment to multiprocess. Since I don’t know how to do implement this, I’m just training my robot with nenv = 1. Is there a way to enhance the learning process by launching multiple sim env at the same time and train in parallel?
Any kind of answer would be highly appreciated thanks.
Taehyoung
#!/usr/bin/env python
# ROS packages required
import rospy
import rospkg
# Dependencies required
import gym
import os
import numpy as np
import pandas as pd
import time
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import DDPG, PPO, A2C, TD3, SAC
from stable_baselines3.common.monitor import Monitor
# import our task environment
import hummingbird_hover_task_env_a2c
# import hummingbird_hover_task_env_ppo
# from openai_ros.task_envs.cartpole_stay_up import stay_up
# ROS ENV gets started automatically before the training
# from openai_ros.openai_ros_common import StartOpenAI_ROS_Environment -- This has to be solved at the end
# change the directory
os.chdir('/home/ubuntu/catkin_ws/src/hummingbird_pkg/')
rospy.init_node('hummingbird_training_test', anonymous=True, log_level=rospy.FATAL)
# Create the Gym environment
environment_name = rospy.get_param('/hummingbird/task_and_robot_environment_name')
env = gym.make(environment_name)
env = DummyVecEnv([lambda: Monitor(env)])
# rospy.loginfo("Gym environment done")
# Set the logging system
rospack = rospkg.RosPack()
pkg_path = rospack.get_path('hummingbird_pkg')
outdir = pkg_path + '/training_results'
# # env = wrappers.Monitor(env, outdir, force=True)
# # rospy.loginfo("Monitor Wrapper started")
model_list = [
# A2C(MlpPolicy, env, verbose=1, tensorboard_log="/home/ubuntu/catkin_ws/src/hummingbird_pkg/results/tensorboard_logs/A2C/"),
PPO("MlpPolicy", env, verbose=1, ent_coef=0.001, tensorboard_log="/home/ubuntu/catkin_ws/src/hummingbird_pkg/results/tensorboard_logs/PPO/"),
# DDPG(MlpPolicy, env, verbose=1, ent_coef=0.001, learning_rate=0.0005, tensorboard_log="/home/ubuntu/catkin_ws/src/hummingbird_pkg/results/tensorboard_logs/DDPG/"),
# TD3(MlpPolicy, env, verbose=1, tensorboard_log="/home/ubuntu/catkin_ws/src/hummingbird_pkg/results/tensorboard_logs/TD3/"),
# SAC(MlpPolicy, env, verbose=1, tensorboard_log="/home/ubuntu/catkin_ws/src/hummingbird_pkg/results/tensorboard_logs/SAC/")
]
algo_list = [
# 'A2C',
'PPO',
# 'DDPG',
# 'TD3',
# 'SAC'
]
training_time_list = []
for model, algo in zip(model_list, algo_list):
print(model)
start = time.time()
model.learn(total_timesteps=300000)
end = time.time()
training_time_list.append((end-start)*1000)
model.save("/home/ubuntu/catkin_ws/src/hummingbird_pkg/results/trained_model"+"_"+algo)
df = pd.DataFrame(list(zip(algo_list, training_time_list)), columns=['algo', 'train_time (ms)'])
df.to_csv('/home/ubuntu/catkin_ws/src/hummingbird_pkg/results/train_time.csv', index=False)
env.close()
I have customized the task and robot environment as the lecture suggest, and so far the simulation runs fine but i want to speed up the training process by multiprocessing! Looking forward to your answer.